123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- import logging
- import json
- import os
- import subprocess
- from pathlib import Path
- from time import sleep
- from threading import Thread
- def run_kinit(
- kinit_principal: str,
- kinit_interval: int,
- keytab_path: Path,
- krb_cache: Path,
- ):
- """使用 kinit 保持 Kerberos 认证状态. 在 daemon 线程中调用.
- 参数:
- kinit_principal: kerberos principal 名称
- kinit_interval: 执行 kinit 的时间间隔 (秒)
- keytab_path: keytab 存储路径
- krb_cache: kerberos ticket 缓存路径
- """
- while True:
- proc = subprocess.run(
- f"kinit -kt '{keytab_path}' '{kinit_principal}' && chmod go+r '{krb_cache}'",
- shell=True,
- text=True,
- capture_output=True,
- check=False,
- )
- try:
- proc.check_returncode()
- except subprocess.CalledProcessError:
- logging.warning(proc.stderr)
- sleep(kinit_interval)
- def main():
- backend_base_url = os.environ.get("BACKEND_BASE_URL")
- sparkmagic_conf_dir = os.environ.get("SPARKMAGIC_CONF_DIR")
- livy_server_url = os.environ.get("LIVY_SERVER_URL")
- classpath = os.environ.get("CLASSPATH")
- hadoop_home = os.environ["HADOOP_HOME"]
- hadoop_conf_dir = os.environ["HADOOP_CONF_DIR"]
- kinit_principal = os.environ.get("KINIT_PRINCIPAL", "aidevuser")
- kinit_interval = int(os.environ.get("KINIT_INTERVAL", 6 * 60 * 60))
- keytab_path = Path(os.environ.get("KEYTAB_PATH", f"{hadoop_conf_dir}/user.keytab"))
- use_krb = keytab_path.exists()
- krb_cache = "/tmp/krb5cc"
- os.environ["KRB5CCNAME"] = f"FILE:{krb_cache}"
- if not use_krb:
- logging.info("Kerberos keytab not found, using local root")
- kinit_principal = "root"
- # 配置 backend
- if backend_base_url is not None:
- setting_dir = Path("/usr/local/share/jupyter/lab/settings/")
- if not setting_dir.exists():
- setting_dir.mkdir(exist_ok=True, parents=True)
- overrides = setting_dir / "overrides.json"
- if overrides.exists():
- overrides_conf = json.loads(overrides.read_text())
- else:
- overrides_conf = {}
- overrides_conf["@jupyterlab/jldbq-extension:plugin"] = {
- "flaskBackend": backend_base_url
- }
- overrides.write_text(json.dumps(overrides_conf, indent=2))
- # 配置 sparkmagic
- if sparkmagic_conf_dir is None:
- sparkmagic_conf_dir = "/etc/sparkmagic"
- os.environ["SPARKMAGIC_CONF_DIR"] = sparkmagic_conf_dir
- conf_file = Path(sparkmagic_conf_dir) / "config.json"
- if not conf_file.exists():
- raise FileNotFoundError(
- f"cannot find sparkmagic config file at {conf_file.resolve()}"
- )
- if livy_server_url is not None:
- conf = json.loads(conf_file.read_text())
- conf["kernel_python_credentials"]["url"] = livy_server_url
- conf_file.write_text(json.dumps(conf, indent=2))
- # 设置 hadoop classpath (for jupyter-hdfscm)
- cp_proc = subprocess.run(
- f"{hadoop_home}/bin/hadoop classpath --glob",
- shell=True,
- capture_output=True,
- check=True,
- text=True,
- )
- if classpath is None:
- os.environ["CLASSPATH"] = cp_proc.stdout.strip()
- else:
- os.environ["CLASSPATH"] = cp_proc.stdout.strip() + ":" + classpath
- # jupyter-hdfscm
- with open("/etc/jupyter/jupyter_lab_config.py", "a") as fp:
- fp.write(
- "\nc.HDFSContentsManager.root_dir_template = '/user/"
- + kinit_principal
- + "/jupyter/{username}/'\n"
- )
- fp.write("c.ServerApp.contents_manager_class = 'hdfscm.HDFSContentsManager'\n")
- # 设置 kerberos 认证
- if use_krb:
- Thread(
- daemon=True,
- target=run_kinit,
- name="run_kinit",
- args=(kinit_principal, kinit_interval, keytab_path, krb_cache),
- ).start()
- subprocess.run("jupyterhub -f /etc/jupyterhub/config.py", shell=True)
- if __name__ == "__main__":
- main()
|