import logging import json import os import subprocess from pathlib import Path from time import sleep from threading import Thread def run_kinit( kinit_principal: str, kinit_interval: int, keytab_path: Path, krb_cache: Path, ): """使用 kinit 保持 Kerberos 认证状态. 在 daemon 线程中调用. 参数: kinit_principal: kerberos principal 名称 kinit_interval: 执行 kinit 的时间间隔 (秒) keytab_path: keytab 存储路径 krb_cache: kerberos ticket 缓存路径 """ while True: proc = subprocess.run( f"kinit -kt '{keytab_path}' '{kinit_principal}' && chmod go+r '{krb_cache}'", shell=True, text=True, capture_output=True, check=False, ) try: proc.check_returncode() except subprocess.CalledProcessError: logging.warning(proc.stderr) sleep(kinit_interval) def main(): backend_base_url = os.environ.get("BACKEND_BASE_URL") sparkmagic_conf_dir = os.environ.get("SPARKMAGIC_CONF_DIR") livy_server_url = os.environ.get("LIVY_SERVER_URL") classpath = os.environ.get("CLASSPATH") hadoop_home = os.environ["HADOOP_HOME"] hadoop_conf_dir = os.environ["HADOOP_CONF_DIR"] kinit_principal = os.environ.get("KINIT_PRINCIPAL", "aidevuser") kinit_interval = int(os.environ.get("KINIT_INTERVAL", 6 * 60 * 60)) keytab_path = Path(os.environ.get("KEYTAB_PATH", f"{hadoop_conf_dir}/user.keytab")) use_krb = keytab_path.exists() krb_cache = "/tmp/krb5cc" os.environ["KRB5CCNAME"] = f"FILE:{krb_cache}" if not use_krb: logging.info("Kerberos keytab not found, using local root") kinit_principal = "root" # 配置 backend if backend_base_url is not None: setting_dir = Path("/usr/local/share/jupyter/lab/settings/") if not setting_dir.exists(): setting_dir.mkdir(exist_ok=True, parents=True) overrides = setting_dir / "overrides.json" if overrides.exists(): overrides_conf = json.loads(overrides.read_text()) else: overrides_conf = {} overrides_conf["@jupyterlab/jldbq-extension:plugin"] = { "flaskBackend": backend_base_url } overrides.write_text(json.dumps(overrides_conf, indent=2)) # 配置 sparkmagic if sparkmagic_conf_dir is None: sparkmagic_conf_dir = "/etc/sparkmagic" os.environ["SPARKMAGIC_CONF_DIR"] = sparkmagic_conf_dir conf_file = Path(sparkmagic_conf_dir) / "config.json" if not conf_file.exists(): raise FileNotFoundError( f"cannot find sparkmagic config file at {conf_file.resolve()}" ) if livy_server_url is not None: conf = json.loads(conf_file.read_text()) conf["kernel_python_credentials"]["url"] = livy_server_url conf_file.write_text(json.dumps(conf, indent=2)) # 设置 hadoop classpath (for jupyter-hdfscm) cp_proc = subprocess.run( f"{hadoop_home}/bin/hadoop classpath --glob", shell=True, capture_output=True, check=True, text=True, ) if classpath is None: os.environ["CLASSPATH"] = cp_proc.stdout.strip() else: os.environ["CLASSPATH"] = cp_proc.stdout.strip() + ":" + classpath # jupyter-hdfscm with open("/etc/jupyter/jupyter_lab_config.py", "a") as fp: fp.write( "\nc.HDFSContentsManager.root_dir_template = '/user/" + kinit_principal + "/jupyter/{username}/'\n" ) fp.write("c.ServerApp.contents_manager_class = 'hdfscm.HDFSContentsManager'\n") # 设置 kerberos 认证 if use_krb: Thread( daemon=True, target=run_kinit, name="run_kinit", args=(kinit_principal, kinit_interval, keytab_path, krb_cache), ).start() subprocess.run("jupyterhub -f /etc/jupyterhub/config.py", shell=True) if __name__ == "__main__": main()