import json import os import subprocess from pathlib import Path from time import sleep from threading import Thread hadoop_conf_dir = os.environ["HADOOP_CONF_DIR"] keytab_path = Path(os.environ.get("KEYTAB_PATH", f"{hadoop_conf_dir}/user.keytab")) use_krb = keytab_path.exists() if use_krb: kinit_principal = os.environ.get("KINIT_PINCIPAL", "aidevuser") kinit_interval = int(os.environ.get("KINIT_INTERVAL", 6 * 3600)) krb_cache = "/tmp/krb5cc" os.environ["KRB5CCNAME"] = f"FILE:{krb_cache}" else: print("!! KEYTAB NOT FOUND !!") kinit_principal = 'root' with open("/etc/jupyter/jupyter_lab_config.py", "a") as fp: fp.write("\nc.HDFSContentsManager.root_dir_template = '/user/" + kinit_principal + "/jupyter/{username}/'\n") if (conf_dir := os.environ.get("SPARKMAGIC_CONF_DIR")) is None: conf_dir = "/etc/sparkmagic" os.environ["SPARKMAGIC_CONF_DIR"] = conf_dir conf_file = Path(conf_dir) / "config.json" if not conf_file.exists(): raise FileNotFoundError(f"cannot find sparkmagic config file at {conf_file.resolve()}") proc = subprocess.run("$HADOOP_HOME/bin/hadoop classpath --glob", shell=True, capture_output=True, check=True, text=True) os.environ["CLASSPATH"] = proc.stdout.strip() if (url := os.environ.get("LIVY_SERVER_URL")) is not None: conf = json.loads(conf_file.read_text()) conf["kernel_python_credentials"]["url"] = url conf_file.write_text(json.dumps(conf, indent=2)) if use_krb: def run_kinit(): while True: subprocess.run(f"kinit -kt '{keytab_path}' '{kinit_principal}' && chmod go+r '{krb_cache}'", shell=True, capture_output=True) sleep(kinit_interval) Thread(daemon=True, target=run_kinit, name="run_kinit").start() subprocess.run(f"jupyterhub -f /etc/jupyterhub/config.py", shell=True)