1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- import json
- import os
- import subprocess
- from pathlib import Path
- from time import sleep
- from threading import Thread
- hadoop_conf_dir = os.environ["HADOOP_CONF_DIR"]
- keytab_path = Path(os.environ.get("KEYTAB_PATH", f"{hadoop_conf_dir}/user.keytab"))
- use_krb = keytab_path.exists()
- if use_krb:
- kinit_principal = os.environ.get("KINIT_PINCIPAL", "aidevuser")
- kinit_interval = int(os.environ.get("KINIT_INTERVAL", 6 * 3600))
- krb_cache = "/tmp/krb5cc"
- os.environ["KRB5CCNAME"] = f"FILE:{krb_cache}"
- else:
- print("!! KEYTAB NOT FOUND !!")
- kinit_principal = 'root'
- with open("/etc/jupyter/jupyter_lab_config.py", "a") as fp:
- fp.write("\nc.HDFSContentsManager.root_dir_template = '/user/" + kinit_principal + "/jupyter/{username}/'\n")
- if (conf_dir := os.environ.get("SPARKMAGIC_CONF_DIR")) is None:
- conf_dir = "/etc/sparkmagic"
- os.environ["SPARKMAGIC_CONF_DIR"] = conf_dir
- conf_file = Path(conf_dir) / "config.json"
- if not conf_file.exists():
- raise FileNotFoundError(f"cannot find sparkmagic config file at {conf_file.resolve()}")
- proc = subprocess.run("$HADOOP_HOME/bin/hadoop classpath --glob", shell=True, capture_output=True, check=True, text=True)
- os.environ["CLASSPATH"] = proc.stdout.strip()
- if (url := os.environ.get("LIVY_SERVER_URL")) is not None:
- conf = json.loads(conf_file.read_text())
- conf["kernel_python_credentials"]["url"] = url
- conf_file.write_text(json.dumps(conf, indent=2))
- if use_krb:
- def run_kinit():
- while True:
- subprocess.run(f"kinit -kt '{keytab_path}' '{kinit_principal}' && chmod go+r '{krb_cache}'", shell=True, capture_output=True)
- sleep(kinit_interval)
- Thread(daemon=True, target=run_kinit, name="run_kinit").start()
- subprocess.run(f"jupyterhub -f /etc/jupyterhub/config.py", shell=True)
|