run.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import json
  2. import os
  3. import subprocess
  4. from pathlib import Path
  5. from time import sleep
  6. from threading import Thread
  7. hadoop_conf_dir = os.environ["HADOOP_CONF_DIR"]
  8. keytab_path = Path(os.environ.get("KEYTAB_PATH", f"{hadoop_conf_dir}/user.keytab"))
  9. use_krb = keytab_path.exists()
  10. if use_krb:
  11. kinit_principal = os.environ.get("KINIT_PINCIPAL", "aidevuser")
  12. kinit_interval = int(os.environ.get("KINIT_INTERVAL", 6 * 3600))
  13. krb_cache = "/tmp/krb5cc"
  14. os.environ["KRB5CCNAME"] = f"FILE:{krb_cache}"
  15. else:
  16. print("!! KEYTAB NOT FOUND !!")
  17. kinit_principal = 'root'
  18. with open("/etc/jupyter/jupyter_lab_config.py", "a") as fp:
  19. fp.write("\nc.HDFSContentsManager.root_dir_template = '/user/" + kinit_principal + "/jupyter/{username}/'\n")
  20. if (conf_dir := os.environ.get("SPARKMAGIC_CONF_DIR")) is None:
  21. conf_dir = "/etc/sparkmagic"
  22. os.environ["SPARKMAGIC_CONF_DIR"] = conf_dir
  23. conf_file = Path(conf_dir) / "config.json"
  24. if not conf_file.exists():
  25. raise FileNotFoundError(f"cannot find sparkmagic config file at {conf_file.resolve()}")
  26. proc = subprocess.run("$HADOOP_HOME/bin/hadoop classpath --glob", shell=True, capture_output=True, check=True, text=True)
  27. os.environ["CLASSPATH"] = proc.stdout.strip()
  28. if (url := os.environ.get("LIVY_SERVER_URL")) is not None:
  29. conf = json.loads(conf_file.read_text())
  30. conf["kernel_python_credentials"]["url"] = url
  31. conf_file.write_text(json.dumps(conf, indent=2))
  32. if use_krb:
  33. def run_kinit():
  34. while True:
  35. subprocess.run(f"kinit -kt '{keytab_path}' '{kinit_principal}' && chmod go+r '{krb_cache}'", shell=True, capture_output=True)
  36. sleep(kinit_interval)
  37. Thread(daemon=True, target=run_kinit, name="run_kinit").start()
  38. subprocess.run(f"jupyterhub -f /etc/jupyterhub/config.py", shell=True)