run.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import logging
  2. import json
  3. import os
  4. import subprocess
  5. from pathlib import Path
  6. from time import sleep
  7. from threading import Thread
  8. def run_kinit(
  9. kinit_principal: str,
  10. kinit_interval: int,
  11. keytab_path: Path,
  12. krb_cache: Path,
  13. ):
  14. """使用 kinit 保持 Kerberos 认证状态. 在 daemon 线程中调用.
  15. 参数:
  16. kinit_principal: kerberos principal 名称
  17. kinit_interval: 执行 kinit 的时间间隔 (秒)
  18. keytab_path: keytab 存储路径
  19. krb_cache: kerberos ticket 缓存路径
  20. """
  21. while True:
  22. proc = subprocess.run(
  23. f"kinit -kt '{keytab_path}' '{kinit_principal}' && chmod go+r '{krb_cache}'",
  24. shell=True,
  25. text=True,
  26. capture_output=True,
  27. check=False,
  28. )
  29. try:
  30. proc.check_returncode()
  31. except subprocess.CalledProcessError:
  32. logging.warning(proc.stderr)
  33. sleep(kinit_interval)
  34. def main():
  35. backend_base_url = os.environ.get("BACKEND_BASE_URL")
  36. sparkmagic_conf_dir = os.environ.get("SPARKMAGIC_CONF_DIR")
  37. livy_server_url = os.environ.get("LIVY_SERVER_URL")
  38. classpath = os.environ.get("CLASSPATH")
  39. hadoop_home = os.environ["HADOOP_HOME"]
  40. hadoop_conf_dir = os.environ["HADOOP_CONF_DIR"]
  41. kinit_principal = os.environ.get("KINIT_PRINCIPAL", "aidevuser")
  42. kinit_interval = int(os.environ.get("KINIT_INTERVAL", 6 * 60 * 60))
  43. keytab_path = Path(os.environ.get("KEYTAB_PATH", f"{hadoop_conf_dir}/user.keytab"))
  44. use_krb = keytab_path.exists()
  45. krb_cache = "/tmp/krb5cc"
  46. os.environ["KRB5CCNAME"] = f"FILE:{krb_cache}"
  47. if not use_krb:
  48. logging.info("Kerberos keytab not found, using local root")
  49. kinit_principal = "root"
  50. # 配置 backend
  51. if backend_base_url is not None:
  52. setting_dir = Path("/usr/local/share/jupyter/lab/settings/")
  53. if not setting_dir.exists():
  54. setting_dir.mkdir(exist_ok=True, parents=True)
  55. overrides = setting_dir / "overrides.json"
  56. if overrides.exists():
  57. overrides_conf = json.loads(overrides.read_text())
  58. else:
  59. overrides_conf = {}
  60. overrides_conf["@jupyterlab/jldbq-extension:plugin"] = {
  61. "flaskBackend": backend_base_url
  62. }
  63. overrides.write_text(json.dumps(overrides_conf, indent=2))
  64. # 配置 sparkmagic
  65. if sparkmagic_conf_dir is None:
  66. sparkmagic_conf_dir = "/etc/sparkmagic"
  67. os.environ["SPARKMAGIC_CONF_DIR"] = sparkmagic_conf_dir
  68. conf_file = Path(sparkmagic_conf_dir) / "config.json"
  69. if not conf_file.exists():
  70. raise FileNotFoundError(
  71. f"cannot find sparkmagic config file at {conf_file.resolve()}"
  72. )
  73. if livy_server_url is not None:
  74. conf = json.loads(conf_file.read_text())
  75. conf["kernel_python_credentials"]["url"] = livy_server_url
  76. conf_file.write_text(json.dumps(conf, indent=2))
  77. # 设置 hadoop classpath (for jupyter-hdfscm)
  78. cp_proc = subprocess.run(
  79. f"{hadoop_home}/bin/hadoop classpath --glob",
  80. shell=True,
  81. capture_output=True,
  82. check=True,
  83. text=True,
  84. )
  85. if classpath is None:
  86. os.environ["CLASSPATH"] = cp_proc.stdout.strip()
  87. else:
  88. os.environ["CLASSPATH"] = cp_proc.stdout.strip() + ":" + classpath
  89. # jupyter-hdfscm
  90. with open("/etc/jupyter/jupyter_lab_config.py", "a") as fp:
  91. fp.write(
  92. "\nc.HDFSContentsManager.root_dir_template = '/user/"
  93. + kinit_principal
  94. + "/jupyter/{username}/'\n"
  95. )
  96. fp.write("c.ServerApp.contents_manager_class = 'hdfscm.HDFSContentsManager'\n")
  97. # 设置 kerberos 认证
  98. if use_krb:
  99. Thread(
  100. daemon=True,
  101. target=run_kinit,
  102. name="run_kinit",
  103. args=(kinit_principal, kinit_interval, keytab_path, krb_cache),
  104. ).start()
  105. subprocess.run("jupyterhub -f /etc/jupyterhub/config.py", shell=True)
  106. if __name__ == "__main__":
  107. main()