run.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. import logging
  2. import json
  3. import os
  4. import subprocess
  5. from pathlib import Path
  6. from time import sleep
  7. from threading import Thread
  8. def run_kinit(
  9. kinit_principal: str,
  10. kinit_interval: int,
  11. keytab_path: Path,
  12. krb_cache: Path,
  13. ):
  14. """使用 kinit 保持 Kerberos 认证状态. 在 daemon 线程中调用.
  15. 参数:
  16. kinit_principal: kerberos principal 名称
  17. kinit_interval: 执行 kinit 的时间间隔 (秒)
  18. keytab_path: keytab 存储路径
  19. krb_cache: kerberos ticket 缓存路径
  20. """
  21. while True:
  22. proc = subprocess.run(
  23. f"kinit -kt '{keytab_path}' '{kinit_principal}' && chmod go+r '{krb_cache}'",
  24. shell=True,
  25. text=True,
  26. capture_output=True,
  27. check=False,
  28. )
  29. try:
  30. proc.check_returncode()
  31. except subprocess.CalledProcessError:
  32. logging.warning(proc.stderr)
  33. sleep(kinit_interval)
  34. def main():
  35. sparkmagic_conf_dir = os.environ.get("SPARKMAGIC_CONF_DIR")
  36. livy_server_url = os.environ.get("LIVY_SERVER_URL")
  37. classpath = os.environ.get("CLASSPATH")
  38. hadoop_home = os.environ["HADOOP_HOME"]
  39. hadoop_conf_dir = os.environ["HADOOP_CONF_DIR"]
  40. kinit_principal = os.environ.get("KINIT_PRINCIPAL", "aidevuser")
  41. kinit_interval = int(os.environ.get("KINIT_INTERVAL", 6 * 60 * 60))
  42. keytab_path = Path(os.environ.get("KEYTAB_PATH", f"{hadoop_conf_dir}/user.keytab"))
  43. use_krb = keytab_path.exists()
  44. krb_cache = "/tmp/krb5cc"
  45. os.environ["KRB5CCNAME"] = f"FILE:{krb_cache}"
  46. if not use_krb:
  47. logging.info("Kerberos keytab not found, using local root")
  48. kinit_principal = "root"
  49. # 配置 sparkmagic
  50. if sparkmagic_conf_dir is None:
  51. sparkmagic_conf_dir = "/etc/sparkmagic"
  52. os.environ["SPARKMAGIC_CONF_DIR"] = sparkmagic_conf_dir
  53. conf_file = Path(sparkmagic_conf_dir) / "config.json"
  54. if not conf_file.exists():
  55. raise FileNotFoundError(
  56. f"cannot find sparkmagic config file at {conf_file.resolve()}"
  57. )
  58. if livy_server_url is not None:
  59. conf = json.loads(conf_file.read_text())
  60. conf["kernel_python_credentials"]["url"] = livy_server_url
  61. conf_file.write_text(json.dumps(conf, indent=2))
  62. # 设置 hadoop classpath (for jupyter-hdfscm)
  63. cp_proc = subprocess.run(
  64. f"{hadoop_home}/bin/hadoop classpath --glob",
  65. shell=True,
  66. capture_output=True,
  67. check=True,
  68. text=True,
  69. )
  70. if classpath is None:
  71. os.environ["CLASSPATH"] = cp_proc.stdout.strip()
  72. else:
  73. os.environ["CLASSPATH"] = cp_proc.stdout.strip() + ":" + classpath
  74. # jupyter-hdfscm
  75. with open("/etc/jupyter/jupyter_lab_config.py", "a") as fp:
  76. fp.write(
  77. "\nc.HDFSContentsManager.root_dir_template = '/user/"
  78. + kinit_principal
  79. + "/jupyter/{username}/'\n"
  80. )
  81. fp.write("c.ServerApp.contents_manager_class = 'hdfscm.HDFSContentsManager'\n")
  82. # 设置 kerberos 认证
  83. if use_krb:
  84. Thread(
  85. daemon=True,
  86. target=run_kinit,
  87. name="run_kinit",
  88. args=(kinit_principal, kinit_interval, keytab_path, krb_cache),
  89. ).start()
  90. subprocess.run("jupyterhub -f /etc/jupyterhub/config.py", shell=True)
  91. if __name__ == "__main__":
  92. main()