operator.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. #
  2. # Copyright 2018-2022 Elyra Authors
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import os
  17. from typing import List
  18. from typing import Optional
  19. from elyra._version import __version__
  20. """
  21. This helper builder constructs the bootstrapping arguments to be used as the driver for elyra's generic components
  22. in Apache Airflow
  23. """
  24. # Inputs and Outputs separator character. If updated,
  25. # same-named variable in bootstrapper.py must be updated!
  26. INOUT_SEPARATOR = ";"
  27. ELYRA_GITHUB_ORG = os.getenv("ELYRA_GITHUB_ORG", "elyra-ai")
  28. ELYRA_GITHUB_BRANCH = os.getenv("ELYRA_GITHUB_BRANCH", "main" if "dev" in __version__ else "v" + __version__)
  29. ELYRA_BOOTSCRIPT_URL = os.getenv(
  30. "ELYRA_BOOTSTRAP_SCRIPT_URL",
  31. f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/elyra/{ELYRA_GITHUB_BRANCH}/elyra/airflow/bootstrapper.py",
  32. )
  33. ELYRA_REQUIREMENTS_URL = os.getenv(
  34. "ELYRA_REQUIREMENTS_URL",
  35. f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/"
  36. f"elyra/{ELYRA_GITHUB_BRANCH}/etc/generic/requirements-elyra.txt",
  37. )
  38. ELYRA_REQUIREMENTS_URL_PY37 = os.getenv(
  39. "ELYRA_REQUIREMENTS_URL_PY37",
  40. f"https://raw.githubusercontent.com/{ELYRA_GITHUB_ORG}/"
  41. f"elyra/{ELYRA_GITHUB_BRANCH}/etc/generic/requirements-elyra-py37.txt",
  42. )
  43. class BootscriptBuilder(object):
  44. def __init__(
  45. self,
  46. filename: str,
  47. pipeline_name: str,
  48. cos_endpoint: str,
  49. cos_bucket: str,
  50. cos_directory: str,
  51. cos_dependencies_archive: str,
  52. inputs: Optional[List[str]] = None,
  53. outputs: Optional[List[str]] = None,
  54. ):
  55. """
  56. This helper builder constructs the bootstrapping arguments to be used as the driver for
  57. elyra's generic components in Apache Airflow
  58. :param filename: name of the file to execute
  59. :param pipeline_name: name of the pipeline
  60. :param :cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442
  61. :param :cos_bucket: bucket to retrieve archive from
  62. :param :cos_directory: name of the directory in the object storage bucket to pull
  63. :param :cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz
  64. :param inputs: comma delimited list of files to be consumed/are required by the filename
  65. :param outputs: comma delimited list of files produced by the filename
  66. """
  67. self.arguments = []
  68. self.cos_endpoint = cos_endpoint
  69. self.cos_bucket = cos_bucket
  70. self.cos_directory = cos_directory
  71. self.cos_dependencies_archive = cos_dependencies_archive
  72. self.filename = filename
  73. self.pipeline_name = pipeline_name
  74. self.outputs = outputs
  75. self.inputs = inputs
  76. self.container_work_dir_root_path = "./"
  77. self.container_work_dir_name = "jupyter-work-dir/"
  78. self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
  79. if not filename:
  80. raise ValueError("You need to provide a filename for the operation.")
  81. @property
  82. def container_cmd(self):
  83. common_curl_options = "--fail -H 'Cache-Control: no-cache'"
  84. self.arguments = [
  85. f"mkdir -p {self.container_work_dir} && cd {self.container_work_dir} && "
  86. f"echo 'Downloading {ELYRA_BOOTSCRIPT_URL}' && "
  87. f"curl {common_curl_options} -L {ELYRA_BOOTSCRIPT_URL} --output bootstrapper.py && "
  88. f"echo 'Downloading {ELYRA_REQUIREMENTS_URL}' && "
  89. f"echo 'Downloading {ELYRA_REQUIREMENTS_URL_PY37}' && "
  90. f"curl {common_curl_options} -L {ELYRA_REQUIREMENTS_URL_PY37} --output requirements-elyra-py37.txt && "
  91. f"curl {common_curl_options} -L {ELYRA_REQUIREMENTS_URL} "
  92. f"--output requirements-elyra.txt && "
  93. "python3 -m pip install packaging && "
  94. "python3 -m pip freeze > requirements-current.txt && "
  95. "python3 bootstrapper.py "
  96. f"--pipeline-name '{self.pipeline_name}' "
  97. f"--cos-endpoint {self.cos_endpoint} "
  98. f"--cos-bucket {self.cos_bucket} "
  99. f"--cos-directory '{self.cos_directory}' "
  100. f"--cos-dependencies-archive '{self.cos_dependencies_archive}' "
  101. f"--file '{self.filename}' "
  102. ]
  103. if self.inputs:
  104. inputs_str = self._artifact_list_to_str(self.inputs)
  105. self.arguments.append(f"--inputs '{inputs_str}' ")
  106. if self.outputs:
  107. outputs_str = self._artifact_list_to_str(self.outputs)
  108. self.arguments.append(f"--outputs '{outputs_str}' ")
  109. argument_string = "".join(self.arguments)
  110. return argument_string
  111. def _artifact_list_to_str(self, pipeline_array):
  112. trimmed_artifact_list = []
  113. for artifact_name in pipeline_array:
  114. if INOUT_SEPARATOR in artifact_name: # if INOUT_SEPARATOR is in name, throw since this is our separator
  115. raise ValueError(f"Illegal character ({INOUT_SEPARATOR}) found in filename '{artifact_name}'.")
  116. trimmed_artifact_list.append(artifact_name.strip())
  117. return INOUT_SEPARATOR.join(trimmed_artifact_list)