component.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. #
  2. # Copyright 2018-2022 Elyra Authors
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. from abc import abstractmethod
  17. from enum import Enum
  18. from importlib import import_module
  19. import json
  20. from logging import Logger
  21. from types import SimpleNamespace
  22. from typing import Any
  23. from typing import Dict
  24. from typing import List
  25. from typing import Optional
  26. from typing import Tuple
  27. from traitlets.config import LoggingConfigurable
  28. # Rather than importing only the CatalogEntry class needed in the Component parse
  29. # type hint below, the catalog_connector module must be imported in its
  30. # entirety in order to avoid a circular reference issue
  31. try:
  32. from elyra.pipeline import catalog_connector
  33. except ImportError:
  34. import sys
  35. catalog_connector = sys.modules[__package__ + ".catalog_connector"]
  36. from elyra.pipeline.runtime_type import RuntimeProcessorType
  37. class ComponentParameter(object):
  38. """
  39. Represents a single property for a pipeline component
  40. """
  41. def __init__(
  42. self,
  43. id: str,
  44. name: str,
  45. data_type: str,
  46. value: str,
  47. description: str,
  48. required: bool = False,
  49. control: str = "custom",
  50. control_id: str = "StringControl",
  51. one_of_control_types: Optional[List[Tuple[str, str, str]]] = None,
  52. default_control_type: str = "StringControl",
  53. default_data_type: str = "string",
  54. allow_no_options: Optional[bool] = False,
  55. items: Optional[List[str]] = None,
  56. ):
  57. """
  58. :param id: Unique identifier for a property
  59. :param name: The name of the property for display
  60. :param data_type: The type that the property value takes on
  61. :param value: The default value of the property
  62. :param description: A description of the property for display
  63. :param control: The control of the property on the display, e.g. custom or readonly
  64. :param control_id: The control type of the property, if the control is 'custom', e.g. StringControl, EnumControl
  65. :param one_of_control_types: A list of control types to be used when 'OneOfControl' type is the primary control
  66. :param default_control_type: The default control type to use when 'OneOfControl type is the primary control
  67. :param items: For properties with a control of 'EnumControl', the items making up the enum
  68. :param required: Whether the property is required
  69. """
  70. if not id:
  71. raise ValueError("Invalid component: Missing field 'id'.")
  72. if not name:
  73. raise ValueError("Invalid component: Missing field 'name'.")
  74. self._ref = id
  75. self._name = name
  76. self._data_type = data_type
  77. self._value = value
  78. self._description = description
  79. self._control = control
  80. self._control_id = control_id
  81. self._one_of_control_types = one_of_control_types
  82. self._default_control_type = default_control_type
  83. self._default_data_type = default_data_type
  84. self._allow_no_options = allow_no_options
  85. self._items = items or []
  86. # Check description for information about 'required' parameter
  87. if "not optional" in description.lower() or (
  88. "required" in description.lower()
  89. and "not required" not in description.lower()
  90. and "n't required" not in description.lower()
  91. ):
  92. required = True
  93. self._required = required
  94. @property
  95. def ref(self) -> str:
  96. return self._ref
  97. @property
  98. def name(self) -> str:
  99. return self._name
  100. @property
  101. def data_type(self) -> str:
  102. return self._data_type
  103. @property
  104. def value(self) -> str:
  105. return self._value
  106. @property
  107. def description(self) -> str:
  108. return self._description
  109. @property
  110. def control(self) -> str:
  111. return self._control
  112. @property
  113. def control_id(self) -> str:
  114. return self._control_id
  115. @property
  116. def one_of_control_types(self) -> List[Tuple[str, str, str]]:
  117. """
  118. The `OneOfControl` controller is an encapsulating control ID that allows users to select
  119. between multiple input types when configuring the component. For instance, in Airflow, a
  120. component parameter can take in, as input, both a value as well as an output from a parent
  121. node.
  122. When using the `OneOfControl` as the primary control ID for the component parameter,
  123. `one_of_control_types` provides canvas a list of control IDs that will be used by the
  124. `OneOfControl` controller. These control IDs are what allow the user to select different
  125. types of inputs.
  126. :return: A list of 3-tuples containing the default_control_type, data_type, label associated with controller
  127. """
  128. return self._one_of_control_types
  129. @property
  130. def default_control_type(self) -> str:
  131. """
  132. The `default_control_type` is the control type that will be displayed by default when
  133. first opening the component's parameters in the pipeline editor.
  134. """
  135. return self._default_control_type
  136. @property
  137. def default_data_type(self) -> str:
  138. """
  139. The `default_data_type` is the first data type that is assigned to this specific parameter
  140. after parsing the component specification.
  141. """
  142. return self._default_data_type
  143. @property
  144. def allow_no_options(self) -> bool:
  145. return self._allow_no_options
  146. @property
  147. def items(self) -> List[str]:
  148. return self._items
  149. @property
  150. def required(self) -> bool:
  151. return bool(self._required)
  152. class Component(object):
  153. """
  154. Represents a generic or runtime-specific component
  155. """
  156. def __init__(
  157. self,
  158. id: str,
  159. name: str,
  160. description: Optional[str],
  161. catalog_type: str,
  162. component_reference: Any,
  163. definition: Optional[str] = None,
  164. runtime_type: Optional[str] = None,
  165. op: Optional[str] = None,
  166. categories: Optional[List[str]] = None,
  167. properties: Optional[List[ComponentParameter]] = None,
  168. extensions: Optional[List[str]] = None,
  169. parameter_refs: Optional[dict] = None,
  170. package_name: Optional[str] = None,
  171. ):
  172. """
  173. :param id: Unique identifier for a component
  174. :param name: The name of the component for display
  175. :param description: The description of the component
  176. :param catalog_type: Indicates the type of component definition resource
  177. location; one of ['url', filename', 'directory]
  178. :param component_reference: Source information to help locate the component definition
  179. :param definition: The content of the specification file for this component
  180. :param runtime_type: The runtime type of the component (e.g. KUBEFLOW_PIPELINES, APACHE_AIRFLOW, etc.)
  181. :param op: The operation name of the component; used by generic components in rendering the palette
  182. :param categories: A list of categories that this component belongs to; used to organize component
  183. in the palette
  184. :param properties: The set of properties for the component
  185. :param extensions: The file extension used by the component
  186. :param package_name: The fully qualified package name (excluding class name) of the file associated
  187. with this component
  188. """
  189. if not id:
  190. raise ValueError("Invalid component: Missing field 'id'.")
  191. if not name:
  192. raise ValueError("Invalid component: Missing field 'name'.")
  193. self._id = id
  194. self._name = name
  195. self._description = description
  196. self._catalog_type = catalog_type
  197. self._component_reference = component_reference
  198. self._definition = definition
  199. self._runtime_type = runtime_type
  200. self._op = op
  201. self._categories = categories or []
  202. self._properties = properties
  203. if not parameter_refs:
  204. if self._catalog_type == "elyra":
  205. parameter_refs = {"filehandler": "filename"}
  206. else:
  207. parameter_refs = {}
  208. if self._catalog_type == "elyra" and extensions and not parameter_refs.get("filehandler"):
  209. Component._log_warning(
  210. f"Component '{self._id}' specifies extensions '{extensions}' but \
  211. no entry in the 'parameter_ref' dictionary for 'filehandler' and \
  212. cannot participate in drag and drop functionality as a result."
  213. )
  214. self._extensions = extensions
  215. self._parameter_refs = parameter_refs
  216. self._package_name = package_name
  217. @property
  218. def id(self) -> str:
  219. return self._id
  220. @property
  221. def name(self) -> str:
  222. return self._name
  223. @property
  224. def description(self) -> Optional[str]:
  225. return self._description
  226. @property
  227. def catalog_type(self) -> str:
  228. return self._catalog_type
  229. @property
  230. def component_reference(self) -> Any:
  231. return self._component_reference
  232. @property
  233. def component_source(self) -> str:
  234. """
  235. Informational property consisting of the catalog type from which
  236. this component originates and the reference information used to
  237. locate it within that catalog.
  238. """
  239. return json.dumps({"catalog_type": self.catalog_type, "component_ref": self.component_reference})
  240. @property
  241. def definition(self) -> str:
  242. return self._definition
  243. @property
  244. def runtime_type(self) -> Optional[RuntimeProcessorType]:
  245. return self._runtime_type
  246. @property
  247. def op(self) -> Optional[str]:
  248. if self._op:
  249. return self._op
  250. else:
  251. return self._id
  252. @property
  253. def categories(self) -> List[str]:
  254. return self._categories
  255. @property
  256. def properties(self) -> Optional[List[ComponentParameter]]:
  257. return self._properties
  258. @property
  259. def extensions(self) -> Optional[List[str]]:
  260. return self._extensions
  261. @property
  262. def parameter_refs(self) -> dict:
  263. return self._parameter_refs
  264. @property
  265. def import_statement(self) -> Optional[str]:
  266. if not self._package_name:
  267. return None
  268. return f"from {self._package_name} import {self._name}"
  269. @property
  270. def input_properties(self) -> List[ComponentParameter]:
  271. return [prop for prop in self._properties if prop.data_type != "outputpath"]
  272. @property
  273. def output_properties(self) -> List[ComponentParameter]:
  274. return [prop for prop in self._properties if prop.data_type == "outputpath"]
  275. @property
  276. def file_extension(self) -> Optional[str]:
  277. """
  278. The file extension of the definition file representing this
  279. Component.
  280. """
  281. return self.extensions[0] if self.extensions else None
  282. @staticmethod
  283. def _log_warning(msg: str, logger: Optional[Logger] = None):
  284. if logger:
  285. logger.warning(msg)
  286. else:
  287. print(f"WARNING: {msg}")
  288. class ComponentParser(LoggingConfigurable): # ABC
  289. component_platform: RuntimeProcessorType = None
  290. _file_types: List[str] = None
  291. _parser_class_map: Dict[str, str] = {
  292. "APACHE_AIRFLOW": "elyra.pipeline.airflow.component_parser_airflow:AirflowComponentParser",
  293. "KUBEFLOW_PIPELINES": "elyra.pipeline.kfp.component_parser_kfp:KfpComponentParser",
  294. }
  295. @classmethod
  296. def create_instance(cls, platform: RuntimeProcessorType) -> "ComponentParser":
  297. """
  298. Class method that creates the appropriate instance of ComponentParser based on platform type name.
  299. """
  300. try:
  301. module_name, class_name = cls._parser_class_map[platform.name].split(":")
  302. module = import_module(module_name)
  303. return getattr(module, class_name)()
  304. except Exception as e:
  305. raise RuntimeError(f"Could not get appropriate ComponentParser class: {e}")
  306. @property
  307. def file_types(self) -> List[str]:
  308. return self._file_types
  309. @abstractmethod
  310. def parse(self, catalog_entry: "catalog_connector.CatalogEntry") -> Optional[List[Component]]:
  311. """
  312. Parse a component definition given in the catalog entry and return
  313. a list of fully-qualified Component objects
  314. """
  315. raise NotImplementedError()
  316. def _format_description(self, description: str, data_type: str) -> str:
  317. """
  318. Adds parameter type information parsed from component specification
  319. to parameter description.
  320. """
  321. if description:
  322. return f"{description} (type: {data_type})"
  323. return f"(type: {data_type})"
  324. def determine_type_information(self, parsed_type: str) -> SimpleNamespace:
  325. """
  326. Takes the type information of a component parameter as parsed from the component
  327. specification and returns a new type that is one of several standard options.
  328. """
  329. parsed_type_lowered = parsed_type.lower()
  330. data_type_info: SimpleNamespace
  331. # Determine if this is a "container type"
  332. # Prefer types that occur in a clause of the form "[type] of ..." (i.e., "container" types)
  333. # E.g. "a dictionary of key/value pairs" will produce the type "dictionary"
  334. container_types = ["dictionary", "dict", "set", "list", "array", "arr"]
  335. for option in container_types:
  336. if option in parsed_type_lowered:
  337. data_type = option
  338. if data_type in ["dict", "dictionary"]:
  339. data_type = "dictionary"
  340. default_value = {}
  341. else: # data_type is one of ['list', 'set', 'array', 'arr']
  342. data_type = "list"
  343. default_value = []
  344. # Since we know the type, create our return value and bail
  345. data_type_info = ComponentParser.create_data_type_info(
  346. parsed_data=parsed_type_lowered, data_type=data_type, default_value=default_value
  347. )
  348. break
  349. else: # None of the container types were found...
  350. # Standardize type names
  351. if any(word in parsed_type_lowered for word in ["str", "string"]):
  352. data_type_info = ComponentParser.create_data_type_info(
  353. parsed_data=parsed_type_lowered, data_type="string"
  354. )
  355. elif any(word in parsed_type_lowered for word in ["int", "integer", "number"]):
  356. data_type_info = ComponentParser.create_data_type_info(
  357. parsed_data=parsed_type_lowered,
  358. data_type="number",
  359. control_id="NumberControl",
  360. default_control_type="NumberControl",
  361. default_value=0,
  362. )
  363. elif any(word in parsed_type_lowered for word in ["float"]):
  364. data_type_info = ComponentParser.create_data_type_info(
  365. parsed_data=parsed_type_lowered,
  366. data_type="number",
  367. control_id="NumberControl",
  368. default_control_type="NumberControl",
  369. default_value=0.0,
  370. )
  371. elif any(word in parsed_type_lowered for word in ["bool", "boolean"]):
  372. data_type_info = ComponentParser.create_data_type_info(
  373. parsed_data=parsed_type_lowered,
  374. data_type="boolean",
  375. control_id="BooleanControl",
  376. default_control_type="BooleanControl",
  377. default_value=False,
  378. )
  379. else: # Let this be undetermined. Callers should check for this status and adjust
  380. data_type_info = ComponentParser.create_data_type_info(
  381. parsed_data=parsed_type_lowered, data_type="string", undetermined=True
  382. )
  383. return data_type_info
  384. @staticmethod
  385. def create_data_type_info(
  386. parsed_data: str,
  387. data_type: str = "string",
  388. default_data_type: str = "string",
  389. data_label: str = None,
  390. default_value: Any = "",
  391. required: bool = True,
  392. one_of_control_types: Optional[List[Tuple[str, str, str]]] = None,
  393. control_id: str = "StringControl",
  394. default_control_type: str = "StringControl",
  395. allow_no_options: Optional[bool] = False,
  396. control: str = "custom",
  397. undetermined: bool = False,
  398. ) -> SimpleNamespace:
  399. """Returns a SimpleNamespace instance that contains the current state of data-type parsing.
  400. This method is called by ComponentParser.determine_type_information() and used by subclass
  401. implementations to determine the current state of parsing a data-type.
  402. The instance will indicate that the base ComponentParser could not determine the actual data-type
  403. via a `True` value in its `undetermined` attribute, in which case subclass implementations
  404. are advised to attempt further parsing. In such cases, the rest of the attributes of the instance
  405. will reflect a 'string' data type as that is the most flexible data_type and, hence, the default.
  406. """
  407. dti = SimpleNamespace(
  408. parsed_data=parsed_data,
  409. data_type=data_type,
  410. default_data_type=default_data_type,
  411. data_label=data_label or ControllerMap[control_id].value,
  412. default_value=default_value,
  413. required=required,
  414. default_control_type=default_control_type,
  415. one_of_control_types=one_of_control_types,
  416. control_id=control_id,
  417. allow_no_options=allow_no_options,
  418. control=control,
  419. undetermined=undetermined,
  420. )
  421. return dti
  422. class ControllerMap(Enum):
  423. StringControl = "Please enter a string value :"
  424. NumberControl = "Please enter a number value :"
  425. BooleanControl = "Please select or deselect the checkbox :"
  426. NestedEnumControl = "Please select an output from a parent :"