test_component_parser_airflow.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. #
  2. # Copyright 2018-2022 Elyra Authors
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import json
  17. import os
  18. from subprocess import CompletedProcess
  19. from subprocess import run
  20. from conftest import AIRFLOW_COMPONENT_CACHE_INSTANCE
  21. from conftest import TEST_CATALOG_NAME
  22. import jupyter_core.paths
  23. import pytest
  24. from elyra.metadata.metadata import Metadata
  25. from elyra.pipeline.catalog_connector import CatalogEntry
  26. from elyra.pipeline.catalog_connector import FilesystemComponentCatalogConnector
  27. from elyra.pipeline.catalog_connector import UrlComponentCatalogConnector
  28. from elyra.pipeline.component import ComponentParser
  29. from elyra.pipeline.component_catalog import ComponentCache
  30. from elyra.pipeline.component_metadata import ComponentCatalogMetadata
  31. from elyra.pipeline.runtime_type import RuntimeProcessorType
  32. COMPONENT_CATALOG_DIRECTORY = os.path.join(jupyter_core.paths.ENV_JUPYTER_PATH[0], "components")
  33. RUNTIME_PROCESSOR = RuntimeProcessorType.APACHE_AIRFLOW
  34. @pytest.fixture
  35. def invalid_url(request):
  36. return request.param
  37. def _get_resource_path(filename):
  38. root = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
  39. resource_path = os.path.join(root, "..", "..", "..", "tests/pipeline", "resources", "components", filename)
  40. resource_path = os.path.normpath(resource_path)
  41. return resource_path
  42. @pytest.mark.parametrize("catalog_instance", [AIRFLOW_COMPONENT_CACHE_INSTANCE], indirect=True)
  43. def test_component_catalog_can_load_components_from_registries(catalog_instance, component_cache):
  44. components = component_cache.get_all_components(RUNTIME_PROCESSOR)
  45. assert len(components) > 0
  46. @pytest.mark.parametrize("create_inprocess", [True, False])
  47. async def test_modify_component_catalogs(component_cache, metadata_manager_with_teardown, create_inprocess):
  48. # Get initial set of components
  49. initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR)
  50. # Create new registry instance with a single URL-based component
  51. urls = [
  52. "https://raw.githubusercontent.com/elyra-ai/elyra/master/elyra/tests/pipeline/resources/components/"
  53. "airflow_test_operator.py"
  54. ]
  55. instance_metadata = {
  56. "description": "A test registry",
  57. "runtime_type": RUNTIME_PROCESSOR.name,
  58. "categories": ["New Components"],
  59. "paths": urls,
  60. }
  61. registry_instance = Metadata(
  62. schema_name="url-catalog", name=TEST_CATALOG_NAME, display_name="New Test Registry", metadata=instance_metadata
  63. )
  64. if create_inprocess:
  65. metadata_manager_with_teardown.create(TEST_CATALOG_NAME, registry_instance)
  66. else:
  67. res: CompletedProcess = run(
  68. [
  69. "elyra-metadata",
  70. "install",
  71. "component-catalogs",
  72. f"--schema_name={registry_instance.schema_name}",
  73. f"--json={registry_instance.to_json()}",
  74. f"--name={TEST_CATALOG_NAME}",
  75. ]
  76. )
  77. assert res.returncode == 0
  78. # Wait for update to complete
  79. component_cache.wait_for_all_cache_tasks()
  80. # Get new set of components from all active registries, including added test registry
  81. components_after_create = component_cache.get_all_components(RUNTIME_PROCESSOR)
  82. assert len(components_after_create) == len(initial_components) + 3
  83. added_component_names = [component.name for component in components_after_create]
  84. assert "TestOperator" in added_component_names
  85. assert "TestOperatorNoInputs" not in added_component_names
  86. # Modify the test registry to add an additional path to
  87. urls.append(
  88. "https://raw.githubusercontent.com/elyra-ai/elyra/master/elyra/tests/pipeline/resources/components"
  89. "/airflow_test_operator_no_inputs.py"
  90. )
  91. metadata_manager_with_teardown.update(TEST_CATALOG_NAME, registry_instance)
  92. # Wait for update to complete
  93. component_cache.wait_for_all_cache_tasks()
  94. # Get set of components from all active registries, including modified test registry
  95. components_after_update = component_cache.get_all_components(RUNTIME_PROCESSOR)
  96. assert len(components_after_update) == len(initial_components) + 4
  97. modified_component_names = [component.name for component in components_after_update]
  98. assert "TestOperator" in modified_component_names
  99. assert "TestOperatorNoInputs" in modified_component_names
  100. # Delete the test registry
  101. metadata_manager_with_teardown.remove(TEST_CATALOG_NAME)
  102. # Wait for update to complete
  103. component_cache.wait_for_all_cache_tasks()
  104. # Check that components remaining after delete are the same as before the new catalog was added
  105. components_after_remove = component_cache.get_all_components(RUNTIME_PROCESSOR)
  106. assert len(components_after_remove) == len(initial_components)
  107. @pytest.mark.parametrize("create_inprocess", [True, False])
  108. async def test_directory_based_component_catalog(component_cache, metadata_manager_with_teardown, create_inprocess):
  109. # Get initial set of components
  110. initial_components = component_cache.get_all_components(RUNTIME_PROCESSOR)
  111. # Create new directory-based registry instance with components in ../../test/resources/components
  112. registry_path = _get_resource_path("")
  113. instance_metadata = {
  114. "description": "A test registry",
  115. "runtime_type": RUNTIME_PROCESSOR.name,
  116. "categories": ["New Components"],
  117. "paths": [registry_path],
  118. }
  119. registry_instance = Metadata(
  120. schema_name="local-directory-catalog",
  121. name=TEST_CATALOG_NAME,
  122. display_name="New Test Registry",
  123. metadata=instance_metadata,
  124. )
  125. if create_inprocess:
  126. metadata_manager_with_teardown.create(TEST_CATALOG_NAME, registry_instance)
  127. else:
  128. res: CompletedProcess = run(
  129. [
  130. "elyra-metadata",
  131. "install",
  132. "component-catalogs",
  133. f"--schema_name={registry_instance.schema_name}",
  134. f"--json={registry_instance.to_json()}",
  135. f"--name={TEST_CATALOG_NAME}",
  136. ]
  137. )
  138. assert res.returncode == 0
  139. # Wait for update to complete
  140. component_cache.wait_for_all_cache_tasks()
  141. # Get new set of components from all active registries, including added test registry
  142. components_after_create = component_cache.get_all_components(RUNTIME_PROCESSOR)
  143. assert len(components_after_create) == len(initial_components) + 6
  144. # Check that all relevant components from the new registry have been added
  145. added_component_names = [component.name for component in components_after_create]
  146. assert "TestOperator" in added_component_names
  147. assert "TestOperatorNoInputs" in added_component_names
  148. # Delete the test registry and wait for updates to complete
  149. metadata_manager_with_teardown.remove(TEST_CATALOG_NAME)
  150. component_cache.wait_for_all_cache_tasks()
  151. def test_parse_airflow_component_file():
  152. # Define the appropriate reader for a filesystem-type component definition
  153. airflow_supported_file_types = [".py"]
  154. reader = FilesystemComponentCatalogConnector(airflow_supported_file_types)
  155. # Read contents of given path
  156. path = _get_resource_path("airflow_test_operator.py")
  157. catalog_entry_data = {"path": path}
  158. # Construct a catalog instance
  159. catalog_type = "local-file-catalog"
  160. catalog_instance = ComponentCatalogMetadata(
  161. schema_name=catalog_type, metadata={"categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name}
  162. )
  163. # Build the catalog entry data structures required for parsing
  164. entry_data = reader.get_entry_data(catalog_entry_data, {})
  165. catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["path"])
  166. # Parse the component entry
  167. parser = ComponentParser.create_instance(platform=RUNTIME_PROCESSOR)
  168. components = parser.parse(catalog_entry)
  169. assert len(components) == 3 # TestOperator, DeriveFromTestOperator, and DeriveFromImportedOperator
  170. # Split components list into its constituent operators
  171. components = sorted(components, key=lambda component: component.id)
  172. import_test_op, derive_test_op, test_op = components[0], components[1], components[2]
  173. # Helper method to retrieve the requested parameter value from the dictionary
  174. def get_parameter_value(param_name):
  175. property_dict = properties_json["current_parameters"][param_name]
  176. return property_dict[property_dict["activeControl"]]
  177. # Helper method to retrieve the requested parameter info from the dictionary
  178. def get_parameter_format(param_name, control_id="StringControl"):
  179. param_info = None
  180. for prop_info in properties_json["uihints"]["parameter_info"]:
  181. if prop_info.get("parameter_ref") == param_name:
  182. param_info = prop_info["data"]["controls"][control_id]["format"]
  183. break
  184. return param_info
  185. # Helper method to retrieve the requested parameter description from the dictionary
  186. def get_parameter_description(param_name):
  187. param_desc = None
  188. for prop_info in properties_json["uihints"]["parameter_info"]:
  189. if prop_info.get("parameter_ref") == param_name:
  190. param_desc = prop_info["description"]["default"]
  191. break
  192. return param_desc
  193. # Helper method to retrieve whether the requested parameter is required
  194. def get_parameter_required(param_name):
  195. param_info = None
  196. for prop_info in properties_json["uihints"]["parameter_info"]:
  197. if prop_info.get("parameter_ref") == param_name:
  198. param_info = prop_info["data"]["required"]
  199. break
  200. return param_info
  201. # Retrieve properties for TestOperator
  202. # Test Operator does not include type hints for the init function args
  203. properties_json = ComponentCache.to_canvas_properties(test_op)
  204. # Ensure system parameters are not prefixed and hold correct values
  205. assert properties_json["current_parameters"]["label"] == ""
  206. component_source = json.dumps({"catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference})
  207. assert properties_json["current_parameters"]["component_source"] == component_source
  208. # Ensure component parameters are prefixed with 'elyra_' and values are as expected
  209. assert get_parameter_value("elyra_str_no_default") == ""
  210. assert get_parameter_value("elyra_str_default") == "default"
  211. assert get_parameter_value("elyra_str_empty") == ""
  212. assert get_parameter_value("elyra_str_not_in_docstring") == ""
  213. assert get_parameter_value("elyra_bool_no_default") is False
  214. assert get_parameter_value("elyra_bool_default_false") is False
  215. assert get_parameter_value("elyra_bool_default_true") is True
  216. assert get_parameter_value("elyra_bool_not_in_docstring") is False
  217. assert get_parameter_value("elyra_int_no_default") == 0
  218. assert get_parameter_value("elyra_int_default_zero") == 0
  219. assert get_parameter_value("elyra_int_default_non_zero") == 2
  220. assert get_parameter_value("elyra_int_not_in_docstring") == 3
  221. assert get_parameter_value("elyra_dict_default_is_none") == "{}" # {}
  222. assert get_parameter_value("elyra_list_default_is_none") == "[]" # []
  223. # Ensure that type information is inferred correctly for properties that
  224. # define 'unusual' types, such as 'a dictionary of lists'
  225. assert get_parameter_format("elyra_unusual_type_dict") == "dictionary"
  226. assert get_parameter_format("elyra_unusual_type_list") == "list"
  227. # Ensure that type information falls back to string if no type hint present
  228. # and no ':type: <type info>' phrase found in docstring
  229. assert get_parameter_format("elyra_fallback_type") == "string"
  230. # Ensure component parameters are marked as required in the correct circumstances
  231. # (parameter is required if there is no default value provided or if a type hint
  232. # does not include 'Optional[...]')
  233. assert get_parameter_required("elyra_str_no_default") is True
  234. assert get_parameter_required("elyra_str_default") is False
  235. assert get_parameter_required("elyra_str_empty") is False
  236. # Ensure descriptions are rendered properly with type hint in parentheses
  237. assert (
  238. get_parameter_description("elyra_unusual_type_dict") == "a dictionary parameter with the "
  239. "phrase 'list' in type description "
  240. "(type: a dictionary of arrays)"
  241. )
  242. assert (
  243. get_parameter_description("elyra_unusual_type_list") == "a list parameter with the phrase "
  244. "'string' in type description "
  245. "(type: a list of strings)"
  246. )
  247. assert get_parameter_description("elyra_fallback_type") == "(type: str)"
  248. # Ensure that a long description with line wrapping and a backslash escape has rendered
  249. # (and hence did not raise an error during json.loads in the properties API request)
  250. parsed_description = """a string parameter with a very long description
  251. that wraps lines and also has an escaped underscore in it, as shown here: (\_) # noqa W605"""
  252. modified_description = parsed_description.replace("\n", " ") + " (type: str)" # modify desc acc. to parser rules
  253. assert get_parameter_description("elyra_long_description_property") == modified_description
  254. # Retrieve properties for DeriveFromTestOperator
  255. # DeriveFromTestOperator includes type hints for all init arguments
  256. properties_json = ComponentCache.to_canvas_properties(derive_test_op)
  257. # Ensure default values are parsed correct in the case where type hints are present
  258. assert get_parameter_value("elyra_str_default") == "default"
  259. assert get_parameter_value("elyra_bool_default") is True
  260. assert get_parameter_value("elyra_int_default") == 2
  261. # Ensure component parameters are prefixed with 'elyra_' and types are as expected
  262. # in the case when a type hint is provided (and regardless of whether or not the
  263. # parameter type is included in the docstring)
  264. assert get_parameter_format("elyra_str_no_default") == "string"
  265. assert get_parameter_format("elyra_str_default") == "string"
  266. assert get_parameter_format("elyra_str_optional_default") == "string"
  267. assert get_parameter_format("elyra_str_not_in_docstring") == "string"
  268. assert get_parameter_format("elyra_bool_no_default", "BooleanControl") == "boolean"
  269. assert get_parameter_format("elyra_bool_default", "BooleanControl") == "boolean"
  270. assert get_parameter_format("elyra_bool_not_in_docstring", "BooleanControl") == "boolean"
  271. assert get_parameter_format("elyra_int_no_default", "NumberControl") == "number"
  272. assert get_parameter_format("elyra_int_default", "NumberControl") == "number"
  273. assert get_parameter_format("elyra_int_not_in_docstring", "NumberControl") == "number"
  274. assert get_parameter_format("elyra_list_optional_default") == "list"
  275. # Ensure component parameters are marked as required in the correct circumstances
  276. assert get_parameter_required("elyra_str_no_default") is True
  277. assert get_parameter_required("elyra_str_default") is False
  278. assert get_parameter_required("elyra_str_optional_default") is False
  279. assert get_parameter_required("elyra_str_not_in_docstring") is True
  280. # Retrieve properties for DeriveFromImportedOperator
  281. # DeriveFromImportedOperator includes type hints for dictionary and
  282. # list values to test the more complex parsing required in this case
  283. properties_json = ComponentCache.to_canvas_properties(import_test_op)
  284. # Ensure component parameters are prefixed with 'elyra_' and types are as expected
  285. assert get_parameter_format("elyra_dict_no_default") == "dictionary"
  286. assert get_parameter_format("elyra_dict_optional_no_default") == "dictionary"
  287. assert get_parameter_format("elyra_nested_dict_default") == "dictionary"
  288. assert get_parameter_format("elyra_dict_not_in_docstring") == "dictionary"
  289. assert get_parameter_format("elyra_list_no_default") == "list"
  290. assert get_parameter_format("elyra_list_optional_no_default") == "list"
  291. assert get_parameter_format("elyra_list_default") == "list"
  292. assert get_parameter_format("elyra_list_optional_default") == "list"
  293. assert get_parameter_format("elyra_list_not_in_docstring") == "list"
  294. assert get_parameter_value("elyra_dict_no_default") == "{}"
  295. assert get_parameter_value("elyra_list_no_default") == "[]"
  296. def test_parse_airflow_component_url():
  297. # Define the appropriate reader for a URL-type component definition
  298. airflow_supported_file_types = [".py"]
  299. reader = UrlComponentCatalogConnector(airflow_supported_file_types)
  300. # Read contents of given path
  301. url = "https://raw.githubusercontent.com/apache/airflow/1.10.15/airflow/operators/bash_operator.py" # noqa: E501
  302. catalog_entry_data = {"url": url}
  303. # Construct a catalog instance
  304. catalog_type = "url-catalog"
  305. catalog_instance = ComponentCatalogMetadata(
  306. schema_name=catalog_type, metadata={"categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name}
  307. )
  308. # Build the catalog entry data structures required for parsing
  309. entry_data = reader.get_entry_data(catalog_entry_data, {})
  310. catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["url"])
  311. # Parse the component entry
  312. parser = ComponentParser.create_instance(platform=RUNTIME_PROCESSOR)
  313. component = parser.parse(catalog_entry)[0]
  314. properties_json = ComponentCache.to_canvas_properties(component)
  315. # Ensure component parameters are prefixed, and system parameters are not, and hold correct values
  316. assert properties_json["current_parameters"]["label"] == ""
  317. # Helper method to retrieve the requested parameter value from the dictionary
  318. def get_parameter(param_name):
  319. property_dict = properties_json["current_parameters"][param_name]
  320. return property_dict[property_dict["activeControl"]]
  321. component_source = json.dumps({"catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference})
  322. assert properties_json["current_parameters"]["component_source"] == component_source
  323. assert get_parameter("elyra_bash_command") == ""
  324. assert get_parameter("elyra_xcom_push") is True
  325. assert get_parameter("elyra_env") == "{}" # {}
  326. assert get_parameter("elyra_output_encoding") == "utf-8"
  327. def test_parse_airflow_component_file_no_inputs():
  328. # Define the appropriate reader for a filesystem-type component definition
  329. airflow_supported_file_types = [".py"]
  330. reader = FilesystemComponentCatalogConnector(airflow_supported_file_types)
  331. # Read contents of given path
  332. path = _get_resource_path("airflow_test_operator_no_inputs.py")
  333. catalog_entry_data = {"path": path}
  334. # Construct a catalog instance
  335. catalog_type = "local-file-catalog"
  336. catalog_instance = ComponentCatalogMetadata(
  337. schema_name=catalog_type, metadata={"categories": ["Test"], "runtime_type": RUNTIME_PROCESSOR.name}
  338. )
  339. # Build the catalog entry data structures required for parsing
  340. entry_data = reader.get_entry_data(catalog_entry_data, {})
  341. catalog_entry = CatalogEntry(entry_data, catalog_entry_data, catalog_instance, ["path"])
  342. # Parse the component entry
  343. parser = ComponentParser.create_instance(platform=RUNTIME_PROCESSOR)
  344. no_input_op = parser.parse(catalog_entry)[0]
  345. properties_json = ComponentCache.to_canvas_properties(no_input_op)
  346. # Properties JSON should only include the two parameters common to every
  347. # component:'label' and 'component_source'
  348. num_common_params = 2
  349. assert len(properties_json["current_parameters"].keys()) == num_common_params
  350. assert len(properties_json["parameters"]) == num_common_params
  351. assert len(properties_json["uihints"]["parameter_info"]) == num_common_params
  352. # Total number of groups includes one for each parameter, plus 1 for the component_source header
  353. # (Airflow does not include an output header since there are no formally defined outputs)
  354. num_groups = num_common_params + 1
  355. assert len(properties_json["uihints"]["group_info"][0]["group_info"]) == num_groups
  356. # Ensure that template still renders the two common parameters correctly
  357. assert properties_json["current_parameters"]["label"] == ""
  358. component_source = json.dumps({"catalog_type": catalog_type, "component_ref": catalog_entry.entry_reference})
  359. assert properties_json["current_parameters"]["component_source"] == component_source
  360. @pytest.mark.parametrize(
  361. "invalid_url",
  362. [
  363. "https://nourl.py", # test an invalid host
  364. "https://raw.githubusercontent.com/elyra-ai/elyra/master/elyra/\
  365. pipeline/tests/resources/components/invalid_file.py", # test an invalid file
  366. ],
  367. indirect=True,
  368. )
  369. async def test_parse_components_invalid_url(invalid_url):
  370. # Define the appropriate reader for a Url-type component definition
  371. airflow_supported_file_types = [".py"]
  372. reader = UrlComponentCatalogConnector(airflow_supported_file_types)
  373. # Get path to an invalid component definition file and read contents
  374. entry_data = reader.get_entry_data({"url": invalid_url}, {})
  375. assert entry_data is None