catalog_connector.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655
  1. #
  2. # Copyright 2018-2022 Elyra Authors
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. from abc import abstractmethod
  17. from copy import deepcopy
  18. import hashlib
  19. from http import HTTPStatus
  20. import os
  21. from pathlib import Path
  22. from queue import Empty
  23. from queue import Queue
  24. from threading import Thread
  25. from typing import Any
  26. from typing import Dict
  27. from typing import List
  28. from typing import Optional
  29. from deprecation import deprecated
  30. from jupyter_core.paths import ENV_JUPYTER_PATH
  31. import requests
  32. from traitlets.config import LoggingConfigurable
  33. from traitlets.traitlets import default
  34. from traitlets.traitlets import Integer
  35. from elyra._version import __version__
  36. from elyra.metadata.metadata import Metadata
  37. from elyra.pipeline.component import Component
  38. from elyra.pipeline.component import ComponentParameter
  39. from elyra.pipeline.runtime_type import RuntimeProcessorType
  40. class EntryData(object):
  41. """
  42. An object representing the data retrieved from a single entry of a catalog, which,
  43. at minimum, includes the string definition of the corresponding component(s)
  44. """
  45. definition: str = None
  46. file_extension: str = None
  47. def __init__(self, definition: str, file_extension: Optional[str] = None, **kwargs):
  48. if isinstance(definition, (bytes, bytearray)):
  49. definition = definition.decode("utf-8")
  50. self.definition = definition
  51. self.file_extension = file_extension
  52. class AirflowEntryData(EntryData):
  53. """
  54. An Airflow-specific EntryData object that includes the fully-qualified package
  55. name (excluding class name) that represents the definition file.
  56. """
  57. package_name: str = None
  58. def __init__(self, definition: str, file_extension: Optional[str] = None, **kwargs):
  59. super().__init__(definition, file_extension, **kwargs)
  60. self.package_name = kwargs.get("package_name")
  61. class KfpEntryData(EntryData):
  62. """
  63. A KFP-specific EntryData object
  64. """
  65. pass
  66. class CatalogEntry(object):
  67. """
  68. An object corresponding to a single entry of a component catalog, which has a
  69. unique id, a string definition, a dict of identifying key-value pairs, and
  70. other associated metadata.
  71. """
  72. id: str
  73. entry_data: EntryData
  74. entry_reference: Any
  75. catalog_type: str
  76. runtime_type: RuntimeProcessorType
  77. categories: List[str]
  78. def __init__(self, entry_data: EntryData, entry_reference: Any, catalog_instance: Metadata, hash_keys: List[str]):
  79. self.entry_data = entry_data
  80. self.entry_reference = entry_reference
  81. self.catalog_type = catalog_instance.schema_name
  82. self.runtime_type = catalog_instance.runtime_type.name # noqa
  83. self.categories = catalog_instance.metadata.get("categories", [])
  84. self.id = self.compute_unique_id(hash_keys)
  85. def compute_unique_id(self, hash_keys: List[str]) -> str:
  86. """
  87. Computes a unique id for the given component based on the schema name of the
  88. catalog connector type and any information specific to that component-catalog-type
  89. combination as given in hash_keys.
  90. :param hash_keys: the list of keys (present in the catalog_entry_data dict)
  91. whose values will be used to construct the hash
  92. :returns: a unique component id of the form '<catalog-type>:<hash_of_entry_info>'
  93. """
  94. hash_str = ""
  95. for key in hash_keys:
  96. if not self.entry_reference.get(key):
  97. # Catalog entry does not have key - build hash without it
  98. continue
  99. hash_str = hash_str + str(self.entry_reference[key]) + ":"
  100. hash_str = hash_str[:-1]
  101. # Use only the first 12 characters of the resulting hash
  102. hash_digest = f"{hashlib.sha256(hash_str.encode()).hexdigest()[:12]}"
  103. return f"{self.catalog_type}:{hash_digest}"
  104. def get_component(
  105. self, id: str, name: str, description: str, properties: List[ComponentParameter], file_extension: str
  106. ) -> Component:
  107. """
  108. Construct a Component object given the arguments (as parsed from the definition file)
  109. and the relevant information from the catalog from which the component originates.
  110. """
  111. params = {
  112. "id": id,
  113. "name": name,
  114. "description": description,
  115. "properties": properties,
  116. "catalog_type": self.catalog_type,
  117. "component_reference": self.entry_reference,
  118. "definition": self.entry_data.definition,
  119. "runtime_type": self.runtime_type,
  120. "categories": self.categories,
  121. "extensions": [self.entry_data.file_extension or file_extension],
  122. }
  123. if isinstance(self.entry_data, AirflowEntryData):
  124. params["package_name"] = self.entry_data.package_name
  125. return Component(**params)
  126. class ComponentCatalogConnector(LoggingConfigurable):
  127. """
  128. Abstract class to model component_entry readers that can read components from different locations
  129. """
  130. max_threads_default = 3
  131. max_readers_env = "ELYRA_CATALOG_CONNECTOR_MAX_READERS"
  132. max_readers = Integer(
  133. max_threads_default,
  134. help="""Sets the maximum number of reader threads to be used to read
  135. catalog entries in parallel""",
  136. ).tag(config=True)
  137. @default("max_readers")
  138. def max_readers_default(self):
  139. max_reader_threads = ComponentCatalogConnector.max_threads_default
  140. try:
  141. max_reader_threads = int(os.getenv(self.max_readers_env, max_reader_threads))
  142. except ValueError:
  143. self.log.info(
  144. f"Unable to parse environmental variable {self.max_readers_env}, "
  145. f"using the default value of {self.max_threads_default}"
  146. )
  147. return max_reader_threads
  148. def __init__(self, file_types: List[str], **kwargs):
  149. super().__init__(**kwargs)
  150. self._file_types = file_types
  151. @abstractmethod
  152. def get_catalog_entries(self, catalog_metadata: Dict[str, Any]) -> List[Dict[str, Any]]:
  153. """
  154. Returns a list of catalog_entry_data dictionary instances, one per entry in the given catalog.
  155. Each catalog_entry_data dictionary contains the information needed to access a single component
  156. definition. The form that each catalog_entry_data takes is determined by the unique requirements
  157. of the reader class.
  158. For example, the FilesystemCatalogConnector includes both a base directory ('base_dir') key-value
  159. pair and a relative path ('path') key-value pair in its 'catalog_entry_data' dict. Both fields
  160. are needed in order to access the corresponding definition in get_entry_data().
  161. Every catalog_entry_data should contain each of the keys returned in get_hash_keys() to ensure
  162. uniqueness and portability among entries. For the same reason, no two catalog entries should have
  163. equivalent catalog_entry_data dictionaries.
  164. :param catalog_metadata: the dictionary form of the metadata associated with a single catalog;
  165. the general structure is given in the example below
  166. example:
  167. {
  168. "description": "...", # only present if a description is added
  169. "runtime_type": "...", # must be present
  170. "categories": ["category1", "category2", ...], # may be an empty array
  171. "your_property1": value1,
  172. "your_property2": value2,
  173. ...
  174. }
  175. :returns: a list of catalog entry dictionaries, each of which contains the information
  176. needed to access a component definition in get_entry_data()
  177. """
  178. raise NotImplementedError("abstract method 'get_catalog_entries()' must be implemented")
  179. @deprecated(
  180. deprecated_in="3.7.0",
  181. removed_in="4.0",
  182. current_version=__version__,
  183. details="Implement the get_entry_data function instead",
  184. )
  185. def read_catalog_entry(self, catalog_entry_data: Dict[str, Any], catalog_metadata: Dict[str, Any]) -> Optional[str]:
  186. """
  187. DEPRECATED. Will be removed in 4.0. get_entry_data() must be implemented instead.
  188. Reads a component definition for a single catalog entry using the catalog_entry_data returned
  189. from get_catalog_entries() and, if needed, the catalog metadata.
  190. :param catalog_entry_data: a dictionary that contains the information needed to read the content
  191. of the component definition; below is an example data structure returned
  192. from get_catalog_entries()
  193. example:
  194. {
  195. "directory_path": "/Users/path/to/directory",
  196. "relative_path": "subdir/file.py"
  197. }
  198. :param catalog_metadata: the metadata associated with the catalog in which this catalog entry is
  199. stored; this is the same dictionary that is passed into get_catalog_entries();
  200. in addition to catalog_entry_data, catalog_metadata may also be
  201. needed to read the component definition for certain types of catalogs
  202. :returns: the content of the given catalog entry's definition in string form, if found, or None;
  203. if None is returned, this catalog entry is skipped and a warning message logged
  204. """
  205. raise NotImplementedError("abstract method 'read_catalog_entry()' must be implemented")
  206. def get_entry_data(
  207. self, catalog_entry_data: Dict[str, Any], catalog_metadata: Dict[str, Any]
  208. ) -> Optional[EntryData]:
  209. """
  210. Reads a component definition (and other information-of-interest) for a single catalog entry and
  211. creates an EntryData object to represent it. Uses the catalog_entry_data returned from
  212. get_catalog_entries() and, if needed, the catalog metadata to retrieve the definition.
  213. :param catalog_entry_data: a dictionary that contains the information needed to read the content of
  214. the component definition; below is an example data structure returned from get_catalog_entries()
  215. example:
  216. {
  217. "directory_path": "/Users/path/to/directory",
  218. "relative_path": "subdir/file.py"
  219. }
  220. :param catalog_metadata: the metadata associated with the catalog in which this catalog entry is
  221. stored; this is the same dictionary that is passed into get_catalog_entries(); in addition to
  222. catalog_entry_data, catalog_metadata may also be needed to read the component definition for
  223. certain types of catalogs
  224. :returns: an EntryData object representing the definition (and other identifying info) for a single
  225. catalog entry; if None is returned, this catalog entry is skipped and a warning message logged
  226. """
  227. raise NotImplementedError("method 'get_entry_data()' must be overridden")
  228. @classmethod
  229. def get_hash_keys(cls) -> List[Any]:
  230. """
  231. Provides a list of keys, available in the 'catalog_entry_data' dictionary, whose values
  232. will be used to construct a unique hash id for each entry with the given catalog type.
  233. This function has been changed to a class method as of version 3.7. Connectors that still
  234. implement this function as an abstract method will be supported in a fallback scenario.
  235. Besides being a means to uniquely identify a single component (catalog entry), the hash id
  236. also enables pipeline portability across installations when the keys returned here are
  237. chosen strategically. For example, the FilesystemCatalogConnector includes both a base
  238. directory key-value pair and a relative path key-value pair in its 'catalog_entry_data' dict.
  239. Both fields are required to access the component definition in get_entry_data(), but
  240. only the relative path field is used to create the unique hash. This allows a component
  241. that has the same relative path defined in two separate a catalogs in two separate
  242. installations to resolve to the same unique id in each, and therefore to be portable across
  243. pipelines in these installations.
  244. To ensure the hash is unique, no two catalog entries can have the same key-value pairs
  245. over the set of keys returned by this function. If two entries resolve to the same hash,
  246. the one whose definition is read last will overwrite the other(s).
  247. Example:
  248. Given a set of keys ['key1', 'key2', 'key3'], the below two catalog_entry_data dictionaries
  249. will produce unique hashes. The same can not be said, however, if the set of keys
  250. returned is ['key2', 'key3'].
  251. component_entry_data for entry1: component_entry_data for entry2:
  252. { {
  253. 'key1': 'value1', 'key1': 'value4',
  254. 'key2': 'value2', 'key2': 'value2',
  255. 'key3': 'value3' 'key3': 'value3'
  256. } {
  257. Additionally, every catalog_entry_data dict should include each key in the set returned
  258. here. If this is not the case, a catalog entry's portability and uniqueness may be negatively
  259. affected.
  260. :returns: a list of keys
  261. """
  262. raise NotImplementedError("abstract method 'get_hash_keys()' must be implemented")
  263. def read_component_definitions(self, catalog_instance: Metadata) -> List[CatalogEntry]:
  264. """
  265. This function compiles the definitions of all catalog entries in a given catalog.
  266. Catalog entry data is first retrieved for each entry in the given catalog. This data is added
  267. to a queue, and a number of reader threads ('max_reader' or fewer) are started.
  268. Each reader thread pulls the data for a singe catalog entry from the queue and uses it to read
  269. the definition associated with that entry.
  270. As a mutable object, the 'catalog_entry_map' provides a means to retrieve a return value for
  271. each thread. If a thread is able to successfully read the content of the given catalog entry,
  272. a unique hash is created for the entry and a mapping is added to the catalog_entry_map.
  273. The catalog_instance Metadata parameter will have the following attributes of interest in
  274. addition to a few additional attributes used internally:
  275. :param catalog_instance: the Metadata instance for this catalog; below is an example instance
  276. example:
  277. display_name: str = "Catalog Name"
  278. schema_name: str = "connector-type"
  279. metadata: Dict[str, Any] = {
  280. "description": "...", # only present if a description is added
  281. "runtime": "...", # must be present
  282. "categories": ["category1", "category2", ...], # may be an empty array
  283. "your_property1": value1,
  284. "your_property2": value2,
  285. ...
  286. }
  287. :returns: a mapping of a unique component ids to their definition and identifying data
  288. """
  289. catalog_entry_q = Queue()
  290. catalog_entries: List[CatalogEntry] = []
  291. try:
  292. # Retrieve list of keys that will be used to construct
  293. # the catalog entry hash for each entry in the catalog
  294. try:
  295. # Attempt to use get_hash_keys as class method (Elyra version 3.7+)
  296. keys_to_hash = ComponentCatalogConnector.get_hash_keys()
  297. except Exception:
  298. # Fall back to using abstract method (version 3.6 and earlier)
  299. keys_to_hash = self.get_hash_keys()
  300. # Add display_name attribute to the metadata dictionary
  301. catalog_metadata = deepcopy(catalog_instance.metadata)
  302. catalog_metadata["display_name"] = catalog_instance.display_name
  303. # Add catalog entry data dictionaries to the thread queue
  304. for entry in self.get_catalog_entries(catalog_metadata):
  305. catalog_entry_q.put_nowait(entry)
  306. except NotImplementedError as e:
  307. err_msg = f"{self.__class__.__name__} does not meet the requirements of a catalog connector class: {e}"
  308. self.log.error(err_msg)
  309. except Exception as e:
  310. err_msg = f"Could not get catalog entry information for catalog '{catalog_instance.display_name}': {e}"
  311. # Dump stack trace with error message
  312. self.log.exception(err_msg)
  313. def read_with_thread():
  314. """
  315. Gets a catalog entry data dictionary from the queue and attempts to read corresponding definition
  316. """
  317. while not catalog_entry_q.empty():
  318. try:
  319. # Pull a catalog entry dictionary from the queue
  320. catalog_entry_data = catalog_entry_q.get(timeout=0.1)
  321. except Empty:
  322. continue
  323. try:
  324. # Read the entry definition given its returned data and the catalog entry data
  325. self.log.debug(
  326. f"Attempting read of definition for catalog entry with identifying information: "
  327. f"{str(catalog_entry_data)}..."
  328. )
  329. try:
  330. # Attempt to get an EntryData object from get_entry_data first
  331. entry_data: EntryData = self.get_entry_data(
  332. catalog_entry_data=catalog_entry_data, catalog_metadata=catalog_metadata
  333. )
  334. except NotImplementedError:
  335. # Connector class does not implement get_catalog_definition and we must
  336. # manually coerce this entry's returned values into a EntryData object
  337. definition = self.read_catalog_entry(
  338. catalog_entry_data=catalog_entry_data, catalog_metadata=catalog_metadata
  339. )
  340. entry_data: EntryData = EntryData(definition=definition)
  341. # Ignore this entry if no definition content is returned
  342. if not entry_data or not entry_data.definition:
  343. self.log.warning(
  344. f"No definition content found for catalog entry with identifying information: "
  345. f"{catalog_entry_data}. Skipping..."
  346. )
  347. catalog_entry_q.task_done()
  348. continue
  349. # Create a CatalogEntry object with the returned EntryData and other
  350. # necessary information from the catalog instance and connector class
  351. catalog_entry = CatalogEntry(
  352. entry_data=entry_data,
  353. entry_reference=catalog_entry_data,
  354. catalog_instance=catalog_instance,
  355. hash_keys=keys_to_hash,
  356. )
  357. catalog_entries.append(catalog_entry)
  358. except NotImplementedError as e:
  359. msg = f"{self.__class__.__name__} does not meet the requirements of a catalog connector class: {e}."
  360. self.log.error(msg)
  361. except Exception as e:
  362. # Dump stack trace with error message and continue
  363. self.log.exception(
  364. f"Could not read definition for catalog entry with identifying information: "
  365. f"{str(catalog_entry_data)}: {e}"
  366. )
  367. # Mark this thread's read as complete
  368. catalog_entry_q.task_done()
  369. # Start 'max_reader' reader threads if catalog includes more than 'max_reader'
  370. # number of catalog entries, else start one thread per entry
  371. num_threads = min(catalog_entry_q.qsize(), self.max_readers)
  372. for i in range(num_threads):
  373. Thread(target=read_with_thread).start()
  374. # Wait for all queued entries to be processed
  375. catalog_entry_q.join()
  376. return catalog_entries
  377. class FilesystemComponentCatalogConnector(ComponentCatalogConnector):
  378. """
  379. Read a singular component definition from the local filesystem
  380. """
  381. def get_absolute_path(self, path: str) -> str:
  382. """
  383. Determines the absolute location of a given path. Error checking is delegated to
  384. the calling function
  385. """
  386. # Expand path to include user home if necessary
  387. path = os.path.expanduser(path)
  388. # Check for absolute path
  389. if os.path.isabs(path):
  390. return path
  391. # If path is still not absolute, default to the Jupyter share location
  392. return os.path.join(ENV_JUPYTER_PATH[0], "components", path)
  393. def get_catalog_entries(self, catalog_metadata: Dict[str, Any]) -> List[Dict[str, Any]]:
  394. """
  395. Returns a list of catalog_entry_data dictionary instances, one per entry in the given catalog.
  396. :returns: a list of component_entry_data; for the FilesystemComponentCatalogConnector class this
  397. takes the form:
  398. {
  399. 'base_dir': 'base/directory/for/file', # can be empty
  400. 'path': 'path/to/definition_in_local_fs.ext' # may be relative or absolute
  401. }
  402. """
  403. catalog_entry_data = []
  404. base_dir = catalog_metadata.get("base_path", "")
  405. if base_dir:
  406. base_dir = self.get_absolute_path(base_dir)
  407. if not os.path.exists(base_dir):
  408. # If the base directory is not found, skip this catalog
  409. self.log.warning(f"Base directory does not exist -> {base_dir}")
  410. return catalog_entry_data
  411. for path in catalog_metadata.get("paths"):
  412. path = os.path.expanduser(path)
  413. if not base_dir and not os.path.isabs(path):
  414. base_dir = os.path.join(ENV_JUPYTER_PATH[0], "components")
  415. catalog_entry_data.append({"base_dir": base_dir, "path": path})
  416. return catalog_entry_data
  417. def get_entry_data(
  418. self, catalog_entry_data: Dict[str, Any], catalog_metadata: Dict[str, Any]
  419. ) -> Optional[EntryData]:
  420. """
  421. Reads a component definition (and other information-of-interest) for a single catalog entry and
  422. creates an EntryData object to represent it. Uses the catalog_entry_data returned from
  423. get_catalog_entries() and, if needed, the catalog metadata to retrieve the definition.
  424. :param catalog_entry_data: for the Filesystem- and DirectoryComponentCatalogConnector classes,
  425. this includes 'path' and 'base_dir' keys
  426. :param catalog_metadata: Filesystem- and DirectoryComponentCatalogConnector classes do not need this
  427. field to read individual catalog entries
  428. """
  429. path = os.path.join(catalog_entry_data.get("base_dir", ""), catalog_entry_data.get("path"))
  430. if not os.path.exists(path):
  431. self.log.warning(f"Invalid location for component: {path}")
  432. else:
  433. with open(path, "r") as f:
  434. return EntryData(definition=f.read())
  435. return None
  436. @classmethod
  437. def get_hash_keys(cls) -> List[Any]:
  438. """
  439. For the Filesystem- and DirectoryComponentCatalogConnector classes, only the
  440. 'path' value is needed from the catalog_entry_data dictionary to construct a
  441. unique hash id for a single catalog entry
  442. """
  443. return ["path"]
  444. class DirectoryComponentCatalogConnector(FilesystemComponentCatalogConnector):
  445. """
  446. Read component definitions from a local directory
  447. """
  448. def get_relative_path_from_base(self, base_dir: str, file_path: str) -> str:
  449. """
  450. Determines the relative portion of a path from the given base directory.
  451. :param base_dir: the absolute path to a base directory to compare against
  452. :param file_path: the absolute path to a file within the given base directory
  453. :returns: the path to the given file relative to the given base directory
  454. Example:
  455. given: base_path = "/path/to/folder"
  456. given: absolute_path = "/path/to/folder/nested/file.py"
  457. returns: 'nested/file.py'
  458. """
  459. base_list = base_dir.split("/")
  460. absolute_list = file_path.split("/")
  461. while base_list:
  462. base_list = base_list[1:]
  463. absolute_list = absolute_list[1:]
  464. return "/".join(absolute_list)
  465. def get_catalog_entries(self, catalog_metadata: Dict[str, Any]) -> List[Dict[str, Any]]:
  466. """
  467. Returns a list of catalog_entry_data dictionary instances, one per entry in the given catalog.
  468. :returns: a list of component_entry_data; for the DirectoryComponentCatalogConnector class this
  469. takes the form
  470. {
  471. 'base_dir': 'base/directory/for/files', # given in base_path
  472. 'path': 'path/to/definition_in_local_fs.ext' # may be relative or absolute
  473. }
  474. """
  475. catalog_entry_data = []
  476. for dir_path in catalog_metadata.get("paths"):
  477. base_dir = self.get_absolute_path(dir_path)
  478. if not os.path.exists(base_dir):
  479. self.log.warning(f"Invalid directory -> {base_dir}")
  480. continue
  481. # Include '**/' in the glob pattern if files in subdirectories should be included
  482. recursive_flag = "**/" if catalog_metadata.get("include_subdirs", False) else ""
  483. patterns = [f"{recursive_flag}*{file_type}" for file_type in self._file_types]
  484. for file_pattern in patterns:
  485. catalog_entry_data.extend(
  486. [
  487. {"base_dir": base_dir, "path": self.get_relative_path_from_base(base_dir, str(absolute_path))}
  488. for absolute_path in Path(base_dir).glob(file_pattern)
  489. ]
  490. )
  491. return catalog_entry_data
  492. class UrlComponentCatalogConnector(ComponentCatalogConnector):
  493. """
  494. Read a singular component definition from a url
  495. """
  496. def get_catalog_entries(self, catalog_metadata: Dict[str, Any]) -> List[Dict[str, Any]]:
  497. """
  498. Returns a list of catalog_entry_data dictionary instances, one per entry in the given catalog.
  499. :returns: a list of component_entry_data; for the UrlComponentCatalogConnector class this takes
  500. the form:
  501. {
  502. 'url': 'url_of_remote_component_definition'
  503. }
  504. """
  505. return [{"url": url} for url in catalog_metadata.get("paths")]
  506. def get_entry_data(
  507. self, catalog_entry_data: Dict[str, Any], catalog_metadata: Dict[str, Any]
  508. ) -> Optional[EntryData]:
  509. """
  510. Reads a component definition (and other information-of-interest) for a single catalog entry and
  511. creates an EntryData object to represent it. Uses the catalog_entry_data returned from
  512. get_catalog_entries() and, if needed, the catalog metadata to retrieve the definition.
  513. :param catalog_entry_data: for the UrlComponentCatalogConnector class this includes a 'url' key
  514. :param catalog_metadata: UrlComponentCatalogConnector does not need this field to read
  515. individual catalog entries
  516. """
  517. url = catalog_entry_data.get("url")
  518. try:
  519. res = requests.get(url)
  520. except Exception as e:
  521. self.log.warning(f"Failed to connect to URL for component: {url}: {e}")
  522. else:
  523. if res.status_code != HTTPStatus.OK:
  524. self.log.warning(f"Invalid location for component: {url} (HTTP code {res.status_code})")
  525. else:
  526. return EntryData(definition=res.text)
  527. return None
  528. @classmethod
  529. def get_hash_keys(cls) -> List[Any]:
  530. """
  531. For the UrlComponentCatalogConnector class, only the 'url' value is needed
  532. from the catalog_entry_data dictionary to construct a unique hash id for a
  533. single catalog entry
  534. """
  535. return ["url"]