test_bootstrapper.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853
  1. #
  2. # Copyright 2018-2022 Elyra Authors
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import hashlib
  17. import json
  18. import logging
  19. import os
  20. from pathlib import Path
  21. import subprocess
  22. from subprocess import CalledProcessError
  23. from subprocess import CompletedProcess
  24. from subprocess import run
  25. import sys
  26. from tempfile import TemporaryFile
  27. import time
  28. from typing import Optional
  29. import minio
  30. import mock
  31. import nbformat
  32. import papermill
  33. import pytest
  34. from elyra.kfp import bootstrapper
  35. # To run this test from an IDE:
  36. # 1. set PYTHONPATH='`path-to-repo`/etc/docker-scripts' and working directory to `path-to-repo`
  37. # 2. Manually launch test_minio container: docker run --name test_minio -d -p 9000:9000 minio/minio server /data
  38. # (this is located in Makefile)
  39. #
  40. # NOTE: Any changes to elyra/tests/kfp/resources/test-notebookA.ipynb require an
  41. # update of elyra/tests/kfp/resources/test-archive.tgz using the command below:
  42. # tar -cvzf test-archive.tgz test-notebookA.ipynb
  43. MINIO_HOST_PORT = os.getenv("MINIO_HOST_PORT", "127.0.0.1:9000")
  44. ELYRA_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
  45. RESOURCES_DIR = os.path.join(ELYRA_ROOT_DIR, "elyra", "tests", "kfp", "resources")
  46. @pytest.fixture(scope="module", autouse=True)
  47. def start_minio():
  48. """Start the minio container to simulate COS."""
  49. # The docker run command will fail if an instance of the test_minio container is running.
  50. # We'll make a "silent" attempt to start. If that fails, assume its due to the container
  51. # conflict, force its shutdown, and try once more. If successful, yield the minio instance
  52. # but also shutdown on the flip-side of the yield (when the fixture is cleaned up).
  53. #
  54. # Although actions like SIGINT (ctrl-C) should still trigger the post-yield logic, urgent
  55. # interrupts like SIGQUIT (ctrl-\) or multiple SIGINTs can still orphan the container, so
  56. # we still need the pre-yield behavior.
  57. minio = start_minio_container(False)
  58. if minio is None: # Got a failure. Shutdown (assumed) container and try once more.
  59. stop_minio_container()
  60. minio = start_minio_container(True)
  61. time.sleep(3) # give container a chance to start
  62. yield minio
  63. stop_minio_container()
  64. def start_minio_container(raise_on_failure: bool = False) -> Optional[CompletedProcess]:
  65. minio = None
  66. try:
  67. minio = run(
  68. ["docker", "run", "--name", "test_minio", "-d", "-p", "9000:9000", "minio/minio", "server", "/data"],
  69. check=True,
  70. )
  71. except CalledProcessError as ex:
  72. if raise_on_failure:
  73. raise RuntimeError(f"Error executing docker process: {ex}") from ex
  74. return minio
  75. def stop_minio_container():
  76. run(["docker", "rm", "-f", "test_minio"], check=True)
  77. @pytest.fixture(scope="function")
  78. def s3_setup():
  79. bucket_name = "test-bucket"
  80. cos_client = minio.Minio(MINIO_HOST_PORT, access_key="minioadmin", secret_key="minioadmin", secure=False)
  81. cos_client.make_bucket(bucket_name)
  82. yield cos_client
  83. cleanup_files = cos_client.list_objects(bucket_name, recursive=True)
  84. for file in cleanup_files:
  85. cos_client.remove_object(bucket_name, file.object_name)
  86. cos_client.remove_bucket(bucket_name)
  87. def main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict):
  88. """Primary body for main method testing..."""
  89. monkeypatch.setattr(bootstrapper.OpUtil, "parse_arguments", lambda x: argument_dict)
  90. monkeypatch.setattr(bootstrapper.OpUtil, "package_install", mock.Mock(return_value=True))
  91. monkeypatch.setenv("AWS_ACCESS_KEY_ID", "minioadmin")
  92. monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "minioadmin")
  93. monkeypatch.setenv("TEST_ENV_VAR1", "test_env_var1")
  94. s3_setup.fput_object(
  95. bucket_name=argument_dict["cos-bucket"],
  96. object_name="test-directory/test-file.txt",
  97. file_path=os.path.join(RESOURCES_DIR, "test-requirements-elyra.txt"),
  98. )
  99. s3_setup.fput_object(
  100. bucket_name=argument_dict["cos-bucket"],
  101. object_name="test-directory/test,file.txt",
  102. file_path=os.path.join(RESOURCES_DIR, "test-bad-requirements-elyra.txt"),
  103. )
  104. s3_setup.fput_object(
  105. bucket_name=argument_dict["cos-bucket"],
  106. object_name="test-directory/test-archive.tgz",
  107. file_path=os.path.join(RESOURCES_DIR, "test-archive.tgz"),
  108. )
  109. with tmpdir.as_cwd():
  110. bootstrapper.main()
  111. test_file_list = [
  112. "test-archive.tgz",
  113. "test-file.txt",
  114. "test,file.txt",
  115. "test-file/test-file-copy.txt",
  116. "test-file/test,file/test,file-copy.txt",
  117. "test-notebookA.ipynb",
  118. "test-notebookA-output.ipynb",
  119. "test-notebookA.html",
  120. ]
  121. # Ensure working directory has all the files.
  122. for file in test_file_list:
  123. assert os.path.isfile(file)
  124. # Ensure upload directory has all the files EXCEPT the output notebook
  125. # since it was it is uploaded as the input notebook (test-notebookA.ipynb)
  126. # (which is included in the archive at start).
  127. for file in test_file_list:
  128. if file != "test-notebookA-output.ipynb":
  129. assert s3_setup.stat_object(
  130. bucket_name=argument_dict["cos-bucket"], object_name="test-directory/" + file
  131. )
  132. if file == "test-notebookA.html":
  133. with open("test-notebookA.html") as html_file:
  134. assert "TEST_ENV_VAR1: test_env_var1" in html_file.read()
  135. def _get_operation_instance(monkeypatch, s3_setup):
  136. config = {
  137. "cos-endpoint": "http://" + MINIO_HOST_PORT,
  138. "cos-user": "minioadmin",
  139. "cos-password": "minioadmin",
  140. "cos-bucket": "test-bucket",
  141. "filepath": "untitled.ipynb",
  142. }
  143. op = bootstrapper.FileOpBase.get_instance(**config)
  144. # use the same minio instance used by the test
  145. # to avoid access denied errors when two minio
  146. # instances exist
  147. monkeypatch.setattr(op, "cos_client", s3_setup)
  148. return op
  149. def test_main_method(monkeypatch, s3_setup, tmpdir):
  150. argument_dict = {
  151. "cos-endpoint": "http://" + MINIO_HOST_PORT,
  152. "cos-bucket": "test-bucket",
  153. "cos-directory": "test-directory",
  154. "cos-dependencies-archive": "test-archive.tgz",
  155. "filepath": os.path.join(RESOURCES_DIR, "test-notebookA.ipynb"),
  156. "inputs": "test-file.txt;test,file.txt",
  157. "outputs": "test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt",
  158. "user-volume-path": None,
  159. }
  160. main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict)
  161. def test_main_method_with_wildcard_outputs(monkeypatch, s3_setup, tmpdir):
  162. argument_dict = {
  163. "cos-endpoint": "http://" + MINIO_HOST_PORT,
  164. "cos-bucket": "test-bucket",
  165. "cos-directory": "test-directory",
  166. "cos-dependencies-archive": "test-archive.tgz",
  167. "filepath": os.path.join(RESOURCES_DIR, "test-notebookA.ipynb"),
  168. "inputs": "test-file.txt;test,file.txt",
  169. "outputs": "test-file/*",
  170. "user-volume-path": None,
  171. }
  172. main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict)
  173. def test_main_method_with_dir_outputs(monkeypatch, s3_setup, tmpdir):
  174. argument_dict = {
  175. "cos-endpoint": "http://" + MINIO_HOST_PORT,
  176. "cos-bucket": "test-bucket",
  177. "cos-directory": "test-directory",
  178. "cos-dependencies-archive": "test-archive.tgz",
  179. "filepath": os.path.join(RESOURCES_DIR, "test-notebookA.ipynb"),
  180. "inputs": "test-file.txt;test,file.txt",
  181. "outputs": "test-file", # this is the directory that contains the outputs
  182. "user-volume-path": None,
  183. }
  184. main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict)
  185. def is_writable_dir(path):
  186. """Helper method determines whether 'path' is a writable directory"""
  187. try:
  188. with TemporaryFile(mode="w", dir=path) as t:
  189. t.write("1")
  190. return True
  191. except Exception:
  192. return False
  193. def remove_file(filename, fail_ok=True):
  194. """Removes filename. If fail_ok is False an assert is raised
  195. if removal failed for any reason, e.g. filenotfound
  196. """
  197. try:
  198. os.remove(filename)
  199. except OSError as ose:
  200. if fail_ok is False:
  201. raise AssertionError(f"Cannot remove {filename}: {str(ose)} {ose}")
  202. def test_process_metrics_method_not_writable_dir(monkeypatch, s3_setup, tmpdir):
  203. """Test for process_metrics_and_metadata
  204. Validates that the method can handle output directory that is not writable
  205. """
  206. # remove "default" output file if it already exists
  207. output_metadata_file = Path("/tmp") / "mlpipeline-ui-metadata.json"
  208. remove_file(output_metadata_file)
  209. try:
  210. monkeypatch.setenv("ELYRA_WRITABLE_CONTAINER_DIR", "/good/time/to/fail")
  211. argument_dict = {
  212. "cos-endpoint": f"http://{MINIO_HOST_PORT}",
  213. "cos-bucket": "test-bucket",
  214. "cos-directory": "test-directory",
  215. "cos-dependencies-archive": "test-archive.tgz",
  216. "filepath": os.path.join(RESOURCES_DIR, "test-notebookA.ipynb"),
  217. "inputs": "test-file.txt;test,file.txt",
  218. "outputs": "test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt",
  219. "user-volume-path": None,
  220. }
  221. main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict)
  222. except Exception as ex:
  223. print(f"Writable dir test failed: {str(ex)} {ex}")
  224. assert False
  225. assert output_metadata_file.exists() is False
  226. def test_process_metrics_method_no_metadata_file(monkeypatch, s3_setup, tmpdir):
  227. """Test for process_metrics_and_metadata
  228. Verifies that the method produces a valid KFP UI metadata file if
  229. the node's script | notebook did not generate this metadata file.
  230. """
  231. argument_dict = {
  232. "cos-endpoint": "http://" + MINIO_HOST_PORT,
  233. "cos-bucket": "test-bucket",
  234. "cos-directory": "test-directory",
  235. "cos-dependencies-archive": "test-archive.tgz",
  236. "filepath": os.path.join(RESOURCES_DIR, "test-notebookA.ipynb"),
  237. "inputs": "test-file.txt;test,file.txt",
  238. "outputs": "test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt",
  239. "user-volume-path": None,
  240. }
  241. output_path = Path(tmpdir)
  242. # metadata file name and location
  243. metadata_file = output_path / "mlpipeline-ui-metadata.json"
  244. # remove file if it already exists
  245. remove_file(metadata_file)
  246. # override the default output directory to make this test platform
  247. # independent
  248. monkeypatch.setenv("ELYRA_WRITABLE_CONTAINER_DIR", str(tmpdir))
  249. main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict)
  250. # process_metrics should have generated a file named mlpipeline-ui-metadata.json
  251. # in tmpdir
  252. try:
  253. with open(metadata_file, "r") as f:
  254. metadata = json.load(f)
  255. assert metadata.get("outputs") is not None
  256. assert isinstance(metadata["outputs"], list)
  257. assert len(metadata["outputs"]) == 1
  258. assert metadata["outputs"][0]["storage"] == "inline"
  259. assert metadata["outputs"][0]["type"] == "markdown"
  260. assert (
  261. f"{argument_dict['cos-endpoint']}/{argument_dict['cos-bucket']}/{argument_dict['cos-directory']}"
  262. in metadata["outputs"][0]["source"]
  263. )
  264. assert argument_dict["cos-dependencies-archive"] in metadata["outputs"][0]["source"]
  265. except AssertionError:
  266. raise
  267. except Exception as ex:
  268. # Potential reasons for failures:
  269. # file not found, invalid JSON
  270. print(f'Validation of "{str(ex)}" failed: {ex}')
  271. assert False
  272. def test_process_metrics_method_valid_metadata_file(monkeypatch, s3_setup, tmpdir):
  273. """Test for process_metrics_and_metadata
  274. Verifies that the method produces a valid KFP UI metadata file if
  275. the node's script | notebook generated this metadata file.
  276. """
  277. argument_dict = {
  278. "cos-endpoint": "http://" + MINIO_HOST_PORT,
  279. "cos-bucket": "test-bucket",
  280. "cos-directory": "test-directory",
  281. "cos-dependencies-archive": "test-archive.tgz",
  282. "filepath": os.path.join(RESOURCES_DIR, "test-notebookA.ipynb"),
  283. "inputs": "test-file.txt;test,file.txt",
  284. "outputs": "test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt",
  285. "user-volume-path": None,
  286. }
  287. output_path = Path(tmpdir)
  288. # metadata file name and location
  289. input_metadata_file = "mlpipeline-ui-metadata.json"
  290. output_metadata_file = output_path / input_metadata_file
  291. # remove output_metadata_file if it already exists
  292. remove_file(output_metadata_file)
  293. #
  294. # Simulate some custom metadata that the script | notebook produced
  295. #
  296. custom_metadata = {
  297. "some_property": "some property value",
  298. "outputs": [{"source": "gs://project/bucket/file.md", "type": "markdown"}],
  299. }
  300. with tmpdir.as_cwd():
  301. with open(input_metadata_file, "w") as f:
  302. json.dump(custom_metadata, f)
  303. # override the default output directory to make this test platform
  304. # independent
  305. monkeypatch.setenv("ELYRA_WRITABLE_CONTAINER_DIR", str(tmpdir))
  306. main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict)
  307. # output_metadata_file should now exist
  308. try:
  309. with open(output_metadata_file, "r") as f:
  310. metadata = json.load(f)
  311. assert metadata.get("some_property") is not None
  312. assert metadata["some_property"] == custom_metadata["some_property"]
  313. assert metadata.get("outputs") is not None
  314. assert isinstance(metadata["outputs"], list)
  315. assert len(metadata["outputs"]) == 2
  316. for output in metadata["outputs"]:
  317. if output.get("storage") is not None:
  318. assert output["storage"] == "inline"
  319. assert output["type"] == "markdown"
  320. assert (
  321. f"{argument_dict['cos-endpoint']}/{argument_dict['cos-bucket']}/{argument_dict['cos-directory']}" # noqa
  322. in output["source"]
  323. )
  324. assert argument_dict["cos-dependencies-archive"] in output["source"]
  325. else:
  326. assert output["type"] == custom_metadata["outputs"][0]["type"]
  327. assert output["source"] == custom_metadata["outputs"][0]["source"]
  328. except AssertionError:
  329. raise
  330. except Exception as ex:
  331. # Potential reasons for failures:
  332. # file not found, invalid JSON
  333. print(f'Validation of "{str(ex)}" failed: {ex}')
  334. assert False
  335. def test_process_metrics_method_invalid_metadata_file(monkeypatch, s3_setup, tmpdir):
  336. """Test for process_metrics_and_metadata
  337. Verifies that the method produces a valid KFP UI metadata file if
  338. the node's script | notebook generated an invalid metadata file.
  339. """
  340. argument_dict = {
  341. "cos-endpoint": f"http://{MINIO_HOST_PORT}",
  342. "cos-bucket": "test-bucket",
  343. "cos-directory": "test-directory",
  344. "cos-dependencies-archive": "test-archive.tgz",
  345. "filepath": os.path.join(RESOURCES_DIR, "test-notebookA.ipynb"),
  346. "inputs": "test-file.txt;test,file.txt",
  347. "outputs": "test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt",
  348. "user-volume-path": None,
  349. }
  350. output_path = Path(tmpdir)
  351. # metadata file name and location
  352. input_metadata_file = "mlpipeline-ui-metadata.json"
  353. output_metadata_file = output_path / input_metadata_file
  354. # remove output_metadata_file if it already exists
  355. remove_file(output_metadata_file)
  356. #
  357. # Populate the metadata file with some custom data that's not JSON
  358. #
  359. with tmpdir.as_cwd():
  360. with open(input_metadata_file, "w") as f:
  361. f.write("I am not a valid JSON data structure")
  362. f.write("1,2,3,4,5,6,7")
  363. # override the default output directory to make this test platform
  364. # independent
  365. monkeypatch.setenv("ELYRA_WRITABLE_CONTAINER_DIR", str(tmpdir))
  366. main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict)
  367. # process_metrics replaces the existing metadata file
  368. # because its content cannot be merged
  369. try:
  370. with open(output_metadata_file, "r") as f:
  371. metadata = json.load(f)
  372. assert metadata.get("outputs") is not None
  373. assert isinstance(metadata["outputs"], list)
  374. assert len(metadata["outputs"]) == 1
  375. assert metadata["outputs"][0]["storage"] == "inline"
  376. assert metadata["outputs"][0]["type"] == "markdown"
  377. assert (
  378. f"{argument_dict['cos-endpoint']}/{argument_dict['cos-bucket']}/{argument_dict['cos-directory']}"
  379. in metadata["outputs"][0]["source"]
  380. )
  381. assert argument_dict["cos-dependencies-archive"] in metadata["outputs"][0]["source"]
  382. except AssertionError:
  383. raise
  384. except Exception as ex:
  385. # Potential reasons for failures:
  386. # file not found, invalid JSON
  387. print(f'Validation of "{str(ex)}" failed: {ex}')
  388. assert False
  389. def test_fail_bad_notebook_main_method(monkeypatch, s3_setup, tmpdir):
  390. argument_dict = {
  391. "cos-endpoint": f"http://{MINIO_HOST_PORT}",
  392. "cos-bucket": "test-bucket",
  393. "cos-directory": "test-directory",
  394. "cos-dependencies-archive": "test-bad-archiveB.tgz",
  395. "filepath": os.path.join(RESOURCES_DIR, "test-bad-notebookB.ipynb"),
  396. "inputs": "test-file.txt",
  397. "outputs": "test-file/test-copy-file.txt",
  398. "user-volume-path": None,
  399. }
  400. monkeypatch.setattr(bootstrapper.OpUtil, "parse_arguments", lambda x: argument_dict)
  401. monkeypatch.setattr(bootstrapper.OpUtil, "package_install", mock.Mock(return_value=True))
  402. mocked_func = mock.Mock(
  403. return_value="default",
  404. side_effect=[
  405. "test-bad-archiveB.tgz",
  406. "test-file.txt",
  407. "test-bad-notebookB-output.ipynb",
  408. "test-bad-notebookB.html",
  409. "test-file.txt",
  410. ],
  411. )
  412. monkeypatch.setattr(bootstrapper.FileOpBase, "get_object_storage_filename", mocked_func)
  413. monkeypatch.setenv("AWS_ACCESS_KEY_ID", "minioadmin")
  414. monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "minioadmin")
  415. s3_setup.fput_object(
  416. bucket_name=argument_dict["cos-bucket"],
  417. object_name="test-file.txt",
  418. file_path=os.path.join(ELYRA_ROOT_DIR, "README.md"),
  419. )
  420. s3_setup.fput_object(
  421. bucket_name=argument_dict["cos-bucket"],
  422. object_name="test-bad-archiveB.tgz",
  423. file_path=os.path.join(RESOURCES_DIR, "test-bad-archiveB.tgz"),
  424. )
  425. with tmpdir.as_cwd():
  426. with pytest.raises(papermill.exceptions.PapermillExecutionError):
  427. bootstrapper.main()
  428. def test_package_installation(monkeypatch, virtualenv):
  429. elyra_dict = {
  430. "ipykernel": "5.3.0",
  431. "ansiwrap": "0.8.4",
  432. "packaging": "20.0",
  433. "text-extensions-for-pandas": "0.0.1-prealpha",
  434. }
  435. to_install_dict = {
  436. "bleach": "3.1.5",
  437. "ansiwrap": "0.7.0",
  438. "packaging": "20.4",
  439. "text-extensions-for-pandas": "0.0.1-prealpha",
  440. }
  441. correct_dict = {
  442. "ipykernel": "5.3.0",
  443. "ansiwrap": "0.8.4",
  444. "packaging": "20.4",
  445. "text-extensions-for-pandas": "0.0.1-prealpha",
  446. }
  447. mocked_func = mock.Mock(return_value="default", side_effect=[elyra_dict, to_install_dict])
  448. monkeypatch.setattr(bootstrapper.OpUtil, "package_list_to_dict", mocked_func)
  449. monkeypatch.setattr(sys, "executable", virtualenv.python)
  450. virtualenv.run("python3 -m pip install bleach==3.1.5")
  451. virtualenv.run("python3 -m pip install ansiwrap==0.7.0")
  452. virtualenv.run("python3 -m pip install packaging==20.4")
  453. virtualenv.run(
  454. "python3 -m pip install git+https://github.com/akchinSTC/"
  455. "text-extensions-for-pandas@3de5ce17ab0493dcdf88b51e8727f580c08d6997"
  456. )
  457. bootstrapper.OpUtil.package_install(user_volume_path=None)
  458. virtual_env_dict = {}
  459. output = virtualenv.run("python3 -m pip freeze", capture=True)
  460. print("This is the [pip freeze] output :\n" + output)
  461. for line in output.strip().split("\n"):
  462. if " @ " in line:
  463. package_name, package_version = line.strip("\n").split(sep=" @ ")
  464. elif "===" in line:
  465. package_name, package_version = line.strip("\n").split(sep="===")
  466. else:
  467. package_name, package_version = line.strip("\n").split(sep="==")
  468. virtual_env_dict[package_name] = package_version
  469. for package, version in correct_dict.items():
  470. assert virtual_env_dict[package] == version
  471. def test_package_installation_with_target_path(monkeypatch, virtualenv, tmpdir):
  472. # TODO : Need to add test for direct-source e.g. ' @ '
  473. elyra_dict = {
  474. "ipykernel": "5.3.0",
  475. "ansiwrap": "0.8.4",
  476. "packaging": "20.0",
  477. "text-extensions-for-pandas": "0.0.1-prealpha",
  478. }
  479. to_install_dict = {
  480. "bleach": "3.1.5",
  481. "ansiwrap": "0.7.0",
  482. "packaging": "21.0",
  483. "text-extensions-for-pandas": "0.0.1-prealpha",
  484. }
  485. correct_dict = {
  486. "ipykernel": "5.3.0",
  487. "ansiwrap": "0.8.4",
  488. "packaging": "21.0",
  489. "text-extensions-for-pandas": "0.0.1-prealpha",
  490. }
  491. mocked_func = mock.Mock(return_value="default", side_effect=[elyra_dict, to_install_dict])
  492. monkeypatch.setattr(bootstrapper.OpUtil, "package_list_to_dict", mocked_func)
  493. monkeypatch.setattr(sys, "executable", virtualenv.python)
  494. virtualenv.run("python3 -m pip install --upgrade pip")
  495. virtualenv.run(f"python3 -m pip install --target={tmpdir} bleach==3.1.5")
  496. virtualenv.run(f"python3 -m pip install --target={tmpdir} ansiwrap==0.7.0")
  497. virtualenv.run(f"python3 -m pip install --target={tmpdir} packaging==20.9")
  498. virtualenv.run(
  499. f"python3 -m pip install --target={tmpdir} git+https://github.com/akchinSTC/"
  500. "text-extensions-for-pandas@3de5ce17ab0493dcdf88b51e8727f580c08d6997"
  501. )
  502. bootstrapper.OpUtil.package_install(user_volume_path=str(tmpdir))
  503. virtual_env_dict = {}
  504. output = virtualenv.run(f"python3 -m pip freeze --path={tmpdir}", capture=True)
  505. print("This is the [pip freeze] output :\n" + output)
  506. for line in output.strip().split("\n"):
  507. if " @ " in line:
  508. package_name, package_version = line.strip("\n").split(sep=" @ ")
  509. elif "===" in line:
  510. package_name, package_version = line.strip("\n").split(sep="===")
  511. else:
  512. package_name, package_version = line.strip("\n").split(sep="==")
  513. virtual_env_dict[package_name] = package_version
  514. for package, version in correct_dict.items():
  515. assert virtual_env_dict[package].split(".")[0] == version.split(".")[0]
  516. def test_convert_notebook_to_html(tmpdir):
  517. notebook_file = os.path.join(RESOURCES_DIR, "test-notebookA.ipynb")
  518. notebook_output_html_file = "test-notebookA.html"
  519. with tmpdir.as_cwd():
  520. bootstrapper.NotebookFileOp.convert_notebook_to_html(notebook_file, notebook_output_html_file)
  521. assert os.path.isfile(notebook_output_html_file)
  522. # Validate that an html file got generated from the notebook
  523. with open(notebook_output_html_file, "r") as html_file:
  524. html_data = html_file.read()
  525. assert html_data.startswith("<!DOCTYPE html>")
  526. assert "TEST_ENV_VAR1" in html_data # from os.getenv("TEST_ENV_VAR1")
  527. assert html_data.endswith("</html>\n")
  528. def test_fail_convert_notebook_to_html(tmpdir):
  529. notebook_file = os.path.join(RESOURCES_DIR, "test-bad-notebookA.ipynb")
  530. notebook_output_html_file = "bad-notebookA.html"
  531. with tmpdir.as_cwd():
  532. # Recent versions raising typeError due to #1130
  533. # https://github.com/jupyter/nbconvert/pull/1130
  534. with pytest.raises((TypeError, nbformat.validator.NotebookValidationError)):
  535. bootstrapper.NotebookFileOp.convert_notebook_to_html(notebook_file, notebook_output_html_file)
  536. def test_get_file_object_store(monkeypatch, s3_setup, tmpdir):
  537. file_to_get = "README.md"
  538. bucket_name = "test-bucket"
  539. s3_setup.fput_object(
  540. bucket_name=bucket_name, object_name=file_to_get, file_path=os.path.join(ELYRA_ROOT_DIR, file_to_get)
  541. )
  542. with tmpdir.as_cwd():
  543. op = _get_operation_instance(monkeypatch, s3_setup)
  544. op.get_file_from_object_storage(file_to_get)
  545. assert os.path.isfile(file_to_get)
  546. assert _fileChecksum(file_to_get) == _fileChecksum(os.path.join(ELYRA_ROOT_DIR, file_to_get))
  547. def test_fail_get_file_object_store(monkeypatch, s3_setup, tmpdir):
  548. file_to_get = "test-file.txt"
  549. with tmpdir.as_cwd():
  550. with pytest.raises(minio.error.S3Error) as exc_info:
  551. op = _get_operation_instance(monkeypatch, s3_setup)
  552. op.get_file_from_object_storage(file_to_get=file_to_get)
  553. assert exc_info.value.code == "NoSuchKey"
  554. def test_put_file_object_store(monkeypatch, s3_setup, tmpdir):
  555. bucket_name = "test-bucket"
  556. file_to_put = "LICENSE"
  557. op = _get_operation_instance(monkeypatch, s3_setup)
  558. op.put_file_to_object_storage(object_name=file_to_put, file_to_upload=os.path.join(ELYRA_ROOT_DIR, file_to_put))
  559. with tmpdir.as_cwd():
  560. s3_setup.fget_object(bucket_name, file_to_put, file_to_put)
  561. assert os.path.isfile(file_to_put)
  562. assert _fileChecksum(file_to_put) == _fileChecksum(os.path.join(ELYRA_ROOT_DIR, file_to_put))
  563. def test_fail_invalid_filename_put_file_object_store(monkeypatch, s3_setup):
  564. file_to_put = "LICENSE_NOT_HERE"
  565. with pytest.raises(FileNotFoundError):
  566. op = _get_operation_instance(monkeypatch, s3_setup)
  567. op.put_file_to_object_storage(file_to_upload=file_to_put)
  568. def test_fail_bucket_put_file_object_store(monkeypatch, s3_setup):
  569. bucket_name = "test-bucket-not-exist"
  570. file_to_put = "LICENSE"
  571. with pytest.raises(minio.error.S3Error) as exc_info:
  572. op = _get_operation_instance(monkeypatch, s3_setup)
  573. monkeypatch.setattr(op, "cos_bucket", bucket_name)
  574. op.put_file_to_object_storage(file_to_upload=os.path.join(ELYRA_ROOT_DIR, file_to_put))
  575. assert exc_info.value.code == "NoSuchBucket"
  576. def test_find_best_kernel_nb(tmpdir):
  577. source_nb_file = os.path.join(RESOURCES_DIR, "test-notebookA.ipynb")
  578. nb_file = os.path.join(tmpdir, "test-notebookA.ipynb")
  579. # "Copy" nb file to destination - this test does not update the kernel or language.
  580. nb = nbformat.read(source_nb_file, 4)
  581. nbformat.write(nb, nb_file)
  582. with tmpdir.as_cwd():
  583. kernel_name = bootstrapper.NotebookFileOp.find_best_kernel(nb_file)
  584. assert kernel_name == nb.metadata.kernelspec["name"]
  585. def test_find_best_kernel_lang(tmpdir, caplog):
  586. caplog.set_level(logging.INFO)
  587. source_nb_file = os.path.join(RESOURCES_DIR, "test-notebookA.ipynb")
  588. nb_file = os.path.join(tmpdir, "test-notebookA.ipynb")
  589. # "Copy" nb file to destination after updating the kernel name - forcing a language match
  590. nb = nbformat.read(source_nb_file, 4)
  591. nb.metadata.kernelspec["name"] = "test-kernel"
  592. nb.metadata.kernelspec["language"] = "PYTHON" # test case-insensitivity
  593. nbformat.write(nb, nb_file)
  594. with tmpdir.as_cwd():
  595. kernel_name = bootstrapper.NotebookFileOp.find_best_kernel(nb_file)
  596. assert kernel_name == "python3"
  597. assert len(caplog.records) == 1
  598. assert caplog.records[0].message.startswith("Matched kernel by language (PYTHON)")
  599. def test_find_best_kernel_nomatch(tmpdir, caplog):
  600. source_nb_file = os.path.join(RESOURCES_DIR, "test-notebookA.ipynb")
  601. nb_file = os.path.join(tmpdir, "test-notebookA.ipynb")
  602. # "Copy" nb file to destination after updating the kernel name and language - forcing use of updated name
  603. nb = nbformat.read(source_nb_file, 4)
  604. nb.metadata.kernelspec["name"] = "test-kernel"
  605. nb.metadata.kernelspec["language"] = "test-language"
  606. nbformat.write(nb, nb_file)
  607. with tmpdir.as_cwd():
  608. kernel_name = bootstrapper.NotebookFileOp.find_best_kernel(nb_file)
  609. assert kernel_name == "test-kernel"
  610. assert len(caplog.records) == 1
  611. assert caplog.records[0].message.startswith("Reverting back to missing notebook kernel 'test-kernel'")
  612. def test_parse_arguments():
  613. test_args = [
  614. "-e",
  615. "http://test.me.now",
  616. "-d",
  617. "test-directory",
  618. "-t",
  619. "test-archive.tgz",
  620. "-f",
  621. "test-notebook.ipynb",
  622. "-b",
  623. "test-bucket",
  624. "-p",
  625. "/tmp/lib",
  626. "-n",
  627. "test-pipeline",
  628. ]
  629. args_dict = bootstrapper.OpUtil.parse_arguments(test_args)
  630. assert args_dict["cos-endpoint"] == "http://test.me.now"
  631. assert args_dict["cos-directory"] == "test-directory"
  632. assert args_dict["cos-dependencies-archive"] == "test-archive.tgz"
  633. assert args_dict["cos-bucket"] == "test-bucket"
  634. assert args_dict["filepath"] == "test-notebook.ipynb"
  635. assert args_dict["user-volume-path"] == "/tmp/lib"
  636. assert args_dict["pipeline-name"] == "test-pipeline"
  637. assert not args_dict["inputs"]
  638. assert not args_dict["outputs"]
  639. def test_fail_missing_notebook_parse_arguments():
  640. test_args = ["-e", "http://test.me.now", "-d", "test-directory", "-t", "test-archive.tgz", "-b", "test-bucket"]
  641. with pytest.raises(SystemExit):
  642. bootstrapper.OpUtil.parse_arguments(test_args)
  643. def test_fail_missing_endpoint_parse_arguments():
  644. test_args = ["-d", "test-directory", "-t", "test-archive.tgz", "-f", "test-notebook.ipynb", "-b", "test-bucket"]
  645. with pytest.raises(SystemExit):
  646. bootstrapper.OpUtil.parse_arguments(test_args)
  647. def test_fail_missing_archive_parse_arguments():
  648. test_args = ["-e", "http://test.me.now", "-d", "test-directory", "-f", "test-notebook.ipynb", "-b", "test-bucket"]
  649. with pytest.raises(SystemExit):
  650. bootstrapper.OpUtil.parse_arguments(test_args)
  651. def test_fail_missing_bucket_parse_arguments():
  652. test_args = [
  653. "-e",
  654. "http://test.me.now",
  655. "-d",
  656. "test-directory",
  657. "-t",
  658. "test-archive.tgz",
  659. "-f",
  660. "test-notebook.ipynb",
  661. ]
  662. with pytest.raises(SystemExit):
  663. bootstrapper.OpUtil.parse_arguments(test_args)
  664. def test_fail_missing_directory_parse_arguments():
  665. test_args = ["-e", "http://test.me.now", "-t", "test-archive.tgz", "-f", "test-notebook.ipynb", "-b", "test-bucket"]
  666. with pytest.raises(SystemExit):
  667. bootstrapper.OpUtil.parse_arguments(test_args)
  668. def test_requirements_file(monkeypatch, tmpdir, caplog):
  669. elyra_requirements_file = Path(__file__).parent / "resources/test-requirements-elyra.txt"
  670. elyra_correct_number_of_packages = 19
  671. elyra_list_dict = bootstrapper.OpUtil.package_list_to_dict(elyra_requirements_file)
  672. assert len(elyra_list_dict) == elyra_correct_number_of_packages
  673. current_requirements_file = Path(__file__).parent / "resources/test-requirements-current.txt"
  674. current_correct_number_of_packages = 15
  675. current_list_dict = bootstrapper.OpUtil.package_list_to_dict(current_requirements_file)
  676. assert len(current_list_dict) == current_correct_number_of_packages
  677. mocked_package_list_to_dict = mock.Mock(return_value="default", side_effect=[elyra_list_dict, current_list_dict])
  678. monkeypatch.setattr(bootstrapper.OpUtil, "package_list_to_dict", mocked_package_list_to_dict)
  679. mocked_subprocess_run = mock.Mock(return_value="default")
  680. monkeypatch.setattr(subprocess, "run", mocked_subprocess_run)
  681. bootstrapper.OpUtil.package_install(user_volume_path=str(tmpdir))
  682. assert "WARNING: Source package 'jupyter-client' found already installed as an editable package" in caplog.text
  683. assert "WARNING: Source package 'requests' found already installed as an editable package" in caplog.text
  684. assert "WARNING: Source package 'tornado' found already installed from git" in caplog.text
  685. def test_fail_requirements_file_bad_delimiter():
  686. bad_requirements_file = Path(__file__).parent / "resources/test-bad-requirements-elyra.txt"
  687. with open(bad_requirements_file, "r") as f:
  688. file_content = f.readlines()
  689. valid_package_list = [
  690. line.strip("\n").split("==")[0] for line in file_content if not line.startswith("#") and "==" in line
  691. ]
  692. package_dict = bootstrapper.OpUtil.package_list_to_dict(bad_requirements_file)
  693. assert valid_package_list == list(package_dict.keys())
  694. def _fileChecksum(filename):
  695. hasher = hashlib.sha256()
  696. with open(filename, "rb") as afile:
  697. buf = afile.read(65536)
  698. while len(buf) > 0:
  699. hasher.update(buf)
  700. buf = afile.read(65536)
  701. checksum = hasher.hexdigest()
  702. return checksum