def test_lakecreator_extractor(zip_file, kube: TestClient): # Extract Zip Files in Parallel bucketName = lake_creator_config.get("bucketName") extractedFolder = lake_creator_config.get("extractedFolder") schemas = get_schemas(bucketName, 'landing/cms/schema/') # for key in s3.list_objects_v2(Bucket=bucketName, Prefix=sourceFolder)['Contents']: file = zip_file['Key'] schema = get_schema(schemas, file) s3_data_folder = os.path.join(extractedFolder, schema[0] if schema[0] else "") notebook_to_run = { "apiVersion": "orbit.aws/v1", "kind": "OrbitJob", "metadata": { "generateName": f"test-orbit-job-lake-creator-" }, "spec": { "taskType": "jupyter", "compute": { "nodeType": "ec2", "container": { "concurrentProcesses": 1 }, "podSetting": "orbit-runner-support-small" }, "tasks": [{ "notebookName": "Example-2-Extract-Files.ipynb", "sourcePath": "/home/jovyan/shared/samples/notebooks/A-LakeCreator", "targetPath": "/home/jovyan/shared/regression/notebooks/A-LakeCreator", "params": { "bucketName": bucketName, "zipFileName": file, "targetFolder": s3_data_folder, "use_subdirs": "False" if schema[0] else "True" } }] } } logger.info(notebook_to_run) lakecreator = OrbitJobCustomApiObject(notebook_to_run) lakecreator.create(namespace="lake-creator") # Logic to wait till OrbitJob creates lakecreator.wait_until_ready(timeout=60) # Logic to pass or fail the pytest lakecreator.wait_until_job_completes(timeout=600) current_status = lakecreator.get_status().get("orbitJobOperator").get( "jobStatus") logger.info(f"current_status={current_status}") # Cleanup lakecreator.delete() assert current_status == JOB_COMPLETION_STATUS
def test_lakecreator_lf(kube: TestClient): notebook_to_run = { "apiVersion": "orbit.aws/v1", "kind": "OrbitJob", "metadata": { "generateName": f"test-orbit-job-lake-creator-" }, "spec": { "taskType": "jupyter", "compute": { "nodeType": "ec2", "container": { "concurrentProcesses": 1 }, "podSetting": "orbit-runner-support-large", }, "tasks": [{ "notebookName": "Example-4-LakeFormation-Secured-DB.ipynb", "sourcePath": "/home/jovyan/shared/samples/notebooks/A-LakeCreator", "targetPath": "/home/jovyan/shared/regression/notebooks/A-LakeCreator", }] } } logger.info(f"notebook_to_run={notebook_to_run}") lakecreator = OrbitJobCustomApiObject(notebook_to_run) lakecreator.create(namespace="lake-creator") # Logic to wait till OrbitJob creates lakecreator.wait_until_ready(timeout=60) # Logic to pass or fail the pytest lakecreator.wait_until_job_completes(timeout=1800) current_status = lakecreator.get_status().get("orbitJobOperator").get( "jobStatus") logger.info(f"current_status={current_status}") # Cleanup lakecreator.delete() assert current_status == JOB_COMPLETION_STATUS
def test_lakeuser_notebooks_xfail(kube: TestClient) -> None: notebook_to_run={ "name": "Example-90-Failure-Behavior.ipynb", "folder": "B-DataAnalyst" } podsetting_name = "orbit-runner-support-small" body = { "apiVersion": "orbit.aws/v1", "kind": "OrbitJob", "metadata": { "generateName": "test-orbit-job-lake-user-" }, "spec": { "taskType": "jupyter", "compute": { "nodeType": "ec2", "container": { "concurrentProcesses": 1 }, "podSetting": podsetting_name }, "tasks": [{ "notebookName": notebook_to_run['name'], "sourcePath": f"shared/samples/notebooks/{notebook_to_run['folder']}", "targetPath": f"shared/regression/notebooks/{notebook_to_run['folder']}", "params": {} }] } } logger.info(body) lakeuser = OrbitJobCustomApiObject(body) lakeuser.create(namespace="lake-user") # Logic to wait till OrbitJob creates lakeuser.wait_until_ready(timeout=120) # Logic to pass or fail the pytest lakeuser.wait_until_job_completes(timeout=1200) current_status = lakeuser.get_status().get("orbitJobOperator").get("jobStatus") logger.info(f"current_status={current_status}") #Cleanup lakeuser.delete() assert current_status == JOB_FAILED_STATUS
def test_lakeuser_notebooks(notebook_to_run, kube: TestClient) -> None: logger.info(f"notebook_to_run={notebook_to_run}") notebook_file_name = notebook_to_run['name'].split(".")[0] podsetting_name = "orbit-runner-support-xlarge" if notebook_file_name in lake_creator_list_of_files['sagemaker_notebooks_list'] else "orbit-runner-support-large" body = { "apiVersion": "orbit.aws/v1", "kind": "OrbitJob", "metadata": { "generateName": "test-orbit-job-lake-user-" }, "spec": { "taskType": "jupyter", "compute": { "nodeType": "ec2", "container": { "concurrentProcesses": 1 }, "podSetting": podsetting_name }, "tasks": [{ "notebookName": notebook_to_run['name'], "sourcePath": f"shared/samples/notebooks/{notebook_to_run['folder']}", "targetPath": f"shared/regression/notebooks/{notebook_to_run['folder']}", "params": {} }] } } logger.info(body) lakeuser = OrbitJobCustomApiObject(body) lakeuser.create(namespace="lake-user") # Logic to wait till OrbitJob creates lakeuser.wait_until_ready(timeout=120) # Logic to pass or fail the pytest lakeuser.wait_until_job_completes(timeout=7200) current_status = lakeuser.get_status().get("orbitJobOperator").get("jobStatus") logger.info(f"current_status={current_status}") #Cleanup lakeuser.delete() assert current_status == JOB_COMPLETION_STATUS
def test_lakeadmin_2_image_with_apps(kube: TestClient) -> None: lake_admin_job_mage_with_apps = LAKE_ADMIN_JOB lake_admin_job_mage_with_apps["metadata"]["generateName"]= "test-orbit-job-lake-admin-image-with-apps-" lake_admin_job_mage_with_apps["spec"]["tasks"]= [{ "notebookName": "2-Image_with_apps.ipynb", "sourcePath": "shared/samples/notebooks/M-Admin", "targetPath": "shared/regression/notebooks/M-Admin", "params": {} }] logger.info(lake_admin_job_mage_with_apps) lakeadmin = OrbitJobCustomApiObject(lake_admin_job_mage_with_apps) lakeadmin.create(namespace="lake-admin") # Logic to wait till OrbitJob creates lakeadmin.wait_until_ready(timeout=120) # Logic to pass or fail the pytest lakeadmin.wait_until_job_completes(timeout=7200, interval=30) current_status = lakeadmin.get_status().get("orbitJobOperator").get("jobStatus") logger.info(f"current_status={current_status}") #Cleanup lakeadmin.delete() assert current_status == JOB_COMPLETION_STATUS
def test_lakecreator_glue_table_creator(datafile, kube: TestClient): region = workspace.get("region") bucket_name = lake_creator_config.get("bucketName") database_name = lake_creator_config.get("database_name") schemas = get_schemas(bucket_name, 'landing/cms/schema/') #file = datafile p = Path(datafile).parent print(f"Path={str(p)}") schema = get_schema(schemas, datafile) from datetime import datetime datetimestring = datetime.now().strftime("%m%d%Y%H%M%S%f") notebook_to_run = { "apiVersion": "orbit.aws/v1", "kind": "OrbitJob", "metadata": { "generateName": f"test-orbit-job-lake-creator-" }, "spec": { "taskType": "jupyter", "compute": { "nodeType": "ec2", "container": { "concurrentProcesses": 1 }, "podSetting": "orbit-runner-support-small", "env": [{ 'name': 'AWS_ORBIT_S3_BUCKET', 'value': bucket_name }] }, "tasks": [{ "notebookName": "Example-3-Load-Database-Athena.ipynb", "sourcePath": "/home/jovyan/shared/samples/notebooks/A-LakeCreator", "targetPath": "/home/jovyan/shared/regression/notebooks/A-LakeCreator", "targetPrefix": f"unsecured-{datetimestring}", "params": { "source_bucket_name": bucket_name, "target_bucket_name": bucket_name, "database_name": database_name, "schema_dir": "landing/cms/schema", "file_path": str(p), "region": region } }] } } print(notebook_to_run) lakecreator = OrbitJobCustomApiObject(notebook_to_run) lakecreator.create(namespace="lake-creator") # Logic to wait till OrbitJob creates lakecreator.wait_until_ready(timeout=60) # Logic to pass or fail the pytest lakecreator.wait_until_job_completes(timeout=1200) current_status = lakecreator.get_status().get("orbitJobOperator").get( "jobStatus") logger.info(f"current_status={current_status}") # Cleanup lakecreator.delete() assert current_status == JOB_COMPLETION_STATUS