def execute_sagemaker_remote_tests(image): """ Run pytest in a virtual env for a particular image Expected to run via multiprocessing :param image: ECR url """ pytest_command, path, tag, job_type = generate_sagemaker_pytest_cmd( image, SAGEMAKER_REMOTE_TEST_TYPE) context = Context() with context.cd(path): context.run(f"virtualenv {tag}") with context.prefix(f"source {tag}/bin/activate"): context.run("pip install -r requirements.txt", warn=True) res = context.run(pytest_command, warn=True) metrics_utils.send_test_result_metrics(res.return_code)
def execute_sagemaker_remote_tests(process_index, image, global_pytest_cache, pytest_cache_params): """ Run pytest in a virtual env for a particular image. Creates a custom directory for each thread for pytest cache file. Stores pytest cache in a shared dict. Expected to run via multiprocessing :param process_index - id for process. Used to create a custom cache dir :param image - ECR url :param global_pytest_cache - shared Manager().dict() for cache merging :param pytest_cache_params - parameters required for s3 file path building """ account_id = os.getenv( "ACCOUNT_ID", boto3.client("sts").get_caller_identity()["Account"]) pytest_cache_util = PytestCache(boto3.client("s3"), account_id) pytest_command, path, tag, job_type = generate_sagemaker_pytest_cmd( image, SAGEMAKER_REMOTE_TEST_TYPE) context = Context() with context.cd(path): context.run(f"virtualenv {tag}") with context.prefix(f"source {tag}/bin/activate"): context.run("pip install -r requirements.txt", warn=True) pytest_cache_util.download_pytest_cache_from_s3_to_local( path, **pytest_cache_params, custom_cache_directory=str(process_index)) # adding -o cache_dir with a custom directory name pytest_command += f" -o cache_dir={os.path.join(str(process_index), '.pytest_cache')}" res = context.run(pytest_command, warn=True) metrics_utils.send_test_result_metrics(res.return_code) cache_json = pytest_cache_util.convert_pytest_cache_file_to_json( path, custom_cache_directory=str(process_index)) global_pytest_cache.update(cache_json) if res.failed: raise DLCSageMakerRemoteTestFailure( f"{pytest_command} failed with error code: {res.return_code}\n" f"Traceback:\n{res.stdout}") return None
def send_scheduler_requests(requester, image): """ Send a PR test request through the requester, and wait for the response. If test completed or encountered runtime error, create local XML reports. Otherwise the test failed, print the failure reason. :param requester: JobRequester object :param image: <string> ECR URI """ # Note: 3 is the max number of instances required for any tests. Here we schedule tests conservatively. identifier = requester.send_request(image, "PR", 3) image_tag = image.split(":")[-1] report_path = os.path.join(os.getcwd(), "test", f"{image_tag}.xml") while True: query_status_response = requester.query_status(identifier) test_status = query_status_response["status"] if test_status == "completed": LOGGER.info(f"Test for image {image} completed.") logs_response = requester.receive_logs(identifier) LOGGER.info( f"Receive logs success for ticket {identifier.ticket_name}, report path: {report_path}" ) print_log_stream(logs_response) metrics_utils.send_test_result_metrics(0) with open(report_path, "w") as xml_report: xml_report.write(logs_response["XML_REPORT"]) break elif test_status == "runtimeError": logs_response = requester.receive_logs(identifier) with open(report_path, "w") as xml_report: xml_report.write(logs_response["XML_REPORT"]) print_log_stream(logs_response) metrics_utils.send_test_result_metrics(1) raise Exception(f"Test for image {image} ran into runtime error.") break elif test_status == "failed": metrics_utils.send_test_result_metrics(1) raise Exception( f"Scheduling failed for image {image}. Reason: {query_status_response['reason']}" ) break