def test_package(self): step_name = "train" expected_results_schema_type = "package_results" # MUST BE A LOADED SCHEMA expected_results_schema_version = "9999.0.1" # MUST BE A SEMVER step_execution_object = StepExecution(self.input_parameters, self.execution_parameters) results_ml_object = MLObject() results_ml_object.set_type( schema_type=expected_results_schema_type, schema_version=expected_results_schema_version, ) # Should error due to missing fields with self.assertRaises(ValueError) as context: verify_result_contract( results_ml_object, expected_results_schema_type, expected_results_schema_version, step_name, ) self.assertTrue( f"Error verifying result object for '{step_name}.output'" in str( context.exception)) result_ml_object_schema_type = expected_results_schema_type result_ml_object_schema_version = expected_results_schema_version exec( (Path("tests") / "sample_package_execution.py").read_text(), globals(), locals(), ) results_ml_object.run_date = datetime.datetime.now() results_ml_object.step_id = uuid.uuid4() results_ml_object.run_id = uuid.uuid4() results_ml_object.execution_profile.system_memory_utilization = random( ) results_ml_object.execution_profile.network_traffic_in_bytes = randint( 7e9, 9e10) results_ml_object.execution_profile.gpu_temperature = randint(70, 130) results_ml_object.execution_profile.disk_io_utilization = random() results_ml_object.execution_profile.gpu_percent_of_time_accessing_memory = ( random()) results_ml_object.execution_profile.cpu_utilization = random() results_ml_object.execution_profile.gpu_utilization = random() results_ml_object.execution_profile.gpu_memory_allocation = random() self.assertTrue( verify_result_contract( results_ml_object, expected_results_schema_type, expected_results_schema_version, step_name, ))
def test_e2e(self): MLSchema.populate_registry() MLSchema.append_schema_to_registry(Path.cwd() / ".parameters" / "schemas") # Execute step input_parameters = { # Put sample required input parameters here } execution_parameters = { # Put sample required execution parameters here } # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE step_name = "process_data" expected_results_schema_type = "data_result" # MUST BE A LOADED SCHEMA expected_results_schema_version = "0.0.1" # MUST BE A SEMVER step_execution_object = StepExecution(input_parameters, execution_parameters) results_object = MLObject() results_object.set_type( schema_type=expected_results_schema_type, schema_version=expected_results_schema_version, ) # Should error due to missing fields with self.assertRaises(ValueError) as context: verify_result_contract( results_object, expected_results_schema_type, expected_results_schema_version, step_name, ) self.assertTrue( f"Error verifying result object for '{step_name}.output'" in str( context.exception)) results_object = step_execution_object.execute( result_object_schema_type=expected_results_schema_type, result_object_schema_version=expected_results_schema_version, ) results_object.run_date = datetime.datetime.now() results_object.step_id = uuid.uuid4() results_object.run_id = uuid.uuid4() self.assertTrue( verify_result_contract(results_object, expected_results_schema_type, expected_results_schema_version, step_name))
def test_process_data(self): """ Full E2E of Process Data """ # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE step_name = "process_data" expected_results_schema_type = "data_result" # MUST BE A LOADED SCHEMA expected_results_schema_version = "9999.0.1" # MUST BE A SEMVER results_ml_object = MLObject() results_ml_object.set_type( schema_type=expected_results_schema_type, schema_version=expected_results_schema_version, ) # Should error due to missing fields with self.assertRaises(ValueError) as context: verify_result_contract( results_ml_object, expected_results_schema_type, expected_results_schema_version, step_name, ) self.assertTrue( f"Error verifying result object for '{step_name}.output'" in str( context.exception)) results_ml_object = MLObject() result_ml_object_schema_type = expected_results_schema_type result_ml_object_schema_version = expected_results_schema_version exec( (Path("tests") / "sample_process_data_execution.py").read_text(), globals(), locals(), ) results_ml_object.run_date = datetime.datetime.now() results_ml_object.step_id = str(uuid.uuid4()) results_ml_object.run_id = str(uuid.uuid4()) results_ml_object.execution_profile.system_memory_utilization = random( ) results_ml_object.execution_profile.network_traffic_in_bytes = randint( 7e9, 9e10) results_ml_object.execution_profile.gpu_temperature = randint(70, 130) results_ml_object.execution_profile.disk_io_utilization = random() results_ml_object.execution_profile.gpu_percent_of_time_accessing_memory = ( random()) results_ml_object.execution_profile.cpu_utilization = random() results_ml_object.execution_profile.gpu_utilization = random() results_ml_object.execution_profile.gpu_memory_allocation = random() self.assertTrue( verify_result_contract( results_ml_object, expected_results_schema_type, expected_results_schema_version, step_name, ))
def execute_step( execution_file: str, workflow_object: MLObject, input_object: MLObject, execution_object: MLObject, step_name, run_id, ): rootLogger = setupLogger().get_root_logger() results_ml_object = MLObject() if execution_file is None: msg = "Did not find any value for INPUT_EXECUTION_FILE, using /src/step_execution.py" print_left_message(msg) rootLogger.debug("::debug::" + msg) print("{:>15}".format("ok")) # Finished loading from environment step_execution_object = StepExecution(input_object, execution_object) results_ml_object = step_execution_object.execute( result_object_schema_type=workflow_object.steps[ step_name ].output.schema_type, result_object_schema_version=workflow_object.steps[ step_name ].output.schema_version, ) else: # TODO: Critical error if variable set but file not found msg = f"Executing '${execution_file}' (found in INPUT_EXECUTION_FILE env var)" print_left_message(msg) rootLogger.debug("::debug::" + msg) execution_file_path = Path(execution_file) if execution_file_path.exists() is False: raise KnownException( f"'{execution_file}' was provided as the file, but it does not appear to exist at {str(execution_file_path.resolve())} -- exiting." ) # The below are used in the execution file result_ml_object_schema_type = workflow_object.steps[ # noqa step_name ].output.schema_type result_ml_object_schema_version = workflow_object.steps[ # noqa step_name ].output.schema_version exec(execution_file_path.read_text(), globals(), locals()) print("{:>15}".format("ok")) # Finished executing step if (results_ml_object is None) or (len(results_ml_object) == 0): raise KnownException( "No value was assigned to the variable 'results_ml_object' -- exiting." ) elif isinstance(results_ml_object, MLObject) is False: raise KnownException( "The variable 'results_ml_object' was not of type MLObject -- exiting." ) results_ml_object.run_id = run_id results_ml_object.step_id = str(uuid.uuid4()) results_ml_object.run_date = datetime.datetime.now().isoformat() # Using the below to validate the object, even though we already have it created. load_contract_object( parameters=results_ml_object.dict_without_internal_variables(), workflow_object=workflow_object, step_name=step_name, contract_type="output", ) return results_ml_object
def main(self): c = """ - Dashboard for runs -- Size -- Likelihood of bias -- Time for run -- Accuracy - Filter by version - Look up at top version and show metadata going in and out - Show bad input (e.g. it's null) and what happens when you run it - Show when you add a new step - how you can compare those with other versions """ # noqa credentials = Credentials.metastore_credentials_prod MLSchema.append_schema_to_registry(Path(".parameters") / "schemas") repo_name = "mlspec" output_regex = "::set-output name=output_base64_encoded::(.*?)\\\\" run_date_start = datetime.datetime(2020, 1, 1) + datetime.timedelta( seconds=random.randrange(0, 5184000)) run_id = str(uuid.uuid4()) step_name = "process_data" data_source = MLObject() data_source.set_type("500.0.1", "data_source") data_source.run_id = run_id data_source.step_id = str(uuid.uuid4()) data_source.run_date = str(run_date_start.isoformat()) data_source.source_id = str(uuid.uuid4()) data_source.source_uri = f"https://internal.contoso.com/datasets/raw_nlp_data-{run_date_start.strftime('%Y-%m-%d')}-{get_random_md5()}" # noqa data_source.extended_properties = {} data_process_run = MLObject() data_process_run.set_type("500.0.1", "data_process_run") data_process_run.nodes = random.randrange(1, 4) * 2 data_process_run.cpu_per_node = f"{random.randrange(2,8) * 2}" data_process_run.ram_per_node = f"{random.randrange(1,16) * 8}Gi" data_process_run.gpu_required = (random.randrange(1, 2) % 2) == 0 data_process_run.output_root_path = ( "https://internal.contoso.com/datasets/processed_data/") data_process_run.base_image = random_base_image() data_process_run.machine_type = random_machine_type() data_process_run.run_id = run_id data_process_run.step_id = str(uuid.uuid4()) data_process_run.run_date = str(run_date_start.isoformat()) data_process_run.extended_properties = {} environment_dict = YAML.safe_load(f""" INPUT_schemas_directory: '.parameters/schemas' INPUT_schemas_git_url: 'https://github.com/mlspec/mlspeclib-action-samples-schemas.git' INPUT_workflow_node_id: 'workflow|500.0.1|31ca83ed-8263-4c8c-8672-7a2163a34725' INPUT_step_name: {step_name} INPUT_input_parameters_raw: {data_source.dict_without_internal_variables()} INPUT_execution_parameters_raw: {data_process_run.dict_without_internal_variables()} INPUT_METASTORE_CREDENTIALS: {credentials} GITHUB_RUN_ID: {str(run_id)} GITHUB_WORKSPACE: '/src' """) self.run_container(repo_name, "mlspeclib-action-samples-process-data", environment_dict) buff_val = self.buffer.getvalue() m = re.search(output_regex, buff_val) process_data_encoded_val = m.group(1) # Below is for debugging, we're ok leaving it in base64 encoded # process_data_output_value = base64.urlsafe_b64decode(process_data_encoded_val) self.buffer.truncate(0) self.buffer.seek(0) step_name = "train" training_run = MLObject() training_run.set_type("500.0.1", "training_run") training_run.nodes = random.randrange(1, 4) * 2 training_run.cpu_per_node = random.randrange(2, 8) * 2 training_run.ram_per_node = f"{random.randrange(1,16) * 8}Gi" training_run.gpu_required = (random.randrange(1, 2) % 2) == 0 training_run.output_path = "test/models/output" training_run.training_params.learning_rate = 1 / (pow( 10, random.randint(0, 4))) training_run.training_params.loss = random.random() training_run.training_params.batch_size = random.randrange(1, 5) * 500 training_run.training_params.epoch = random.randrange(1, 8) * 25 training_run.training_params.optimizer = ["SGD"] training_run.training_params.other_tags = { "pii": False, "data_sha": "8b03f70" } training_run.extended_properties = {} environment_dict_train = YAML.safe_load(f""" INPUT_schemas_directory: '.parameters/schemas' INPUT_schemas_git_url: 'https://github.com/mlspec/mlspeclib-action-samples-schemas.git' INPUT_workflow_node_id: 'workflow|500.0.1|31ca83ed-8263-4c8c-8672-7a2163a34725' INPUT_step_name: {step_name} INPUT_input_parameters_base64: {process_data_encoded_val} INPUT_execution_parameters_raw: {training_run.dict_without_internal_variables()} INPUT_METASTORE_CREDENTIALS: {credentials} GITHUB_RUN_ID: {str(run_id)} GITHUB_WORKSPACE: '/src' """) self.run_container(repo_name, "mlspeclib-action-samples-train", environment_dict_train) buff_val = self.buffer.getvalue() m = re.search(output_regex, buff_val) train_encoded_val = m.group(1) # train_output_value = base64.urlsafe_b64decode(train_encoded_val) self.buffer.truncate(0) self.buffer.seek(0) step_name = "package" package_run = MLObject() package_run.set_type("500.0.1", "package_run") package_run.run_id = run_id package_run.step_id = str(uuid.uuid4()) package_run.run_date = run_date_start.isoformat() package_run.model_source = "/nfs/trained_models/nlp" package_run.container_registry = f"https://registry.hub.docker.com/v1/repositories/contoso/nlp/{get_random_md5()}" # noqa package_run.agent_pool = "nlp-build-pool" package_run.build_args = ["arg1", "arg2", "arg3"] package_run.extended_properties = {} package_run.secrets = { "credentials": "AZURE_CREDENTIALS", "docker_username": "******", "docker_password": "******", } environment_dict_package = YAML.safe_load(f""" INPUT_schemas_directory: '.parameters/schemas' INPUT_schemas_git_url: 'https://github.com/mlspec/mlspeclib-action-samples-schemas.git' INPUT_workflow_node_id: 'workflow|500.0.1|31ca83ed-8263-4c8c-8672-7a2163a34725' INPUT_step_name: {step_name} INPUT_input_parameters_base64: {train_encoded_val} INPUT_execution_parameters_raw: {package_run.dict_without_internal_variables()} INPUT_METASTORE_CREDENTIALS: {credentials} GITHUB_RUN_ID: {str(run_id)} GITHUB_WORKSPACE: '/src' """) self.run_container(repo_name, "mlspeclib-action-samples-package", environment_dict_package) buff_val = self.buffer.getvalue() m = re.search(output_regex, buff_val) encoded_val = m.group(1) print(base64.urlsafe_b64decode(encoded_val)) self.buffer.flush()
def test_e2e(self): MLSchema.populate_registry() MLSchema.append_schema_to_registry(Path.cwd() / ".parameters" / "schemas") # Execute step input_parameters = { # Put sample required input parameters here } execution_parameters = { # Put sample required execution parameters here } # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE step_name = "process_data" expected_results_schema_type = "data_result" # MUST BE A LOADED SCHEMA expected_results_schema_version = "500.0.1" # MUST BE A SEMVER step_execution_object = StepExecution(input_parameters, execution_parameters) results_object = MLObject() results_object.set_type( schema_type=expected_results_schema_type, schema_version=expected_results_schema_version, ) # Should error due to missing fields with self.assertRaises(ValueError) as context: verify_result_contract( results_object, expected_results_schema_type, expected_results_schema_version, step_name, ) self.assertTrue( f"Error verifying result object for '{step_name}.output'" in str( context.exception)) results_object = step_execution_object.execute( result_object_schema_type=expected_results_schema_type, result_object_schema_version=expected_results_schema_version, ) results_object.run_date = datetime.datetime.now() results_object.step_id = str(uuid.uuid4()) results_object.run_id = str(uuid.uuid4()) results_object.execution_profile.system_memory_utilization = random() results_object.execution_profile.network_traffic_in_bytes = randint( 7e9, 9e10) results_object.execution_profile.gpu_temperature = randint(70, 130) results_object.execution_profile.disk_io_utilization = random() results_object.execution_profile.gpu_percent_of_time_accessing_memory = random( ) results_object.execution_profile.cpu_utilization = random() results_object.execution_profile.gpu_utilization = random() results_object.execution_profile.gpu_memory_allocation = random() self.assertTrue( verify_result_contract(results_object, expected_results_schema_type, expected_results_schema_version, step_name))