Exemplo n.º 1
0
    def test_package(self):
        step_name = "train"
        expected_results_schema_type = "package_results"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "9999.0.1"  # MUST BE A SEMVER

        step_execution_object = StepExecution(self.input_parameters,
                                              self.execution_parameters)

        results_ml_object = MLObject()
        results_ml_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        result_ml_object_schema_type = expected_results_schema_type
        result_ml_object_schema_version = expected_results_schema_version

        exec(
            (Path("tests") / "sample_package_execution.py").read_text(),
            globals(),
            locals(),
        )

        results_ml_object.run_date = datetime.datetime.now()
        results_ml_object.step_id = uuid.uuid4()
        results_ml_object.run_id = uuid.uuid4()

        results_ml_object.execution_profile.system_memory_utilization = random(
        )
        results_ml_object.execution_profile.network_traffic_in_bytes = randint(
            7e9, 9e10)
        results_ml_object.execution_profile.gpu_temperature = randint(70, 130)
        results_ml_object.execution_profile.disk_io_utilization = random()
        results_ml_object.execution_profile.gpu_percent_of_time_accessing_memory = (
            random())
        results_ml_object.execution_profile.cpu_utilization = random()
        results_ml_object.execution_profile.gpu_utilization = random()
        results_ml_object.execution_profile.gpu_memory_allocation = random()

        self.assertTrue(
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            ))
    def test_e2e(self):
        MLSchema.populate_registry()
        MLSchema.append_schema_to_registry(Path.cwd() / ".parameters" /
                                           "schemas")

        # Execute step
        input_parameters = {
            # Put sample required input parameters here
        }

        execution_parameters = {
            # Put sample required execution parameters here
        }

        # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE
        step_name = "process_data"
        expected_results_schema_type = "data_result"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "0.0.1"  # MUST BE A SEMVER

        step_execution_object = StepExecution(input_parameters,
                                              execution_parameters)

        results_object = MLObject()
        results_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        results_object = step_execution_object.execute(
            result_object_schema_type=expected_results_schema_type,
            result_object_schema_version=expected_results_schema_version,
        )

        results_object.run_date = datetime.datetime.now()
        results_object.step_id = uuid.uuid4()
        results_object.run_id = uuid.uuid4()

        self.assertTrue(
            verify_result_contract(results_object,
                                   expected_results_schema_type,
                                   expected_results_schema_version, step_name))
Exemplo n.º 3
0
def mlobject_from_dict(schema_type, schema_version, dict_value):
    ml_object = MLObject()
    ml_object.set_type(
        schema_version=schema_version,
        schema_type=schema_type,
    )
    dict_value['schema_type'] = schema_type
    dict_value['schema_version'] = schema_version
    MLObject.update_tree(ml_object, dict_value)
    errors = ml_object.validate()

    if errors:
        return None, errors
    else:
        return ml_object, None
    def execute(self, result_object_schema_type, result_object_schema_version):
        # Create Result object
        results_object = MLObject()
        results_object.set_type(
            schema_type=result_object_schema_type,
            schema_version=result_object_schema_version,
        )

        # Mocked up results
        return_dict = YAML.safe_load(
            """
servable: True
package_size: 1029310298
tested_platforms: ['kubeflow', 'azureml', 'sagemaker']
model_source:
    servable_model:
        data_store: 's3'
        bucket: 'nlp-bucket'
        path: 'a231653454ca8e07f42adc7941aeec6b'
serving_container_image:
    container_image_url: 'https://hub.docker.com/repository/docker/contoso/nlp-base-images'
"""
        )

        results_object.servable = return_dict["servable"]
        results_object.tested_platforms = return_dict["tested_platforms"]
        results_object.package_size = return_dict["package_size"]
        results_object.model_source.servable_model.data_store = return_dict[
            "model_source"
        ]["servable_model"]["data_store"]
        results_object.model_source.servable_model.bucket = return_dict["model_source"][
            "servable_model"
        ]["bucket"]
        results_object.model_source.servable_model.path = return_dict["model_source"][
            "servable_model"
        ]["path"]
        results_object.serving_container_image.container_image_url = return_dict[
            "serving_container_image"
        ]["container_image_url"]

        return results_object
    def execute(self, result_object_schema_type, result_object_schema_version):
        # Create Result object
        results_object = MLObject()
        results_object.set_type(
            schema_type=result_object_schema_type,
            schema_version=result_object_schema_version,
        )

        # Mocked up results
        return_dict = {
            "data_output_path": str(Path("tests/data/data_output.csv")),
            "data_statistics_path": str(Path("tests/data/data_stats.csv")),
            "data_schemas_path": str(Path("tests/data/data_schemas.yaml")),
            "feature_file_path": str(Path("tests/data/feature_file.yaml")),
        }

        results_object.data_output_path = return_dict["data_output_path"]
        results_object.data_statistics_path = return_dict["data_statistics_path"]
        results_object.data_schemas_path = return_dict["data_schemas_path"]
        results_object.feature_file_path = return_dict["feature_file_path"]

        return results_object
    def execute(self, result_object_schema_type, result_object_schema_version):
        # Create Result object
        results_object = MLObject()
        results_object.set_type(
            schema_type=result_object_schema_type,
            schema_version=result_object_schema_version,
        )

        # Mocked up results
        return_dict = {
            "training_execution_id": uuid.uuid4(),
            "accuracy": float(f"{randrange(93000,99999)/100000}"),
            "global_step": int(f"{randrange(50,150) * 100}"),
            "loss": float(f"{randrange(10000,99999)/1000000}")
        }

        results_object.training_execution_id = return_dict[
            "training_execution_id"]
        results_object.accuracy = return_dict["accuracy"]
        results_object.global_step = return_dict["global_step"]
        results_object.loss = return_dict["loss"]

        return results_object
    def execute(self, result_object_schema_type, result_object_schema_version):
        # Create Result object
        results_object = MLObject()
        results_object.set_type(
            schema_type=result_object_schema_type,
            schema_version=result_object_schema_version,
        )

        # Mocked up results
        return_dict = {
            "training_execution_id": str(uuid.uuid4()),
            "accuracy": float(random.randrange(0, 100) / 100),
            "global_step": 10**random.randrange(2, 4),
            "loss": float(random.randrange(1000, 9999) / 1000),
        }

        results_object.training_execution_id = return_dict[
            "training_execution_id"]
        results_object.accuracy = return_dict["accuracy"]
        results_object.global_step = return_dict["global_step"]
        results_object.loss = return_dict["loss"]

        errors = results_object.validate()  # noqa
        return results_object
Exemplo n.º 8
0
from mlspeclib import MLSchema, MLObject
from random import randint, random

results_ml_object = MLObject()

results_ml_object.set_type(
    schema_type=result_ml_object_schema_type,
    schema_version=result_ml_object_schema_version,
)

# Execute code below. Examples:
"""
1) Execute on the worker node this step is running on

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

dataset = pd.read_csv('Weather.csv') # Example dataset

X = dataset['MinTemp'].values.reshape(-1,1)
y = dataset['MaxTemp'].values.reshape(-1,1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
regressor = LinearRegression()
regressor.fit(X_train, y_train) # Training the algorithm

y_pred = regressor.predict(X_test)
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
Exemplo n.º 9
0
    def test_process_data(self):
        """
        Full E2E of Process Data
        """
        # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE
        step_name = "process_data"
        expected_results_schema_type = "data_result"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "9999.0.1"  # MUST BE A SEMVER

        results_ml_object = MLObject()
        results_ml_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        results_ml_object = MLObject()

        result_ml_object_schema_type = expected_results_schema_type
        result_ml_object_schema_version = expected_results_schema_version

        exec(
            (Path("tests") / "sample_process_data_execution.py").read_text(),
            globals(),
            locals(),
        )

        results_ml_object.run_date = datetime.datetime.now()
        results_ml_object.step_id = str(uuid.uuid4())
        results_ml_object.run_id = str(uuid.uuid4())

        results_ml_object.execution_profile.system_memory_utilization = random(
        )
        results_ml_object.execution_profile.network_traffic_in_bytes = randint(
            7e9, 9e10)
        results_ml_object.execution_profile.gpu_temperature = randint(70, 130)
        results_ml_object.execution_profile.disk_io_utilization = random()
        results_ml_object.execution_profile.gpu_percent_of_time_accessing_memory = (
            random())
        results_ml_object.execution_profile.cpu_utilization = random()
        results_ml_object.execution_profile.gpu_utilization = random()
        results_ml_object.execution_profile.gpu_memory_allocation = random()

        self.assertTrue(
            verify_result_contract(
                results_ml_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            ))
Exemplo n.º 10
0
def sub_main():
    rootLogger = setupLogger().get_root_logger()

    # Loading input values
    msg = "::debug::Loading input values"
    print_left_message("Loading variables from environment...")
    rootLogger.debug(msg)

    parameters = convert_environment_variables_to_dict()

    print("{:>15}".format("ok"))  # Finished loading from environment

    parameters.INPUT_SCHEMAS_DIRECTORY = os.environ.get("INPUT_SCHEMAS_DIRECTORY")

    if "INPUT_SCHEMAS_GIT_URL" in os.environ and os.environ.get != "":
        parameters.INPUT_SCHEMAS_GIT_URL = os.environ.get("INPUT_SCHEMAS_GIT_URL")
        print_left_message(
            f"Downloading schemas from {parameters.INPUT_SCHEMAS_GIT_URL}..."
        )
        try:
            git.Git(parameters.INPUT_SCHEMAS_DIRECTORY).clone(
                parameters.INPUT_SCHEMAS_GIT_URL, str(uuid.uuid4()), depth=1
            )
            # TODO: Authenticate with GH Token?
            print("{:>15}".format("ok"))  # Finished loading from GIT URL
        except GitCommandError as gce:
            raise KnownException(
                f"Trying to read from the git repo ({parameters.INPUT_SCHEMAS_GIT_URL}) and write to the directory ({parameters.INPUT_SCHEMAS_DIRECTORY}). Full error follows: {str(gce)}"
            )

    print_left_message("Appending schemas to registry...")
    MLSchema.append_schema_to_registry(Path(parameters.INPUT_SCHEMAS_DIRECTORY))
    print("{:>15}".format("ok"))  # Finished loading registry

    parameters.previous_step_name = os.environ.get("INPUT_PREVIOUS_STEP_NAME", "")
    parameters.next_step_name = os.environ.get("INPUT_NEXT_STEP_NAME", "")
    rootLogger.debug("::debug:: Finished main")

    # Load metastore credentials

    rootLogger.debug("::debug:: Loading credentials")
    print_left_message("Loading and validating metastore credentials...")
    metastore_cred_string_blob = os.environ.get("INPUT_METASTORE_CREDENTIALS")

    metastore_credentials_packed = YAML.safe_load(metastore_cred_string_blob)
    metastore_credentials_string = base64.urlsafe_b64decode(
        metastore_credentials_packed
    ).decode("utf-8")
    metastore_credentials = YAML.safe_load(metastore_credentials_string)

    report_found_params(
        ["url", "key", "database_name", "container_name"], metastore_credentials
    )
    print("{:>15}".format("ok"))  # Finished loading and validating metastore
    rootLogger.debug("::debug::Starting metastore connection")

    print_left_message("Starting connection to metastore...")
    ms = load_metastore_connection(metastore_credentials_packed)
    print("{:>15}".format("ok"))  # Finished connecting to metastore

    workflow_node_id = os.environ.get("INPUT_WORKFLOW_NODE_ID")
    if workflow_node_id == "":
        raise KnownException(
            "INPUT_WORKFLOW_NODE_ID - No workflow node id was provided."
        )

    print_left_message(f"Loading workflow object ID: '{workflow_node_id}' ...")
    workflow_object = load_workflow_object(workflow_node_id, ms)
    print("{:>15}".format("ok"))  # Finished loading workload abject

    rootLogger.debug("::debug::Loading input parameters")
    print_left_message("Loading input parameters ...")
    input_parameters = load_parameters("INPUT", ms)
    print("{:>15}".format("ok"))  # Finished loading input parameters from metastore

    rootLogger.debug("::debug::Loading execution parameters file")
    print_left_message("Loading execution parameters ...")
    execution_parameters = load_parameters("EXECUTION", ms)
    print(
        "{:>15}".format("ok")
    )  # Finished loading execution  parameters from metastore

    step_name = parameters.INPUT_STEP_NAME
    print_left_message(f"Loading contract for '{step_name}.input' ...")
    input_object = load_contract_object(
        parameters=input_parameters,
        workflow_object=workflow_object,
        step_name=step_name,
        contract_type="input",
    )
    print(
        "{:>15}".format("ok")
    )  # Finished loading execution  parameters from metastore

    print(f"Attaching step info to input for '{step_name}.input' ... ")
    input_node_id = ms.attach_step_info(
        input_object,
        workflow_object.schema_version,
        workflow_node_id,
        step_name,
        "input",
    )
    print(f"     Input Node ID: {input_node_id}")  # Finished attaching step ID to input

    rootLogger.debug(f"Successfully saved: {input_object}")

    # TODO don't hard code any of these
    exec_dict = execution_parameters
    exec_dict["run_id"] = parameters.GITHUB_RUN_ID
    exec_dict["run_date"] = datetime.datetime.now()
    exec_dict["step_id"] = str(uuid.uuid4())

    print_left_message(f"Loading contract for '{step_name}.execution' ...")
    execution_object = load_contract_object(
        parameters=exec_dict,
        workflow_object=workflow_object,
        step_name=step_name,
        contract_type="execution",
    )
    print(
        "{:>15}".format("ok")
    )  # Finished loading execution  parameters from metastore

    rootLogger.debug(f"Successfully loaded and validated execution: {execution_object}")

    print(f"Attaching step info to input for '{step_name}.execution' ... ")
    execution_node_id = ms.attach_step_info(
        execution_object,
        workflow_object.schema_version,
        workflow_node_id,
        step_name,
        "execution",
    )
    rootLogger.debug(f"Successfully saved: {execution_object}")
    print(
        f"      Execution Node ID: {execution_node_id}"
    )  # Finished attaching step ID to input

    # Branching between use step_execution.py or execution file.
    execution_file = os.environ.get("INPUT_EXECUTION_FILE")

    print_left_message("Executing step ... ")
    print("{:>15}".format("ok"))  # Starting executing step
    results_ml_object = execute_step(
        execution_file,
        workflow_object,
        input_object,
        execution_object,
        step_name,
        parameters.GITHUB_RUN_ID,
    )
    print_left_message("Finished executing step ... ")
    print("{:>15}".format("ok"))  # Starting executing step

    # TODO: Need to add next and previous steps to attach_step_info
    print(f"Attaching step info to output for '{step_name}.output' ... ")
    output_node_id = ms.attach_step_info(
        results_ml_object,
        workflow_object.schema_version,
        workflow_node_id,
        step_name,
        "output",
    )
    print(
        f"      Output Node ID: {output_node_id}"
    )  # Finished attaching step ID to output

    dict_conversion = results_ml_object.dict_without_internal_variables()

    string_io_handle = StringIO()
    YAML.SafeDumper.add_representer(uuid.UUID, repr_uuid)
    YAML.safe_dump(dict_conversion, string_io_handle)
    yaml_conversion = string_io_handle.getvalue()

    encode_to_utf8_bytes = yaml_conversion.encode("utf-8")
    base64_encode = base64.urlsafe_b64encode(encode_to_utf8_bytes)
    final_encode_to_utf8 = str(base64_encode, "utf-8")

    # Recording raw log info
    # logBuffer.flush()
    # log_contents = logBuffer.getvalue()

    log_object = MLObject()
    log_object.set_type(schema_version="0.1.0", schema_type="log")
    log_object.run_id = parameters.GITHUB_RUN_ID
    log_object.step_name = step_name
    log_object.run_date = datetime.datetime.now()
    log_object.raw_log = (
        "NO RAW LOGS YET (NEED TO FIGURE OUT WHERE I CAN PUSH A LARGE OBJECT)"
    )
    # log_object.raw_log = log_contents
    log_object.log_property_bag = {}

    # errors = log_object.validate()

    log_node_id = ms.attach_step_info(
        log_object, workflow_object.schema_version, workflow_node_id, step_name, "log"
    )

    rootLogger.debug(
        f"::set-output name=output_raw::{results_ml_object.dict_without_internal_variables()}"
    )

    print("Printing output ... \n \n")
    logger = setupLogger()
    output_message = ""
    output_message += f"{logger.print_and_log('output_raw', results_ml_object.dict_without_internal_variables())}\n"
    output_message += (
        f"{logger.print_and_log('output_base64_encoded', final_encode_to_utf8)}\n"
    )
    output_message += f"{logger.print_and_log('input_node_id', input_node_id)}\n"
    output_message += (
        f"{logger.print_and_log('execution_node_id', execution_node_id)}\n"
    )
    output_message += f"{logger.print_and_log('output_node_id', output_node_id)}\n"
    output_message += f"{logger.print_and_log('log_node_id', log_node_id)}\n"

    rootLogger.debug(f"Complete output: \n {output_message}")
    print("\n\n... finished printing output")  # Finished printing output

    print_left_message("Generating /output_message.txt ...")
    if is_docker():
        Path("/output_message.txt").write_text(output_message)
    else:
        fp = tempfile.TemporaryFile()
        fp.write(output_message.encode("utf-8"))
    print("{:>15}".format("ok"))  # Finished printing output
Exemplo n.º 11
0
    def main(self):
        c = """
- Dashboard for runs
-- Size
-- Likelihood of bias
-- Time for run
-- Accuracy
- Filter by version
- Look up at top version and show metadata going in and out

- Show bad input (e.g. it's null) and what happens when you run it
- Show when you add a new step - how you can compare those with other versions

""" # noqa
        credentials = Credentials.metastore_credentials_prod
        MLSchema.append_schema_to_registry(Path(".parameters") / "schemas")
        repo_name = "mlspec"
        output_regex = "::set-output name=output_base64_encoded::(.*?)\\\\"

        run_date_start = datetime.datetime(2020, 1, 1) + datetime.timedelta(
            seconds=random.randrange(0, 5184000))
        run_id = str(uuid.uuid4())

        step_name = "process_data"
        data_source = MLObject()
        data_source.set_type("500.0.1", "data_source")
        data_source.run_id = run_id
        data_source.step_id = str(uuid.uuid4())
        data_source.run_date = str(run_date_start.isoformat())
        data_source.source_id = str(uuid.uuid4())
        data_source.source_uri = f"https://internal.contoso.com/datasets/raw_nlp_data-{run_date_start.strftime('%Y-%m-%d')}-{get_random_md5()}"  # noqa
        data_source.extended_properties = {}

        data_process_run = MLObject()
        data_process_run.set_type("500.0.1", "data_process_run")
        data_process_run.nodes = random.randrange(1, 4) * 2
        data_process_run.cpu_per_node = f"{random.randrange(2,8) * 2}"
        data_process_run.ram_per_node = f"{random.randrange(1,16) * 8}Gi"
        data_process_run.gpu_required = (random.randrange(1, 2) % 2) == 0
        data_process_run.output_root_path = (
            "https://internal.contoso.com/datasets/processed_data/")
        data_process_run.base_image = random_base_image()
        data_process_run.machine_type = random_machine_type()
        data_process_run.run_id = run_id
        data_process_run.step_id = str(uuid.uuid4())
        data_process_run.run_date = str(run_date_start.isoformat())
        data_process_run.extended_properties = {}

        environment_dict = YAML.safe_load(f"""
INPUT_schemas_directory: '.parameters/schemas'
INPUT_schemas_git_url: 'https://github.com/mlspec/mlspeclib-action-samples-schemas.git'
INPUT_workflow_node_id: 'workflow|500.0.1|31ca83ed-8263-4c8c-8672-7a2163a34725'
INPUT_step_name: {step_name}
INPUT_input_parameters_raw: {data_source.dict_without_internal_variables()}
INPUT_execution_parameters_raw: {data_process_run.dict_without_internal_variables()}
INPUT_METASTORE_CREDENTIALS: {credentials}
GITHUB_RUN_ID: {str(run_id)}
GITHUB_WORKSPACE: '/src'
        """)

        self.run_container(repo_name, "mlspeclib-action-samples-process-data",
                           environment_dict)

        buff_val = self.buffer.getvalue()
        m = re.search(output_regex, buff_val)
        process_data_encoded_val = m.group(1)

        # Below is for debugging, we're ok leaving it in base64 encoded
        # process_data_output_value = base64.urlsafe_b64decode(process_data_encoded_val)
        self.buffer.truncate(0)
        self.buffer.seek(0)

        step_name = "train"
        training_run = MLObject()
        training_run.set_type("500.0.1", "training_run")
        training_run.nodes = random.randrange(1, 4) * 2
        training_run.cpu_per_node = random.randrange(2, 8) * 2
        training_run.ram_per_node = f"{random.randrange(1,16) * 8}Gi"
        training_run.gpu_required = (random.randrange(1, 2) % 2) == 0
        training_run.output_path = "test/models/output"
        training_run.training_params.learning_rate = 1 / (pow(
            10, random.randint(0, 4)))
        training_run.training_params.loss = random.random()
        training_run.training_params.batch_size = random.randrange(1, 5) * 500
        training_run.training_params.epoch = random.randrange(1, 8) * 25
        training_run.training_params.optimizer = ["SGD"]
        training_run.training_params.other_tags = {
            "pii": False,
            "data_sha": "8b03f70"
        }
        training_run.extended_properties = {}

        environment_dict_train = YAML.safe_load(f"""
INPUT_schemas_directory: '.parameters/schemas'
INPUT_schemas_git_url: 'https://github.com/mlspec/mlspeclib-action-samples-schemas.git'
INPUT_workflow_node_id: 'workflow|500.0.1|31ca83ed-8263-4c8c-8672-7a2163a34725'
INPUT_step_name: {step_name}
INPUT_input_parameters_base64: {process_data_encoded_val}
INPUT_execution_parameters_raw: {training_run.dict_without_internal_variables()}
INPUT_METASTORE_CREDENTIALS: {credentials}
GITHUB_RUN_ID: {str(run_id)}
GITHUB_WORKSPACE: '/src'
        """)

        self.run_container(repo_name, "mlspeclib-action-samples-train",
                           environment_dict_train)

        buff_val = self.buffer.getvalue()
        m = re.search(output_regex, buff_val)
        train_encoded_val = m.group(1)
        # train_output_value = base64.urlsafe_b64decode(train_encoded_val)
        self.buffer.truncate(0)
        self.buffer.seek(0)

        step_name = "package"
        package_run = MLObject()
        package_run.set_type("500.0.1", "package_run")
        package_run.run_id = run_id
        package_run.step_id = str(uuid.uuid4())
        package_run.run_date = run_date_start.isoformat()
        package_run.model_source = "/nfs/trained_models/nlp"
        package_run.container_registry = f"https://registry.hub.docker.com/v1/repositories/contoso/nlp/{get_random_md5()}"  # noqa
        package_run.agent_pool = "nlp-build-pool"
        package_run.build_args = ["arg1", "arg2", "arg3"]
        package_run.extended_properties = {}
        package_run.secrets = {
            "credentials": "AZURE_CREDENTIALS",
            "docker_username": "******",
            "docker_password": "******",
        }

        environment_dict_package = YAML.safe_load(f"""
INPUT_schemas_directory: '.parameters/schemas'
INPUT_schemas_git_url: 'https://github.com/mlspec/mlspeclib-action-samples-schemas.git'
INPUT_workflow_node_id: 'workflow|500.0.1|31ca83ed-8263-4c8c-8672-7a2163a34725'
INPUT_step_name: {step_name}
INPUT_input_parameters_base64: {train_encoded_val}
INPUT_execution_parameters_raw: {package_run.dict_without_internal_variables()}
INPUT_METASTORE_CREDENTIALS: {credentials}
GITHUB_RUN_ID: {str(run_id)}
GITHUB_WORKSPACE: '/src'
        """)

        self.run_container(repo_name, "mlspeclib-action-samples-package",
                           environment_dict_package)

        buff_val = self.buffer.getvalue()
        m = re.search(output_regex, buff_val)
        encoded_val = m.group(1)
        print(base64.urlsafe_b64decode(encoded_val))
        self.buffer.flush()
    def test_e2e(self):
        MLSchema.populate_registry()
        MLSchema.append_schema_to_registry(Path.cwd() / ".parameters" /
                                           "schemas")

        # Execute step
        input_parameters = {
            # Put sample required input parameters here
        }

        execution_parameters = {
            # Put sample required execution parameters here
        }

        # THESE SHOULD BE THE ONLY SETTINGS FOR THIS FILE
        step_name = "process_data"
        expected_results_schema_type = "data_result"  # MUST BE A LOADED SCHEMA
        expected_results_schema_version = "500.0.1"  # MUST BE A SEMVER

        step_execution_object = StepExecution(input_parameters,
                                              execution_parameters)

        results_object = MLObject()
        results_object.set_type(
            schema_type=expected_results_schema_type,
            schema_version=expected_results_schema_version,
        )

        # Should error due to missing fields
        with self.assertRaises(ValueError) as context:
            verify_result_contract(
                results_object,
                expected_results_schema_type,
                expected_results_schema_version,
                step_name,
            )

        self.assertTrue(
            f"Error verifying result object for '{step_name}.output'" in str(
                context.exception))

        results_object = step_execution_object.execute(
            result_object_schema_type=expected_results_schema_type,
            result_object_schema_version=expected_results_schema_version,
        )

        results_object.run_date = datetime.datetime.now()
        results_object.step_id = str(uuid.uuid4())
        results_object.run_id = str(uuid.uuid4())

        results_object.execution_profile.system_memory_utilization = random()
        results_object.execution_profile.network_traffic_in_bytes = randint(
            7e9, 9e10)
        results_object.execution_profile.gpu_temperature = randint(70, 130)
        results_object.execution_profile.disk_io_utilization = random()
        results_object.execution_profile.gpu_percent_of_time_accessing_memory = random(
        )
        results_object.execution_profile.cpu_utilization = random()
        results_object.execution_profile.gpu_utilization = random()
        results_object.execution_profile.gpu_memory_allocation = random()

        self.assertTrue(
            verify_result_contract(results_object,
                                   expected_results_schema_type,
                                   expected_results_schema_version, step_name))