Пример #1
0
(sample_task_object,
 err) = MLObject.create_object_from_file('tasks/download_data.yaml')

if err != {}:
    pp.pprint(err)
else:
    pp.pprint(sample_task_object.inputs)
    pp.pprint(sample_task_object.outputs)

load_path = Path('tasks').glob("*.yaml")
load_list = list(load_path)

for this_file in load_list:
    file_text = this_file.read_text()
    (loaded_object, err) = MLObject.create_object_from_string(file_text)
    if err != {}:
        print(f"ERROR LOADING FILE: {this_file}")
        pp.pprint(err)
        print("\n\n")
    else:
        print(f"File loaded: {this_file}")
        pp.pprint(loaded_object)
        print("\n\n")

from mlspeclib import MLObject, MLSchema
from pathlib import Path

MLSchema.populate_registry()
MLSchema.append_schema_to_registry(Path("schemas"))
Пример #2
0
def load_contract_object(
    parameters: dict, workflow_object: MLObject, step_name: str, contract_type: str
):
    """ Creates an MLObject based on an input string, and validates it against the workflow object
    and step_name provided.

    Will fail if the .validate() fails on the object or the schema mismatches what is seen in the
    workflow.
    """
    rootLogger = setupLogger().get_root_logger()

    if contract_type not in CONTRACT_TYPES:
        raise KnownException(
            f"{contract_type} not in the expected list of contract types: {CONTRACT_TYPES}."
        )

    if isinstance(parameters, dict):
        parameters_string = YAML.safe_dump(parameters)
    elif isinstance(parameters, str):
        parameters_string = parameters
    else:
        raise KnownException(
            f"load_contract_object was called with neither a string nor a dict. Value: {parameters}"
        )

    (contract_object, errors) = MLObject.create_object_from_string(parameters_string)

    if errors is not None and len(errors) > 0:
        rootLogger.debug(f"{contract_type} object loading errors: {errors}")
        raise KnownException(
            f"Error when trying to validate the contract object {step_name}.{contract_type}. Errors: {errors}"
        )

    if step_name not in workflow_object["steps"]:
        raise KnownException(
            f"Workflow object does not contain the step '{step_name}'."
        )

    if contract_type not in workflow_object["steps"][step_name]:
        raise KnownException(
            f"Workflow object for step '{step_name}' does not contain a spec for the contract type: '{contract_type}'."
        )

    if (
        contract_object.schema_type
        != workflow_object["steps"][step_name][contract_type].schema_type
    ) or (
        contract_object.schema_version
        != workflow_object["steps"][step_name][contract_type].schema_version
    ):
        raise_schema_mismatch(
            expected_type=workflow_object["steps"][step_name][
                contract_type
            ].schema_type,
            actual_type=contract_object.schema_type,
            expected_version=workflow_object["steps"][step_name][
                contract_type
            ].schema_version,
            actual_version=contract_object.schema_version,
        )
    rootLogger.debug(
        f"Successfully loaded and validated contract object: {contract_object.schema_type} on step {step_name}.{contract_type}"
    )
    return contract_object
Пример #3
0
    def test_e2e(self):
        parameters_from_environment = {}

        integration_tests_dir = parameters_from_environment.get(
            "INPUT_integration_tests_directory", "integration")
        # parameters_dir_name = parameters_from_environment.get("INPUT_parameters_directory", "/src/.parameters")
        variables_file_name = parameters_from_environment.get(
            "INPUT_integration_tests_variable_file_name",
            "integration_test_variables.yaml",
        )

        print(os.environ)

        for var in os.environ:
            if "INPUT" in var:
                parameters_from_environment[var] = os.environ.get(var,
                                                                  default=None)

        parameters_from_file = {}
        parameters_file_location = Path(
            integration_tests_dir) / variables_file_name
        if parameters_file_location.exists():
            parameters_from_file = YAML.safe_load(
                parameters_file_location.read_text("utf-8"))

        # Building everything into parameters that we'll eventually write to environment variables to execute Docker
        parameters = {**parameters_from_file, **parameters_from_environment}
        schemas_dir_name = parameters.get(
            "INPUT_integration_tests_schemas_dir_name",
            "/src/parameters/schemas")

        repo_name = parameters.get("INPUT_container_repo_name")
        container_name = parameters.get("INPUT_container_name")

        parameters["INPUT_workflow_version"] = parameters.get(
            "INPUT_workflow_version",
            str("999999999999.9." + str(random.randint(0, 9999))))
        workflow_version = parameters["INPUT_workflow_version"]

        MLSchema.append_schema_to_registry(Path(schemas_dir_name))

        workflow_input = parameters.get("INPUT_workflow")
        if isinstance(workflow_input, dict):
            workflow_string = YAML.safe_dump(workflow_input)
        else:
            workflow_string = workflow_input

        workflow_dict = YAML.safe_load(workflow_string)
        workflow_dict["workflow_version"] = workflow_version
        workflow_dict["run_id"] = str(uuid.uuid4())
        parameters["GITHUB_RUN_ID"] = workflow_dict["run_id"]
        parameters["GITHUB_WORKSPACE"] = "/src"

        workflow_dict["step_id"] = str(uuid.uuid4())
        workflow_dict["run_date"] = datetime.datetime.now()

        workflow_string = YAML.safe_dump(workflow_dict)
        (workflow_object,
         errors) = MLObject.create_object_from_string(workflow_string)

        credentials_packed = parameters_from_environment.get(
            "INPUT_METASTORE_CREDENTIALS", None)

        if credentials_packed is None:
            credentials_packed = (Path(integration_tests_dir) /
                                  "metastore_credentials.yaml").read_text(
                                      encoding="utf-8")

        # TODO Sometimes secrets have no spacer. Should figure this out.
        parameters["INPUT_METASTORE_CREDENTIALS"] = credentials_packed

        ms = Metastore(credentials_packed)
        debug_args = ""
        environment_vars_list = []

        workflow_node_id = None
        try:
            workflow_node_id = ms.create_workflow_node(workflow_object,
                                                       workflow_dict["run_id"])
            ms.create_workflow_steps(workflow_node_id, workflow_object)
            parameters["INPUT_workflow_node_id"] = workflow_node_id

            for param in parameters:
                if isinstance(parameters[param], dict):
                    env_value = YAML.safe_dump(parameters[param])
                else:
                    env_value = parameters[param]
                debug_args += f" -e '{param}={env_value}'"
                environment_vars_list.append("-e")
                environment_vars_list.append(f"{param}={env_value}")

            exec_statement = [
                "docker",
                "pull",
                f"{repo_name}/{container_name}",
            ]

            print(f"docker pull --no-cache {repo_name}/{container_name}")
            # self.rootLogger.debug(f"exec_statement = {exec_statement}")

            p = subprocess.Popen(exec_statement,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            out, err = p.communicate()
            # self.rootLogger.debug(f"out = {str(out)}")
            # self.rootLogger.debug(f"error = {str(err)}")
            # self.assertTrue(str(err, "utf-8") == "")

            exec_statement = (["docker", "run"] + environment_vars_list +
                              [f"{repo_name}/{container_name}"])

            # print(f"args statement: '{debug_args}'")
            print(
                f"docker run \\\n {debug_args} \\\n -ti --entrypoint=/bin/bash {repo_name}/{container_name}"
            )
            # self.rootLogger.debug(f"exec_statement = {exec_statement}")

            p = subprocess.Popen(exec_statement,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            out, err = p.communicate()
            self.rootLogger.debug(f"out = {str(out)}")
            self.rootLogger.debug(f"error = {str(err)}")
            self.assertTrue(str(err, "utf-8") == "")
            result = ms.execute_query(
                f"g.V().has('workflow_node_id', '{workflow_node_id}').count()")
            self.assertTrue(result[0] == 8)

        finally:
            try:
                if workflow_node_id is not None:
                    ms.execute_query(
                        f"g.V().has('workflow_node_id', '{workflow_node_id}').drop()"
                    )
            except ValueError:
                pass
Пример #4
0
    Path('tests/sample_process_data_execution.py').absolute())

credentials_packed = parameters["INPUT_METASTORE_CREDENTIALS"]

ms = Metastore(credentials_packed)

workflow_dict = parameters["INPUT_WORKFLOW"]
workflow_dict["workflow_version"] = str("999999999999.9." +
                                        str(random.randint(0, 9999)))
workflow_dict["run_id"] = str(uuid.uuid4())
workflow_dict["step_id"] = str(uuid.uuid4())
workflow_dict["run_date"] = datetime.datetime.now()

MLSchema.append_schema_to_registry(Path("tests/schemas_for_test"))

(workflow_object, err) = MLObject.create_object_from_string(workflow_dict)
if len(err) != 0:
    raise ValueError(f"Error creating mock workflow_object. Errors: {err}")

parameters["INPUT_WORKFLOW_NODE_ID"] = ms.create_workflow_node(
    workflow_object, str(uuid.uuid4()))

parameters["GITHUB_RUN_ID"] = workflow_dict["run_id"]
parameters["GITHUB_WORKSPACE"] = "/src"

rootLogger.debug(os.environ)
bar = buffer.getvalue()

environment_vars = ""

for param in parameters: