示例#1
0
def main():
    turn_debug_on = os.environ.get("INPUT_ACTIONS_STEP_DEBUG", False)

    rootLogger = setupLogger(turn_debug_on).get_root_logger()

    try:
        sub_main()
    except KnownException as ve:
        rootLogger.critical(str(ve))
        exit(1)
    except KeyError as ke:
        matches = ["url", "key", "database_name", "container_name"]
        if any(x in str(ke) for x in matches):
            rootLogger.critical(str(ke))
            exit(1)
        else:
            raise ke
    except FileNotFoundError as fnfe:
        if "No files ending in" in str(fnfe):
            rootLogger.critical(str(fnfe))
            exit(1)
        else:
            raise fnfe
    except RegistryError as re:
        rootLogger.critical(str(re))
        exit(1)
    def test_main_no_input(self):
        """
        Unit test to check the main function with no inputs
        """
        buffer = setupLogger().get_buffer()
        with self.assertRaises(SystemExit) as context:
            main()

        self.assertEqual(context.exception.code, 1)
        self.assertTrue("No value provided for" in str(buffer.getvalue()))
    def test_setup_logger_returns(self):
        (rootLogger, buffer) = setupLogger().get_loggers()
        self.assertTrue(rootLogger, logging.getLogger())

        message_string = "Test log message"
        rootLogger.warning(message_string)

        return_string = buffer.getvalue()
        assert message_string in return_string

        with patch.object(rootLogger, "debug") as mock_log:
            rootLogger.debug("foo")
            mock_log.assert_called_once_with("foo")
    def __init__(self, input_params, execution_params):
        self.input_params = input_params
        self.execution_params = execution_params
        self.rootLogger = setupLogger().get_root_logger()

        # Execute all work in here.

        # Output input params & execution params
        if self.input_params is not None:
            self.rootLogger.debug(f"Input params: {self.input_params}")

        if self.execution_params is not None:
            self.rootLogger.debug(f"Execution params: {self.execution_params}")
示例#5
0
    def __init__(self, input_params, execution_params):
        self.input_params = input_params
        self.execution_params = execution_params
        self.logger = setupLogger().get_root_logger()

        # Execute all work in here.

        # Output input params & execution params
        if self.input_params is not None:
            self.logger.debug(f"Input params: {self.input_params}")

        if self.execution_params is not None:
            self.logger.debug(f"Execution params: {self.execution_params}")
        else:
            raise KnownException("No execution parameters provided.")
示例#6
0
def load_workflow_object(
    workflow_node_id: str, metastore_connection: Metastore
) -> MLObject:
    rootLogger = setupLogger().get_root_logger()
    (workflow_object, errors) = metastore_connection.get_workflow_object(
        workflow_node_id
    )

    if workflow_object is None:
        raise KnownException(
            f"No workflow loaded when attempting to load workflow node id: {workflow_node_id}"
        )

    if "steps" not in workflow_object:
        raise KnownException("Workflow object does not contain the field 'steps'.")

    # Show count of errors, then errors
    rootLogger.debug(f"Workflow loading errors: {errors}")
    if errors is not None and len(errors) > 0:
        return None
    else:
        return workflow_object
 def setUp(self):
     (self.rootLogger, self._buffer) = setupLogger().get_loggers()
     self.rootLogger.setLevel(logging.DEBUG)
 def setUp(self):
     (self.rootLogger, self._buffer) = setupLogger().get_loggers()
    def setUp(self):
        (self.rootLogger, self._buffer) = setupLogger().get_loggers()

        MLSchema.populate_registry()
        MLSchema.append_schema_to_registry(Path.cwd() / "tests" /
                                           "schemas_for_test")
示例#10
0
def sub_main():
    rootLogger = setupLogger().get_root_logger()

    # Loading input values
    msg = "::debug::Loading input values"
    print_left_message("Loading variables from environment...")
    rootLogger.debug(msg)

    parameters = convert_environment_variables_to_dict()

    print("{:>15}".format("ok"))  # Finished loading from environment

    parameters.INPUT_SCHEMAS_DIRECTORY = os.environ.get("INPUT_SCHEMAS_DIRECTORY")

    if "INPUT_SCHEMAS_GIT_URL" in os.environ and os.environ.get != "":
        parameters.INPUT_SCHEMAS_GIT_URL = os.environ.get("INPUT_SCHEMAS_GIT_URL")
        print_left_message(
            f"Downloading schemas from {parameters.INPUT_SCHEMAS_GIT_URL}..."
        )
        try:
            git.Git(parameters.INPUT_SCHEMAS_DIRECTORY).clone(
                parameters.INPUT_SCHEMAS_GIT_URL, str(uuid.uuid4()), depth=1
            )
            # TODO: Authenticate with GH Token?
            print("{:>15}".format("ok"))  # Finished loading from GIT URL
        except GitCommandError as gce:
            raise KnownException(
                f"Trying to read from the git repo ({parameters.INPUT_SCHEMAS_GIT_URL}) and write to the directory ({parameters.INPUT_SCHEMAS_DIRECTORY}). Full error follows: {str(gce)}"
            )

    print_left_message("Appending schemas to registry...")
    MLSchema.append_schema_to_registry(Path(parameters.INPUT_SCHEMAS_DIRECTORY))
    print("{:>15}".format("ok"))  # Finished loading registry

    parameters.previous_step_name = os.environ.get("INPUT_PREVIOUS_STEP_NAME", "")
    parameters.next_step_name = os.environ.get("INPUT_NEXT_STEP_NAME", "")
    rootLogger.debug("::debug:: Finished main")

    # Load metastore credentials

    rootLogger.debug("::debug:: Loading credentials")
    print_left_message("Loading and validating metastore credentials...")
    metastore_cred_string_blob = os.environ.get("INPUT_METASTORE_CREDENTIALS")

    metastore_credentials_packed = YAML.safe_load(metastore_cred_string_blob)
    metastore_credentials_string = base64.urlsafe_b64decode(
        metastore_credentials_packed
    ).decode("utf-8")
    metastore_credentials = YAML.safe_load(metastore_credentials_string)

    report_found_params(
        ["url", "key", "database_name", "container_name"], metastore_credentials
    )
    print("{:>15}".format("ok"))  # Finished loading and validating metastore
    rootLogger.debug("::debug::Starting metastore connection")

    print_left_message("Starting connection to metastore...")
    ms = load_metastore_connection(metastore_credentials_packed)
    print("{:>15}".format("ok"))  # Finished connecting to metastore

    workflow_node_id = os.environ.get("INPUT_WORKFLOW_NODE_ID")
    if workflow_node_id == "":
        raise KnownException(
            "INPUT_WORKFLOW_NODE_ID - No workflow node id was provided."
        )

    print_left_message(f"Loading workflow object ID: '{workflow_node_id}' ...")
    workflow_object = load_workflow_object(workflow_node_id, ms)
    print("{:>15}".format("ok"))  # Finished loading workload abject

    rootLogger.debug("::debug::Loading input parameters")
    print_left_message("Loading input parameters ...")
    input_parameters = load_parameters("INPUT", ms)
    print("{:>15}".format("ok"))  # Finished loading input parameters from metastore

    rootLogger.debug("::debug::Loading execution parameters file")
    print_left_message("Loading execution parameters ...")
    execution_parameters = load_parameters("EXECUTION", ms)
    print(
        "{:>15}".format("ok")
    )  # Finished loading execution  parameters from metastore

    step_name = parameters.INPUT_STEP_NAME
    print_left_message(f"Loading contract for '{step_name}.input' ...")
    input_object = load_contract_object(
        parameters=input_parameters,
        workflow_object=workflow_object,
        step_name=step_name,
        contract_type="input",
    )
    print(
        "{:>15}".format("ok")
    )  # Finished loading execution  parameters from metastore

    print(f"Attaching step info to input for '{step_name}.input' ... ")
    input_node_id = ms.attach_step_info(
        input_object,
        workflow_object.schema_version,
        workflow_node_id,
        step_name,
        "input",
    )
    print(f"     Input Node ID: {input_node_id}")  # Finished attaching step ID to input

    rootLogger.debug(f"Successfully saved: {input_object}")

    # TODO don't hard code any of these
    exec_dict = execution_parameters
    exec_dict["run_id"] = parameters.GITHUB_RUN_ID
    exec_dict["run_date"] = datetime.datetime.now()
    exec_dict["step_id"] = str(uuid.uuid4())

    print_left_message(f"Loading contract for '{step_name}.execution' ...")
    execution_object = load_contract_object(
        parameters=exec_dict,
        workflow_object=workflow_object,
        step_name=step_name,
        contract_type="execution",
    )
    print(
        "{:>15}".format("ok")
    )  # Finished loading execution  parameters from metastore

    rootLogger.debug(f"Successfully loaded and validated execution: {execution_object}")

    print(f"Attaching step info to input for '{step_name}.execution' ... ")
    execution_node_id = ms.attach_step_info(
        execution_object,
        workflow_object.schema_version,
        workflow_node_id,
        step_name,
        "execution",
    )
    rootLogger.debug(f"Successfully saved: {execution_object}")
    print(
        f"      Execution Node ID: {execution_node_id}"
    )  # Finished attaching step ID to input

    # Branching between use step_execution.py or execution file.
    execution_file = os.environ.get("INPUT_EXECUTION_FILE")

    print_left_message("Executing step ... ")
    print("{:>15}".format("ok"))  # Starting executing step
    results_ml_object = execute_step(
        execution_file,
        workflow_object,
        input_object,
        execution_object,
        step_name,
        parameters.GITHUB_RUN_ID,
    )
    print_left_message("Finished executing step ... ")
    print("{:>15}".format("ok"))  # Starting executing step

    # TODO: Need to add next and previous steps to attach_step_info
    print(f"Attaching step info to output for '{step_name}.output' ... ")
    output_node_id = ms.attach_step_info(
        results_ml_object,
        workflow_object.schema_version,
        workflow_node_id,
        step_name,
        "output",
    )
    print(
        f"      Output Node ID: {output_node_id}"
    )  # Finished attaching step ID to output

    dict_conversion = results_ml_object.dict_without_internal_variables()

    string_io_handle = StringIO()
    YAML.SafeDumper.add_representer(uuid.UUID, repr_uuid)
    YAML.safe_dump(dict_conversion, string_io_handle)
    yaml_conversion = string_io_handle.getvalue()

    encode_to_utf8_bytes = yaml_conversion.encode("utf-8")
    base64_encode = base64.urlsafe_b64encode(encode_to_utf8_bytes)
    final_encode_to_utf8 = str(base64_encode, "utf-8")

    # Recording raw log info
    # logBuffer.flush()
    # log_contents = logBuffer.getvalue()

    log_object = MLObject()
    log_object.set_type(schema_version="0.1.0", schema_type="log")
    log_object.run_id = parameters.GITHUB_RUN_ID
    log_object.step_name = step_name
    log_object.run_date = datetime.datetime.now()
    log_object.raw_log = (
        "NO RAW LOGS YET (NEED TO FIGURE OUT WHERE I CAN PUSH A LARGE OBJECT)"
    )
    # log_object.raw_log = log_contents
    log_object.log_property_bag = {}

    # errors = log_object.validate()

    log_node_id = ms.attach_step_info(
        log_object, workflow_object.schema_version, workflow_node_id, step_name, "log"
    )

    rootLogger.debug(
        f"::set-output name=output_raw::{results_ml_object.dict_without_internal_variables()}"
    )

    print("Printing output ... \n \n")
    logger = setupLogger()
    output_message = ""
    output_message += f"{logger.print_and_log('output_raw', results_ml_object.dict_without_internal_variables())}\n"
    output_message += (
        f"{logger.print_and_log('output_base64_encoded', final_encode_to_utf8)}\n"
    )
    output_message += f"{logger.print_and_log('input_node_id', input_node_id)}\n"
    output_message += (
        f"{logger.print_and_log('execution_node_id', execution_node_id)}\n"
    )
    output_message += f"{logger.print_and_log('output_node_id', output_node_id)}\n"
    output_message += f"{logger.print_and_log('log_node_id', log_node_id)}\n"

    rootLogger.debug(f"Complete output: \n {output_message}")
    print("\n\n... finished printing output")  # Finished printing output

    print_left_message("Generating /output_message.txt ...")
    if is_docker():
        Path("/output_message.txt").write_text(output_message)
    else:
        fp = tempfile.TemporaryFile()
        fp.write(output_message.encode("utf-8"))
    print("{:>15}".format("ok"))  # Finished printing output
示例#11
0
def execute_step(
    execution_file: str,
    workflow_object: MLObject,
    input_object: MLObject,
    execution_object: MLObject,
    step_name,
    run_id,
):

    rootLogger = setupLogger().get_root_logger()

    results_ml_object = MLObject()

    if execution_file is None:
        msg = "Did not find any value for INPUT_EXECUTION_FILE, using /src/step_execution.py"

        print_left_message(msg)
        rootLogger.debug("::debug::" + msg)

        print("{:>15}".format("ok"))  # Finished loading from environment

        step_execution_object = StepExecution(input_object, execution_object)
        results_ml_object = step_execution_object.execute(
            result_object_schema_type=workflow_object.steps[
                step_name
            ].output.schema_type,
            result_object_schema_version=workflow_object.steps[
                step_name
            ].output.schema_version,
        )

    else:
        # TODO: Critical error if variable set but file not found
        msg = f"Executing '${execution_file}' (found in INPUT_EXECUTION_FILE env var)"

        print_left_message(msg)
        rootLogger.debug("::debug::" + msg)

        execution_file_path = Path(execution_file)

        if execution_file_path.exists() is False:
            raise KnownException(
                f"'{execution_file}' was provided as the file, but it does not appear to exist at {str(execution_file_path.resolve())} -- exiting."
            )

        # The below are used in the execution file
        result_ml_object_schema_type = workflow_object.steps[  # noqa
            step_name
        ].output.schema_type
        result_ml_object_schema_version = workflow_object.steps[  # noqa
            step_name
        ].output.schema_version
        exec(execution_file_path.read_text(), globals(), locals())

        print("{:>15}".format("ok"))  # Finished executing step

    if (results_ml_object is None) or (len(results_ml_object) == 0):
        raise KnownException(
            "No value was assigned to the variable 'results_ml_object' -- exiting."
        )
    elif isinstance(results_ml_object, MLObject) is False:
        raise KnownException(
            "The variable 'results_ml_object' was not of type MLObject -- exiting."
        )

    results_ml_object.run_id = run_id
    results_ml_object.step_id = str(uuid.uuid4())
    results_ml_object.run_date = datetime.datetime.now().isoformat()

    # Using the below to validate the object, even though we already have it created.
    load_contract_object(
        parameters=results_ml_object.dict_without_internal_variables(),
        workflow_object=workflow_object,
        step_name=step_name,
        contract_type="output",
    )

    return results_ml_object
示例#12
0
def load_contract_object(
    parameters: dict, workflow_object: MLObject, step_name: str, contract_type: str
):
    """ Creates an MLObject based on an input string, and validates it against the workflow object
    and step_name provided.

    Will fail if the .validate() fails on the object or the schema mismatches what is seen in the
    workflow.
    """
    rootLogger = setupLogger().get_root_logger()

    if contract_type not in CONTRACT_TYPES:
        raise KnownException(
            f"{contract_type} not in the expected list of contract types: {CONTRACT_TYPES}."
        )

    if isinstance(parameters, dict):
        parameters_string = YAML.safe_dump(parameters)
    elif isinstance(parameters, str):
        parameters_string = parameters
    else:
        raise KnownException(
            f"load_contract_object was called with neither a string nor a dict. Value: {parameters}"
        )

    (contract_object, errors) = MLObject.create_object_from_string(parameters_string)

    if errors is not None and len(errors) > 0:
        rootLogger.debug(f"{contract_type} object loading errors: {errors}")
        raise KnownException(
            f"Error when trying to validate the contract object {step_name}.{contract_type}. Errors: {errors}"
        )

    if step_name not in workflow_object["steps"]:
        raise KnownException(
            f"Workflow object does not contain the step '{step_name}'."
        )

    if contract_type not in workflow_object["steps"][step_name]:
        raise KnownException(
            f"Workflow object for step '{step_name}' does not contain a spec for the contract type: '{contract_type}'."
        )

    if (
        contract_object.schema_type
        != workflow_object["steps"][step_name][contract_type].schema_type
    ) or (
        contract_object.schema_version
        != workflow_object["steps"][step_name][contract_type].schema_version
    ):
        raise_schema_mismatch(
            expected_type=workflow_object["steps"][step_name][
                contract_type
            ].schema_type,
            actual_type=contract_object.schema_type,
            expected_version=workflow_object["steps"][step_name][
                contract_type
            ].schema_version,
            actual_version=contract_object.schema_version,
        )
    rootLogger.debug(
        f"Successfully loaded and validated contract object: {contract_object.schema_type} on step {step_name}.{contract_type}"
    )
    return contract_object
示例#13
0
import tempfile
import shutil

if Path("src").exists():
    sys.path.append(str(Path("src")))
sys.path.append(str(Path.cwd().resolve()))
from main import main  # noqa

from utils.utils import setupLogger  # noqa

RUN_TYPES = [
    "main", "entrypoint.sh", "container interactive", "container pure"
]
RUN_TYPE = RUN_TYPES[os.environ.get("RUN_TYPE", 0)]

(rootLogger, buffer) = setupLogger().get_loggers()

for i in os.environ:
    rootLogger.debug(f"{i}:\t{os.environ.get(i)}")

parameters = {}
parameters = YAML.safe_load(
    (Path("tests") / "env_variables.yaml").read_text("utf-8"))

for param in parameters:
    rootLogger.debug(f"{i}:\t{param}")
    if isinstance(parameters[param], dict):
        env_value = YAML.safe_dump(parameters[param])
    else:
        env_value = parameters[param]