Пример #1
0
def log_component_run(
        component_run: ComponentRun,
        set_dependencies_from_inputs=True,
        staleness_threshold: int = (60 * 60 * 24 * 30),
):
    """Takes client-facing ComponentRun object and logs it to the DB."""
    store = Store(_db_uri)

    # Make dictionary object
    component_run_dict = component_run.to_dictionary()

    component_run_sql = store.initialize_empty_component_run(
        component_run.component_name)

    # Add relevant attributes
    if component_run_dict["start_timestamp"]:
        component_run_sql.set_start_timestamp(
            component_run_dict["start_timestamp"])

    if component_run_dict["end_timestamp"]:
        component_run_sql.set_end_timestamp(
            component_run_dict["end_timestamp"])

    if component_run_dict["notes"]:
        component_run_sql.add_notes(component_run_dict["notes"])

    component_run_sql.set_git_hash(component_run_dict["git_hash"])
    component_run_sql.set_git_tags(component_run_dict["git_tags"])
    component_run_sql.set_code_snapshot(component_run_dict["code_snapshot"])

    # Add I/O
    component_run_sql.add_inputs([
        store.get_io_pointer(inp.name,
                             inp.value,
                             pointer_type=inp.pointer_type)
        for inp in component_run_dict["inputs"]
    ])
    component_run_sql.add_outputs([
        store.get_io_pointer(out.name,
                             out.value,
                             pointer_type=out.pointer_type)
        for out in component_run_dict["outputs"]
    ])

    # Create component if it does not exist
    create_component(component_run.component_name, "", "")

    # Add dependencies if there is flag to automatically set
    if set_dependencies_from_inputs:
        store.set_dependencies_from_inputs(component_run_sql)

    # Add dependencies explicitly stored in the component run
    for dependency in component_run_dict["dependencies"]:
        cr = store.get_history(dependency, 1)[0]
        component_run_sql.set_upstream(cr)

    store.commit_component_run(component_run_sql,
                               staleness_threshold=staleness_threshold)
Пример #2
0
class TestComponentRun(unittest.TestCase):
    def setUp(self):
        self.mock_component_run = ComponentRun("mock_component_run")
        self.mock_component_run_dict = {
            "component_name": "mock_component_run",
            "notes": "",
            "inputs": [],
            "outputs": [],
            "git_hash": None,
            "git_tags": None,
            "code_snapshot": None,
            "start_timestamp": None,
            "end_timestamp": None,
            "dependencies": [],
            "id": None,
            "stale": [],
            "test_result": None,
            "mlflow_run_id": None,
            "mlflow_run_params": None,
            "mlflow_run_metrics": None,
        }

        self.mock_inputs = [
            IOPointer("mock_input_1"),
            IOPointer("mock_input_2"),
        ]
        self.mock_outputs = [
            IOPointer("mock_output_1"),
            IOPointer("mock_output_2"),
        ]

    def testSerialize(self):
        """
        Test the serialization functionality.
        """
        self.assertEqual(
            self.mock_component_run.to_dictionary(),
            self.mock_component_run_dict,
        )

    def testSetStartEndError(self):
        """
        Test that setting start and end ts as non
        datetime types throws an error.
        """

        with self.assertRaises(TypeError):
            self.mock_component_run.set_start_timestamp("incorrect_type")

        with self.assertRaises(TypeError):
            self.mock_component_run.set_end_timestamp("incorrect_type")

    def testAddInputOutput(self):
        cr = copy.deepcopy(self.mock_component_run)
        for inp in self.mock_inputs:
            cr.add_input(inp)
        for out in self.mock_outputs:
            cr.add_output(out)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))
        self.assertEqual(cr.outputs, list(set(self.mock_outputs)))

    def testAddInputsOutputs(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.add_inputs(self.mock_inputs)
        cr.add_outputs(self.mock_outputs)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))
        self.assertEqual(cr.outputs, list(set(self.mock_outputs)))

    def testAddDuplicateInputs(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.add_inputs(self.mock_inputs)
        cr.add_inputs(self.mock_inputs)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))

    def testAddNotes(self):
        cr = copy.deepcopy(self.mock_component_run)
        expected_output = "this is a test note"
        cr.notes = "this is a test note"

        self.assertEqual(cr.notes, expected_output)

    def testAddNotesError(self):
        """
        Test that adding non-str input to the notes attribute
        gives a TypeError
        """
        with self.assertRaises(TypeError):
            self.mock_component_run.notes = ["incorrect_type"]
Пример #3
0
class TestComponentRun(unittest.TestCase):
    def setUp(self):
        self.mock_component_run = ComponentRun("mock_component_run")
        self.mock_component_run_dict = {
            "component_name": "mock_component_run",
            "inputs": [],
            "outputs": [],
            "git_hash": None,
            "code_snapshot": None,
            "start_timestamp": None,
            "end_timestamp": None,
            "dependencies": [],
            "id": None,
            "stale": [],
        }

        self.mock_inputs = [IOPointer("mock_input_1"), IOPointer("mock_input_2")]
        self.mock_outputs = [IOPointer("mock_output_1"), IOPointer("mock_output_2")]

    def testSerialize(self):
        """
        Test the serialization functionality.
        """
        self.assertEqual(
            self.mock_component_run.to_dictionary(), self.mock_component_run_dict
        )

    def testSetStartEndError(self):
        """
        Test that setting start and end ts as non datetime types throws an error.
        """

        with self.assertRaises(TypeError):
            self.mock_component_run.set_start_timestamp("incorrect_type")

        with self.assertRaises(TypeError):
            self.mock_component_run.set_end_timestamp("incorrect_type")

    def testAddInputOutput(self):
        cr = copy.deepcopy(self.mock_component_run)
        for inp in self.mock_inputs:
            cr.add_input(inp)
        for out in self.mock_outputs:
            cr.add_output(out)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))
        self.assertEqual(cr.outputs, list(set(self.mock_outputs)))

    def testAddInputsOutputs(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.add_inputs(self.mock_inputs)
        cr.add_outputs(self.mock_outputs)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))
        self.assertEqual(cr.outputs, list(set(self.mock_outputs)))

    def testAddDuplicateInputs(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.add_inputs(self.mock_inputs)
        cr.add_inputs(self.mock_inputs)

        self.assertEqual(cr.inputs, list(set(self.mock_inputs)))