Exemplo n.º 1
0
    def set_dependencies_from_inputs(self, component_run: ComponentRun):
        """Gets IOPointers associated with component_run's inputs, checks
        against any ComponentRun's outputs, and if there are any matches,
        sets the ComponentRun's dependency on the most recent match."""

        input_ids = [inp.name for inp in component_run.inputs]

        if len(input_ids) == 0:
            return

        match_ids = (self.session.query(
            func.max(component_run_output_association.c.component_run_id),
        ).group_by(component_run_output_association.c.output_path_name).filter(
            component_run_output_association.c.output_path_name.in_(
                input_ids)).all())
        match_ids = [m[0] for m in match_ids]

        matches = (self.session.query(ComponentRun).filter(
            ComponentRun.id.in_(match_ids)).all())

        # If there are no matches, return
        if len(matches) == 0:
            return

        # Get match with the max timestamp and set upstream
        component_run.set_upstream(matches)
Exemplo n.º 2
0
    def commit_component_run(
            self,
            component_run: ComponentRun,
            staleness_threshold: int = (60 * 60 * 24 * 30),
    ):
        """Commits a fully initialized component run to the DB."""
        status_dict = component_run.check_completeness()
        if not status_dict["success"]:
            raise RuntimeError(status_dict["msg"])

        if status_dict["msg"]:
            logging.info(status_dict["msg"])

        # Check for staleness
        for dep in component_run.dependencies:
            # First case: there is over a month between component runs
            time_diff = (component_run.start_timestamp -
                         dep.start_timestamp).total_seconds()
            if time_diff > staleness_threshold:
                days_diff = int(time_diff // (60 * 60 * 24))
                component_run.add_staleness_message(
                    f"{dep.component_name} (ID {dep.id}) was run {days_diff}" +
                    " days ago.")
            # Second case: there is a newer run of the dependency
            fresher_runs = self.get_history(
                dep.component_name,
                limit=None,
                date_lower=dep.start_timestamp,
                date_upper=component_run.start_timestamp,
            )
            fresher_runs = [
                cr for cr in fresher_runs if component_run.id != cr.id
            ]
            if len(fresher_runs) > 1:
                run_or_runs = "run" if len(fresher_runs) - 1 == 1 else "runs"
                component_run.add_staleness_message(
                    f"{dep.component_name} (ID {dep.id}) has " +
                    f"{len(fresher_runs) - 1} fresher {run_or_runs} that " +
                    "began before this component run started.")

        # Warn user if there is a staleness message
        if len(component_run.stale) > 0:
            logging.warning(component_run.stale)

        # Dedup labels
        for inp in component_run.inputs:
            inp.dedup_labels()
        for out in component_run.outputs:
            out.dedup_labels()

        # Commit to DB
        self.session.add(component_run)
        logging.info(f"Committing ComponentRun {component_run.id} of type " +
                     f'"{component_run.component_name}" to the database.')
        self.session.commit()
Exemplo n.º 3
0
    def commit_component_run(
            self,
            component_run: ComponentRun,
            staleness_threshold: int = (60 * 60 * 24 * 30),
    ):
        """Commits a fully initialized component run to the DB."""
        status_dict = component_run.check_completeness()
        if not status_dict["success"]:
            raise RuntimeError(status_dict["msg"])

        if status_dict["msg"]:
            logging.info(status_dict["msg"])

        # Check for staleness. https://github.com/loglabs/mltrace/issues/165#issue-891397631
        for dep in component_run.dependencies:
            # First case: there is over a month between component runs
            time_diff = (component_run.start_timestamp -
                         dep.start_timestamp).total_seconds()
            if time_diff > staleness_threshold:
                days_diff = int(time_diff // (60 * 60 * 24))
                component_run.add_staleness_message(
                    f"{dep.component_name} (ID {dep.id}) was run {days_diff} days ago."
                )
            # Second case: there is a newer run of the dependency
            fresher_runs = self.get_history(
                dep.component_name,
                limit=None,
                date_lower=dep.start_timestamp,
                date_upper=component_run.start_timestamp,
            )
            if len(fresher_runs) != 1:
                component_run.add_staleness_message(
                    f"{dep.component_name} (ID {dep.id}) has {len(fresher_runs) - 1} fresher run(s) that began before this component run started."
                )

        # Commit to DB
        self.session.add(component_run)
        logging.info(
            f'Committing ComponentRun of type "{component_run.component_name}" to the database.'
        )
        self.session.commit()
Exemplo n.º 4
0
    def commit_component_run(self, component_run: ComponentRun):
        """Commits a fully initialized component run to the DB."""
        status_dict = component_run.check_completeness()
        if not status_dict["success"]:
            raise RuntimeError(status_dict["msg"])

        if status_dict["msg"]:
            logging.info(status_dict["msg"])

        # Commit to DB
        self.session.add(component_run)
        logging.info(
            f'Committing ComponentRun of type "{component_run.component_name}" to the database.'
        )
        self.session.commit()
Exemplo n.º 5
0
    def testCompletenessWithStartEndDeps(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.set_start_timestamp()
        cr.set_end_timestamp()

        # Add I/O and dependencies
        cr.add_input(IOPointer("input"))
        cr.add_output(IOPointer("output"))
        cr.set_upstream(ComponentRun("mock_upstream"))

        status = cr.check_completeness()
        self.assertTrue(status["success"])

        # Assert that there no warning statements
        msg = status["msg"]
        self.assertTrue(len(msg) == 0)
Exemplo n.º 6
0
class TestComponentRun(unittest.TestCase):
    def setUp(self):
        self.mock_component_run = ComponentRun("mock")

    def testCompletenessWithNoInfo(self):
        status = self.mock_component_run.check_completeness()
        self.assertFalse(status["success"])

    def testCompletenessWithStartEnd(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.set_start_timestamp()
        cr.set_end_timestamp()

        status = cr.check_completeness()
        self.assertTrue(status["success"])

        # Assert that there are warning statements
        # because of no I/O or dependencies
        msg = status["msg"]
        self.assertTrue(len(msg) > 0)

    def testCompletenessWithStartEndDeps(self):
        cr = copy.deepcopy(self.mock_component_run)
        cr.set_start_timestamp()
        cr.set_end_timestamp()

        # Add I/O and dependencies
        cr.add_input(IOPointer("input"))
        cr.add_output(IOPointer("output"))
        cr.set_upstream(ComponentRun("another_component"))

        status = cr.check_completeness()
        self.assertTrue(status["success"])

        # Assert that there no warning statements
        msg = status["msg"]
        self.assertTrue(len(msg) == 0)
Exemplo n.º 7
0
 def setUp(self):
     self.mock_component_run = ComponentRun("mock")
Exemplo n.º 8
0
 def initialize_empty_component_run(self,
                                    component_name: str) -> ComponentRun:
     """Initializes an empty run for the specified component. Does not
     commit to the database."""
     component_run = ComponentRun(component_name=component_name)
     return component_run