Пример #1
0
def compute_metric(
    task_name: str,
    metric_fn: typing.Callable,
    window_size: int = None,
):
    store = Store(_db_uri)
    store.compute_metric(task_name, metric_fn, window_size)
Пример #2
0
def log_output(
    task_name: str,
    identifier: str,
    val: float,
):
    store = Store(_db_uri)
    store.log_output(identifier=identifier, task_name=task_name, val=val)
Пример #3
0
def review_flagged_outputs():
    """Finds common ComponentRuns for a group of flagged outputs.
    Returns a list of ComponentRuns and occurrence counts in the
    group of flagged outputs, sorted by descending count and then
    alphabetically."""
    store = Store(_db_uri)
    return store.review_flagged_outputs()
Пример #4
0
def get_history(component_name: str,
                limit: int = 10) -> typing.List[ComponentRun]:
    """Returns a list of ComponentRuns that are part of the component's
    history."""
    store = Store(_db_uri)

    history = store.get_history(component_name, limit)

    # Convert to client-facing ComponentRuns
    component_runs = []
    for cr in history:
        inputs = [
            IOPointer.from_dictionary(iop.__dict__).to_dictionary()
            for iop in cr.inputs
        ]
        outputs = [
            IOPointer.from_dictionary(iop.__dict__).to_dictionary()
            for iop in cr.outputs
        ]
        dependencies = [dep.component_name for dep in cr.dependencies]
        d = copy.deepcopy(cr.__dict__)
        d.update({
            "inputs": inputs,
            "outputs": outputs,
            "dependencies": dependencies
        })
        component_runs.append(ComponentRun.from_dictionary(d))

    return component_runs
Пример #5
0
def get_io_pointer(io_pointer_id: str,
                   io_pointer_val: typing.Any = None,
                   create=True):
    """Returns IO Pointer metadata."""
    store = Store(_db_uri)
    iop = store.get_io_pointer(io_pointer_id, io_pointer_val, create=create)
    return IOPointer.from_dictionary(iop.__dict__)
Пример #6
0
def create_component(name: str,
                     description: str,
                     owner: str,
                     tags: typing.List[str] = []):
    """Creates a component entity in the database."""
    store = Store(_db_uri)
    store.create_component(name, description, owner, tags)
Пример #7
0
def log_feedback(
    task_name: str,
    identifier: str,
    val: float,
):
    store = Store(_db_uri)
    store.log_feedback(identifier=identifier, task_name=task_name, val=val)
Пример #8
0
def log_component_run(
        component_run: ComponentRun,
        set_dependencies_from_inputs=True,
        staleness_threshold: int = (60 * 60 * 24 * 30),
):
    """Takes client-facing ComponentRun object and logs it to the DB."""
    store = Store(_db_uri)

    # Make dictionary object
    component_run_dict = component_run.to_dictionary()

    component_run_sql = store.initialize_empty_component_run(
        component_run.component_name)

    # Add relevant attributes
    if component_run_dict["start_timestamp"]:
        component_run_sql.set_start_timestamp(
            component_run_dict["start_timestamp"])

    if component_run_dict["end_timestamp"]:
        component_run_sql.set_end_timestamp(
            component_run_dict["end_timestamp"])

    if component_run_dict["notes"]:
        component_run_sql.add_notes(component_run_dict["notes"])

    component_run_sql.set_git_hash(component_run_dict["git_hash"])
    component_run_sql.set_git_tags(component_run_dict["git_tags"])
    component_run_sql.set_code_snapshot(component_run_dict["code_snapshot"])

    # Add I/O
    component_run_sql.add_inputs([
        store.get_io_pointer(inp.name,
                             inp.value,
                             pointer_type=inp.pointer_type)
        for inp in component_run_dict["inputs"]
    ])
    component_run_sql.add_outputs([
        store.get_io_pointer(out.name,
                             out.value,
                             pointer_type=out.pointer_type)
        for out in component_run_dict["outputs"]
    ])

    # Create component if it does not exist
    create_component(component_run.component_name, "", "")

    # Add dependencies if there is flag to automatically set
    if set_dependencies_from_inputs:
        store.set_dependencies_from_inputs(component_run_sql)

    # Add dependencies explicitly stored in the component run
    for dependency in component_run_dict["dependencies"]:
        cr = store.get_history(dependency, 1)[0]
        component_run_sql.set_upstream(cr)

    store.commit_component_run(component_run_sql,
                               staleness_threshold=staleness_threshold)
Пример #9
0
def get_component_information(component_name: str) -> Component:
    """Returns a Component with the name, info, owner, and tags."""
    store = Store(_db_uri)
    c = store.get_component(component_name)
    if not c:
        raise RuntimeError(f"Component with name {component_name} not found.")
    tags = [tag.name for tag in c.tags]
    d = copy.deepcopy(c.__dict__)
    d.update({"tags": tags})
    return Component.from_dictionary(d)
Пример #10
0
def get_components_with_tag(tag: str) -> typing.List[Component]:
    """Returns a list of components with the specified tag."""
    store = Store(_db_uri)
    res = store.get_components_with_tag(tag)

    # Convert to client-facing Components
    components = []
    for c in res:
        tags = [tag.name for tag in c.tags]
        d = copy.deepcopy(c.__dict__)
        d.update({"tags": tags})
        components.append(Component.from_dictionary(d))

    return components
Пример #11
0
def get_components(tag="", owner="") -> typing.List[Component]:
    """Returns all components with the specified owner and/or tag.
    Else, returns all components."""
    store = Store(_db_uri)
    res = store.get_components(tag=tag, owner=owner)

    # Convert to client-facing Components
    components = []
    for c in res:
        tags = [tag.name for tag in c.tags]
        d = copy.deepcopy(c.__dict__)
        d.update({"tags": tags})
        components.append(Component.from_dictionary(d))

    return components
Пример #12
0
def get_component_run_information(component_run_id: str) -> ComponentRun:
    """Returns a ComponentRun object."""
    store = Store(_db_uri)
    cr = store.get_component_run(component_run_id)
    if not cr:
        raise RuntimeError(f"Component run with id {id} not found.")
    inputs = [
        IOPointer.from_dictionary(iop.__dict__).to_dictionary() for iop in cr.inputs
    ]
    outputs = [
        IOPointer.from_dictionary(iop.__dict__).to_dictionary() for iop in cr.outputs
    ]
    dependencies = [dep.component_name for dep in cr.dependencies]
    d = copy.deepcopy(cr.__dict__)
    if cr.code_snapshot:
        d.update({"code_snapshot": str(cr.code_snapshot.decode("utf-8"))})
    d.update({"inputs": inputs, "outputs": outputs, "dependencies": dependencies})
    return ComponentRun.from_dictionary(d)
Пример #13
0
def backtrace(output_pointer: str):
    """Prints trace for an output id.
    Returns list of tuples (level, ComponentRun) where level is how
    many hops away the node is from the node that produced the output_id."""
    store = Store(_db_uri)
    trace = store.trace(output_pointer)

    # Convert to entities.ComponentRun
    component_runs = []
    for depth, cr in trace:
        inputs = [IOPointer.from_dictionary(iop.__dict__) for iop in cr.inputs]
        outputs = [IOPointer.from_dictionary(iop.__dict__) for iop in cr.outputs]
        dependencies = [dep.component_name for dep in cr.dependencies]
        d = copy.deepcopy(cr.__dict__)
        d.update({"inputs": inputs, "outputs": outputs, "dependencies": dependencies})
        component_runs.append((depth, ComponentRun.from_dictionary(d)))

    return component_runs
Пример #14
0
def get_history(
    component_name: str,
    limit: int = 10,
    date_lower: typing.Union[datetime, str] = datetime.min,
    date_upper: typing.Union[datetime, str] = datetime.max,
) -> typing.List[ComponentRun]:
    """Returns a list of ComponentRuns that are part of the component's
    history."""
    store = Store(_db_uri)

    # Check if none
    if not date_lower:
        date_lower = datetime.min
    if not date_upper:
        date_upper = datetime.max

    history = store.get_history(component_name, limit, date_lower, date_upper)

    # Convert to client-facing ComponentRuns
    component_runs = []
    for cr in history:
        inputs = [
            IOPointer.from_dictionary(iop.__dict__).to_dictionary()
            for iop in cr.inputs
        ]
        outputs = [
            IOPointer.from_dictionary(iop.__dict__).to_dictionary()
            for iop in cr.outputs
        ]
        dependencies = [dep.component_name for dep in cr.dependencies]
        d = copy.deepcopy(cr.__dict__)
        d.update({
            "inputs": inputs,
            "outputs": outputs,
            "dependencies": dependencies,
        })
        component_runs.append(ComponentRun.from_dictionary(d))

    return component_runs
Пример #15
0
def tag_component(component_name: str, tags: typing.List[str]):
    """Adds tags to existing component."""
    store = Store(_db_uri)
    store.add_tags_to_component(component_name, tags)
Пример #16
0
def web_trace(output_id: str):
    store = Store(_db_uri)
    return store.web_trace(output_id)
Пример #17
0
def get_recent_run_ids(limit: int = 50):
    """Returns most recent component run ids."""
    store = Store(_db_uri)
    return store.get_recent_run_ids(limit)
Пример #18
0
def clean_db():
    """Deletes database and reinitializes tables."""
    store = Store(_db_uri, delete_first=True)
Пример #19
0
def retrieve_retracted_labels():
    store = Store(_db_uri)
    return store.retrieve_deleted_labels()
Пример #20
0
def retrieve_io_pointers_for_label(label_id: str):
    store = Store(_db_uri)
    iops = store.retrieve_io_pointers_for_label(label_id)
    return [IOPointer.from_dictionary(iop.__dict__) for iop in iops]
Пример #21
0
def get_labels() -> typing.List[str]:
    store = Store(_db_uri)
    return [label.id for label in store.get_all_labels()]
Пример #22
0
        def wrapper(*args, **kwargs):
            # Get function information
            filename = inspect.getfile(func)
            function_name = func.__name__

            # Construct component run object
            store = Store(_db_uri)
            component_run = store.initialize_empty_component_run(
                component_name)
            component_run.set_start_timestamp()

            # Define trace helper
            def trace_helper(frame, event, arg):
                if event != "return":
                    return

                logging.info(f"Inspecting {frame.f_code.co_filename}")
                input_pointers = []
                output_pointers = []
                local_vars = frame.f_locals
                # Add input_vars and output_vars as pointers
                for var in input_vars:
                    if var not in local_vars:
                        logging.debug(
                            f"Variable {var} not in current stack frame.")
                        continue
                    val = local_vars[var]
                    if val == None:
                        logging.debug(f"Variable {var} has value {val}.")
                        continue
                    if isinstance(val, list):
                        input_pointers += store.get_io_pointers(val)
                    else:
                        input_pointers.append(store.get_io_pointer(str(val)))
                for var in output_vars:
                    if var not in local_vars:
                        logging.debug(
                            f"Variable {var} not in current stack frame.")
                        continue
                    val = local_vars[var]
                    if val == None:
                        logging.debug(f"Variable {var} has value {val}.")
                        continue
                    if isinstance(val, list):
                        output_pointers += (store.get_io_pointers(
                            val, PointerTypeEnum.ENDPOINT) if endpoint else
                                            store.get_io_pointers(val))
                    else:
                        output_pointers += ([
                            store.get_io_pointer(str(val),
                                                 PointerTypeEnum.ENDPOINT)
                        ] if endpoint else [store.get_io_pointer(str(val))])
                component_run.add_inputs(input_pointers)
                component_run.add_outputs(output_pointers)

            # Define tracer
            def tracer(frame, event, arg):
                if event == "call":
                    if (frame.f_code.co_name == function_name
                            and frame.f_code.co_filename == filename):
                        return trace_helper
                    return

            # Run function under the tracer
            sys.settrace(tracer)
            try:
                value = func(*args, **kwargs)
            finally:
                sys.settrace(None)

            # Log relevant info
            component_run.set_end_timestamp()
            input_pointers = [store.get_io_pointer(inp) for inp in inputs]
            output_pointers = ([
                store.get_io_pointer(out, PointerTypeEnum.ENDPOINT)
                for out in outputs
            ] if endpoint else [store.get_io_pointer(out) for out in outputs])
            component_run.add_inputs(input_pointers)
            component_run.add_outputs(output_pointers)
            store.set_dependencies_from_inputs(component_run)

            # Add code versions
            try:
                repo = git.Repo(search_parent_directories=True)
                component_run.set_git_hash(str(repo.head.object.hexsha))
            except:
                logging.info("No git repo found.")

            # Add source code if less than 2^16
            func_source_code = inspect.getsource(func)
            if len(func_source_code) < 2**16:
                component_run.set_code_snapshot(
                    bytes(func_source_code, "ascii"))

            # Commit component run object to the DB
            store.commit_component_run(component_run)

            return value
Пример #23
0
class TestStore(unittest.TestCase):
    def setUp(self):
        self.store = Store("test")

    def testComponent(self):
        self.store.create_component("test_component", "test_description", "shreya")
        component = self.store.get_component("test_component")
        self.assertEqual(component.name, "test_component")

        # Retrieve components with owner
        components = self.store.get_components_with_owner("shreya")
        self.assertEqual(1, len(components))

    def testCompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description", "shreya")

        # Create component run
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(IOPointer("inp"))
        cr.add_output(IOPointer("out"))
        self.store.commit_component_run(cr)

        # Test retrieval
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertEqual(1, len(component_runs))
        self.assertEqual(component_runs[0], cr)

    def testIncompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description", "shreya")

        # Create incomplete component run
        cr = self.store.initialize_empty_component_run("test_component")
        with self.assertRaises(RuntimeError):
            self.store.commit_component_run(cr)

    def testTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description", "shreya")

        # Add tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag2"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(set(tags), set(["tag1", "tag2"]))

    def testDuplicateTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description", "shreya")

        # Add duplicate tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag1"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(tags, ["tag1"])

    def testIOPointer(self):
        # Test there is no IOPointer
        with self.assertRaises(RuntimeError):
            self.store.get_io_pointer("iop", create=False)

        # Create IOPointer
        iop = self.store.get_io_pointer("iop")
        iop2 = self.store.get_io_pointer("iop")

        self.assertEqual(iop, iop2)

    def testIOPointers(self):
        # Create new IOPointers from scratch
        iop_names = [f"iop_{i}" for i in range(100)]
        iops = self.store.get_io_pointers(iop_names)
        iops2 = self.store.get_io_pointers(iop_names)

        self.assertEqual(set(iops), set(iops2))

    def testSetDependenciesFromInputs(self):
        # Create IO pointers
        inp = self.store.get_io_pointer("inp")
        out = self.store.get_io_pointer("out")
        another_out = self.store.get_io_pointer("another_out")

        # Create two component runs that have the same output
        self.store.create_component("test_component", "test_description", "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.commit_component_run(cr)

        # Create another two component runs that have the same output
        self.store.create_component("test_component", "test_description", "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(another_out)
            self.store.commit_component_run(cr)

        # Create new component run that depends on "out" pointer
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_inputs([out, another_out])
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Retrieve latest component run and check dependencies
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertTrue(component_runs[1] in component_runs[0].dependencies)
        self.assertTrue(component_runs[3] in component_runs[0].dependencies)

    def _set_up_computation(self):
        # Create dag of computation
        # Create component and IOPointers
        self.store.create_component("test_component", "test_description", "shreya")
        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)]

        # Create component runs
        cr1 = self.store.initialize_empty_component_run("test_component")
        cr1.set_start_timestamp()
        cr1.set_end_timestamp()
        cr1.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr1)
        self.store.commit_component_run(cr1)

        cr2 = self.store.initialize_empty_component_run("test_component")
        cr2.set_start_timestamp()
        cr2.set_end_timestamp()
        cr2.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr2)
        self.store.commit_component_run(cr2)

        cr3 = self.store.initialize_empty_component_run("test_component")
        cr3.set_start_timestamp()
        cr3.set_end_timestamp()
        cr3.add_input(iop[0])
        cr3.add_outputs([iop[1], iop[2]])
        self.store.set_dependencies_from_inputs(cr3)
        self.store.commit_component_run(cr3)

        cr4 = self.store.initialize_empty_component_run("test_component")
        cr4.set_start_timestamp()
        cr4.set_end_timestamp()
        cr4.add_input(iop[2])
        cr4.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr4)
        self.store.commit_component_run(cr4)

    def testTrace(self):
        self._set_up_computation()

        # Call trace functionality
        trace = self.store.trace("iop_4")
        level_id = [(l, cr.id) for l, cr in trace]

        self.assertEqual(level_id, [(0, 4), (1, 3), (2, 2)])

    def testEmptyTrace(self):
        with self.assertRaises(RuntimeError):
            self.store.trace("some_weird_pointer")
        with self.assertRaises(RuntimeError):
            self.store.web_trace("some_weird_pointer")

    def testWebTrace(self):
        self._set_up_computation()

        # Call web trace functionality. The ordering is nondeterministic.
        expected_res = [
            {
                "id": "componentrun_4",
                "label": "test_component",
                "hasCaret": True,
                "isExpanded": True,
                "childNodes": [
                    {
                        "id": "iopointer_iop_4",
                        "label": "iop_4",
                        "hasCaret": False,
                        "parent": "componentrun_4",
                    },
                    {
                        "id": "componentrun_3",
                        "label": "test_component",
                        "hasCaret": True,
                        "isExpanded": True,
                        "childNodes": [
                            {
                                "id": "iopointer_iop_2",
                                "label": "iop_2",
                                "hasCaret": False,
                                "parent": "componentrun_3",
                            },
                            {
                                "id": "iopointer_iop_3",
                                "label": "iop_3",
                                "hasCaret": False,
                                "parent": "componentrun_3",
                            },
                            {
                                "id": "componentrun_2",
                                "label": "test_component",
                                "hasCaret": True,
                                "isExpanded": True,
                                "childNodes": [
                                    {
                                        "id": "iopointer_iop_1",
                                        "label": "iop_1",
                                        "hasCaret": False,
                                        "parent": "componentrun_2",
                                    }
                                ],
                            },
                        ],
                    },
                ],
            }
        ]
        web_trace = self.store.web_trace("iop_4")

        self.assertEqual(web_trace, expected_res)
Пример #24
0
class TestDags(unittest.TestCase):
    def setUp(self):
        self.store = Store("test")

    def testLinkedList(self):
        # Create chain of component runs
        expected_result = []
        num_runs = 10
        for i in range(1, num_runs + 1):
            self.store.create_component(f"mock_component_{i}", "", "")
            inp = self.store.get_io_pointer(f"iop_{i}")
            out = self.store.get_io_pointer(f"iop_{i + 1}")
            cr = self.store.initialize_empty_component_run(
                f"mock_component_{i}")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)
            expected_result.append((num_runs - i, i))

        # Reverse the expected result
        expected_result.reverse()

        # Trace the final output
        trace = self.store.trace("iop_11")
        level_id = [(level, cr.id) for level, cr in trace]
        self.assertEqual(expected_result, level_id)

    def testVersionedComputation(self):
        # Run the same computation many times
        self.store.create_component("mock_component", "", "")
        num_runs = 10
        for i in range(1, num_runs + 1):
            inp = self.store.get_io_pointer("inp")
            out = self.store.get_io_pointer("out")
            cr = self.store.initialize_empty_component_run("mock_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)

        # Trace the out pointer. Only most recent run ID should show.
        trace = self.store.trace("out")
        self.assertEqual(len(trace), 1)
        self.assertEqual(trace[0][0], 0)
        self.assertEqual(trace[0][1].id, num_runs)

    def testTree(self):
        # Create a tree of component runs, 5 levels deep
        num_levels = 2
        global cr_counter
        global iop_counter
        cr_counter = 1
        iop_counter = 1

        def create_tree(level, inp):
            if level == num_levels:
                return

            global cr_counter
            global iop_counter

            self.store.create_component(f"mock_component_{cr_counter}", "", "")
            cr = self.store.initialize_empty_component_run(
                f"mock_component_{cr_counter}")
            cr_counter += 1
            cr.set_start_timestamp()
            cr.set_end_timestamp()

            # Create output pointers
            out1 = self.store.get_io_pointer(f"iop_{iop_counter}")
            iop_counter += 1
            out2 = self.store.get_io_pointer(f"iop_{iop_counter}")
            iop_counter += 1

            # Add and commit component run
            cr.add_input(inp)
            cr.add_outputs([out1, out2])
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)

            # Create left and right trees
            create_tree(level + 1, out1)
            create_tree(level + 1, out2)

        # Create first input pointer and tree of computation
        inp = self.store.get_io_pointer(f"iop_{iop_counter}")
        iop_counter += 1
        create_tree(0, inp)

        # Grab last iop id and trace it
        last_iop_id = f"iop_{iop_counter - 1}"
        trace = self.store.trace(last_iop_id)
        level_id = [(level, cr.id) for level, cr in trace]
        self.assertEqual(level_id, [(0, 3), (1, 1)])

    def testCycle(self):
        # Create cycle. Since dependencies are versioned, we shouldn't run
        # into problems.
        # Create io pointers and components
        iop1 = self.store.get_io_pointer("iop1")
        iop2 = self.store.get_io_pointer("iop2")
        self.store.create_component("component_1", "", "")
        self.store.create_component("component_2", "", "")

        # Create component runs
        cr = self.store.initialize_empty_component_run("component_1")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop2)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        cr = self.store.initialize_empty_component_run("component_2")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop2)
        cr.add_output(iop1)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Trace iop1
        trace_1 = [(level, cr.id) for level, cr in self.store.trace("iop1")]
        trace_2 = [(level, cr.id) for level, cr in self.store.trace("iop2")]
        self.assertEqual(trace_1, [(0, 2), (1, 1)])
        self.assertEqual(trace_2, [(0, 1)])

    def testStaleUpdate(self):
        # Create computation with stale update.
        iop1 = self.store.get_io_pointer("iop1")
        iop2 = self.store.get_io_pointer("iop2")
        iop3 = self.store.get_io_pointer("iop3")
        iop4 = self.store.get_io_pointer("iop4")
        self.store.create_component("component_1", "", "")
        self.store.create_component("component_2", "", "")

        # Create first component
        cr = self.store.initialize_empty_component_run("component_1")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop2)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Create second component run
        cr = self.store.initialize_empty_component_run("component_1")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop3)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Create third component run that depends on the first (stale update)
        cr = self.store.initialize_empty_component_run("component_2")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop2)
        cr.add_output(iop4)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Trace iop4
        trace = [(level, cr.id, cr.stale)
                 for level, cr in self.store.trace("iop4")]
        res = [
            (
                0,
                3,
                [
                    "component_1 (ID 1) has 1 fresher run that began " +
                    "before this component run started."
                ],
            ),
            (1, 1, []),
        ]
        self.assertEqual(trace, res)

    def testStaleTime(self):
        # Create computation with stale update.
        iop1 = self.store.get_io_pointer("iop1")
        iop2 = self.store.get_io_pointer("iop2")
        iop3 = self.store.get_io_pointer("iop3")
        self.store.create_component("component_1", "", "")
        self.store.create_component("component_2", "", "")
        now = datetime.utcnow()

        # Create first component
        cr = self.store.initialize_empty_component_run("component_1")
        start_month = now.month - 2 if now.month > 2 else (12 + now.month) - 2
        start_year = now.year if now.month > 2 else now.year - 1
        start_date = now.replace(month=start_month, year=start_year)
        cr.set_start_timestamp(start_date)
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop2)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Create second component run
        cr = self.store.initialize_empty_component_run("component_2")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop2)
        cr.add_output(iop3)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Trace
        trace = [(level, cr.id, cr.stale)
                 for level, cr in self.store.trace("iop3")]
        res = [
            (
                0,
                2,
                [
                    "component_1 (ID 1) was run " +
                    f"{(now - start_date).days} days" + " ago."
                ],
            ),
            (1, 1, []),
        ]
        self.assertEqual(trace, res)
Пример #25
0
class TestDags(unittest.TestCase):
    def setUp(self):
        self.store = Store("test")

    def testLinkedList(self):
        # Create chain of component runs
        expected_result = []
        num_runs = 10
        for i in range(1, num_runs + 1):
            self.store.create_component(f"mock_component_{i}", "", "")
            inp = self.store.get_io_pointer(f"iop_{i}")
            out = self.store.get_io_pointer(f"iop_{i + 1}")
            cr = self.store.initialize_empty_component_run(
                f"mock_component_{i}")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)
            expected_result.append((num_runs - i, i))

        # Reverse the expected result
        expected_result.reverse()

        # Trace the final output
        trace = self.store.trace("iop_11")
        level_id = [(l, cr.id) for l, cr in trace]
        self.assertEqual(expected_result, level_id)

    def testVersionedComputation(self):
        # Run the same computation many times
        self.store.create_component("mock_component", "", "")
        num_runs = 10
        for i in range(1, num_runs + 1):
            inp = self.store.get_io_pointer("inp")
            out = self.store.get_io_pointer("out")
            cr = self.store.initialize_empty_component_run("mock_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)

        # Trace the out pointer. Only most recent run ID should show.
        trace = self.store.trace("out")
        self.assertEqual(len(trace), 1)
        self.assertEqual(trace[0][0], 0)
        self.assertEqual(trace[0][1].id, num_runs)

    def testTree(self):
        # Create a tree of component runs, 5 levels deep
        num_levels = 2
        global cr_counter
        global iop_counter
        cr_counter = 1
        iop_counter = 1

        def create_tree(level, inp):
            if level == num_levels:
                return

            global cr_counter
            global iop_counter

            self.store.create_component(f"mock_component_{cr_counter}", "", "")
            cr = self.store.initialize_empty_component_run(
                f"mock_component_{cr_counter}")
            cr_counter += 1
            cr.set_start_timestamp()
            cr.set_end_timestamp()

            # Create output pointers
            out1 = self.store.get_io_pointer(f"iop_{iop_counter}")
            iop_counter += 1
            out2 = self.store.get_io_pointer(f"iop_{iop_counter}")
            iop_counter += 1

            # Add and commit component run
            cr.add_input(inp)
            cr.add_outputs([out1, out2])
            self.store.set_dependencies_from_inputs(cr)
            self.store.commit_component_run(cr)

            # Create left and right trees
            create_tree(level + 1, out1)
            create_tree(level + 1, out2)

        # Create first input pointer and tree of computation
        inp = self.store.get_io_pointer(f"iop_{iop_counter}")
        iop_counter += 1
        create_tree(0, inp)

        # Grab last iop id and trace it
        last_iop_id = f"iop_{iop_counter - 1}"
        trace = self.store.trace(last_iop_id)
        level_id = [(l, cr.id) for l, cr in trace]
        self.assertEqual(level_id, [(0, 3), (1, 1)])

    def testCycle(self):
        # Create cycle. Since dependencies are versioned, we shouldn't run into problems.
        # Create io pointers and components
        iop1 = self.store.get_io_pointer("iop1")
        iop2 = self.store.get_io_pointer("iop2")
        self.store.create_component("component_1", "", "")
        self.store.create_component("component_2", "", "")

        # Create component runs
        cr = self.store.initialize_empty_component_run("component_1")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop1)
        cr.add_output(iop2)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        cr = self.store.initialize_empty_component_run("component_2")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(iop2)
        cr.add_output(iop1)
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Trace iop1
        trace_1 = [(l, cr.id) for l, cr in self.store.trace("iop1")]
        trace_2 = [(l, cr.id) for l, cr in self.store.trace("iop2")]
        self.assertEqual(trace_1, [(0, 2), (1, 1)])
        self.assertEqual(trace_2, [(0, 1)])
Пример #26
0
def create_labels(label_ids: typing.List[str]):
    store = Store(_db_uri)
    store.get_labels(label_ids)
Пример #27
0
class TestStore(unittest.TestCase):
    def setUp(self):
        self.store = Store("test")

    def testComponent(self):
        self.store.create_component("test_component", "test_description",
                                    "shreya")
        component = self.store.get_component("test_component")
        self.assertEqual(component.name, "test_component")

        # Retrieve components with owner
        components = self.store.get_components(owner="shreya")
        self.assertEqual(1, len(components))

    def testCompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Create component run
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(IOPointer("inp"))
        cr.add_output(IOPointer("out"))
        self.store.commit_component_run(cr)

        # Test retrieval
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertEqual(1, len(component_runs))
        self.assertEqual(component_runs[0], cr)

    def testLogComponentRunWithoutComponentCreated(self):
        # Create a ComponentRun
        cr = self.store.initialize_empty_component_run("test_component_new")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_input(IOPointer("inp"))
        cr.add_output(IOPointer("out"))
        self.store.commit_component_run(cr)

        # Test retrieval
        component_runs = self.store.get_history("test_component_new",
                                                limit=None)
        self.assertEqual(1, len(component_runs))
        self.assertEqual(component_runs[0], cr)

    def testIncompleteComponentRun(self):
        # Create component
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Create incomplete component run
        cr = self.store.initialize_empty_component_run("test_component")
        with self.assertRaises(RuntimeError):
            self.store.commit_component_run(cr)

    def testTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Add tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag2"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(set(tags), set(["tag1", "tag2"]))

    def testDuplicateTags(self):
        # Create component without tags
        self.store.create_component("test_component", "test_description",
                                    "shreya")

        # Add duplicate tags
        self.store.add_tags_to_component("test_component", ["tag1", "tag1"])

        # Test retrieval
        component = self.store.get_component("test_component")
        tags = [t.name for t in component.tags]
        self.assertEqual(component.name, "test_component")
        self.assertEqual(tags, ["tag1"])

    def testIOPointer(self):
        # Test there is no IOPointer
        with self.assertRaises(RuntimeError):
            self.store.get_io_pointer("iop", create=False)

        # Create IOPointer
        iop = self.store.get_io_pointer("iop")
        iop2 = self.store.get_io_pointer("iop")

        self.assertEqual(iop, iop2)

    def testIOPointers(self):
        # Create new IOPointers from scratch
        iop_names = [f"iop_{i}" for i in range(100)]
        iops = self.store.get_io_pointers(iop_names)
        iops2 = self.store.get_io_pointers(iop_names)

        self.assertEqual(set(iops), set(iops2))

    def testKVIOPointer(self):
        iop_name = "name"
        iop_value = "value"

        iop = self.store.get_io_pointer(iop_name, iop_value)
        iop2 = self.store.get_io_pointer(iop_name, iop_value)

        self.assertEqual(iop, iop2)

    def testSetDependenciesFromInputs(self):
        # Create IO pointers
        inp = self.store.get_io_pointer("inp")
        out = self.store.get_io_pointer("out")
        another_out = self.store.get_io_pointer("another_out")

        # Create two component runs that have the same output
        self.store.create_component("test_component", "test_description",
                                    "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(out)
            self.store.commit_component_run(cr)

        # Create another two component runs that have the same output
        self.store.create_component("test_component", "test_description",
                                    "shreya")
        for idx in range(2):
            cr = self.store.initialize_empty_component_run("test_component")
            cr.set_start_timestamp()
            cr.set_end_timestamp()
            cr.add_input(inp)
            cr.add_output(another_out)
            self.store.commit_component_run(cr)

        # Create new component run that depends on "out" pointer
        cr = self.store.initialize_empty_component_run("test_component")
        cr.set_start_timestamp()
        cr.set_end_timestamp()
        cr.add_inputs([out, another_out])
        self.store.set_dependencies_from_inputs(cr)
        self.store.commit_component_run(cr)

        # Retrieve latest component run and check dependencies
        component_runs = self.store.get_history("test_component", limit=None)
        self.assertTrue(component_runs[1] in component_runs[0].dependencies)
        self.assertTrue(component_runs[3] in component_runs[0].dependencies)

    def _set_up_computation(self):
        # Create dag of computation
        # Create component and IOPointers
        for i in range(1, 5):
            self.store.create_component(f"test_component_{i}",
                                        "test_description", "shreya")

        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)]

        # Create component runs
        cr1 = self.store.initialize_empty_component_run("test_component_1")
        cr1.set_start_timestamp()
        cr1.set_end_timestamp()
        cr1.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr1)
        self.store.commit_component_run(cr1)

        cr2 = self.store.initialize_empty_component_run("test_component_2")
        cr2.set_start_timestamp()
        cr2.set_end_timestamp()
        cr2.add_output(iop[0])
        self.store.set_dependencies_from_inputs(cr2)
        self.store.commit_component_run(cr2)

        cr3 = self.store.initialize_empty_component_run("test_component_3")
        cr3.set_start_timestamp()
        cr3.set_end_timestamp()
        cr3.add_input(iop[0])
        cr3.add_outputs([iop[1], iop[2]])
        self.store.set_dependencies_from_inputs(cr3)
        self.store.commit_component_run(cr3)

        cr4 = self.store.initialize_empty_component_run("test_component_4")
        cr4.set_start_timestamp()
        cr4.set_end_timestamp()
        cr4.add_input(iop[2])
        cr4.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr4)
        self.store.commit_component_run(cr4)

    def testTrace(self):
        self._set_up_computation()

        # Call trace functionality
        trace = self.store.trace("iop_4")
        level_id = [(level, cr.id) for level, cr in trace]

        self.assertEqual(level_id, [(0, 4), (1, 3), (2, 2)])

    def testEmptyTrace(self):
        with self.assertRaises(RuntimeError):
            self.store.trace("some_weird_pointer")
        with self.assertRaises(RuntimeError):
            self.store.web_trace("some_weird_pointer")

    def testWebTrace(self):
        self._set_up_computation()

        # Call web trace functionality. The ordering is nondeterministic.
        expected_res = [{
            "id":
            "componentrun_4",
            "label":
            "test_component_4",
            "hasCaret":
            True,
            "isExpanded":
            True,
            "stale": [],
            "childNodes": [
                {
                    "id": "iopointer_iop_4",
                    "label": "iop_4",
                    "hasCaret": False,
                    "parent": "componentrun_4",
                },
                {
                    "id":
                    "componentrun_3",
                    "label":
                    "test_component_3",
                    "hasCaret":
                    True,
                    "isExpanded":
                    True,
                    "stale": [],
                    "childNodes": [
                        {
                            "id": "iopointer_iop_2",
                            "label": "iop_2",
                            "hasCaret": False,
                            "parent": "componentrun_3",
                        },
                        {
                            "id": "iopointer_iop_3",
                            "label": "iop_3",
                            "hasCaret": False,
                            "parent": "componentrun_3",
                        },
                        {
                            "id":
                            "componentrun_2",
                            "label":
                            "test_component_2",
                            "hasCaret":
                            True,
                            "isExpanded":
                            True,
                            "stale": [],
                            "childNodes": [{
                                "id": "iopointer_iop_1",
                                "label": "iop_1",
                                "hasCaret": False,
                                "parent": "componentrun_2",
                            }],
                        },
                    ],
                },
            ],
        }]
        web_trace = self.store.web_trace("iop_4")

        self.assertEqual(web_trace, expected_res)

    def testBasicFlaggedOutputs(self):
        # Create components and iopointers
        self.store.create_component("test_component_A", "test_description",
                                    "shreya")
        self.store.create_component("test_component_B", "test_description",
                                    "shreya")

        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 5)]

        # Create component runs
        # First pipeline
        cr_A1 = self.store.initialize_empty_component_run("test_component_A")
        cr_A1.set_start_timestamp()
        cr_A1.set_end_timestamp()
        cr_A1.add_outputs([iop[0], iop[1]])
        self.store.set_dependencies_from_inputs(cr_A1)
        self.store.commit_component_run(cr_A1)
        cr_B1 = self.store.initialize_empty_component_run("test_component_B")
        cr_B1.set_start_timestamp()
        cr_B1.set_end_timestamp()
        cr_B1.add_input(iop[0])
        cr_B1.add_output(iop[2])
        self.store.set_dependencies_from_inputs(cr_B1)
        self.store.commit_component_run(cr_B1)
        # Second pipeline, which builds off iop2
        cr_B2 = self.store.initialize_empty_component_run("test_component_B")
        cr_B2.set_start_timestamp()
        cr_B2.set_end_timestamp()
        cr_B2.add_input(iop[1])
        cr_B2.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr_B2)
        self.store.commit_component_run(cr_B2)

        # Flag iop_3 and iop_4
        self.store.set_io_pointer_flag("iop_3", True)
        self.store.set_io_pointer_flag("iop_4", True)

        # Run diagnose. It should output
        # [component_A, component_B, component_B]'s corresponding run IDs
        _, res = self.store.review_flagged_outputs()
        res = [(cr.id, count) for cr, count in res]
        expected_res = [(1, 2), (3, 1), (2, 1)]
        self.assertEqual(res, expected_res)

    def testManyFlaggedOutputs(self):
        # Create components and iopointers
        self.store.create_component("test_component_A", "test_description",
                                    "shreya")
        self.store.create_component("test_component_B", "test_description",
                                    "shreya")
        self.store.create_component("test_component_C", "test_description",
                                    "shreya")

        iop = [self.store.get_io_pointer(f"iop_{i}") for i in range(1, 8)]
        # Create component runs
        # First pipeline
        cr_A1 = self.store.initialize_empty_component_run("test_component_A")
        cr_A1.set_start_timestamp()
        cr_A1.set_end_timestamp()
        cr_A1.add_outputs([iop[0], iop[1]])
        self.store.set_dependencies_from_inputs(cr_A1)
        self.store.commit_component_run(cr_A1)
        cr_B1 = self.store.initialize_empty_component_run("test_component_B")
        cr_B1.set_start_timestamp()
        cr_B1.set_end_timestamp()
        cr_B1.add_input(iop[0])
        cr_B1.add_output(iop[2])
        self.store.set_dependencies_from_inputs(cr_B1)
        self.store.commit_component_run(cr_B1)
        cr_C1 = self.store.initialize_empty_component_run("test_component_C")
        cr_C1.set_start_timestamp()
        cr_C1.set_end_timestamp()
        cr_C1.add_inputs([iop[1], iop[2]])
        cr_C1.add_output(iop[3])
        self.store.set_dependencies_from_inputs(cr_C1)
        self.store.commit_component_run(cr_C1)

        # Second pipeline
        cr_C2 = self.store.initialize_empty_component_run("test_component_C")
        cr_C2.set_start_timestamp()
        cr_C2.set_end_timestamp()
        cr_C2.add_inputs([iop[1], iop[2]])
        cr_C2.add_output(iop[4])
        self.store.set_dependencies_from_inputs(cr_C2)
        self.store.commit_component_run(cr_C2)

        # Third pipeline
        cr_C3 = self.store.initialize_empty_component_run("test_component_C")
        cr_C3.set_start_timestamp()
        cr_C3.set_end_timestamp()
        cr_C3.add_inputs([iop[1], iop[2]])
        cr_C3.add_output(iop[5])
        self.store.set_dependencies_from_inputs(cr_C3)
        self.store.commit_component_run(cr_C3)

        # Fourth pipeline
        cr_C4 = self.store.initialize_empty_component_run("test_component_C")
        cr_C4.set_start_timestamp()
        cr_C4.set_end_timestamp()
        cr_C4.add_inputs([iop[1], iop[2]])
        cr_C4.add_output(iop[6])
        self.store.set_dependencies_from_inputs(cr_C4)
        self.store.commit_component_run(cr_C4)

        # Flag
        self.store.set_io_pointer_flag("iop_4", True)
        self.store.set_io_pointer_flag("iop_5", True)
        self.store.set_io_pointer_flag("iop_6", True)
        self.store.set_io_pointer_flag("iop_7", True)

        _, res = self.store.review_flagged_outputs()
        res = [(cr.component_name, cr.id, count) for cr, count in res]
        expected_res = [
            ("test_component_B", 2, 4),
            ("test_component_A", 1, 4),
            ("test_component_C", 6, 1),
            ("test_component_C", 5, 1),
            ("test_component_C", 4, 1),
            ("test_component_C", 3, 1),
        ]
        self.assertEqual(res, expected_res)
Пример #28
0
class Task(object):
    def __init__(self, task_name: str):
        self.task_name = task_name
        self.store = Store(clientUtils.get_db_uri())
        self.metrics = []
        # TODO(shreyashankar): Add metric cache

    def registerMetric(self, metric: Metric, create_view: bool = True):
        if create_view:
            self.store.create_view(self.task_name, metric.window_size)
        self.metrics.append(metric)

    def computeMetrics(self, use_views=True):
        results = {}
        for metric in self.metrics:
            if use_views:
                results[
                    metric.getIdentifier()
                ] = self.store.compute_metric_from_view(
                    self.task_name, metric.fn, window_size=metric.window_size
                )
            else:
                results[metric.getIdentifier()] = self.store.compute_metric(
                    self.task_name, metric.fn, window_size=metric.window_size
                )
        return results

    def logOutput(
        self,
        output_value: float,
        identifier: str,
    ):
        self.store.log_output(
            identifier=identifier,
            task_name=self.task_name,
            val=output_value,
        )

    def logOutputs(
        self,
        output_values: typing.List[float],
        identifiers: typing.List[str],
    ):
        self.store.log_outputs(
            task_name=self.task_name,
            vals=output_values,
            identifiers=identifiers,
        )

    def logFeedbacks(
        self,
        feedback_values: typing.List[float],
        identifiers: typing.List[str],
    ):
        self.store.log_feedbacks(
            task_name=self.task_name,
            vals=feedback_values,
            identifiers=identifiers,
        )

    def logFeedback(
        self,
        feedback_value: float,
        identifier: str,
    ):
        self.store.log_feedback(
            identifier=identifier,
            task_name=self.task_name,
            val=feedback_value,
        )

    def getOutputs(self, limit: int = None, window_size: int = None):
        return self.store.get_outputs_or_feedback(
            self.task_name,
            tablename="output_table",
            limit=limit,
            window_size=window_size,
        )

    def getFeedback(self, limit: int = None, window_size: int = None):
        return self.store.get_outputs_or_feedback(
            self.task_name,
            tablename="feedback_table",
            limit=limit,
            window_size=window_size,
        )

    def computeMetric(
        self,
        metric: typing.Union[typing.Callable, str],
        window_size: int = None,
    ):

        metric_fn = (
            get_metric_function(metric) if not callable(metric) else metric
        )

        return self.store.compute_metric(
            self.task_name, metric_fn, window_size=window_size
        )
Пример #29
0
 def setUp(self):
     self.store = Store("test")
Пример #30
0
 def __init__(self, task_name: str):
     self.task_name = task_name
     self.store = Store(clientUtils.get_db_uri())
     self.metrics = []