Exemplos de Flow em Python, exemplos de metaflow.client.Flow em Python

Exemplo n.º 1

0

Exibir arquivo

    def __init__(self, flow):
        from metaflow.client import Flow, get_namespace

        self.flow = flow
        self.run = Flow(flow.name)[self.run_id]
        assert_equals(sorted(step.name for step in flow),
                      sorted(step.id for step in self.run))
        self._test_namespace()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: tag_cli.py Projeto: zillow/metaflow

def _get_client_run_obj(obj, run_id, user_namespace):
    flow_name = obj.flow.name

    # handle error messaging for two cases
    # 1. our user tries to tag a new flow before it is run
    # 2. our user makes a typo in --namespace
    try:
        namespace(user_namespace)
        Flow(pathspec=flow_name)
    except MetaflowNotFound:
        raise CommandException(
            "No run found for *%s*. Please run the flow before tagging." %
            flow_name)

    except MetaflowNamespaceMismatch:
        raise CommandException(
            "No run found for *%s* in namespace *%s*. You can switch the namespace using --namespace"
            % (flow_name, user_namespace))

    # throw an error with message to include latest run-id when run_id is None
    if run_id is None:
        latest_run_id = Flow(pathspec=flow_name).latest_run.id
        msg = ("Please specify a run-id using --run-id.\n"
               "*%s*'s latest run in namespace *%s* has id *%s*." %
               (flow_name, user_namespace, latest_run_id))
        raise CommandException(msg)
    run_id_parts = run_id.split("/")
    if len(run_id_parts) == 1:
        path_spec = "%s/%s" % (flow_name, run_id)
    else:
        raise CommandException("Run-id *%s* is not a valid run-id" % run_id)

    # handle error messaging for three cases
    # 1. our user makes a typo in --run-id
    # 2. our user's --run-id does not exist in the default/specified namespace
    try:
        namespace(user_namespace)
        run = Run(pathspec=path_spec)
    except MetaflowNotFound:
        raise CommandException("No run *%s* found for flow *%s*" %
                               (path_spec, flow_name))
    except MetaflowNamespaceMismatch:
        msg = "Run *%s* for flow *%s* does not belong to namespace *%s*\n" % (
            path_spec,
            flow_name,
            user_namespace,
        )
        raise CommandException(msg)
    return run

Exemplo n.º 3

0

Exibir arquivo

Arquivo: metadata_check.py Projeto: usedata-analytics/usedata-metaflow

    def _test_namespace(self):
        from metaflow.client import Flow, get_namespace, namespace, default_namespace
        from metaflow.exception import MetaflowNamespaceMismatch
        import os

        # test 1) METAFLOW_USER should be the default
        assert_equals("user:%s" % os.environ.get("METAFLOW_USER"),
                      get_namespace())
        # test 2) Run should be in the listing
        assert_equals(True, self.run_id
                      in [run.id for run in Flow(self.flow.name)])
        # test 3) changing namespace should change namespace
        namespace("user:nobody")
        assert_equals(get_namespace(), "user:nobody")
        # test 4) fetching results in the incorrect namespace should fail
        assert_exception(lambda: Flow(self.flow.name)[self.run_id],
                         MetaflowNamespaceMismatch)
        # test 5) global namespace should work
        namespace(None)
        assert_equals(get_namespace(), None)
        Flow(self.flow.name)[self.run_id]
        default_namespace()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: tag_cli.py Projeto: zillow/metaflow

def tag_list(
    obj,
    run_id,
    hide_system_tags,
    list_all,
    my_runs,
    group_by_tag,
    group_by_run,
    flat,
    arg_run_id,
):
    _set_current(obj)
    if run_id is None and arg_run_id is None and not list_all and not my_runs:
        # Assume list_all by default
        list_all = True

    if list_all and my_runs:
        raise CommandException(
            "Option --all cannot be used together with --my-runs.")

    if run_id is not None and arg_run_id is not None:
        raise CommandException(
            "Specify a run either using --run-id or as an argument but not both"
        )

    if arg_run_id is not None:
        run_id = arg_run_id

    if group_by_run and group_by_tag:
        raise CommandException(
            "Option --group-by-tag cannot be used with --group-by-run")

    if flat and (group_by_run or group_by_tag):
        raise CommandException(
            "Option --flat cannot be used with any --group-by-* option")

    system_tags_by_some_grouping = dict()
    all_tags_by_some_grouping = dict()

    def _populate_tag_groups_from_run(_run):
        if group_by_run:
            if hide_system_tags:
                all_tags_by_some_grouping[
                    _run.pathspec] = _run.tags - _run.system_tags
            else:
                system_tags_by_some_grouping[_run.pathspec] = _run.system_tags
                all_tags_by_some_grouping[_run.pathspec] = _run.tags
        elif group_by_tag:
            for t in _run.tags - _run.system_tags:
                all_tags_by_some_grouping.setdefault(t,
                                                     []).append(_run.pathspec)
            if not hide_system_tags:
                for t in _run.system_tags:
                    system_tags_by_some_grouping.setdefault(t, []).append(
                        _run.pathspec)
        else:
            if hide_system_tags:
                all_tags_by_some_grouping.setdefault("_", set()).update(
                    _run.tags.difference(_run.system_tags))
            else:
                system_tags_by_some_grouping.setdefault("_", set()).update(
                    _run.system_tags)
                all_tags_by_some_grouping.setdefault("_",
                                                     set()).update(_run.tags)

    pathspecs = []
    if list_all or my_runs:
        user_namespace = resolve_identity() if my_runs else None
        namespace(user_namespace)
        try:
            flow = Flow(pathspec=obj.flow.name)
        except MetaflowNotFound:
            raise CommandException(
                "Cannot list tags because the flow %s has never been run." %
                (obj.flow.name, ))
        for run in flow.runs():
            _populate_tag_groups_from_run(run)
            pathspecs.append(run.pathspec)
    else:
        run = _get_client_run_obj(obj, run_id, None)
        _populate_tag_groups_from_run(run)
        pathspecs.append(run.pathspec)

    if not group_by_run and not group_by_tag:
        # We list all the runs that match to print them out if needed.
        system_tags_by_some_grouping[",".join(
            pathspecs)] = system_tags_by_some_grouping.get("_", set())
        all_tags_by_some_grouping[",".join(
            pathspecs)] = all_tags_by_some_grouping.get("_", set())
        if "_" in system_tags_by_some_grouping:
            del system_tags_by_some_grouping["_"]
        if "_" in all_tags_by_some_grouping:
            del all_tags_by_some_grouping["_"]

    if flat:
        if len(all_tags_by_some_grouping) != 1:
            raise MetaflowInternalError("Failed to flatten tag set")
        for v in all_tags_by_some_grouping.values():
            for tag in v:
                obj.echo(tag)
            return

    _print_tags_for_runs_by_groups(obj, system_tags_by_some_grouping,
                                   all_tags_by_some_grouping, group_by_tag)

Exemplo n.º 5

0

Exibir arquivo

class MetadataCheck(MetaflowCheck):
    def __init__(self, flow):
        from metaflow.client import Flow, get_namespace

        self.flow = flow
        self.run = Flow(flow.name)[self.run_id]
        assert_equals(sorted(step.name for step in flow),
                      sorted(step.id for step in self.run))
        self._test_namespace()

    def _test_namespace(self):
        from metaflow.client import Flow, get_namespace, namespace, default_namespace
        from metaflow.exception import MetaflowNamespaceMismatch
        import os

        # test 1) METAFLOW_USER should be the default
        assert_equals("user:%s" % os.environ.get("METAFLOW_USER"),
                      get_namespace())
        # test 2) Run should be in the listing
        assert_equals(True, self.run_id
                      in [run.id for run in Flow(self.flow.name)])
        # test 3) changing namespace should change namespace
        namespace("user:nobody")
        assert_equals(get_namespace(), "user:nobody")
        # test 4) fetching results in the incorrect namespace should fail
        assert_exception(lambda: Flow(self.flow.name)[self.run_id],
                         MetaflowNamespaceMismatch)
        # test 5) global namespace should work
        namespace(None)
        assert_equals(get_namespace(), None)
        Flow(self.flow.name)[self.run_id]
        default_namespace()

    def get_run(self):
        return self.run

    def assert_artifact(self, step, name, value, fields=None):
        for task, artifacts in self.artifact_dict(step, name).items():
            if name in artifacts:
                artifact = artifacts[name]
                if fields:
                    for field, v in fields.items():
                        if is_stringish(artifact):
                            data = json.loads(artifact)
                        else:
                            data = artifact
                        if not isinstance(data, dict):
                            raise AssertArtifactFailed(
                                "Task '%s' expected %s to be a dictionary (got %s)"
                                % (task, name, type(data)))
                        if data.get(field, None) != v:
                            raise AssertArtifactFailed(
                                "Task '%s' expected %s[%s]=%r but got %s[%s]=%s"
                                % (
                                    task,
                                    name,
                                    field,
                                    truncate(v),
                                    name,
                                    field,
                                    truncate(data.get(field, None)),
                                ))
                elif artifact != value:
                    raise AssertArtifactFailed(
                        "Task '%s' expected %s=%r but got %s=%s" %
                        (task, name, truncate(value), name,
                         truncate(artifact)))
            else:
                raise AssertArtifactFailed("Task '%s' expected %s=%s but "
                                           "the key was not found" %
                                           (task, name, truncate(value)))
        return True

    def artifact_dict(self, step, name):
        return {task.id: {name: task[name].data} for task in self.run[step]}

    def artifact_dict_if_exists(self, step, name):
        return {
            task.id: {
                name: task[name].data
            }
            for task in self.run[step] if name in task
        }

    def assert_log(self, step, logtype, value, exact_match=True):
        log_value = self.get_log(step, logtype)
        if log_value == value:
            return True
        elif not exact_match and value in log_value:
            return True
        else:
            raise AssertLogFailed(
                "Step '%s' expected task.%s='%s' but got task.%s='%s'" %
                (step, logtype, repr(value), logtype, repr(log_value)))

    def list_cards(self, step, task, card_type=None):
        from metaflow.plugins.cards.exception import CardNotPresentException

        try:
            card_iter = self.get_card(step, task, card_type)
        except CardNotPresentException:
            card_iter = None

        if card_iter is None:
            return
        pathspec = self.run[step][task].pathspec
        list_data = dict(pathspec=pathspec, cards=[])
        if len(card_iter) > 0:
            list_data["cards"] = [
                dict(
                    hash=card.hash,
                    id=card.id,
                    type=card.type,
                    filename=card.path.split("/")[-1],
                ) for card in card_iter
            ]
        return list_data

    def assert_card(
        self,
        step,
        task,
        card_type,
        value,
        card_hash=None,
        card_id=None,
        exact_match=True,
    ):
        from metaflow.plugins.cards.exception import CardNotPresentException

        try:
            card_iter = self.get_card(step, task, card_type, card_id=card_id)
        except CardNotPresentException:
            card_iter = None
        card_data = None
        # Since there are many cards possible for a taskspec, we check for hash to assert a single card.
        # If the id argument is present then there will be a single cards anyways.
        if card_iter is not None:
            if len(card_iter) > 0:
                if card_hash is None:
                    card_data = card_iter[0].get()
                else:
                    card_filter = [c for c in card_iter if card_hash in c.hash]
                    card_data = None if len(
                        card_filter) == 0 else card_filter[0].get()
        if (exact_match
                and card_data != value) or (not exact_match
                                            and value not in card_data):
            raise AssertCardFailed(
                "Task '%s/%s' expected %s card with content '%s' but got '%s'"
                % (self.run_id, step, card_type, repr(value), repr(card_data)))
        return True

    def get_log(self, step, logtype):
        return "".join(getattr(task, logtype) for task in self.run[step])

    def get_card(self, step, task, card_type, card_id=None):
        from metaflow.cards import get_cards

        iterator = get_cards(self.run[step][task], type=card_type, id=card_id)
        return iterator

    def get_user_tags(self):
        return self.run.user_tags

    def get_system_tags(self):
        return self.run.system_tags

    def add_tag(self, tag):
        return self.run.add_tag(tag)

    def add_tags(self, tags):
        return self.run.add_tags(tags)

    def remove_tag(self, tag):
        return self.run.remove_tag(tag)

    def remove_tags(self, tags):
        return self.run.remove_tags(tags)

    def replace_tag(self, tag_to_remove, tag_to_add):
        return self.run.replace_tag(tag_to_remove, tag_to_add)

    def replace_tags(self, tags_to_remove, tags_to_add):
        return self.run.replace_tags(tags_to_remove, tags_to_add)