def __init__(self, flow): from metaflow.client import Flow, get_namespace self.flow = flow self.run = Flow(flow.name)[self.run_id] assert_equals(sorted(step.name for step in flow), sorted(step.id for step in self.run)) self._test_namespace()
def _get_client_run_obj(obj, run_id, user_namespace): flow_name = obj.flow.name # handle error messaging for two cases # 1. our user tries to tag a new flow before it is run # 2. our user makes a typo in --namespace try: namespace(user_namespace) Flow(pathspec=flow_name) except MetaflowNotFound: raise CommandException( "No run found for *%s*. Please run the flow before tagging." % flow_name) except MetaflowNamespaceMismatch: raise CommandException( "No run found for *%s* in namespace *%s*. You can switch the namespace using --namespace" % (flow_name, user_namespace)) # throw an error with message to include latest run-id when run_id is None if run_id is None: latest_run_id = Flow(pathspec=flow_name).latest_run.id msg = ("Please specify a run-id using --run-id.\n" "*%s*'s latest run in namespace *%s* has id *%s*." % (flow_name, user_namespace, latest_run_id)) raise CommandException(msg) run_id_parts = run_id.split("/") if len(run_id_parts) == 1: path_spec = "%s/%s" % (flow_name, run_id) else: raise CommandException("Run-id *%s* is not a valid run-id" % run_id) # handle error messaging for three cases # 1. our user makes a typo in --run-id # 2. our user's --run-id does not exist in the default/specified namespace try: namespace(user_namespace) run = Run(pathspec=path_spec) except MetaflowNotFound: raise CommandException("No run *%s* found for flow *%s*" % (path_spec, flow_name)) except MetaflowNamespaceMismatch: msg = "Run *%s* for flow *%s* does not belong to namespace *%s*\n" % ( path_spec, flow_name, user_namespace, ) raise CommandException(msg) return run
def _test_namespace(self): from metaflow.client import Flow, get_namespace, namespace, default_namespace from metaflow.exception import MetaflowNamespaceMismatch import os # test 1) METAFLOW_USER should be the default assert_equals("user:%s" % os.environ.get("METAFLOW_USER"), get_namespace()) # test 2) Run should be in the listing assert_equals(True, self.run_id in [run.id for run in Flow(self.flow.name)]) # test 3) changing namespace should change namespace namespace("user:nobody") assert_equals(get_namespace(), "user:nobody") # test 4) fetching results in the incorrect namespace should fail assert_exception(lambda: Flow(self.flow.name)[self.run_id], MetaflowNamespaceMismatch) # test 5) global namespace should work namespace(None) assert_equals(get_namespace(), None) Flow(self.flow.name)[self.run_id] default_namespace()
def tag_list( obj, run_id, hide_system_tags, list_all, my_runs, group_by_tag, group_by_run, flat, arg_run_id, ): _set_current(obj) if run_id is None and arg_run_id is None and not list_all and not my_runs: # Assume list_all by default list_all = True if list_all and my_runs: raise CommandException( "Option --all cannot be used together with --my-runs.") if run_id is not None and arg_run_id is not None: raise CommandException( "Specify a run either using --run-id or as an argument but not both" ) if arg_run_id is not None: run_id = arg_run_id if group_by_run and group_by_tag: raise CommandException( "Option --group-by-tag cannot be used with --group-by-run") if flat and (group_by_run or group_by_tag): raise CommandException( "Option --flat cannot be used with any --group-by-* option") system_tags_by_some_grouping = dict() all_tags_by_some_grouping = dict() def _populate_tag_groups_from_run(_run): if group_by_run: if hide_system_tags: all_tags_by_some_grouping[ _run.pathspec] = _run.tags - _run.system_tags else: system_tags_by_some_grouping[_run.pathspec] = _run.system_tags all_tags_by_some_grouping[_run.pathspec] = _run.tags elif group_by_tag: for t in _run.tags - _run.system_tags: all_tags_by_some_grouping.setdefault(t, []).append(_run.pathspec) if not hide_system_tags: for t in _run.system_tags: system_tags_by_some_grouping.setdefault(t, []).append( _run.pathspec) else: if hide_system_tags: all_tags_by_some_grouping.setdefault("_", set()).update( _run.tags.difference(_run.system_tags)) else: system_tags_by_some_grouping.setdefault("_", set()).update( _run.system_tags) all_tags_by_some_grouping.setdefault("_", set()).update(_run.tags) pathspecs = [] if list_all or my_runs: user_namespace = resolve_identity() if my_runs else None namespace(user_namespace) try: flow = Flow(pathspec=obj.flow.name) except MetaflowNotFound: raise CommandException( "Cannot list tags because the flow %s has never been run." % (obj.flow.name, )) for run in flow.runs(): _populate_tag_groups_from_run(run) pathspecs.append(run.pathspec) else: run = _get_client_run_obj(obj, run_id, None) _populate_tag_groups_from_run(run) pathspecs.append(run.pathspec) if not group_by_run and not group_by_tag: # We list all the runs that match to print them out if needed. system_tags_by_some_grouping[",".join( pathspecs)] = system_tags_by_some_grouping.get("_", set()) all_tags_by_some_grouping[",".join( pathspecs)] = all_tags_by_some_grouping.get("_", set()) if "_" in system_tags_by_some_grouping: del system_tags_by_some_grouping["_"] if "_" in all_tags_by_some_grouping: del all_tags_by_some_grouping["_"] if flat: if len(all_tags_by_some_grouping) != 1: raise MetaflowInternalError("Failed to flatten tag set") for v in all_tags_by_some_grouping.values(): for tag in v: obj.echo(tag) return _print_tags_for_runs_by_groups(obj, system_tags_by_some_grouping, all_tags_by_some_grouping, group_by_tag)
class MetadataCheck(MetaflowCheck): def __init__(self, flow): from metaflow.client import Flow, get_namespace self.flow = flow self.run = Flow(flow.name)[self.run_id] assert_equals(sorted(step.name for step in flow), sorted(step.id for step in self.run)) self._test_namespace() def _test_namespace(self): from metaflow.client import Flow, get_namespace, namespace, default_namespace from metaflow.exception import MetaflowNamespaceMismatch import os # test 1) METAFLOW_USER should be the default assert_equals("user:%s" % os.environ.get("METAFLOW_USER"), get_namespace()) # test 2) Run should be in the listing assert_equals(True, self.run_id in [run.id for run in Flow(self.flow.name)]) # test 3) changing namespace should change namespace namespace("user:nobody") assert_equals(get_namespace(), "user:nobody") # test 4) fetching results in the incorrect namespace should fail assert_exception(lambda: Flow(self.flow.name)[self.run_id], MetaflowNamespaceMismatch) # test 5) global namespace should work namespace(None) assert_equals(get_namespace(), None) Flow(self.flow.name)[self.run_id] default_namespace() def get_run(self): return self.run def assert_artifact(self, step, name, value, fields=None): for task, artifacts in self.artifact_dict(step, name).items(): if name in artifacts: artifact = artifacts[name] if fields: for field, v in fields.items(): if is_stringish(artifact): data = json.loads(artifact) else: data = artifact if not isinstance(data, dict): raise AssertArtifactFailed( "Task '%s' expected %s to be a dictionary (got %s)" % (task, name, type(data))) if data.get(field, None) != v: raise AssertArtifactFailed( "Task '%s' expected %s[%s]=%r but got %s[%s]=%s" % ( task, name, field, truncate(v), name, field, truncate(data.get(field, None)), )) elif artifact != value: raise AssertArtifactFailed( "Task '%s' expected %s=%r but got %s=%s" % (task, name, truncate(value), name, truncate(artifact))) else: raise AssertArtifactFailed("Task '%s' expected %s=%s but " "the key was not found" % (task, name, truncate(value))) return True def artifact_dict(self, step, name): return {task.id: {name: task[name].data} for task in self.run[step]} def artifact_dict_if_exists(self, step, name): return { task.id: { name: task[name].data } for task in self.run[step] if name in task } def assert_log(self, step, logtype, value, exact_match=True): log_value = self.get_log(step, logtype) if log_value == value: return True elif not exact_match and value in log_value: return True else: raise AssertLogFailed( "Step '%s' expected task.%s='%s' but got task.%s='%s'" % (step, logtype, repr(value), logtype, repr(log_value))) def list_cards(self, step, task, card_type=None): from metaflow.plugins.cards.exception import CardNotPresentException try: card_iter = self.get_card(step, task, card_type) except CardNotPresentException: card_iter = None if card_iter is None: return pathspec = self.run[step][task].pathspec list_data = dict(pathspec=pathspec, cards=[]) if len(card_iter) > 0: list_data["cards"] = [ dict( hash=card.hash, id=card.id, type=card.type, filename=card.path.split("/")[-1], ) for card in card_iter ] return list_data def assert_card( self, step, task, card_type, value, card_hash=None, card_id=None, exact_match=True, ): from metaflow.plugins.cards.exception import CardNotPresentException try: card_iter = self.get_card(step, task, card_type, card_id=card_id) except CardNotPresentException: card_iter = None card_data = None # Since there are many cards possible for a taskspec, we check for hash to assert a single card. # If the id argument is present then there will be a single cards anyways. if card_iter is not None: if len(card_iter) > 0: if card_hash is None: card_data = card_iter[0].get() else: card_filter = [c for c in card_iter if card_hash in c.hash] card_data = None if len( card_filter) == 0 else card_filter[0].get() if (exact_match and card_data != value) or (not exact_match and value not in card_data): raise AssertCardFailed( "Task '%s/%s' expected %s card with content '%s' but got '%s'" % (self.run_id, step, card_type, repr(value), repr(card_data))) return True def get_log(self, step, logtype): return "".join(getattr(task, logtype) for task in self.run[step]) def get_card(self, step, task, card_type, card_id=None): from metaflow.cards import get_cards iterator = get_cards(self.run[step][task], type=card_type, id=card_id) return iterator def get_user_tags(self): return self.run.user_tags def get_system_tags(self): return self.run.system_tags def add_tag(self, tag): return self.run.add_tag(tag) def add_tags(self, tags): return self.run.add_tags(tags) def remove_tag(self, tag): return self.run.remove_tag(tag) def remove_tags(self, tags): return self.run.remove_tags(tags) def replace_tag(self, tag_to_remove, tag_to_add): return self.run.replace_tag(tag_to_remove, tag_to_add) def replace_tags(self, tags_to_remove, tags_to_add): return self.run.replace_tags(tags_to_remove, tags_to_add)