def logs(obj, input_path, stdout=None, stderr=None, both=None): types = set() if stdout: types.add('stdout') both = False if stderr: types.add('stderr') both = False if both: types.update(('stdout', 'stderr')) # Pathspec can either be run_id/step_name or run_id/step_name/task_id. parts = input_path.split('/') if len(parts) == 2: run_id, step_name = parts task_id = None elif len(parts) == 3: run_id, step_name, task_id = parts else: raise CommandException("input_path should either be run_id/step_name" "or run_id/step_name/task_id") if obj.datastore.datastore_root is None: obj.datastore.datastore_root = obj.datastore.get_datastore_root_from_config( obj.echo, create_on_absent=False) if obj.datastore.datastore_root is None: raise CommandException( "Could not find the location of the datastore -- did you correctly set the " "METAFLOW_DATASTORE_SYSROOT_%s environment variable" % (obj.datastore.TYPE).upper()) from metaflow.datastore.datastore_set import MetaflowDatastoreSet datastore_set = MetaflowDatastoreSet(obj.datastore, obj.flow.name, run_id, steps=[step_name], metadata=obj.metadata, monitor=obj.monitor, event_logger=obj.event_logger) if task_id: ds_list = [datastore_set.get_with_pathspec(input_path)] else: ds_list = list(datastore_set) # get all tasks for ds in ds_list: echo('Dumping logs of run_id=*{run_id}* ' 'step=*{step}* task_id=*{task_id}*'.format(run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id), fg='magenta') for typ in ('stdout', 'stderr'): if typ in types: echo(typ, bold=True) click.secho(ds.load_log(typ).decode('UTF-8', errors='replace'), nl=False)
def dump(obj, input_path, private=None, max_value_size=None, include=None, file=None): output = {} kwargs = { 'show_private': private, 'max_value_size': max_value_size, 'include': {t for t in include.split(',') if t} } if obj.datastore.datastore_root is None: obj.datastore.datastore_root = obj.datastore.get_datastore_root_from_config( obj.echo, create_on_absent=False) if obj.datastore.datastore_root is None: raise CommandException( "Could not find the location of the datastore -- did you correctly set the " "METAFLOW_DATASTORE_SYSROOT_%s environment variable" % (obj.datastore.TYPE).upper()) # Pathspec can either be run_id/step_name or run_id/step_name/task_id. parts = input_path.split('/') if len(parts) == 2: run_id, step_name = parts task_id = None elif len(parts) == 3: run_id, step_name, task_id = parts else: raise CommandException("input_path should either be run_id/step_name" "or run_id/step_name/task_id") from metaflow.datastore.datastore_set import MetaflowDatastoreSet datastore_set = MetaflowDatastoreSet( obj.datastore, obj.flow.name, run_id, steps=[step_name], metadata=obj.metadata, monitor=obj.monitor, event_logger=obj.event_logger, prefetch_data_artifacts=kwargs.get('include')) if task_id: ds_list = [datastore_set.get_with_pathspec(input_path)] else: ds_list = list(datastore_set) # get all tasks for ds in ds_list: echo('Dumping output of run_id=*{run_id}* ' 'step=*{step}* task_id=*{task_id}*'.format(run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id), fg='magenta') if file is None: echo_always(ds.format(**kwargs), highlight='green', highlight_bold=False, err=False) else: output[ds.pathspec] = ds.to_dict(**kwargs) if file is not None: with open(file, 'wb') as f: pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL) echo('Artifacts written to *%s*' % file)
def logs(obj, input_path, stdout=None, stderr=None, both=None, timestamps=False): types = set() if stdout: types.add('stdout') both = False if stderr: types.add('stderr') both = False if both: types.update(('stdout', 'stderr')) streams = list(sorted(types, reverse=True)) # Pathspec can either be run_id/step_name or run_id/step_name/task_id. parts = input_path.split('/') if len(parts) == 2: run_id, step_name = parts task_id = None elif len(parts) == 3: run_id, step_name, task_id = parts else: raise CommandException("input_path should either be run_id/step_name " "or run_id/step_name/task_id") if obj.datastore.datastore_root is None: obj.datastore.datastore_root = obj.datastore.get_datastore_root_from_config( obj.echo, create_on_absent=False) if obj.datastore.datastore_root is None: raise CommandException( "Could not find the location of the datastore -- did you correctly set the " "METAFLOW_DATASTORE_SYSROOT_%s environment variable" % (obj.datastore.TYPE).upper()) if task_id: ds_list = [ obj.datastore(obj.flow.name, run_id=run_id, step_name=step_name, task_id=task_id, mode='r', allow_unsuccessful=True) ] else: from metaflow.datastore.datastore_set import MetaflowDatastoreSet datastore_set = MetaflowDatastoreSet(obj.datastore, obj.flow.name, run_id, steps=[step_name], metadata=obj.metadata, monitor=obj.monitor, event_logger=obj.event_logger) # get all successful tasks ds_list = list(datastore_set) if ds_list: def echo_unicode(line, **kwargs): click.secho(line.decode('UTF-8', errors='replace'), **kwargs) # old style logs are non mflog-style logs maybe_old_style = True for ds in ds_list: echo('Dumping logs of run_id=*{run_id}* ' 'step=*{step}* task_id=*{task_id}*'.format( run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id), fg='magenta') for stream in streams: echo(stream, bold=True) logs = ds.load_logs(LOG_SOURCES, stream) if any(data for _, data in logs): # attempt to read new, mflog-style logs for line in mflog.merge_logs([blob for _, blob in logs]): if timestamps: ts = mflog.utc_to_local(line.utc_tstamp) tstamp = ts.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] click.secho(tstamp + ' ', fg=LOGGER_TIMESTAMP, nl=False) echo_unicode(line.msg) maybe_old_style = False elif maybe_old_style: # if they are not available, we may be looking at # a legacy run (unless we have seen new-style data already # for another stream). This return an empty string if # nothing is found log = ds.load_log_legacy(stream) if log and timestamps: raise CommandException( "We can't show --timestamps for " "old runs. Sorry!") echo_unicode(log, nl=False) elif len(parts) == 2: # TODO if datastore provided a way to find unsuccessful task IDs, we # could make handle this case automatically raise CommandException("Successful tasks were not found at the given " "path. You can see logs for unsuccessful tasks " "by giving an exact task ID using the " "run_id/step_name/task_id format.")