Пример #1
0
def get_log_content(task: Task, logtype: str):
    # NOTE: this re-implements some of the client logic from _load_log(self, stream)
    # for backwards compatibility of different log types.
    # Necessary due to the client not exposing a stdout/stderr property that would
    # contain the optional timestamps.
    stream = 'stderr' if logtype == STDERR else 'stdout'
    log_location = task.metadata_dict.get('log_location_%s' % stream)
    if log_location:
        return [
            (None, line)
            for line in task._load_log_legacy(log_location, stream).split("\n")
        ]
    else:
        return [(_datetime_to_epoch(datetime), line)
                for datetime, line in task.loglines(stream)]
Пример #2
0
 def check_results(self, flow, checker):
     run = checker.get_run()
     if run is None:
         # very basic sanity check for CLI
         for step in flow:
             checker.assert_artifact(step.name, 'step_name', step.name)
             checker.assert_artifact(step.name,
                                     'project_names',
                                     {'current_singleton'})
     else:
         from metaflow import Task
         task_data = run.data.task_data
         for pathspec, uuid in task_data.items():
             assert_equals(Task(pathspec).data.uuid, uuid)
         for step in run:
             for task in step:
                 assert_equals(task.data.step_name, step.id)
                 pathspec = '/'.join(task.pathspec.split('/')[-4:])
                 assert_equals(task.data.uuid, task_data[pathspec])
         assert_equals(run.data.project_names, {'current_singleton'})
         assert_equals(run.data.branch_names, {'user.tester'})
         assert_equals(run.data.project_flow_names,\
             {'current_singleton.user.tester.CurrentSingletonTestFlow'})
         assert_equals(run.data.is_production, {False})
         assert_equals(run.data.flow_names, {run.parent.id})
         assert_equals(run.data.run_ids, {run.id})
         assert_equals(run.data.origin_run_ids, {None})
         assert_equals(run.data.namespaces, {'user:tester'})
         assert_equals(run.data.usernames, {'tester'})
Пример #3
0
    def execute(cls,
                message=None,
                keys=None,
                existing_keys={},
                stream_output=None,
                invalidate_cache=False,
                **kwargs):

        results = {}
        # params
        task_dict = message['task']
        attempt = int(task_dict.get('attempt_id', 0))
        limit = message['limit']
        page = message['page']
        logtype = message['logtype']
        reverse = message['reverse_order']
        output_raw = message['raw_log']
        pathspec = pathspec_for_task(task_dict)

        # keys
        log_key = log_cache_id(task_dict, logtype)
        result_key = log_result_id(task_dict, logtype, limit, page, reverse,
                                   output_raw)

        previous_log_file = existing_keys.get(log_key, None)
        previous_log_size = json.loads(previous_log_file).get(
            "log_size", None) if previous_log_file else None

        log_size_changed = False  # keep track if we loaded new content
        with streamed_errors(stream_output):
            task = Task(pathspec, attempt=attempt)
            # check if log has grown since last time.
            current_size = get_log_size(task, logtype)
            log_size_changed = previous_log_size is None or previous_log_size != current_size

            if log_size_changed:
                content = get_log_content(task, logtype)
                results[log_key] = json.dumps({
                    "log_size": current_size,
                    "content": content
                })
            else:
                results = {**existing_keys}

        if log_size_changed or result_key not in existing_keys:
            results[result_key] = json.dumps(
                paginated_result(
                    json.loads(results[log_key])["content"], page, limit,
                    reverse, output_raw))

        return results
Пример #4
0
    def step_all(self):
        from metaflow import current, Task

        run = Task(current.pathspec).parent.parent
        for i in range(7):
            tag = str(i)
            run.add_tag(tag)
            assert tag in run.user_tags
            run.remove_tag(tag)
            assert tag not in run.user_tags
Пример #5
0
    def fetch_data(cls, pathspec: str, stream_output: Callable[[object],
                                                               None]):
        """
        Fetch data using Metaflow Client.

        Parameters
        ----------
        pathspec : str
            Task pathspec with attempt id as last component:
                "FlowId/RunNumber/StepName/TaskId/0"
        stream_output : Callable[[object], None]
            Stream output callable from execute()  that accepts a JSON serializable object.
            Used for generic messaging.

        Errors can be streamed to cache client using `stream_output` in combination with
        the error_event_msg helper. This way failures won't be cached for individual artifacts,
        thus making it necessary to retry fetching during next attempt.
        (Will add significant overhead/delay).

        Stream error example:
            stream_output(error_event_msg(str(ex), "s3-not-found", get_traceback_str()))
        """
        try:
            pathspec_without_attempt, attempt_id = unpack_pathspec_with_attempt_id(
                pathspec)
            task = Task(pathspec_without_attempt, attempt=attempt_id)
        except MetaflowNotFound:
            return False  # Skip cache persist if Task cannot be found

        if '_task_ok' not in task:
            # Skip cache persist if _task_ok artifact cannot be found
            return False

        values = {}
        for artifact_name in ['_task_ok', '_foreach_stack']:
            if artifact_name in task:
                artifact = task[artifact_name]
                if artifact.size < MAX_S3_SIZE:
                    values[artifact_name] = artifact.data
                else:
                    return [
                        False, 'artifact-too-large',
                        "{}: {} bytes".format(artifact.pathspec, artifact.size)
                    ]

        return [True, values]
    def get_run_output_data(self):

        st = time.time()
        task_wrapper = Task(self._run.end_task.pathspec)

        return_dataset = {}

        for data in task_wrapper.artifacts:
            print(data.pathspec)
            wrapper = {
                "data": data.data,
                "artifact_name": data.path_components[-1],
                "finished_at": data.finished_at
            }
            return_dataset[wrapper['artifact_name']] = wrapper
        end = time.time()
        print(end - st)
        return return_dataset
Пример #7
0
    def fetch_data(cls, pathspec: str, stream_output: Callable[[str], None]):
        """
        Fetch data using Metaflow Client.

        Parameters
        ----------
        pathspec : str
            Task pathspec
                "FlowId/RunNumber/StepName/TaskId"
        stream_output : Callable[[object], None]
            Stream output callable from execute() that accepts a JSON serializable object.
            Used for generic messaging.

        Errors can be streamed to cache client using `stream_output` in combination with
        the error_event_msg helper. This way failures won't be cached for individual artifacts,
        thus making it necessary to retry fetching during next attempt.
        (Will add significant overhead/delay).

        Stream error example:
            stream_output(error_event_msg(str(ex), "s3-not-found", get_traceback_str()))
        """
        def _card_item(card):
            return {
                "id": card.id,
                "type": card.type,
                "html": card.get()
            }
        try:
            with streamed_errors(stream_output):
                task = Task("{}".format(pathspec))
                cards = {card.hash: _card_item(card) for card in get_cards(task)}
        except Exception:
            # NOTE: return false in order not to cache this
            # since parameters might be available later
            return False

        return [True, cards]