def _wait_for_mapper_tasks(self, flow, step_name):
        """
        When lauching multinode task with UBF, need to wait for the secondary
        tasks to finish cleanly and produce their output before exiting the
        main task. Otherwise main task finishing will cause secondary nodes
        to terminate immediately, and possibly prematurely.
        """
        from metaflow import Step  # avoid circular dependency

        TIMEOUT = 600
        last_completion_timeout = time.time() + TIMEOUT
        print("Waiting for batch secondary tasks to finish")
        while last_completion_timeout > time.time():
            time.sleep(2)
            try:
                step_path = "%s/%s/%s" % (flow.name, current.run_id, step_name)
                tasks = [task for task in Step(step_path)]
                if len(tasks) == len(flow._control_mapper_tasks):
                    if all(task.finished_at is not None for task in
                           tasks):  # for some reason task.finished fails
                        return True
                else:
                    print(
                        "Waiting for all parallel tasks to finish. Finished: {}/{}"
                        .format(
                            len(tasks),
                            len(flow._control_mapper_tasks),
                        ))
            except Exception as e:
                pass
        raise Exception(
            "Batch secondary workers did not finish in %s seconds" % TIMEOUT)
示例#2
0
    def execute(cls,
                message=None,
                keys=None,
                existing_keys={},
                stream_output=None,
                invalidate_cache=False,
                **kwargs):
        results = {}
        flow_id = message['flow_id']
        run_number = message['run_number']

        result_key = [key for key in keys if key.startswith('dag:result')][0]

        with streamed_errors(stream_output):
            run = Run("{}/{}".format(flow_id, run_number))
            param_step = Step("{}/_parameters".format(run.pathspec))
            try:
                dag = DataArtifact("{}/_graph_info".format(
                    param_step.task.pathspec)).data
            except MetaflowNotFound:
                dag = generate_dag(run)

            results[result_key] = json.dumps(dag)

        return results
示例#3
0
def resolve_task_from_pathspec(flow_name, pathspec):
    """
    resolves a task object for the pathspec query on the CLI.
    Args:
        flow_name : (str) : name of flow
        pathspec (str) : can be `stepname` / `runid/stepname` / `runid/stepname/taskid`

    Returns:
        metaflow.Task | None
    """
    from metaflow import Flow, Step, Task
    from metaflow.exception import MetaflowNotFound

    # since pathspec can have many variations.
    pthsplits = pathspec.split("/")
    task = None
    run_id = None
    resolving_from = "task_pathspec"
    if len(pthsplits) == 1:
        # This means stepname
        resolving_from = "stepname"
        latest_run = Flow(flow_name).latest_run
        if latest_run is not None:
            run_id = latest_run.pathspec
            try:
                task = latest_run[pathspec].task
            except KeyError:
                pass
    elif len(pthsplits) == 2:
        # This means runid/stepname
        namespace(None)
        resolving_from = "step_pathspec"
        try:
            task = Step("/".join([flow_name, pathspec])).task
        except MetaflowNotFound:
            pass
    elif len(pthsplits) == 3:
        # this means runid/stepname/taskid
        namespace(None)
        resolving_from = "task_pathspec"
        try:
            task = Task("/".join([flow_name, pathspec]))
        except MetaflowNotFound:
            pass
    else:
        # raise exception for invalid pathspec format
        raise CommandException(
            msg=
            "The PATHSPEC argument should be of the form 'stepname' Or '<runid>/<stepname>' Or '<runid>/<stepname>/<taskid>'"
        )

    if task is None:
        # raise Exception that task could not be resolved for the query.
        raise TaskNotFoundException(pathspec, resolving_from, run_id=run_id)

    return task
示例#4
0
    def get_step(self, run: Run, step_name: str) -> Step:
        """
        Get step function returns metaflow `Step`
        object for given `Run` and `step_name`

        :param run: metaflow.Run
        :param step_name: step name
        :return: metaflow.client.Step
        """
        flow_id = run._object['flow_id']
        return Step(f'{flow_id}/{run.id}/{step_name}')
    def fetch_data(cls, pathspec: str, stream_output: Callable[[object],
                                                               None]):
        """
        Fetch data using Metaflow Client.

        Parameters
        ----------
        pathspec : str
            Run pathspec: "FlowId/RunNumber"
        stream_output : Callable[[object], None]
            Stream output callable from execute()  that accepts a JSON serializable object.
            Used for generic messaging.

        Errors can be streamed to cache client using `stream_output` in combination with
        the error_event_msg helper. This way failures won't be cached for individual artifacts,
        thus making it necessary to retry fetching during next attempt.
        (Will add significant overhead/delay).

        Stream error example:
            stream_output(error_event_msg(str(ex), "s3-not-found", get_traceback_str()))
        """
        try:
            with streamed_errors(stream_output):
                step = Step("{}/_parameters".format(pathspec))
        except Exception as ex:
            # NOTE: return false in order not to cache this
            # since parameters might be available later
            return False

        values = {}
        for artifact_name, artifact in step.task.artifacts._asdict().items():
            # Exclude following internal only artifacts from results:
            #   - Artifacts prefixed with underscore (_)
            #   - Artifacts with 'name' or 'script_name'
            if artifact_name.startswith('_') or artifact_name in [
                    'name', 'script_name'
            ]:
                continue
            try:
                if artifact.size < MAX_S3_SIZE:
                    values[artifact_name] = artifact.data
                else:
                    values[
                        artifact_name] = "Artifact too large: {} bytes".format(
                            artifact.size)
            except Exception as ex:
                values[artifact_name] = str(ex)

        return [True, values]