示例#1
0
def compute_step_keys_to_execute(graphene_info, external_pipeline, execution_params):
    check.inst_param(graphene_info, "graphene_info", ResolveInfo)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.inst_param(execution_params, "execution_params", ExecutionParams)

    instance = graphene_info.context.instance

    if not execution_params.step_keys and is_resume_retry(execution_params):
        # Get step keys from parent_run_id if it's a resume/retry
        external_execution_plan = get_external_execution_plan_or_raise(
            graphene_info=graphene_info,
            external_pipeline=external_pipeline,
            mode=execution_params.mode,
            run_config=execution_params.run_config,
            step_keys_to_execute=None,
            known_state=None,
        )
        return get_retry_steps_from_execution_plan(
            instance, external_execution_plan, execution_params.execution_metadata.parent_run_id
        )
    else:
        known_state = None
        if execution_params.execution_metadata.parent_run_id:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(execution_params.execution_metadata.parent_run_id),
                execution_params.step_keys,
            )

        return execution_params.step_keys, known_state
示例#2
0
def create_backfill_run(instance, repo_location, external_pipeline,
                        external_partition_set, backfill_job, partition_data):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.inst_param(external_partition_set, "external_partition_set",
                     ExternalPartitionSet)
    check.inst_param(backfill_job, "backfill_job", PartitionBackfill)
    check.inst_param(partition_data, "partition_data",
                     ExternalPartitionExecutionParamData)

    full_external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        partition_data.run_config,
        external_partition_set.mode,
        step_keys_to_execute=None,
        known_state=None,
    )

    tags = merge_dicts(
        external_pipeline.tags,
        partition_data.tags,
        PipelineRun.tags_for_backfill_id(backfill_job.backfill_id),
        backfill_job.tags,
    )

    if not backfill_job.from_failure and not backfill_job.reexecution_steps:
        step_keys_to_execute = None
        parent_run_id = None
        root_run_id = None
        known_state = None

    elif backfill_job.from_failure:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        if not last_run or last_run.status != PipelineRunStatus.FAILURE:
            return None

        parent_run_id = last_run.run_id
        root_run_id = last_run.root_run_id or last_run.run_id
        tags = merge_dicts(
            tags,
            {
                RESUME_RETRY_TAG: "true",
                PARENT_RUN_ID_TAG: parent_run_id,
                ROOT_RUN_ID_TAG: root_run_id,
            },
        )
        step_keys_to_execute, known_state = get_retry_steps_from_execution_plan(
            instance, full_external_execution_plan, parent_run_id)
    elif backfill_job.reexecution_steps:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        parent_run_id = last_run.run_id if last_run else None
        root_run_id = (last_run.root_run_id
                       or last_run.run_id) if last_run else None
        if parent_run_id and root_run_id:
            tags = merge_dicts(tags, {
                PARENT_RUN_ID_TAG: parent_run_id,
                ROOT_RUN_ID_TAG: root_run_id
            })
        step_keys_to_execute = backfill_job.reexecution_steps
        if last_run and last_run.status == PipelineRunStatus.SUCCESS:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(parent_run_id),
                step_keys_to_execute,
            )
        else:
            known_state = None

    if step_keys_to_execute:
        external_execution_plan = repo_location.get_external_execution_plan(
            external_pipeline,
            partition_data.run_config,
            external_partition_set.mode,
            step_keys_to_execute=step_keys_to_execute,
            known_state=known_state,
        )
    else:
        external_execution_plan = full_external_execution_plan

    return instance.create_run(
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=external_execution_plan.
        execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        pipeline_name=external_pipeline.name,
        run_id=make_new_run_id(),
        solids_to_execute=frozenset(external_partition_set.solid_selection)
        if external_partition_set.solid_selection else None,
        run_config=partition_data.run_config,
        mode=external_partition_set.mode,
        step_keys_to_execute=step_keys_to_execute,
        tags=tags,
        root_run_id=root_run_id,
        parent_run_id=parent_run_id,
        status=PipelineRunStatus.NOT_STARTED,
        external_pipeline_origin=external_pipeline.get_external_origin(),
    )