def create_run_for_pipeline( self, pipeline_def, execution_plan=None, run_id=None, environment_dict=None, mode=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, solid_selection=None, ): from dagster.core.execution.api import create_execution_plan from dagster.core.execution.plan.plan import ExecutionPlan from dagster.core.snap import snapshot_from_execution_plan check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition) check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan) # note that solids_to_execute is required to execute the solid subset, which is the # frozenset version of the previous solid_subset. # solid_selection is not required and will not be converted to solids_to_execute here. # i.e. this function doesn't handle solid queries. # solid_selection is only used to pass the user queries further down. check.opt_set_param(solids_to_execute, 'solids_to_execute', of_type=str) check.opt_list_param(solid_selection, 'solid_selection', of_type=str) if solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): # for the case when pipeline_def is created by ExecutablePipeline or ExternalPipeline check.invariant( solids_to_execute == pipeline_def.solids_to_execute, 'Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} ' 'that conflicts with solids_to_execute arg {solids_to_execute}' .format( pipeline_solids_to_execute=str_format_list( pipeline_def.solids_to_execute), solids_to_execute=str_format_list(solids_to_execute), ), ) else: # for cases when `create_run_for_pipeline` is directly called pipeline_def = pipeline_def.get_pipeline_subset_def( solids_to_execute=solids_to_execute) if execution_plan is None: execution_plan = create_execution_plan( pipeline_def, environment_dict=environment_dict, mode=mode, step_keys_to_execute=step_keys_to_execute, ) return self.create_run( pipeline_name=pipeline_def.name, run_id=run_id, environment_dict=environment_dict, mode=check.opt_str_param( mode, 'mode', default=pipeline_def.get_default_mode_name()), solid_selection=solid_selection, solids_to_execute=solids_to_execute, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot=pipeline_def.get_pipeline_snapshot(), execution_plan_snapshot=snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()), parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot( ), )
def has_enum_value(self, value): check.invariant(self.kind == ConfigTypeKind.ENUM) for enum_value in self.enum_values: if enum_value.value == value: return True return False
def __init__(self, event): super().__init__() self._event = check.inst_param(event, "event", EventLogEntry) check.invariant( isinstance(event.dagster_event.step_materialization_data, StepMaterializationData) )
def on_modified(self, event): check.invariant(event.src_path == self._log_path) self._process_log()
def cp_object(self, src, dst): check.invariant(not dst in self.values, "key {} already in use".format(dst)) check.invariant(src in self.values, "key {} not present".format(src)) self.values[dst] = self.values[src] return src, dst
def reexecute_pipeline_iterator( pipeline: Union[IPipeline, PipelineDefinition], parent_run_id: str, run_config: Optional[dict] = None, step_selection: Optional[List[str]] = None, mode: Optional[str] = None, preset: Optional[str] = None, tags: Optional[Dict[str, Any]] = None, instance: DagsterInstance = None, ) -> Iterator[DagsterEvent]: """Reexecute a pipeline iteratively. Rather than package up the result of running a pipeline into a single object, like :py:func:`reexecute_pipeline`, this function yields the stream of events resulting from pipeline reexecution. This is intended to allow the caller to handle these events on a streaming basis in whatever way is appropriate. Parameters: pipeline (Union[IPipeline, PipelineDefinition]): The pipeline to execute. parent_run_id (str): The id of the previous run to reexecute. The run must exist in the instance. run_config (Optional[dict]): The environment configuration that parametrizes this run, as a dict. solid_selection (Optional[List[str]]): A list of solid selection queries (including single solid names) to execute. For example: - ``['some_solid']``: selects ``some_solid`` itself. - ``['*some_solid']``: select ``some_solid`` and all its ancestors (upstream dependencies). - ``['*some_solid+++']``: select ``some_solid``, all its ancestors, and its descendants (downstream dependencies) within 3 levels down. - ``['*some_solid', 'other_solid_a', 'other_solid_b+']``: select ``some_solid`` and all its ancestors, ``other_solid_a`` itself, and ``other_solid_b`` and its direct child solids. mode (Optional[str]): The name of the pipeline mode to use. You may not set both ``mode`` and ``preset``. preset (Optional[str]): The name of the pipeline preset to use. You may not set both ``mode`` and ``preset``. tags (Optional[Dict[str, Any]]): Arbitrary key-value pairs that will be added to pipeline logs. instance (Optional[DagsterInstance]): The instance to execute against. If this is ``None``, an ephemeral instance will be used, and no artifacts will be persisted from the run. Returns: Iterator[DagsterEvent]: The stream of events resulting from pipeline reexecution. """ check.opt_list_param(step_selection, "step_selection", of_type=str) check.str_param(parent_run_id, "parent_run_id") with ephemeral_instance_if_missing(instance) as execute_instance: (pipeline, run_config, mode, tags, _, _) = _check_execute_pipeline_args( pipeline=pipeline, run_config=run_config, mode=mode, preset=preset, tags=tags, solid_selection=None, ) parent_pipeline_run = execute_instance.get_run_by_id(parent_run_id) check.invariant( parent_pipeline_run, "No parent run with id {parent_run_id} found in instance.".format( parent_run_id=parent_run_id), ) step_keys_to_execute: Optional[List[str]] = None execution_plan: Optional[ExecutionPlan] = None # resolve step selection DSL queries using parent execution information if step_selection: step_keys_to_execute, execution_plan = _resolve_reexecute_step_selection( execute_instance, pipeline, mode, run_config, parent_pipeline_run, step_selection, ) pipeline_run = execute_instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), run_config=run_config, execution_plan=execution_plan, mode=mode, tags=tags, solid_selection=parent_pipeline_run.solid_selection, solids_to_execute=parent_pipeline_run.solids_to_execute, # convert to frozenset https://github.com/dagster-io/dagster/issues/2914 step_keys_to_execute=list(step_keys_to_execute) if step_keys_to_execute else None, root_run_id=parent_pipeline_run.root_run_id or parent_pipeline_run.run_id, parent_run_id=parent_pipeline_run.run_id, ) return execute_run_iterator(pipeline, pipeline_run, execute_instance)
def _wrapped_fn(context: SensorEvaluationContext): # initiate the cursor to (most recent event id, current timestamp) when: # * it's the first time starting the sensor # * or, the cursor isn't in valid format (backcompt) if context.cursor is None or not RunStatusSensorCursor.is_valid( context.cursor): most_recent_event_records = list( context.instance.get_event_records(ascending=False, limit=1)) most_recent_event_id = (most_recent_event_records[0].storage_id if len(most_recent_event_records) == 1 else -1) new_cursor = RunStatusSensorCursor( update_timestamp=pendulum.now("UTC").isoformat(), record_id=most_recent_event_id, ) context.update_cursor(new_cursor.to_json()) yield SkipReason( f"Initiating {name}. Set cursor to {new_cursor}") return record_id, update_timestamp = RunStatusSensorCursor.from_json( context.cursor) # Fetch events after the cursor id # * we move the cursor forward to the latest visited event's id to avoid revisits # * when the daemon is down, bc we persist the cursor info, we can go back to where we # left and backfill alerts for the qualified events (up to 5 at a time) during the downtime # Note: this is a cross-run query which requires extra handling in sqlite, see details in SqliteEventLogStorage. event_records = context.instance.get_event_records( EventRecordsFilter( after_cursor=RunShardedEventsCursor( id=record_id, run_updated_after=pendulum.parse(update_timestamp)), event_type=PIPELINE_RUN_STATUS_TO_EVENT_TYPE[ pipeline_run_status], ), ascending=True, limit=5, ) for event_record in event_records: event_log_entry = event_record.event_log_entry storage_id = event_record.storage_id # get run info run_records = context.instance.get_run_records( filters=PipelineRunsFilter( run_ids=[event_log_entry.run_id])) check.invariant(len(run_records) == 1) pipeline_run = run_records[0].pipeline_run update_timestamp = run_records[0].update_timestamp # skip if any of of the followings happens: if ( # the pipeline does not have a repository (manually executed) not pipeline_run.external_pipeline_origin or # the pipeline does not belong to the current repository pipeline_run.external_pipeline_origin. external_repository_origin.repository_name != context.repository_name or # if pipeline is not selected (pipeline_selection and pipeline_run.pipeline_name not in pipeline_selection )): context.update_cursor( RunStatusSensorCursor( record_id=storage_id, update_timestamp=update_timestamp.isoformat()). to_json()) continue serializable_error = None try: with user_code_error_boundary( RunStatusSensorExecutionError, lambda: f'Error occurred during the execution sensor "{name}".', ): # one user code invocation maps to one failure event run_status_sensor_fn( RunStatusSensorContext( sensor_name=name, pipeline_run=pipeline_run, dagster_event=event_log_entry.dagster_event, )) except RunStatusSensorExecutionError as run_status_sensor_execution_error: # When the user code errors, we report error to the sensor tick not the original run. serializable_error = serializable_error_info_from_exc_info( run_status_sensor_execution_error.original_exc_info) context.update_cursor( RunStatusSensorCursor(record_id=storage_id, update_timestamp=update_timestamp. isoformat()).to_json()) # Yield PipelineRunReaction to indicate the execution success/failure. # The sensor machinery would # * report back to the original run if success # * update cursor and job state yield PipelineRunReaction( pipeline_run=pipeline_run, error=serializable_error, )
def __init__(self, bucket, client=None, prefix="dagster"): self.bucket = check.str_param(bucket, "bucket") self.client = client or storage.Client() self.bucket_obj = self.client.get_bucket(bucket) check.invariant(self.bucket_obj.exists()) self.prefix = check.str_param(prefix, "prefix")
def __init__( self, name: str, pipeline_name: Optional[str] = None, partition_fn: Optional[Callable[..., Union[List[Partition[T]], List[str]]]] = None, solid_selection: Optional[List[str]] = None, mode: Optional[str] = None, run_config_fn_for_partition: Callable[[Partition[T]], Any] = lambda _partition: {}, tags_fn_for_partition: Callable[[Partition[T]], Optional[Dict[ str, str]]] = lambda _partition: {}, partitions_def: Optional[PartitionsDefinition[T] # pylint: disable=unsubscriptable-object ] = None, job_name: Optional[str] = None, ): check.invariant( partition_fn is not None or partitions_def is not None, "One of `partition_fn` or `partitions_def` must be supplied.", ) check.invariant( not (partition_fn and partitions_def), "Only one of `partition_fn` or `partitions_def` must be supplied.", ) check.invariant( not (pipeline_name and job_name), "Only one of `job_name` and `pipeline_name` must be supplied.", ) _wrap_partition_fn = None if partition_fn is not None: partition_fn_param_count = len( inspect.signature(partition_fn).parameters) def _wrap_partition(x: Union[str, Partition]) -> Partition: if isinstance(x, Partition): return x if isinstance(x, str): return Partition(x) raise DagsterInvalidDefinitionError( "Expected <Partition> | <str>, received {type}".format( type=type(x))) def _wrap_partition_fn(current_time=None) -> List[Partition]: if not current_time: current_time = pendulum.now("UTC") check.callable_param(partition_fn, "partition_fn") if partition_fn_param_count == 1: obj_list = cast( Callable[..., List[Union[Partition[T], str]]], partition_fn, )(current_time) else: obj_list = partition_fn() # type: ignore return [_wrap_partition(obj) for obj in obj_list] self._name = check_valid_name(name) self._pipeline_name = check.opt_str_param(pipeline_name, "pipeline_name") self._job_name = check.opt_str_param(job_name, "job_name") self._partition_fn = _wrap_partition_fn self._solid_selection = check.opt_nullable_list_param( solid_selection, "solid_selection", of_type=str) self._mode = check.opt_str_param(mode, "mode", DEFAULT_MODE_NAME) self._user_defined_run_config_fn_for_partition = check.callable_param( run_config_fn_for_partition, "run_config_fn_for_partition") self._user_defined_tags_fn_for_partition = check.callable_param( tags_fn_for_partition, "tags_fn_for_partition") check.opt_inst_param(partitions_def, "partitions_def", PartitionsDefinition) if partitions_def is not None: self._partitions_def = partitions_def else: if partition_fn is None: check.failed( "One of `partition_fn` or `partitions_def` must be supplied." ) self._partitions_def = DynamicPartitionsDefinition( partition_fn=_wrap_partition_fn)
def __init__( self, service_account_name, instance_config_map, postgres_password_secret=None, dagster_home=None, job_image=None, image_pull_policy=None, image_pull_secrets=None, load_incluster_config=True, kubeconfig_file=None, inst_data=None, job_namespace="default", env_config_maps=None, env_secrets=None, env_vars=None, k8s_client_batch_api=None, volume_mounts=None, volumes=None, labels=None, fail_pod_on_run_failure=None, ): self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData) self.job_namespace = check.str_param(job_namespace, "job_namespace") self.load_incluster_config = load_incluster_config self.kubeconfig_file = kubeconfig_file if load_incluster_config: check.invariant( kubeconfig_file is None, "`kubeconfig_file` is set but `load_incluster_config` is True.", ) kubernetes.config.load_incluster_config() else: check.opt_str_param(kubeconfig_file, "kubeconfig_file") kubernetes.config.load_kube_config(kubeconfig_file) self._fixed_batch_api = k8s_client_batch_api self._job_config = None self._job_image = check.opt_str_param(job_image, "job_image") self.dagster_home = check.str_param(dagster_home, "dagster_home") self._image_pull_policy = check.opt_str_param(image_pull_policy, "image_pull_policy", "IfNotPresent") self._image_pull_secrets = check.opt_list_param(image_pull_secrets, "image_pull_secrets", of_type=dict) self._service_account_name = check.str_param(service_account_name, "service_account_name") self.instance_config_map = check.str_param(instance_config_map, "instance_config_map") self.postgres_password_secret = check.opt_str_param( postgres_password_secret, "postgres_password_secret") self._env_config_maps = check.opt_list_param(env_config_maps, "env_config_maps", of_type=str) self._env_secrets = check.opt_list_param(env_secrets, "env_secrets", of_type=str) self._env_vars = check.opt_list_param(env_vars, "env_vars", of_type=str) self._volume_mounts = check.opt_list_param(volume_mounts, "volume_mounts") self._volumes = check.opt_list_param(volumes, "volumes") self._labels = check.opt_dict_param(labels, "labels", key_type=str, value_type=str) self._fail_pod_on_run_failure = check.opt_bool_param( fail_pod_on_run_failure, "fail_pod_on_run_failure") super().__init__()
def create_and_launch_partition_backfill(graphene_info, backfill_params): from ...schema.backfill import GraphenePartitionBackfillSuccess from ...schema.errors import GraphenePartitionSetNotFoundError, GraphenePythonError partition_set_selector = backfill_params.get("selector") partition_set_name = partition_set_selector.get("partitionSetName") repository_selector = RepositorySelector.from_graphql_input( partition_set_selector.get("repositorySelector") ) location = graphene_info.context.get_repository_location(repository_selector.location_name) repository = location.get_repository(repository_selector.repository_name) matches = [ partition_set for partition_set in repository.get_external_partition_sets() if partition_set.name == partition_set_selector.get("partitionSetName") ] if not matches: return GraphenePartitionSetNotFoundError(partition_set_name) check.invariant( len(matches) == 1, "Partition set names must be unique: found {num} matches for {partition_set_name}".format( num=len(matches), partition_set_name=partition_set_name ), ) external_partition_set = next(iter(matches)) external_pipeline = repository.get_full_external_pipeline(external_partition_set.pipeline_name) pipeline_selector = PipelineSelector( location_name=location.name, repository_name=repository.name, pipeline_name=external_pipeline.name, solid_selection=external_partition_set.solid_selection, ) partition_names = backfill_params.get("partitionNames") backfill_id = make_new_backfill_id() result = graphene_info.context.get_external_partition_set_execution_param_data( repository.handle, partition_set_name, partition_names ) if isinstance(result, ExternalPartitionExecutionErrorData): return GraphenePythonError(result.error) assert isinstance(result, ExternalPartitionSetExecutionParamData) launched_run_ids = [] execution_param_list = _build_execution_param_list_for_backfill( graphene_info.context.instance, result.partition_data, backfill_id, backfill_params, pipeline_selector, external_partition_set, ) for execution_params in execution_param_list: pipeline_run = create_valid_pipeline_run(graphene_info, external_pipeline, execution_params) graphene_info.context.instance.submit_run(pipeline_run.run_id, external_pipeline) launched_run_ids.append(pipeline_run.run_id) return GraphenePartitionBackfillSuccess( backfill_id=backfill_id, launched_run_ids=launched_run_ids )
def do_composition( decorator_name, graph_name, fn, provided_input_defs, provided_output_defs, config_schema, config_fn, ignore_output_from_composition_fn, ): """ This a function used by both @pipeline and @composite_solid to implement their composition function which is our DSL for constructing a dependency graph. Args: decorator_name (str): Name of the calling decorator. e.g. "@pipeline", "@composite_solid", "@graph" graph_name (str): User-defined name of the definition being constructed fn (Callable): The composition function to be called. provided_input_defs(List[InputDefinition]): List of input definitions explicitly provided to the decorator by the user. provided_output_defs(List[OutputDefinition]): List of output definitions explicitly provided to the decorator by the user. config_schema(Any): Config schema provided to decorator by user. config_fn(Callable): Config fn provided to decorator by user. ignore_output_from_composite_fn(Bool): Because of backwards compatibility issues, pipelines ignore the return value out of the mapping if the user has not explicitly provided the output definitions. This should be removed in 0.10.0. """ actual_input_defs = (provided_input_defs if provided_input_defs is not None else infer_input_definitions_for_graph( decorator_name, graph_name, fn)) actual_output_defs, outputs_are_explicit = (( provided_output_defs, True) if provided_output_defs is not None else ( infer_output_definitions(decorator_name, graph_name, fn), has_explicit_return_type(fn), )) positional_inputs = validate_solid_fn(decorator_name, graph_name, fn, actual_input_defs, exclude_nothing=False) kwargs = { input_def.name: InputMappingNode(input_def) for input_def in actual_input_defs } output = None returned_mapping = None enter_composition(graph_name, decorator_name) try: output = fn(**kwargs) if ignore_output_from_composition_fn: if output is not None: warnings.warn( "You have returned a value out of a @pipeline-decorated function. " "This currently has no effect on behavior, but will after 0.10.0 is " "released. In order to preserve existing behavior to do not return " "anything out of this function. Pipelines (and its successor, graphs) " "will have meaningful outputs just like composite solids do today, " "and the return value will be meaningful.", stacklevel=3, ) output = None returned_mapping = composite_mapping_from_output( output, actual_output_defs, graph_name) finally: context = exit_composition(returned_mapping) check.invariant( context.name == graph_name, "Composition context stack desync: received context for " '"{context.name}" expected "{graph_name}"'.format( context=context, graph_name=graph_name), ) # line up mappings in definition order input_mappings = [] for defn in actual_input_defs: mappings = [ mapping for mapping in context.input_mappings if mapping.definition.name == defn.name ] if len(mappings) == 0: raise DagsterInvalidDefinitionError( "{decorator_name} '{graph_name}' has unmapped input '{input_name}'. " "Remove it or pass it to the appropriate solid invocation.". format(decorator_name=decorator_name, graph_name=graph_name, input_name=defn.name)) input_mappings += mappings output_mappings = [] for defn in actual_output_defs: mapping = context.output_mapping_dict.get(defn.name) if mapping is None: # if we inferred output_defs we will be flexible and either take a mapping or not if not outputs_are_explicit: continue # if we are ignoring the output, disregard this unsatisfied mapping if ignore_output_from_composition_fn: continue raise DagsterInvalidDefinitionError( "{decorator_name} '{graph_name}' has unmapped output '{output_name}'. " "Remove it or return a value from the appropriate solid invocation." .format(decorator_name=decorator_name, graph_name=graph_name, output_name=defn.name)) output_mappings.append(mapping) config_mapping = _get_validated_config_mapping(graph_name, config_schema, config_fn) return ( input_mappings, output_mappings, context.dependencies, context.solid_defs, config_mapping, positional_inputs, )
def solid_def_named(self, name): check.str_param(name, 'name') check.invariant(name in self._all_solid_defs, '{} not found'.format(name)) return self._all_solid_defs[name]
def _construct_run_with_snapshots( self, pipeline_name, run_id, environment_dict, mode, solids_to_execute, step_keys_to_execute, status, tags, root_run_id, parent_run_id, pipeline_snapshot, execution_plan_snapshot, parent_pipeline_snapshot, solid_selection=None, ): # https://github.com/dagster-io/dagster/issues/2403 if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags: if AIRFLOW_EXECUTION_DATE_STR not in tags: tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc( ).isoformat() pipeline_run = PipelineRun( pipeline_name=pipeline_name, run_id=run_id, environment_dict=environment_dict, mode=mode, solid_selection=solid_selection, solids_to_execute=solids_to_execute, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, ) if pipeline_snapshot is not None: from dagster.core.snap import create_pipeline_snapshot_id if pipeline_snapshot.lineage_snapshot: if not self._run_storage.has_pipeline_snapshot( pipeline_snapshot.lineage_snapshot.parent_snapshot_id): check.invariant( create_pipeline_snapshot_id( parent_pipeline_snapshot) == pipeline_snapshot.lineage_snapshot.parent_snapshot_id, 'Parent pipeline snapshot id out of sync with passed parent pipeline snapshot', ) returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot( parent_pipeline_snapshot) check.invariant( pipeline_snapshot.lineage_snapshot.parent_snapshot_id == returned_pipeline_snapshot_id) pipeline_snapshot_id = create_pipeline_snapshot_id( pipeline_snapshot) if not self._run_storage.has_pipeline_snapshot( pipeline_snapshot_id): returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot( pipeline_snapshot) check.invariant( pipeline_snapshot_id == returned_pipeline_snapshot_id) pipeline_run = pipeline_run.with_pipeline_snapshot_id( pipeline_snapshot_id) if execution_plan_snapshot is not None: from dagster.core.snap import create_execution_plan_snapshot_id check.invariant(execution_plan_snapshot.pipeline_snapshot_id == pipeline_snapshot_id) check.invariant( set(step_keys_to_execute) == set( execution_plan_snapshot.step_keys_to_execute) if step_keys_to_execute else set( execution_plan_snapshot.step_keys_to_execute) == set( [step.key for step in execution_plan_snapshot.steps]), 'We encode step_keys_to_execute twice in our stack, unfortunately. This check ' 'ensures that they are consistent. We check that step_keys_to_execute in the plan ' 'matches the step_keys_to_execute params if it is set. If it is not, this indicates ' 'a full execution plan, and so we verify that.', ) execution_plan_snapshot_id = create_execution_plan_snapshot_id( execution_plan_snapshot) if not self._run_storage.has_execution_plan_snapshot( execution_plan_snapshot_id): returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot( execution_plan_snapshot) check.invariant(execution_plan_snapshot_id == returned_execution_plan_snapshot_id) pipeline_run = pipeline_run.with_execution_plan_snapshot_id( execution_plan_snapshot_id) return pipeline_run
def execute_run( pipeline: IPipeline, pipeline_run: PipelineRun, instance: DagsterInstance, raise_on_error: bool = False, ) -> PipelineExecutionResult: """Executes an existing pipeline run synchronously. Synchronous version of execute_run_iterator. Args: pipeline (IPipeline): The pipeline to execute. pipeline_run (PipelineRun): The run to execute instance (DagsterInstance): The instance in which the run has been created. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``False``. Returns: PipelineExecutionResult: The result of the execution. """ if isinstance(pipeline, PipelineDefinition): raise DagsterInvariantViolationError( "execute_run requires an IPipeline but received a PipelineDefinition " "directly instead. To support hand-off to other processes provide a " "ReconstructablePipeline which can be done using reconstructable(). For in " "process only execution you can use InMemoryPipeline.") check.inst_param(pipeline, "pipeline", IPipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING" .format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) pipeline_def = pipeline.get_definition() if pipeline_run.solids_to_execute: if isinstance(pipeline_def, PipelineSubsetDefinition): check.invariant( pipeline_run.solids_to_execute == pipeline.solids_to_execute, "Cannot execute PipelineRun with solids_to_execute {solids_to_execute} that " "conflicts with pipeline subset {pipeline_solids_to_execute}.". format( pipeline_solids_to_execute=str_format_set( pipeline.solids_to_execute), solids_to_execute=str_format_set( pipeline_run.solids_to_execute), ), ) else: # when `execute_run` is directly called, the sub pipeline hasn't been created # note that when we receive the solids to execute via PipelineRun, it won't support # solid selection query syntax pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan = _get_execution_plan_from_run(pipeline, pipeline_run, instance) if is_memoized_run(pipeline_run.tags): environment_config = EnvironmentConfig.build(pipeline.get_definition(), pipeline_run.run_config, pipeline_run.mode) execution_plan = resolve_memoized_execution_plan( execution_plan, pipeline.get_definition(), pipeline_run.run_config, instance, environment_config, ) output_capture: Optional[Dict[StepOutputHandle, Any]] = {} _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PipelineExecutionContextManager( pipeline=pipeline, execution_plan=execution_plan, pipeline_run=pipeline_run, instance=instance, run_config=pipeline_run.run_config, raise_on_error=raise_on_error, output_capture=output_capture, ), ) event_list = list(_execute_run_iterable) pipeline_context = _execute_run_iterable.pipeline_context return PipelineExecutionResult( pipeline.get_definition(), pipeline_run.run_id, event_list, lambda: scoped_pipeline_context( execution_plan, pipeline, pipeline_run.run_config, pipeline_run, instance, intermediate_storage=pipeline_context.intermediate_storage, ), output_capture=output_capture, )
def get_optional_inner_type(ttype): check.invariant(is_closed_python_optional_type(ttype), 'type must pass is_closed_python_optional_type check') return ttype.__args__[0]
def reexecute_pipeline( pipeline: Union[IPipeline, PipelineDefinition], parent_run_id: str, run_config: Optional[dict] = None, step_selection: Optional[List[str]] = None, mode: Optional[str] = None, preset: Optional[str] = None, tags: Optional[Dict[str, Any]] = None, instance: DagsterInstance = None, raise_on_error: bool = True, ) -> PipelineExecutionResult: """Reexecute an existing pipeline run. Users will typically call this API when testing pipeline reexecution, or running standalone scripts. Parameters: pipeline (Union[IPipeline, PipelineDefinition]): The pipeline to execute. parent_run_id (str): The id of the previous run to reexecute. The run must exist in the instance. run_config (Optional[dict]): The environment configuration that parametrizes this run, as a dict. solid_selection (Optional[List[str]]): A list of solid selection queries (including single solid names) to execute. For example: - ``['some_solid']``: selects ``some_solid`` itself. - ``['*some_solid']``: select ``some_solid`` and all its ancestors (upstream dependencies). - ``['*some_solid+++']``: select ``some_solid``, all its ancestors, and its descendants (downstream dependencies) within 3 levels down. - ``['*some_solid', 'other_solid_a', 'other_solid_b+']``: select ``some_solid`` and all its ancestors, ``other_solid_a`` itself, and ``other_solid_b`` and its direct child solids. mode (Optional[str]): The name of the pipeline mode to use. You may not set both ``mode`` and ``preset``. preset (Optional[str]): The name of the pipeline preset to use. You may not set both ``mode`` and ``preset``. tags (Optional[Dict[str, Any]]): Arbitrary key-value pairs that will be added to pipeline logs. instance (Optional[DagsterInstance]): The instance to execute against. If this is ``None``, an ephemeral instance will be used, and no artifacts will be persisted from the run. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``True``, since this is the most useful behavior in test. Returns: :py:class:`PipelineExecutionResult`: The result of pipeline execution. For the asynchronous version, see :py:func:`reexecute_pipeline_iterator`. """ check.opt_list_param(step_selection, "step_selection", of_type=str) check.str_param(parent_run_id, "parent_run_id") with ephemeral_instance_if_missing(instance) as execute_instance: (pipeline, run_config, mode, tags, _, _) = _check_execute_pipeline_args( pipeline=pipeline, run_config=run_config, mode=mode, preset=preset, tags=tags, ) parent_pipeline_run = execute_instance.get_run_by_id(parent_run_id) check.invariant( parent_pipeline_run, "No parent run with id {parent_run_id} found in instance.".format( parent_run_id=parent_run_id), ) step_keys_to_execute: Optional[List[str]] = None execution_plan: Optional[ExecutionPlan] = None # resolve step selection DSL queries using parent execution information if step_selection: step_keys_to_execute, execution_plan = _resolve_reexecute_step_selection( execute_instance, pipeline, mode, run_config, parent_pipeline_run, step_selection, ) pipeline_run = execute_instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition(), execution_plan=execution_plan, run_config=run_config, mode=mode, tags=tags, solid_selection=parent_pipeline_run.solid_selection, solids_to_execute=parent_pipeline_run.solids_to_execute, # convert to frozenset https://github.com/dagster-io/dagster/issues/2914 step_keys_to_execute=list(step_keys_to_execute) if step_keys_to_execute else None, root_run_id=parent_pipeline_run.root_run_id or parent_pipeline_run.run_id, parent_run_id=parent_pipeline_run.run_id, ) return execute_run( pipeline, pipeline_run, execute_instance, raise_on_error=raise_on_error, )
def default_value(self): check.invariant(self.default_provided, 'Asking for default value when none was provided') return self._default_value
def _check_execute_pipeline_args( pipeline: Union[PipelineDefinition, IPipeline], run_config: Optional[dict], mode: Optional[str], preset: Optional[str], tags: Optional[Dict[str, Any]], solid_selection: Optional[List[str]] = None, ) -> Tuple[IPipeline, Optional[dict], Optional[str], Dict[str, Any], FrozenSet[str], Optional[List[str]], ]: pipeline = _check_pipeline(pipeline) pipeline_def = pipeline.get_definition() check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition) run_config = check.opt_dict_param(run_config, "run_config") check.opt_str_param(mode, "mode") check.opt_str_param(preset, "preset") check.invariant( not (mode is not None and preset is not None), "You may set only one of `mode` (got {mode}) or `preset` (got {preset})." .format(mode=mode, preset=preset), ) tags = check.opt_dict_param(tags, "tags", key_type=str) check.opt_list_param(solid_selection, "solid_selection", of_type=str) if preset is not None: pipeline_preset = pipeline_def.get_preset(preset) if pipeline_preset.run_config is not None: check.invariant( (not run_config) or (pipeline_preset.run_config == run_config), "The environment set in preset '{preset}' does not agree with the environment " "passed in the `run_config` argument.".format(preset=preset), ) run_config = pipeline_preset.run_config # load solid_selection from preset if pipeline_preset.solid_selection is not None: check.invariant( solid_selection is None or solid_selection == pipeline_preset.solid_selection, "The solid_selection set in preset '{preset}', {preset_subset}, does not agree with " "the `solid_selection` argument: {solid_selection}".format( preset=preset, preset_subset=pipeline_preset.solid_selection, solid_selection=solid_selection, ), ) solid_selection = pipeline_preset.solid_selection check.invariant( mode is None or mode == pipeline_preset.mode, "Mode {mode} does not agree with the mode set in preset '{preset}': " "('{preset_mode}')".format(preset=preset, preset_mode=pipeline_preset.mode, mode=mode), ) mode = pipeline_preset.mode tags = merge_dicts(pipeline_preset.tags, tags) if mode is not None: if not pipeline_def.has_mode_definition(mode): raise DagsterInvariantViolationError(( "You have attempted to execute pipeline {name} with mode {mode}. " "Available modes: {modes}").format( name=pipeline_def.name, mode=mode, modes=pipeline_def.available_modes, )) else: if pipeline_def.is_multi_mode: raise DagsterInvariantViolationError(( "Pipeline {name} has multiple modes (Available modes: {modes}) and you have " "attempted to execute it without specifying a mode. Set " "mode property on the PipelineRun object.").format( name=pipeline_def.name, modes=pipeline_def.available_modes)) mode = pipeline_def.get_default_mode_name() tags = merge_dicts(pipeline_def.tags, tags) # generate pipeline subset from the given solid_selection if solid_selection: pipeline = pipeline.subset_for_execution(solid_selection) return ( pipeline, run_config, mode, tags, pipeline.solids_to_execute, solid_selection, )
def default_value_as_json_str(self): check.invariant(self.default_provided, 'Asking for default value when none was provided') return serialize_value(self.default_value)
def _execute_step_k8s_job( _self, instance_ref_dict, step_keys, environment_dict, mode, repo_name, repo_location_name, run_id, job_config_dict, job_namespace, load_incluster_config, resources=None, kubeconfig_file=None, ): '''Run step execution in a K8s job pod. ''' from dagster_k8s import DagsterK8sJobConfig, construct_dagster_graphql_k8s_job from dagster_k8s.utils import get_pod_names_in_job, retrieve_pod_logs, wait_for_job_success import kubernetes check.dict_param(instance_ref_dict, 'instance_ref_dict') check.list_param(step_keys, 'step_keys', of_type=str) check.invariant( len(step_keys) == 1, 'Celery K8s task executor can only execute 1 step at a time' ) check.dict_param(environment_dict, 'environment_dict') check.str_param(mode, 'mode') check.str_param(repo_name, 'repo_name') check.str_param(repo_location_name, 'repo_location_name') check.str_param(run_id, 'run_id') # Celery will serialize this as a list job_config = DagsterK8sJobConfig.from_dict(job_config_dict) check.inst_param(job_config, 'job_config', DagsterK8sJobConfig) check.str_param(job_namespace, 'job_namespace') check.bool_param(load_incluster_config, 'load_incluster_config') resources = check.opt_inst_param( resources, 'resources', kubernetes.client.V1ResourceRequirements ) check.opt_str_param(kubeconfig_file, 'kubeconfig_file') # For when launched via DinD or running the cluster if load_incluster_config: kubernetes.config.load_incluster_config() else: kubernetes.config.load_kube_config(kubeconfig_file) instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, 'Could not load run {}'.format(run_id)) step_keys_str = ", ".join(step_keys) # Ensure we stay below k8s name length limits k8s_name_key = _get_k8s_name_key(run_id, step_keys) job_name = 'dagster-stepjob-%s' % k8s_name_key pod_name = 'dagster-stepjob-%s' % k8s_name_key variables = { 'executionParams': { 'runConfigData': environment_dict, 'mode': mode, 'selector': { 'repositoryLocationName': repo_location_name, 'repositoryName': repo_name, 'pipelineName': pipeline_run.pipeline_name, }, 'executionMetadata': {'runId': run_id}, 'stepKeys': step_keys, } } args = ['-p', 'executePlan', '-v', seven.json.dumps(variables)] job = construct_dagster_graphql_k8s_job(job_config, args, job_name, resources, pod_name) # Running list of events generated from this task execution events = [] # Post event for starting execution engine_event = instance.report_engine_event( 'Executing steps {} in Kubernetes job {}'.format(step_keys_str, job.metadata.name), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, 'Step keys'), EventMetadataEntry.text(job.metadata.name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_config.job_image, 'Job image'), EventMetadataEntry.text(job_config.image_pull_policy, 'Image pull policy'), EventMetadataEntry.text( str(job_config.image_pull_secrets), 'Image pull secrets' ), EventMetadataEntry.text( str(job_config.service_account_name), 'Service account name' ), ], marker_end=DELEGATE_MARKER, ), CeleryK8sJobEngine, # validated above that step_keys is length 1, and it is not possible to use ETH or # execution plan in this function (Celery K8s workers should not access to user code) step_key=step_keys[0], ) events.append(engine_event) kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace) wait_for_job_success(job.metadata.name, namespace=job_namespace) pod_names = get_pod_names_in_job(job.metadata.name, namespace=job_namespace) # Post engine event for log retrieval engine_event = instance.report_engine_event( 'Retrieving logs from Kubernetes Job pods', pipeline_run, EngineEventData([EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]), CeleryK8sJobEngine, step_key=step_keys[0], ) events.append(engine_event) logs = [] for pod_name in pod_names: raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace) logs += raw_logs.split('\n') res = parse_raw_log_lines(logs) handle_execution_errors(res, 'executePlan') step_events = handle_execute_plan_result(res) events += step_events serialized_events = [serialize_dagster_namedtuple(event) for event in events] return serialized_events
def read_unary_response(output_file): messages = list(ipc_read_event_stream(output_file)) check.invariant(len(messages) == 1) return messages[0]
def get_image(name): """Retrieve the image information from the list defined above. """ image = next((img for img in list_images() if img.image == name), None) check.invariant(image is not None, "could not find image {}".format(name)) return image
def get_currently_upgrading_instance(): global _UPGRADING_INSTANCE # pylint: disable=global-statement check.invariant(_UPGRADING_INSTANCE is not None, "currently upgrading instance not set") return _UPGRADING_INSTANCE
def non_scalar_type_key(self): check.invariant(self.kind == ConfigTypeKind.SCALAR_UNION) return self.type_param_keys[1]
def __init__( self, server_termination_event, loadable_target_origin=None, heartbeat=False, heartbeat_timeout=30, lazy_load_user_code=False, fixed_server_id=None, ): super(DagsterApiServer, self).__init__() check.bool_param(heartbeat, "heartbeat") check.int_param(heartbeat_timeout, "heartbeat_timeout") check.invariant(heartbeat_timeout > 0, "heartbeat_timeout must be greater than 0") self._server_termination_event = check.inst_param( server_termination_event, "server_termination_event", ThreadingEventType) self._loadable_target_origin = check.opt_inst_param( loadable_target_origin, "loadable_target_origin", LoadableTargetOrigin) # Each server is initialized with a unique UUID. This UUID is used by clients to track when # servers are replaced and is used for cache invalidation and reloading. self._server_id = check.opt_str_param(fixed_server_id, "fixed_server_id", str(uuid.uuid4())) # Client tells the server to shutdown by calling ShutdownServer (or by failing to send a # hearbeat, at which point this event is set. The cleanup thread will then set the server # termination event once all current executions have finished, which will stop the server) self._shutdown_once_executions_finish_event = threading.Event() # Dict[str, (multiprocessing.Process, DagsterInstance)] self._executions = {} # Dict[str, multiprocessing.Event] self._termination_events = {} self._termination_times = {} self._execution_lock = threading.Lock() self._repository_symbols_and_code_pointers = LazyRepositorySymbolsAndCodePointers( loadable_target_origin) if not lazy_load_user_code: self._repository_symbols_and_code_pointers.load() self.__last_heartbeat_time = time.time() if heartbeat: self.__heartbeat_thread = threading.Thread( target=self._heartbeat_thread, args=(heartbeat_timeout, ), name="grpc-server-heartbeat", ) self.__heartbeat_thread.daemon = True self.__heartbeat_thread.start() else: self.__heartbeat_thread = None self.__cleanup_thread = threading.Thread(target=self._cleanup_thread, args=(), name="grpc-server-cleanup") self.__cleanup_thread.daemon = True self.__cleanup_thread.start()
def inner_type_key(self): # valid for Noneable and Array check.invariant(self.kind == ConfigTypeKind.NONEABLE or self.kind == ConfigTypeKind.ARRAY) check.invariant(len(self.type_param_keys) == 1) return self.type_param_keys[0]
def __init__( self, host="localhost", port=None, socket=None, max_workers=None, loadable_target_origin=None, heartbeat=False, heartbeat_timeout=30, lazy_load_user_code=False, ipc_output_file=None, fixed_server_id=None, ): check.opt_str_param(host, "host") check.opt_int_param(port, "port") check.opt_str_param(socket, "socket") check.opt_int_param(max_workers, "max_workers") check.opt_inst_param(loadable_target_origin, "loadable_target_origin", LoadableTargetOrigin) check.invariant( port is not None if seven.IS_WINDOWS else True, "You must pass a valid `port` on Windows: `socket` not supported.", ) check.invariant( (port or socket) and not (port and socket), "You must pass one and only one of `port` or `socket`.", ) check.invariant( host is not None if port else True, "Must provide a host when serving on a port", ) check.bool_param(heartbeat, "heartbeat") check.int_param(heartbeat_timeout, "heartbeat_timeout") self._ipc_output_file = check.opt_str_param(ipc_output_file, "ipc_output_file") check.opt_str_param(fixed_server_id, "fixed_server_id") check.invariant(heartbeat_timeout > 0, "heartbeat_timeout must be greater than 0") check.invariant( max_workers is None or max_workers > 1 if heartbeat else True, "max_workers must be greater than 1 or set to None if heartbeat is True. " "If set to None, the server will use the gRPC default.", ) self.server = grpc.server(ThreadPoolExecutor(max_workers=max_workers)) self._server_termination_event = threading.Event() try: self._api_servicer = DagsterApiServer( server_termination_event=self._server_termination_event, loadable_target_origin=loadable_target_origin, heartbeat=heartbeat, heartbeat_timeout=heartbeat_timeout, lazy_load_user_code=lazy_load_user_code, fixed_server_id=fixed_server_id, ) except Exception: if self._ipc_output_file: with ipc_write_stream(self._ipc_output_file) as ipc_stream: ipc_stream.send( GrpcServerLoadErrorEvent( error_info=serializable_error_info_from_exc_info( sys.exc_info()))) raise # Create a health check servicer self._health_servicer = health.HealthServicer() health_pb2_grpc.add_HealthServicer_to_server(self._health_servicer, self.server) add_DagsterApiServicer_to_server(self._api_servicer, self.server) if port: server_address = host + ":" + str(port) else: server_address = "unix:" + os.path.abspath(socket) # grpc.Server.add_insecure_port returns: # - 0 on failure # - port number when a port is successfully bound # - 1 when a UDS is successfully bound res = self.server.add_insecure_port(server_address) if socket and res != 1: if self._ipc_output_file: with ipc_write_stream(self._ipc_output_file) as ipc_stream: ipc_stream.send(GrpcServerFailedToBindEvent()) raise CouldNotBindGrpcServerToAddress(socket) if port and res != port: if self._ipc_output_file: with ipc_write_stream(self._ipc_output_file) as ipc_stream: ipc_stream.send(GrpcServerFailedToBindEvent()) raise CouldNotBindGrpcServerToAddress(port)
def execute_in_process( self, run_config: Optional[Dict[str, Any]] = None, instance: Optional["DagsterInstance"] = None, partition_key: Optional[str] = None, raise_on_error: bool = True, op_selection: Optional[List[str]] = None, run_id: Optional[str] = None, ) -> "ExecuteInProcessResult": """ Execute the Job in-process, gathering results in-memory. The `executor_def` on the Job will be ignored, and replaced with the in-process executor. If using the default `io_manager`, it will switch from filesystem to in-memory. Args: run_config (Optional[Dict[str, Any]]: The configuration for the run instance (Optional[DagsterInstance]): The instance to execute against, an ephemeral one will be used if none provided. partition_key: (Optional[str]) The string partition key that specifies the run config to execute. Can only be used to select run config for jobs with partitioned config. raise_on_error (Optional[bool]): Whether or not to raise exceptions when they occur. Defaults to ``True``. op_selection (Optional[List[str]]): A list of op selection queries (including single op names) to execute. For example: * ``['some_op']``: selects ``some_op`` itself. * ``['*some_op']``: select ``some_op`` and all its ancestors (upstream dependencies). * ``['*some_op+++']``: select ``some_op``, all its ancestors, and its descendants (downstream dependencies) within 3 levels down. * ``['*some_op', 'other_op_a', 'other_op_b+']``: select ``some_op`` and all its ancestors, ``other_op_a`` itself, and ``other_op_b`` and its direct child ops. Returns: :py:class:`~dagster.ExecuteInProcessResult` """ from dagster.core.definitions.executor_definition import execute_in_process_executor from dagster.core.execution.execute_in_process import core_execute_in_process run_config = check.opt_dict_param(run_config, "run_config") op_selection = check.opt_list_param(op_selection, "op_selection", str) partition_key = check.opt_str_param(partition_key, "partition_key") check.invariant( len(self._mode_definitions) == 1, "execute_in_process only supported on job / single mode pipeline", ) base_mode = self.get_mode_definition() # create an ephemeral in process mode by replacing the executor_def and # switching the default fs io_manager to in mem, if another was not set in_proc_mode = ModeDefinition( name="in_process", executor_defs=[execute_in_process_executor], resource_defs=_swap_default_io_man(base_mode.resource_defs, self), logger_defs=base_mode.loggers, _config_mapping=base_mode.config_mapping, _partitioned_config=base_mode.partitioned_config, ) ephemeral_job = JobDefinition( name=self._name, graph_def=self._graph_def, mode_def=in_proc_mode, hook_defs=self.hook_defs, tags=self.tags, op_retry_policy=self._solid_retry_policy, version_strategy=self.version_strategy, ).get_job_def_for_op_selection(op_selection) tags = None if partition_key: if not base_mode.partitioned_config: check.failed( f"Provided partition key `{partition_key}` for job `{self._name}` without a partitioned config" ) check.invariant( not run_config, "Cannot provide both run_config and partition_key arguments to `execute_in_process`", ) partition_set = self.get_partition_set_def() if not partition_set: check.failed( f"Provided partition key `{partition_key}` for job `{self._name}` without a partitioned config" ) partition = partition_set.get_partition(partition_key) run_config = partition_set.run_config_for_partition(partition) tags = partition_set.tags_for_partition(partition) return core_execute_in_process( node=self._graph_def, ephemeral_pipeline=ephemeral_job, run_config=run_config, instance=instance, output_capturing_enabled=True, raise_on_error=raise_on_error, run_tags=tags, run_id=run_id, )
def _from_storage( cls, pipeline_name=None, run_id=None, run_config=None, mode=None, solid_selection=None, solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot_id=None, execution_plan_snapshot_id=None, # backcompat environment_dict=None, previous_run_id=None, selector=None, solid_subset=None, reexecution_config=None, # pylint: disable=unused-argument **kwargs): # serdes log # * removed reexecution_config - serdes logic expected to strip unknown keys so no need to preserve # * added pipeline_snapshot_id # * renamed previous_run_id -> parent_run_id, added root_run_id # * added execution_plan_snapshot_id # * removed selector # * added solid_subset # * renamed solid_subset -> solid_selection, added solids_to_execute # * renamed environment_dict -> run_config # back compat for environment dict => run_config if environment_dict: check.invariant( not run_config, "Cannot set both run_config and environment_dict. Use run_config parameter.", ) run_config = environment_dict # back compat for previous_run_id => parent_run_id, root_run_id if previous_run_id and not (parent_run_id and root_run_id): parent_run_id = previous_run_id root_run_id = previous_run_id # back compat for selector => pipeline_name, solids_to_execute selector = check.opt_inst_param(selector, "selector", ExecutionSelector) if selector: check.invariant( pipeline_name is None or selector.name == pipeline_name, ("Conflicting pipeline name {pipeline_name} in arguments to PipelineRun: " "selector was passed with pipeline {selector_pipeline}". format(pipeline_name=pipeline_name, selector_pipeline=selector.name)), ) if pipeline_name is None: pipeline_name = selector.name check.invariant( solids_to_execute is None or set(selector.solid_subset) == solids_to_execute, ("Conflicting solids_to_execute {solids_to_execute} in arguments to PipelineRun: " "selector was passed with subset {selector_subset}".format( solids_to_execute=solids_to_execute, selector_subset=selector.solid_subset)), ) # for old runs that only have selector but no solids_to_execute if solids_to_execute is None: solids_to_execute = (frozenset(selector.solid_subset) if selector.solid_subset else None) # back compat for solid_subset => solids_to_execute check.opt_list_param(solid_subset, "solid_subset", of_type=str) if solid_subset: solids_to_execute = frozenset(solid_subset) # warn about unused arguments if len(kwargs): warnings.warn( "Found unhandled arguments from stored PipelineRun: {args}". format(args=kwargs.keys())) return cls.__new__( # pylint: disable=redundant-keyword-arg cls, pipeline_name=pipeline_name, run_id=run_id, run_config=run_config, mode=mode, solid_selection=solid_selection, solids_to_execute=solids_to_execute, step_keys_to_execute=step_keys_to_execute, status=status, tags=tags, root_run_id=root_run_id, parent_run_id=parent_run_id, pipeline_snapshot_id=pipeline_snapshot_id, execution_plan_snapshot_id=execution_plan_snapshot_id, )