def create_grpc_watch_thread( client, on_disconnect=None, on_reconnected=None, on_updated=None, on_error=None, watch_interval=None, max_reconnect_attempts=None, ): check.inst_param(client, "client", DagsterGrpcClient) noop = lambda *a: None on_disconnect = check.opt_callable_param(on_disconnect, "on_disconnect", noop) on_reconnected = check.opt_callable_param(on_reconnected, "on_reconnected", noop) on_updated = check.opt_callable_param(on_updated, "on_updated", noop) on_error = check.opt_callable_param(on_error, "on_error", noop) shutdown_event = threading.Event() thread = threading.Thread( target=watch_grpc_server_thread, args=[ client, on_disconnect, on_reconnected, on_updated, on_error, shutdown_event, watch_interval, max_reconnect_attempts, ], ) thread.daemon = True return shutdown_event, thread
def execute_queries( self, queries, fetch_results=False, cursor_factory=None, error_callback=None ): '''Fake for execute_queries; returns [self.QUERY_RESULT] * 3 Args: queries (List[str]): The queries to execute. fetch_results (Optional[bool]): Whether to return the results of executing the query. Defaults to False, in which case the query will be executed without retrieving the results. cursor_factory (Optional[:py:class:`psycopg2.extensions.cursor`]): An alternative cursor_factory; defaults to None. Will be used when constructing the cursor. error_callback (Optional[Callable[[Exception, Cursor, DagsterLogManager], None]]): A callback function, invoked when an exception is encountered during query execution; this is intended to support executing additional queries to provide diagnostic information, e.g. by querying ``stl_load_errors`` using ``pg_last_copy_id()``. If no function is provided, exceptions during query execution will be raised directly. Returns: Optional[List[List[Tuple[Any, ...]]]]: Results of the query, as a list of list of tuples, when fetch_results is set. Otherwise return None. ''' check.list_param(queries, 'queries', of_type=str) check.bool_param(fetch_results, 'fetch_results') check.opt_subclass_param(cursor_factory, 'cursor_factory', psycopg2.extensions.cursor) check.opt_callable_param(error_callback, 'error_callback') for query in queries: self.log.info('Executing query \'{query}\''.format(query=query)) if fetch_results: return [self.QUERY_RESULT] * 3
def load_python_module(module_name, working_directory, remove_from_path_fn=None): check.str_param(module_name, "module_name") check.opt_str_param(working_directory, "working_directory") check.opt_callable_param(remove_from_path_fn, "remove_from_path_fn") # Use the passed in working directory for local imports (sys.path[0] isn't # consistently set in the different entry points that Dagster uses to import code) remove_paths = remove_from_path_fn() if remove_from_path_fn else [] # hook for tests remove_paths.insert(0, sys.path[0]) # remove the script path with alter_sys_path( to_add=([working_directory] if working_directory else []), to_remove=remove_paths ): try: return importlib.import_module(module_name) except ImportError as ie: msg = get_import_error_message(ie) if working_directory: abs_working_directory = os.path.abspath(os.path.expanduser(working_directory)) raise DagsterImportError( f"Encountered ImportError: `{msg}` while importing module {module_name}. " f"Local modules were resolved using the working " f"directory `{abs_working_directory}`. If another working directory should be " "used, please explicitly specify the appropriate path using the `-d` or " "`--working-directory` for CLI based targets or the `working_directory` " "configuration option for workspace targets. " ) from ie else: raise DagsterImportError( f"Encountered ImportError: `{msg}` while importing module {module_name}. " f"If relying on the working directory to resolve modules, please " "explicitly specify the appropriate path using the `-d` or " "`--working-directory` for CLI based targets or the `working_directory` " "configuration option for workspace targets. " ) from ie
def __init__( self, resource_fn=None, config_schema=None, description=None, _configured_config_mapping_fn=None, version=None, ): EXPECTED_POSITIONALS = ["*"] fn_positionals, _ = split_function_parameters(resource_fn, EXPECTED_POSITIONALS) missing_positional = validate_decorated_fn_positionals(fn_positionals, EXPECTED_POSITIONALS) if missing_positional: raise DagsterInvalidDefinitionError( "@resource '{resource_name}' decorated function does not have required " "positional parameter '{missing_param}'. Resource functions should only have keyword " "arguments that match input names and a first positional parameter.".format( resource_name=resource_fn.__name__, missing_param=missing_positional ) ) self._resource_fn = check.opt_callable_param(resource_fn, "resource_fn") self._config_schema = check_user_facing_opt_config_param(config_schema, "config_schema") self._description = check.opt_str_param(description, "description") self.__configured_config_mapping_fn = check.opt_callable_param( _configured_config_mapping_fn, "config_mapping_fn" ) self._version = check.opt_str_param(version, "version") if version: experimental_arg_warning("version", "ResourceDefinition.__init__")
def __init__( self, name, is_persistent, required_resource_keys, config_schema=None, intermediate_storage_creation_fn=None, _configured_config_mapping_fn=None, ): self._name = check_valid_name(name) self._is_persistent = check.bool_param(is_persistent, "is_persistent") self._config_schema = check_user_facing_opt_config_param(config_schema, "config_schema") self._intermediate_storage_creation_fn = check.opt_callable_param( intermediate_storage_creation_fn, "intermediate_storage_creation_fn" ) self._required_resource_keys = frozenset( check.set_param( required_resource_keys if required_resource_keys else set(), "required_resource_keys", of_type=str, ) ) self.__configured_config_mapping_fn = check.opt_callable_param( _configured_config_mapping_fn, "config_mapping_fn" )
def __new__( cls, solid_handle: SolidHandle, name: str, dagster_type_key: str, is_required: bool, is_dynamic: bool, is_asset: bool = False, get_asset_key: Optional[Callable] = None, get_asset_partitions: Optional[Callable] = None, should_materialize: Optional[bool] = None, ): return super(StepOutput, cls).__new__( cls, solid_handle=check.inst_param(solid_handle, "solid_handle", SolidHandle), name=check.str_param(name, "name"), dagster_type_key=check.str_param(dagster_type_key, "dagster_type_key"), is_required=check.bool_param(is_required, "is_required"), is_dynamic=check.bool_param(is_dynamic, "is_dynamic"), is_asset=check.bool_param(is_asset, "is_asset"), get_asset_key=check.opt_callable_param(get_asset_key, "get_asset_key"), get_asset_partitions=check.opt_callable_param( get_asset_partitions, "get_asset_partitions"), should_materialize=check.opt_bool_param(should_materialize, "should_materialize"), )
def canonicalize_backcompat_args(new_val, new_arg, old_val, old_arg, coerce_old_to_new=None, additional_warn_txt=None): ''' Utility for managing backwards compatibility of two related arguments. For example if you had an existing function def is_new(old_flag): return not new_flag And you decided you wanted a new function to be: def is_new(new_flag): return new_flag However you want an in between period where either flag is accepted. Use canonicalize_backcompat_args to manage that: def is_new(old_flag=None, new_flag=None): return canonicalize_backcompat_args( new_val=new_flag, new_arg='new_flag', old_val=old_flag, old_arg='old_flag', coerce_old_to_new=lambda val: not val, ) In this example, if the caller sets both new_flag and old_flag, it will fail by throwing a CheckError. If the caller sets old_flag, it will run it through the coercion function , warn, and then execute. canonicalize_backcompat_args returns the value as if *only* new_val were specified ''' check.str_param(new_arg, 'new_arg') check.str_param(old_arg, 'old_arg') check.opt_callable_param(coerce_old_to_new, 'coerce_old_to_new') check.opt_str_param(additional_warn_txt, 'additional_warn_txt') if new_val is not None: if old_val is not None: check.failed( 'Do not use deprecated "{old_arg}" now that you are using "{new_arg}".' .format(old_arg=old_arg, new_arg=new_arg)) return new_val if old_val is not None: warnings.warn( '"{old_arg}" is deprecated, use "{new_arg}" instead.'.format( old_arg=old_arg, new_arg=new_arg) + (' ' + additional_warn_txt) if additional_warn_txt else '', # This punches up to the caller of canonicalize_backcompat_args stacklevel=3, ) return coerce_old_to_new(old_val) if coerce_old_to_new else old_val return new_val
def daily_schedule( pipeline_name, start_date, name=None, execution_time=datetime.time(0, 0), tags=None, tags_fn_for_date=None, solid_subset=None, mode="default", should_execute=None, environment_vars=None, ): from dagster.core.definitions.partition import PartitionSetDefinition check.opt_str_param(name, 'name') check.str_param(pipeline_name, 'pipeline_name') check.inst_param(start_date, 'start_date', datetime.datetime) check.inst_param(execution_time, 'execution_time', datetime.time) check.opt_dict_param(tags, 'tags', key_type=str, value_type=str) check.opt_callable_param(tags_fn_for_date, 'tags_fn_for_date') check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str) mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) cron_schedule = '{minute} {hour} * * *'.format( minute=execution_time.minute, hour=execution_time.hour) def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ def _environment_dict_fn_for_partition(partition): return fn(partition.value) partition_set_name = '{}_daily'.format(pipeline_name) partition_set = PartitionSetDefinition( name=partition_set_name, pipeline_name=pipeline_name, partition_fn=date_partition_range(start_date), environment_dict_fn_for_partition= _environment_dict_fn_for_partition, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, ) return inner
def execute_query(self, query, fetch_results=False, cursor_factory=None, error_callback=None): '''Synchronously execute a single query against Redshift. Will return a list of rows, where each row is a tuple of values, e.g. SELECT 1 will return [(1,)]. Args: query (str): The query to execute. fetch_results (Optional[bool]): Whether to return the results of executing the query. Defaults to False, in which case the query will be executed without retrieving the results. cursor_factory (Optional[:py:class:`psycopg2.extensions.cursor`]): An alternative cursor_factory; defaults to None. Will be used when constructing the cursor. error_callback (Optional[Callable[[Exception, Cursor, DagsterLogManager], None]]): A callback function, invoked when an exception is encountered during query execution; this is intended to support executing additional queries to provide diagnostic information, e.g. by querying ``stl_load_errors`` using ``pg_last_copy_id()``. If no function is provided, exceptions during query execution will be raised directly. Returns: Optional[List[Tuple[Any, ...]]]: Results of the query, as a list of tuples, when fetch_results is set. Otherwise return None. ''' check.str_param(query, 'query') check.bool_param(fetch_results, 'fetch_results') check.opt_subclass_param(cursor_factory, 'cursor_factory', psycopg2.extensions.cursor) check.opt_callable_param(error_callback, 'error_callback') with self._get_conn() as conn: with self._get_cursor(conn, cursor_factory=cursor_factory) as cursor: try: six.ensure_str(query) self.log.info('Executing query \'{query}\''.format(query=query)) cursor.execute(query) if fetch_results and cursor.rowcount > 0: return cursor.fetchall() else: self.log.info('Empty result from query') except Exception as e: # pylint: disable=broad-except # If autocommit is disabled or not set (it is disabled by default), Redshift # will be in the middle of a transaction at exception time, and because of # the failure the current transaction will not accept any further queries. # # This conn.commit() call closes the open transaction before handing off # control to the error callback, so that the user can issue additional # queries. Notably, for e.g. pg_last_copy_id() to work, it requires you to # use the same conn/cursor, so you have to do this conn.commit() to ensure # things are in a usable state in the error callback. if not self.autocommit: conn.commit() if error_callback is not None: error_callback(e, cursor, self.log) else: raise
def triggered_execution( pipeline_name, name=None, mode="default", solid_selection=None, tags_fn=None, should_execute_fn=None, ): """ The decorated function will be called as the ``run_config_fn`` of the underlying :py:class:`~dagster.TriggeredDefinition` and should take a :py:class:`~dagster.TriggeredExecutionContext` as its only argument, returning the environment dict for the triggered execution. Args: pipeline_name (str): The name of the pipeline to execute when the trigger fires. name (Optional[str]): The name of this triggered execution. solid_selection (Optional[List[str]]): A list of solid subselection (including single solid names) to execute when the trigger fires. e.g. ``['*some_solid+', 'other_solid']`` mode (Optional[str]): The pipeline mode to apply for the triggered execution (Default: 'default') tags_fn (Optional[Callable[[TriggeredExecutionContext], Optional[Dict[str, str]]]]): A function that generates tags to attach to the triggered execution. Takes a :py:class:`~dagster.TriggeredExecutionContext` and returns a dictionary of tags (string key-value pairs). should_execute_fn (Optional[Callable[[TriggeredExecutionContext], bool]]): A function that runs at trigger time to determine whether a pipeline execution should be initiated or skipped. Takes a :py:class:`~dagster.TriggeredExecutionContext` and returns a boolean (``True`` if a pipeline run should be execute). Defaults to a function that always returns ``True``. """ check.str_param(pipeline_name, "pipeline_name") check.opt_str_param(name, "name") check.str_param(mode, "mode") check.opt_nullable_list_param(solid_selection, "solid_selection", of_type=str) check.opt_callable_param(tags_fn, "tags_fn") check.opt_callable_param(should_execute_fn, "should_execute_fn") def inner(fn): check.callable_param(fn, "fn") trigger_name = name or fn.__name__ return TriggeredExecutionDefinition( name=trigger_name, pipeline_name=pipeline_name, run_config_fn=fn, tags_fn=tags_fn, should_execute_fn=should_execute_fn, mode=mode, solid_selection=solid_selection, ) return inner
def execute_run_host_mode( pipeline: ReconstructablePipeline, pipeline_run: PipelineRun, instance: DagsterInstance, get_executor_def_fn: Callable[[Optional[str]], ExecutorDefinition] = None, raise_on_error: bool = False, ): check.inst_param(pipeline, "pipeline", ReconstructablePipeline) check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.inst_param(instance, "instance", DagsterInstance) check.opt_callable_param(get_executor_def_fn, "get_executor_def_fn") if pipeline_run.status == PipelineRunStatus.CANCELED: message = "Not starting execution since the run was canceled before execution could start" instance.report_engine_event( message, pipeline_run, ) raise DagsterInvariantViolationError(message) check.invariant( pipeline_run.status == PipelineRunStatus.NOT_STARTED or pipeline_run.status == PipelineRunStatus.STARTING, desc="Pipeline run {} ({}) in state {}, expected NOT_STARTED or STARTING" .format(pipeline_run.pipeline_name, pipeline_run.run_id, pipeline_run.status), ) if pipeline_run.solids_to_execute: pipeline = pipeline.subset_for_execution_from_existing_pipeline( pipeline_run.solids_to_execute) execution_plan_snapshot = instance.get_execution_plan_snapshot( pipeline_run.execution_plan_snapshot_id) execution_plan = ExecutionPlan.rebuild_from_snapshot( pipeline_run.pipeline_name, execution_plan_snapshot, ) _execute_run_iterable = ExecuteRunWithPlanIterable( execution_plan=execution_plan, iterator=pipeline_execution_iterator, execution_context_manager=PlanOrchestrationContextManager( context_event_generator=host_mode_execution_context_event_generator, pipeline=pipeline, execution_plan=execution_plan, run_config=pipeline_run.run_config, pipeline_run=pipeline_run, instance=instance, raise_on_error=raise_on_error, get_executor_def_fn=get_executor_def_fn, output_capture=None, ), ) event_list = list(_execute_run_iterable) return event_list
def __init__( self, name, cron_schedule, pipeline_name, run_config=None, run_config_fn=None, tags=None, tags_fn=None, solid_selection=None, mode="default", should_execute=None, environment_vars=None, ): self._name = check.str_param(name, 'name') self._cron_schedule = check.str_param(cron_schedule, 'cron_schedule') self._pipeline_name = check.str_param(pipeline_name, 'pipeline_name') self._run_config = check.opt_dict_param(run_config, 'run_config') self._tags = check.opt_dict_param(tags, 'tags', key_type=str, value_type=str) check.opt_callable_param(tags_fn, 'tags_fn') self._solid_selection = check.opt_nullable_list_param( solid_selection, 'solid_selection', of_type=str) self._mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, 'should_execute') self._environment_vars = check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str) if run_config_fn and run_config: raise DagsterInvalidDefinitionError( 'Attempted to provide both run_config_fn and run_config as arguments' ' to ScheduleDefinition. Must provide only one of the two.') if not run_config and not run_config_fn: run_config_fn = lambda _context: {} self._run_config_fn = run_config_fn if tags_fn and tags: raise DagsterInvalidDefinitionError( 'Attempted to provide both tags_fn and tags as arguments' ' to ScheduleDefinition. Must provide only one of the two.') if not tags and not tags_fn: tags_fn = lambda _context: {} self._tags_fn = tags_fn if not should_execute: should_execute = lambda _context: True self._should_execute = should_execute
def __init__( self, name, cron_schedule, pipeline_name, run_config=None, run_config_fn=None, tags=None, tags_fn=None, solid_selection=None, mode="default", should_execute=None, environment_vars=None, ): self._name = check_for_invalid_name_and_warn(name) self._cron_schedule = check.str_param(cron_schedule, "cron_schedule") self._pipeline_name = check.str_param(pipeline_name, "pipeline_name") self._run_config = check.opt_dict_param(run_config, "run_config") self._tags = check.opt_dict_param(tags, "tags", key_type=str, value_type=str) check.opt_callable_param(tags_fn, "tags_fn") self._solid_selection = check.opt_nullable_list_param( solid_selection, "solid_selection", of_type=str) self._mode = check.opt_str_param(mode, "mode", DEFAULT_MODE_NAME) check.opt_callable_param(should_execute, "should_execute") self._environment_vars = check.opt_dict_param(environment_vars, "environment_vars", key_type=str, value_type=str) if run_config_fn and run_config: raise DagsterInvalidDefinitionError( "Attempted to provide both run_config_fn and run_config as arguments" " to ScheduleDefinition. Must provide only one of the two.") if not run_config and not run_config_fn: run_config_fn = lambda _context: {} self._run_config_fn = run_config_fn if tags_fn and tags: raise DagsterInvalidDefinitionError( "Attempted to provide both tags_fn and tags as arguments" " to ScheduleDefinition. Must provide only one of the two.") if not tags and not tags_fn: tags_fn = lambda _context: {} self._tags_fn = tags_fn if not should_execute: should_execute = lambda _context: True self._should_execute = should_execute
def load_python_module(module_name, warn_only=False, remove_from_path_fn=None): check.str_param(module_name, "module_name") check.bool_param(warn_only, "warn_only") check.opt_callable_param(remove_from_path_fn, "remove_from_path_fn") error = None remove_paths = remove_from_path_fn() if remove_from_path_fn else [] # hook for tests remove_paths.insert(0, sys.path[0]) # remove the working directory with alter_sys_path(to_add=[], to_remove=remove_paths): try: module = importlib.import_module(module_name) except ImportError as ie: error = ie if error: try: module = importlib.import_module(module_name) # if here, we were able to resolve the module with the working directory on the path, # but should error because we may not always invoke from the same directory (e.g. from # cron) if warn_only: warnings.warn( ( "Module {module} was resolved using the working directory. The ability to " "load uninstalled modules from the working directory is deprecated and " "will be removed in a future release. Please use the python-file based " "load arguments or install {module} to your python environment." ).format(module=module_name) ) else: six.raise_from( DagsterInvariantViolationError( ( "Module {module} not found. Packages must be installed rather than " "relying on the working directory to resolve module loading." ).format(module=module_name) ), error, ) except RuntimeError: # We might be here because numpy throws run time errors at import time when being # imported multiple times, just raise the original import error raise error except ImportError as ie: raise error return module
def __init__(self, python_type=None, key=None, name=None, typecheck_metadata_fn=None, type_check=None, **kwargs): name = check.opt_str_param(name, 'name', type(self).__name__) key = check.opt_str_param(key, 'key', name) super(PythonObjectType, self).__init__(key=key, name=name, **kwargs) self.python_type = check.type_param(python_type, 'python_type') self.typecheck_metadata_fn = check.opt_callable_param( typecheck_metadata_fn, 'typecheck_metadata_fn') self._user_type_check = check.opt_callable_param( type_check, 'type_check')
def job( pipeline_name, name=None, mode="default", solid_selection=None, tags_fn=None, ): """ The decorated function will be called as the ``run_config_fn`` of the underlying :py:class:`~dagster.JobDefinition` and should take a :py:class:`~dagster.JobContext` as its only argument, returning the run config dict for the pipeline execution. Args: pipeline_name (str): The name of the pipeline to execute. name (Optional[str]): The name of this job. solid_selection (Optional[List[str]]): A list of solid subselection (including single solid names) for the pipeline execution e.g. ``['*some_solid+', 'other_solid']`` mode (Optional[str]): The pipeline mode to apply for the pipeline execution (Default: 'default') tags_fn (Optional[Callable[[JobContext], Optional[Dict[str, str]]]]): A function that generates tags to attach to the pipeline execution. Takes a :py:class:`~dagster.JobContext` and returns a dictionary of tags (string key-value pairs). """ check.str_param(pipeline_name, "pipeline_name") check.opt_str_param(name, "name") check.str_param(mode, "mode") check.opt_nullable_list_param(solid_selection, "solid_selection", of_type=str) check.opt_callable_param(tags_fn, "tags_fn") def inner(fn): check.callable_param(fn, "fn") job_name = name or fn.__name__ return JobDefinition( name=job_name, pipeline_name=pipeline_name, run_config_fn=fn, tags_fn=tags_fn, mode=mode, solid_selection=solid_selection, ) return inner
def __new__( cls, run_id=None, tags=None, event_callback=None, loggers=None, executor_config=None, reexecution_config=None, step_keys_to_execute=None, mode=None, ): check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str) tags = check.opt_dict_param(tags, 'tags', key_type=str) if EXECUTION_TIME_KEY in tags: tags[EXECUTION_TIME_KEY] = float(tags[EXECUTION_TIME_KEY]) else: tags[EXECUTION_TIME_KEY] = time.time() return super(RunConfig, cls).__new__( cls, run_id=check.str_param(run_id, 'run_id') if run_id else make_new_run_id(), tags=tags, event_callback=check.opt_callable_param(event_callback, 'event_callback'), loggers=check.opt_list_param(loggers, 'loggers'), executor_config=check.inst_param(executor_config, 'executor_config', ExecutorConfig) if executor_config else InProcessExecutorConfig(), reexecution_config=check.opt_inst_param( reexecution_config, 'reexecution_config', ReexecutionConfig ), step_keys_to_execute=step_keys_to_execute, mode=check.opt_str_param(mode, 'mode'), )
def __init__( self, execution_plan, run_config, pipeline_run, instance, scoped_resources_builder_cm=None, system_storage_data=None, intermediate_storage=None, raise_on_error=False, ): scoped_resources_builder_cm = check.opt_callable_param( scoped_resources_builder_cm, "scoped_resources_builder_cm", default=resource_initialization_manager, ) generator = self.event_generator( execution_plan, run_config, pipeline_run, instance, scoped_resources_builder_cm, system_storage_data, intermediate_storage, raise_on_error, ) self._manager = EventGenerationManager(generator, self.context_type, raise_on_error)
def __init__( self, name=None, input_defs=None, output_defs=None, description=None, required_resource_keys=None, config=None, metadata=None, step_metadata_fn=None, ): self.name = check.opt_str_param(name, 'name') self.input_defs = check.opt_nullable_list_param(input_defs, 'input_defs', InputDefinition) self.output_defs = check.opt_nullable_list_param( output_defs, 'output_defs', OutputDefinition ) self.description = check.opt_str_param(description, 'description') # resources will be checked within SolidDefinition self.required_resource_keys = required_resource_keys # config will be checked within SolidDefinition self.config = config # metadata will be checked within ISolidDefinition self.metadata = metadata self.step_metadata_fn = check.opt_callable_param(step_metadata_fn, 'step_metadata_fn')
def __new__( cls, name, is_persistent, required_resource_keys, config_schema=None, system_storage_creation_fn=None, config=None, ): return super(SystemStorageDefinition, cls).__new__( cls, name=check.str_param(name, 'name'), is_persistent=check.bool_param(is_persistent, 'is_persistent'), config_schema=canonicalize_backcompat_args( check_user_facing_opt_config_param(config_schema, 'config_schema'), 'config_schema', check_user_facing_opt_config_param(config, 'config'), 'config', '0.9.0', ), system_storage_creation_fn=check.opt_callable_param( system_storage_creation_fn, 'system_storage_creation_fn'), required_resource_keys=frozenset( check.set_param(required_resource_keys, 'required_resource_keys', of_type=str)), )
def __new__( cls, name, is_persistent, required_resource_keys, config_schema=None, intermediate_storage_creation_fn=None, ): return super(IntermediateStorageDefinition, cls).__new__( cls, name=check.str_param(name, 'name'), is_persistent=check.bool_param(is_persistent, 'is_persistent'), config_schema=check_user_facing_opt_config_param( config_schema, 'config_schema'), intermediate_storage_creation_fn=check.opt_callable_param( intermediate_storage_creation_fn, 'intermediate_storage_creation_fn'), required_resource_keys=frozenset( check.set_param( required_resource_keys if required_resource_keys else set(), 'required_resource_keys', of_type=str, )), )
def __init__( self, name, input_defs, compute_fn, output_defs, config_schema=None, description=None, tags=None, required_resource_keys=None, positional_inputs=None, _configured_config_mapping_fn=None, ): self._compute_fn = check.callable_param(compute_fn, 'compute_fn') self._config_schema = check_user_facing_opt_config_param( config_schema, 'config_schema') self._required_resource_keys = frozenset( check.opt_set_param(required_resource_keys, 'required_resource_keys', of_type=str)) self.__configured_config_mapping_fn = check.opt_callable_param( _configured_config_mapping_fn, 'config_mapping_fn') super(SolidDefinition, self).__init__( name=name, input_defs=check.list_param(input_defs, 'input_defs', InputDefinition), output_defs=check.list_param(output_defs, 'output_defs', OutputDefinition), description=description, tags=check.opt_dict_param(tags, 'tags', key_type=str), positional_inputs=positional_inputs, )
def __new__(cls, name, alias=None, resource_mapper_fn=None): name = check.str_param(name, 'name') alias = check.opt_str_param(alias, 'alias') resource_mapper_fn = check.opt_callable_param( resource_mapper_fn, 'resource_mapper_fn', SolidInvocation.default_resource_mapper_fn ) return super(cls, SolidInvocation).__new__(cls, name, alias, resource_mapper_fn)
def __init__( self, run_id, loggers=None, resources=None, event_callback=None, environment_config=None, tags=None, persistence_policy=None, ): if loggers is None: loggers = [define_colored_console_logger('dagster')] self._logger = CompositeLogger(loggers=loggers) self.resources = resources self._run_id = check.str_param(run_id, 'run_id') self._tags = check.opt_dict_param(tags, 'tags') self.events = ExecutionEvents(self) # For re-construction purposes later on self._event_callback = check.opt_callable_param(event_callback, 'event_callback') self._environment_config = environment_config self.persistence_policy = check.opt_inst_param( persistence_policy, 'persistence_policy', PersistenceStrategy )
def __init__( self, name: Optional[str] = None, mode_defs: Optional[List[ModeDefinition]] = None, preset_defs: Optional[List[PresetDefinition]] = None, description: Optional[str] = None, tags: Optional[Dict[str, Any]] = None, hook_defs: Optional[Set[HookDefinition]] = None, input_defs: Optional[List[InputDefinition]] = None, output_defs: Optional[List[OutputDefinition]] = None, config_schema: Optional[Dict[str, Any]] = None, config_fn: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, ): self.name = check.opt_str_param(name, "name") self.mode_definitions = check.opt_list_param(mode_defs, "mode_defs", ModeDefinition) self.preset_definitions = check.opt_list_param(preset_defs, "preset_defs", PresetDefinition) self.description = check.opt_str_param(description, "description") self.tags = check.opt_dict_param(tags, "tags") self.hook_defs = check.opt_set_param(hook_defs, "hook_defs", of_type=HookDefinition) self.input_defs = check.opt_list_param(input_defs, "input_defs", of_type=InputDefinition) self.did_pass_outputs = output_defs is not None self.output_defs = check.opt_nullable_list_param( output_defs, "output_defs", of_type=OutputDefinition) self.config_schema = config_schema self.config_fn = check.opt_callable_param(config_fn, "config_fn")
def pipeline_initialization_manager( pipeline_def, environment_dict, pipeline_run, instance, execution_plan, scoped_resources_builder_cm=None, system_storage_data=None, raise_on_error=False, ): scoped_resources_builder_cm = check.opt_callable_param( scoped_resources_builder_cm, 'scoped_resources_builder_cm', default=resource_initialization_manager, ) generator = pipeline_initialization_event_generator( pipeline_def, environment_dict, pipeline_run, instance, execution_plan, scoped_resources_builder_cm, system_storage_data, raise_on_error, ) return EventGenerationManager(generator, SystemPipelineExecutionContext, raise_on_error)
def __init__(self, execution_plan, retries, sort_key_fn=None): self._plan = check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) self._retries = check.inst_param(retries, 'retries', Retries) self._sort_key_fn = check.opt_callable_param(sort_key_fn, 'sort_key_fn', _default_sort_key) # All steps to be executed start out here in _pending self._pending = self._plan.execution_deps() # steps move in to these buckets as a result of _update calls self._executable = [] self._pending_skip = [] self._pending_retry = [] self._waiting_to_retry = {} # then are considered _in_flight when vended via get_steps_to_* self._in_flight = set() # and finally their terminal state is tracked by these sets, via mark_* self._completed = set() self._success = set() self._failed = set() self._skipped = set() # Start the show by loading _executable with the set of _pending steps that have no deps self._update()
def __init__( self, name, input_defs, compute_fn, output_defs, config_field=None, description=None, metadata=None, required_resource_keys=None, step_metadata_fn=None, ): self._compute_fn = check.callable_param(compute_fn, 'compute_fn') self._config_field = check_user_facing_opt_field_param( config_field, 'config_field', 'of a SolidDefinition or @solid named "{name}"'.format(name=name), ) self._required_resource_keys = check.opt_set_param( required_resource_keys, 'required_resource_keys', of_type=str) self._step_metadata_fn = check.opt_callable_param( step_metadata_fn, 'step_metadata_fn') super(SolidDefinition, self).__init__( name=name, input_defs=check.list_param(input_defs, 'input_defs', InputDefinition), output_defs=check.list_param(output_defs, 'output_defs', OutputDefinition), description=description, metadata=metadata, )
def __init__( self, name, is_persistent, required_resource_keys, config_schema=None, intermediate_storage_creation_fn=None, description=None, ): warnings.warn( "IntermediateStorageDefinition and @intermediate_storage are deprecated in 0.10.0 and " "will be removed in 0.11.0. Use ObjectManagerDefinition and @object_manager instead, " "which gives you better control over how inputs and outputs are handled and loaded." ) self._name = check_valid_name(name) self._is_persistent = check.bool_param(is_persistent, "is_persistent") self._config_schema = convert_user_facing_definition_config_schema( config_schema) self._intermediate_storage_creation_fn = check.opt_callable_param( intermediate_storage_creation_fn, "intermediate_storage_creation_fn") self._required_resource_keys = frozenset( check.set_param( required_resource_keys if required_resource_keys else set(), "required_resource_keys", of_type=str, )) self._description = check.opt_str_param(description, "description")
def __init__(self, execution_plan, retries, sort_key_fn=None): self._plan = check.inst_param(execution_plan, "execution_plan", ExecutionPlan) self._retries = check.inst_param(retries, "retries", Retries) self._sort_key_fn = check.opt_callable_param(sort_key_fn, "sort_key_fn", _default_sort_key) self._context_guard = False # Prevent accidental direct use # All steps to be executed start out here in _pending self._pending = self._plan.execution_deps() # steps move in to these buckets as a result of _update calls self._executable = [] self._pending_skip = [] self._pending_retry = [] self._pending_abandon = [] self._waiting_to_retry = {} # then are considered _in_flight when vended via get_steps_to_* self._in_flight = set() # and finally their terminal state is tracked by these sets, via mark_* self._success = set() self._failed = set() self._skipped = set() self._abandoned = set() # see verify_complete self._unknown_state = set() self._interrupted = set() # Start the show by loading _executable with the set of _pending steps that have no deps self._update()