def define_resource_dictionary_cls(resource_defs): check.dict_param(resource_defs, 'resource_defs', key_type=str, value_type=ResourceDefinition) fields = {} for resource_name, resource_def in resource_defs.items(): if resource_def.config_field: fields[resource_name] = Field( Shape({'config': resource_def.config_field})) return Shape(fields=fields)
def solid_config_field(fields, ignored): trimmed_fields = remove_none_entries(fields) if trimmed_fields: if ignored: return Field( Shape(trimmed_fields), is_required=False, description="This solid is not present in the current solid selection, " "the config values are allowed but ignored.", ) else: return Field(Shape(trimmed_fields)) else: return None
def define_logger_dictionary_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) fields = {} for logger_name, logger_definition in creation_data.logger_defs.items(): fields[logger_name] = Field( Shape( remove_none_entries({'config': logger_definition.config_field}), ), is_optional=True, ) return Shape(fields)
def define_logger_dictionary_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) fields = {} for logger_name, logger_definition in creation_data.logger_defs.items(): fields[logger_name] = Field( Shape( remove_none_entries( {"config": logger_definition.config_schema}), ), is_required=False, ) return Shape(fields)
def get_inputs_field(solid, handle, dependency_structure): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure) if not solid.definition.has_configurable_inputs: return None inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): if inp.runtime_type.input_hydration_config: inp_handle = SolidInputHandle(solid, inp) # If this input is not satisfied by a dependency you must # provide it via config if not dependency_structure.has_deps(inp_handle) and not solid.container_maps_input( name ): inputs_field_fields[name] = Field( inp.runtime_type.input_hydration_config.schema_type ) if not inputs_field_fields: return None return Field(Shape(inputs_field_fields))
def define_solid_dictionary_cls( solids, ignored_solids, dependency_structure, parent_handle=None, ): check.list_param(solids, "solids", of_type=Solid) ignored_solids = check.opt_list_param(ignored_solids, "ignored_solids", of_type=Solid) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(parent_handle, "parent_handle", SolidHandle) fields = {} for solid in solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field(solid, SolidHandle( solid.name, parent_handle), dependency_structure, ignored=False) for solid in ignored_solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field(solid, SolidHandle( solid.name, parent_handle), dependency_structure, ignored=True) return Shape(fields)
def define_run_config_schema_type( creation_data: RunConfigSchemaCreationData) -> ConfigType: execution_field = (define_execution_field( creation_data.mode_definition.executor_defs) if not creation_data.is_using_graph_job_op_apis else define_single_execution_field( creation_data.mode_definition.executor_defs[0])) top_level_node = Node( name=creation_data.graph_def.name, definition=creation_data.graph_def, graph_definition=creation_data.graph_def, ) fields = { "execution": execution_field, "loggers": Field(define_logger_dictionary_cls(creation_data)), "resources": Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs, creation_data.required_resources, )), "inputs": get_inputs_field( solid=top_level_node, dependency_structure=creation_data.dependency_structure, resource_defs=creation_data.mode_definition.resource_defs, solid_ignored=False, ), } if creation_data.graph_def.has_config_mapping: config_schema = cast(IDefinitionConfigSchema, creation_data.graph_def.config_schema) nodes_field = Field({"config": config_schema.as_field()}) else: nodes_field = Field( define_solid_dictionary_cls( solids=creation_data.solids, ignored_solids=creation_data.ignored_solids, dependency_structure=creation_data.dependency_structure, resource_defs=creation_data.mode_definition.resource_defs, is_using_graph_job_op_apis=creation_data. is_using_graph_job_op_apis, )) if creation_data.is_using_graph_job_op_apis: fields["ops"] = nodes_field field_aliases = {"ops": "solids"} else: fields["solids"] = nodes_field field_aliases = {"solids": "ops"} return Shape( fields=remove_none_entries(fields), field_aliases=field_aliases, )
def define_solid_dictionary_cls( solids, ignored_solids, dependency_structure, resource_defs, parent_handle=None, ): check.list_param(solids, "solids", of_type=Solid) ignored_solids = check.opt_list_param(ignored_solids, "ignored_solids", of_type=Solid) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(parent_handle, "parent_handle", SolidHandle) fields = {} for solid in solids: solid_field = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, resource_defs, ignored=False, ) if solid_field: fields[solid.name] = solid_field for solid in ignored_solids: solid_field = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, resource_defs, ignored=True, ) if solid_field: fields[solid.name] = solid_field return Shape(fields)
def def_config_field(configurable_def: ConfigurableDefinition, is_required: bool = None) -> Field: return Field( Shape({"config": configurable_def.config_field} if configurable_def. has_config_field else {}), is_required=is_required, )
def get_inputs_field(solid, handle, dependency_structure, resource_defs): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): inp_handle = SolidInputHandle(solid, inp) has_upstream = input_has_upstream(dependency_structure, inp_handle, solid, name) if inp.root_manager_key and not has_upstream: input_field = get_input_manager_input_field( solid, inp, resource_defs) elif inp.dagster_type.loader and not has_upstream: input_field = get_type_loader_input_field(solid, name, inp) else: input_field = None if input_field: inputs_field_fields[name] = input_field if not inputs_field_fields: return None return Field(Shape(inputs_field_fields))
def get_inputs_field(solid, handle, dependency_structure): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) if not solid.definition.has_configurable_inputs: return None inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): if inp.dagster_type.loader: inp_handle = SolidInputHandle(solid, inp) # If this input is not satisfied by a dependency you must # provide it via config if not dependency_structure.has_deps( inp_handle) and not solid.container_maps_input(name): inputs_field_fields[name] = Field( inp.dagster_type.loader.schema_type, is_required=(not solid.definition.input_has_default(name)), ) if not inputs_field_fields: return None return Field(Shape(inputs_field_fields))
def define_environment_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) return Shape( fields=remove_none_entries( { 'solids': Field( define_solid_dictionary_cls( creation_data.solids, creation_data.dependency_structure, ) ), 'storage': Field( define_storage_config_cls(creation_data.mode_definition), is_required=False, ), 'intermediate_storage': Field( define_intermediate_storage_config_cls(creation_data.mode_definition), is_required=False, ), 'execution': Field( define_executor_config_cls(creation_data.mode_definition), is_required=False, ), 'loggers': Field(define_logger_dictionary_cls(creation_data)), 'resources': Field( define_resource_dictionary_cls(creation_data.mode_definition.resource_defs) ), } ), )
def define_logger_dictionary_cls( creation_data: RunConfigSchemaCreationData) -> Shape: return Shape({ logger_name: def_config_field(logger_definition, is_required=False) for logger_name, logger_definition in creation_data.logger_defs.items() })
def config_type(cls): return { "max_concurrent_runs": Field( config=IntSource, is_required=False, description= "The maximum number of runs that are allowed to be in progress at once. " "Defaults to 10. Set to -1 to disable the limit. Set to 0 to stop any runs from launching. " "Any other negative values are disallowed.", ), "tag_concurrency_limits": Field( config=Noneable( Array( Shape({ "key": String, "value": Field( ScalarUnion( scalar_type=String, non_scalar_schema=Shape( {"applyLimitPerUniqueValue": Bool}), ), is_required=False, ), "limit": Field(int), }))), is_required=False, description= "A set of limits that are applied to runs with particular tags. " "If a value is set, the limit is applied to only that key-value pair. " "If no value is set, the limit is applied across all values of that key. " "If the value is set to a dict with `applyLimitPerUniqueValue: true`, the limit " "will apply to the number of unique values for that key.", ), "dequeue_interval_seconds": Field( config=IntSource, is_required=False, description= "The interval in seconds at which the Dagster Daemon " "should periodically check the run queue for new runs to launch.", ), }
def def_config_field(configurable_def, is_required=None): check.inst_param(configurable_def, "configurable_def", ConfigurableDefinition) return Field( Shape({"config": configurable_def.config_field} if configurable_def. has_config_field else {}), is_required=is_required, )
def solid_config_field(fields: Dict[str, Optional[Field]], ignored: bool) -> Optional[Field]: trimmed_fields = remove_none_entries(fields) if trimmed_fields: if ignored: return Field( Shape(trimmed_fields, field_aliases={"solids": "ops"}), is_required=False, description= "This solid is not present in the current solid selection, " "the config values are allowed but ignored.", ) else: return Field(Shape(trimmed_fields, field_aliases={"solids": "ops"})) else: return None
def define_resource_dictionary_cls( resource_defs: Dict[str, ResourceDefinition]) -> Shape: fields = {} for resource_name, resource_def in resource_defs.items(): if resource_def.config_schema: fields[resource_name] = def_config_field(resource_def) return Shape(fields=fields)
def config_type_pipeline_run(cls): """Configuration intended to be set at pipeline execution time. """ return { "job_image": Field( Noneable(StringSource), is_required=False, description= "Docker image to use for launched task Jobs. If the repository is not " "loaded from a GRPC server, then this field is required. If the repository is " "loaded from a GRPC server, then leave this field empty." '(Ex: "mycompany.com/dagster-k8s-image:latest").', ), "image_pull_policy": Field( StringSource, is_required=False, default_value="IfNotPresent", description= "Image pull policy to set on the launched task Job Pods. Defaults to " '"IfNotPresent".', ), "image_pull_secrets": Field( Noneable(Array(Shape({"name": StringSource}))), is_required=False, description= "(Advanced) Specifies that Kubernetes should get the credentials from " "the Secrets named in this list.", ), "service_account_name": Field( Noneable(StringSource), is_required=False, description= "(Advanced) Override the name of the Kubernetes service account under " "which to run the Job.", ), "env_config_maps": Field( Noneable(Array(StringSource)), is_required=False, description= "A list of custom ConfigMapEnvSource names from which to draw " "environment variables (using ``envFrom``) for the Job. Default: ``[]``. See:" "https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#define-an-environment-variable-for-a-container", ), "env_secrets": Field( Noneable(Array(StringSource)), is_required=False, description= "A list of custom Secret names from which to draw environment " "variables (using ``envFrom``) for the Job. Default: ``[]``. See:" "https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables", ), }
def define_logger_dictionary_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) return Shape({ logger_name: def_config_field(logger_definition, is_required=False) for logger_name, logger_definition in creation_data.logger_defs.items() })
def define_resource_dictionary_cls(resource_defs): check.dict_param(resource_defs, "resource_defs", key_type=str, value_type=ResourceDefinition) fields = {} for resource_name, resource_def in resource_defs.items(): if resource_def.config_schema: fields[resource_name] = def_config_field(resource_def) return Shape(fields=fields)
def define_solid_config_cls(config_schema, inputs_field, outputs_field): check_opt_field_param(config_schema, 'config_schema') check_opt_field_param(inputs_field, 'inputs_field') check_opt_field_param(outputs_field, 'outputs_field') return Shape( remove_none_entries( {'config': config_schema, 'inputs': inputs_field, 'outputs': outputs_field} ), )
def config_type_pipeline_run(cls): '''Configuration intended to be set at pipeline execution time. ''' return { 'job_image': Field( StringSource, is_required=True, description='Docker image to use for launched task Jobs ' '(e.g. "mycompany.com/dagster-k8s-image:latest").', ), 'image_pull_policy': Field( StringSource, is_required=False, default_value='IfNotPresent', description= 'Image pull policy to set on the launched task Job Pods. Defaults to ' '"IfNotPresent".', ), 'image_pull_secrets': Field( Noneable(Array(Shape({'name': StringSource}))), is_required=False, description= '(Advanced) Specifies that Kubernetes should get the credentials from ' 'the Secrets named in this list.', ), 'service_account_name': Field( Noneable(StringSource), is_required=False, description= '(Advanced) Override the name of the Kubernetes service account under ' 'which to run the Job.', ), 'env_config_maps': Field( Noneable(Array(StringSource)), is_required=False, description= 'A list of custom ConfigMapEnvSource names from which to draw ' 'environment variables (using ``envFrom``) for the Job. Default: ``[]``. See:' 'https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#define-an-environment-variable-for-a-container', ), 'env_secrets': Field( Noneable(Array(StringSource)), is_required=False, description= 'A list of custom Secret names from which to draw environment ' 'variables (using ``envFrom``) for the Job. Default: ``[]``. See:' 'https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables', ), }
def define_executor_config_cls(mode_definition): check.inst_param(mode_definition, "mode_definition", ModeDefinition) fields = {} for executor_def in mode_definition.executor_defs: fields[executor_def.name] = Field( Shape(fields={"config": executor_def.config_schema} if executor_def.config_schema else {}, )) return Selector(fields)
def define_storage_config_cls(mode_definition): check.inst_param(mode_definition, 'mode_definition', ModeDefinition) fields = {} for storage_def in mode_definition.system_storage_defs: fields[storage_def.name] = Field( Shape(fields={'config': storage_def.config_field} if storage_def.config_field else {}, )) return Selector(fields)
def define_solid_config_cls(config_schema, inputs_field, outputs_field): check_opt_field_param(config_schema, "config_schema") check_opt_field_param(inputs_field, "inputs_field") check_opt_field_param(outputs_field, "outputs_field") return Shape( remove_none_entries({ "config": config_schema, "inputs": inputs_field, "outputs": outputs_field }), )
def define_environment_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) intermediate_storage_field = define_storage_field( selector_for_named_defs( creation_data.mode_definition.intermediate_storage_defs), storage_names=[ dfn.name for dfn in creation_data.mode_definition.intermediate_storage_defs ], defaults=set( [storage.name for storage in default_intermediate_storage_defs]), ) if not (intermediate_storage_field.is_required or intermediate_storage_field.default_provided): storage_field = define_storage_field( selector_for_named_defs( creation_data.mode_definition.system_storage_defs), storage_names=[ dfn.name for dfn in creation_data.mode_definition.system_storage_defs ], defaults=set( [storage.name for storage in default_system_storage_defs]), ) else: storage_field = None return Shape(fields=remove_none_entries({ "solids": Field( define_solid_dictionary_cls( solids=creation_data.solids, ignored_solids=creation_data.ignored_solids, dependency_structure=creation_data.dependency_structure, )), "storage": storage_field, "intermediate_storage": intermediate_storage_field, "execution": Field( selector_for_named_defs( creation_data.mode_definition.executor_defs), is_required=False, ), "loggers": Field(define_logger_dictionary_cls(creation_data)), "resources": Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs)), }), )
def define_intermediate_storage_config_cls(mode_definition): check.inst_param(mode_definition, "mode_definition", ModeDefinition) fields = {} for intermediate_storage_def in mode_definition.intermediate_storage_defs: fields[intermediate_storage_def.name] = Field( Shape(fields={"config": intermediate_storage_def.config_schema} if intermediate_storage_def.config_schema else {}, )) return Selector(fields)
def define_executor_config_cls(mode_definition): check.inst_param(mode_definition, 'mode_definition', ModeDefinition) fields = {} for executor_def in mode_definition.executor_defs: fields[executor_def.name] = Field( Shape(fields={'config': executor_def.config_field} if executor_def.config_field else {}, )) return Selector(fields)
def define_solid_dictionary_cls(solids, dependency_structure, parent_handle=None): check.list_param(solids, 'solids', of_type=Solid) check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure) check.opt_inst_param(parent_handle, 'parent_handle', SolidHandle) fields = {} for solid in solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, ) return Shape(fields)
def define_environment_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) intermediate_storage_field = define_storage_field( selector_for_named_defs( creation_data.mode_definition.intermediate_storage_defs), storage_names=[ dfn.name for dfn in creation_data.mode_definition.intermediate_storage_defs ], defaults=set( [storage.name for storage in default_intermediate_storage_defs]), ) # TODO: remove "storage" entry in run_config as part of system storage removal # currently we treat "storage" as an alias to "intermediate_storage" and storage field is optional # tracking https://github.com/dagster-io/dagster/issues/3280 storage_field = Field( selector_for_named_defs( creation_data.mode_definition.intermediate_storage_defs), is_required=False, ) return Shape(fields=remove_none_entries({ "solids": Field( define_solid_dictionary_cls( solids=creation_data.solids, ignored_solids=creation_data.ignored_solids, dependency_structure=creation_data.dependency_structure, resource_defs=creation_data.mode_definition.resource_defs, )), "storage": storage_field, "intermediate_storage": intermediate_storage_field, "execution": Field( selector_for_named_defs( creation_data.mode_definition.executor_defs), is_required=False, ), "loggers": Field(define_logger_dictionary_cls(creation_data)), "resources": Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs)), }), )