def get_inputs_field( solid: Node, dependency_structure: DependencyStructure, resource_defs: Dict[str, ResourceDefinition], solid_ignored: bool, ): inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): inp_handle = SolidInputHandle(solid, inp) has_upstream = input_has_upstream(dependency_structure, inp_handle, solid, name) if inp.root_manager_key and not has_upstream: input_field = get_input_manager_input_field( solid, inp, resource_defs) elif inp.dagster_type.loader and not has_upstream: input_field = get_type_loader_input_field(solid, name, inp) else: input_field = None if input_field: inputs_field_fields[name] = input_field if not inputs_field_fields: return None if solid_ignored: return Field( Shape(inputs_field_fields), is_required=False, description= "This solid is not present in the current solid selection, " "the input config values are allowed but ignored.", ) else: return Field(Shape(inputs_field_fields))
def get_outputs_field(solid, handle, resource_defs): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) check.dict_param(resource_defs, "resource_defs", key_type=str, value_type=ResourceDefinition) # if any outputs have configurable output managers, use those for the schema and ignore all type # materializers output_manager_fields = {} for name, output_def in solid.definition.output_dict.items(): output_manager_output_field = get_output_manager_output_field( solid, output_def, resource_defs) if output_manager_output_field: output_manager_fields[name] = output_manager_output_field if output_manager_fields: return Field(Shape(output_manager_fields)) # otherwise, use any type materializers for the schema type_materializer_fields = {} for name, output_def in solid.definition.output_dict.items(): type_output_field = get_type_output_field(output_def) if type_output_field: type_materializer_fields[name] = type_output_field if type_materializer_fields: return Field(Array(Shape(type_materializer_fields)), is_required=False) return None
def get_outputs_field( solid: Node, resource_defs: Dict[str, ResourceDefinition], ) -> Optional[Field]: # if any outputs have configurable output managers, use those for the schema and ignore all type # materializers output_manager_fields = {} for name, output_def in solid.definition.output_dict.items(): output_manager_output_field = get_output_manager_output_field( solid, output_def, resource_defs) if output_manager_output_field: output_manager_fields[name] = output_manager_output_field if output_manager_fields: return Field(Shape(output_manager_fields)) # otherwise, use any type materializers for the schema type_materializer_fields = {} for name, output_def in solid.definition.output_dict.items(): type_output_field = get_type_output_field(output_def) if type_output_field: type_materializer_fields[name] = type_output_field if type_materializer_fields: return Field(Array(Shape(type_materializer_fields)), is_required=False) return None
def config_type(cls): return { "max_concurrent_runs": Field(config=IntSource, is_required=False), "tag_concurrency_limits": Field( config=Noneable( Array( Shape({ "key": String, "value": Field( ScalarUnion( scalar_type=String, non_scalar_schema=Shape( {"applyLimitPerUniqueValue": Bool}), ), is_required=False, ), "limit": Field(int), }))), is_required=False, ), "dequeue_interval_seconds": Field(config=IntSource, is_required=False), }
def define_resource_dictionary_cls(resource_defs): check.dict_param(resource_defs, 'resource_defs', key_type=str, value_type=ResourceDefinition) fields = {} for resource_name, resource_def in resource_defs.items(): if resource_def.config_schema: fields[resource_name] = Field(Shape({'config': resource_def.config_schema})) return Shape(fields=fields)
def solid_config_field(fields, ignored): if ignored: return Field( Shape(remove_none_entries(fields)), is_required=False, description= "This solid is not present in the current solid selection, " "the config values are allowed but ignored.", ) else: return Field(Shape(remove_none_entries(fields)))
def define_logger_dictionary_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) fields = {} for logger_name, logger_definition in creation_data.logger_defs.items(): fields[logger_name] = Field( Shape(remove_none_entries({'config': logger_definition.config_schema}),), is_required=False, ) return Shape(fields)
def get_inputs_field(solid, handle, dependency_structure): check.inst_param(solid, 'solid', Solid) check.inst_param(handle, 'handle', SolidHandle) check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure) if not solid.definition.has_configurable_inputs: return None inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): if inp.runtime_type.input_hydration_config: inp_handle = SolidInputHandle(solid, inp) # If this input is not satisfied by a dependency you must # provide it via config if not dependency_structure.has_deps(inp_handle) and not solid.container_maps_input( name ): inputs_field_fields[name] = Field( inp.runtime_type.input_hydration_config.schema_type ) if not inputs_field_fields: return None return Field(Shape(inputs_field_fields))
def define_solid_dictionary_cls( solids, ignored_solids, dependency_structure, parent_handle=None, ): check.list_param(solids, "solids", of_type=Solid) ignored_solids = check.opt_list_param(ignored_solids, "ignored_solids", of_type=Solid) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(parent_handle, "parent_handle", SolidHandle) fields = {} for solid in solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field(solid, SolidHandle( solid.name, parent_handle), dependency_structure, ignored=False) for solid in ignored_solids: if solid.definition.has_config_entry: fields[solid.name] = define_isolid_field(solid, SolidHandle( solid.name, parent_handle), dependency_structure, ignored=True) return Shape(fields)
def define_run_config_schema_type( creation_data: RunConfigSchemaCreationData) -> ConfigType: execution_field = (define_execution_field( creation_data.mode_definition.executor_defs) if not creation_data.is_using_graph_job_op_apis else define_single_execution_field( creation_data.mode_definition.executor_defs[0])) top_level_node = Node( name=creation_data.graph_def.name, definition=creation_data.graph_def, graph_definition=creation_data.graph_def, ) fields = { "execution": execution_field, "loggers": Field(define_logger_dictionary_cls(creation_data)), "resources": Field( define_resource_dictionary_cls( creation_data.mode_definition.resource_defs, creation_data.required_resources, )), "inputs": get_inputs_field( solid=top_level_node, dependency_structure=creation_data.dependency_structure, resource_defs=creation_data.mode_definition.resource_defs, solid_ignored=False, ), } if creation_data.graph_def.has_config_mapping: config_schema = cast(IDefinitionConfigSchema, creation_data.graph_def.config_schema) nodes_field = Field({"config": config_schema.as_field()}) else: nodes_field = Field( define_solid_dictionary_cls( solids=creation_data.solids, ignored_solids=creation_data.ignored_solids, dependency_structure=creation_data.dependency_structure, resource_defs=creation_data.mode_definition.resource_defs, is_using_graph_job_op_apis=creation_data. is_using_graph_job_op_apis, )) if creation_data.is_using_graph_job_op_apis: fields["ops"] = nodes_field field_aliases = {"ops": "solids"} else: fields["solids"] = nodes_field field_aliases = {"solids": "ops"} return Shape( fields=remove_none_entries(fields), field_aliases=field_aliases, )
def define_solid_dictionary_cls( solids, ignored_solids, dependency_structure, resource_defs, parent_handle=None, ): check.list_param(solids, "solids", of_type=Solid) ignored_solids = check.opt_list_param(ignored_solids, "ignored_solids", of_type=Solid) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) check.opt_inst_param(parent_handle, "parent_handle", SolidHandle) fields = {} for solid in solids: solid_field = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, resource_defs, ignored=False, ) if solid_field: fields[solid.name] = solid_field for solid in ignored_solids: solid_field = define_isolid_field( solid, SolidHandle(solid.name, parent_handle), dependency_structure, resource_defs, ignored=True, ) if solid_field: fields[solid.name] = solid_field return Shape(fields)
def get_inputs_field(solid, handle, dependency_structure): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) if not solid.definition.has_configurable_inputs: return None inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): if inp.dagster_type.loader: inp_handle = SolidInputHandle(solid, inp) # If this input is not satisfied by a dependency you must # provide it via config if not dependency_structure.has_deps( inp_handle) and not solid.container_maps_input(name): inputs_field_fields[name] = Field( inp.dagster_type.loader.schema_type, is_required=(not solid.definition.input_has_default(name)), ) if not inputs_field_fields: return None return Field(Shape(inputs_field_fields))
def define_environment_cls(creation_data): check.inst_param(creation_data, 'creation_data', EnvironmentClassCreationData) return Shape( fields=remove_none_entries( { 'solids': Field( define_solid_dictionary_cls( creation_data.solids, creation_data.dependency_structure, ) ), 'storage': Field( define_storage_config_cls(creation_data.mode_definition), is_required=False, ), 'intermediate_storage': Field( define_intermediate_storage_config_cls(creation_data.mode_definition), is_required=False, ), 'execution': Field( define_executor_config_cls(creation_data.mode_definition), is_required=False, ), 'loggers': Field(define_logger_dictionary_cls(creation_data)), 'resources': Field( define_resource_dictionary_cls(creation_data.mode_definition.resource_defs) ), } ), )
def get_inputs_field(solid, handle, dependency_structure, resource_defs): check.inst_param(solid, "solid", Solid) check.inst_param(handle, "handle", SolidHandle) check.inst_param(dependency_structure, "dependency_structure", DependencyStructure) inputs_field_fields = {} for name, inp in solid.definition.input_dict.items(): inp_handle = SolidInputHandle(solid, inp) has_upstream = input_has_upstream(dependency_structure, inp_handle, solid, name) if inp.root_manager_key and not has_upstream: input_field = get_input_manager_input_field( solid, inp, resource_defs) elif inp.dagster_type.loader and not has_upstream: input_field = get_type_loader_input_field(solid, name, inp) else: input_field = None if input_field: inputs_field_fields[name] = input_field if not inputs_field_fields: return None return Field(Shape(inputs_field_fields))
def def_config_field(configurable_def: ConfigurableDefinition, is_required: bool = None) -> Field: return Field( Shape({"config": configurable_def.config_field} if configurable_def. has_config_field else {}), is_required=is_required, )
def define_logger_dictionary_cls( creation_data: RunConfigSchemaCreationData) -> Shape: return Shape({ logger_name: def_config_field(logger_definition, is_required=False) for logger_name, logger_definition in creation_data.logger_defs.items() })
def solid_config_field(fields: Dict[str, Optional[Field]], ignored: bool) -> Optional[Field]: trimmed_fields = remove_none_entries(fields) if trimmed_fields: if ignored: return Field( Shape(trimmed_fields), is_required=False, description= "This solid is not present in the current solid selection, " "the config values are allowed but ignored.", ) else: return Field(Shape(trimmed_fields)) else: return None
def config_type(cls): return { "max_concurrent_runs": Field( config=IntSource, is_required=False, description= "The maximum number of runs that are allowed to be in progress at once. " "Defaults to 10. Set to -1 to disable the limit. Set to 0 to stop any runs from launching. " "Any other negative values are disallowed.", ), "tag_concurrency_limits": Field( config=Noneable( Array( Shape({ "key": String, "value": Field( ScalarUnion( scalar_type=String, non_scalar_schema=Shape( {"applyLimitPerUniqueValue": Bool}), ), is_required=False, ), "limit": Field(int), }))), is_required=False, description= "A set of limits that are applied to runs with particular tags. " "If a value is set, the limit is applied to only that key-value pair. " "If no value is set, the limit is applied across all values of that key. " "If the value is set to a dict with `applyLimitPerUniqueValue: true`, the limit " "will apply to the number of unique values for that key.", ), "dequeue_interval_seconds": Field( config=IntSource, is_required=False, description= "The interval in seconds at which the Dagster Daemon " "should periodically check the run queue for new runs to launch.", ), }
def def_config_field(configurable_def, is_required=None): check.inst_param(configurable_def, "configurable_def", ConfigurableDefinition) return Field( Shape({"config": configurable_def.config_field} if configurable_def. has_config_field else {}), is_required=is_required, )
def solid_config_field( fields: Dict[str, Optional[Field]], ignored: bool, is_using_graph_job_op_apis: bool ) -> Optional[Field]: field_aliases = {"ops": "solids"} if is_using_graph_job_op_apis else {"solids": "ops"} trimmed_fields = remove_none_entries(fields) if trimmed_fields: if ignored: return Field( Shape(trimmed_fields, field_aliases=field_aliases), is_required=False, description="This solid is not present in the current solid selection, " "the config values are allowed but ignored.", ) else: return Field(Shape(trimmed_fields, field_aliases=field_aliases)) else: return None
def define_resource_dictionary_cls( resource_defs: Dict[str, ResourceDefinition]) -> Shape: fields = {} for resource_name, resource_def in resource_defs.items(): if resource_def.config_schema: fields[resource_name] = def_config_field(resource_def) return Shape(fields=fields)
def config_type_pipeline_run(cls): """Configuration intended to be set at pipeline execution time. """ return { "job_image": Field( Noneable(StringSource), is_required=False, description= "Docker image to use for launched task Jobs. If the repository is not " "loaded from a GRPC server, then this field is required. If the repository is " "loaded from a GRPC server, then leave this field empty." '(Ex: "mycompany.com/dagster-k8s-image:latest").', ), "image_pull_policy": Field( StringSource, is_required=False, default_value="IfNotPresent", description= "Image pull policy to set on the launched task Job Pods. Defaults to " '"IfNotPresent".', ), "image_pull_secrets": Field( Noneable(Array(Shape({"name": StringSource}))), is_required=False, description= "(Advanced) Specifies that Kubernetes should get the credentials from " "the Secrets named in this list.", ), "service_account_name": Field( Noneable(StringSource), is_required=False, description= "(Advanced) Override the name of the Kubernetes service account under " "which to run the Job.", ), "env_config_maps": Field( Noneable(Array(StringSource)), is_required=False, description= "A list of custom ConfigMapEnvSource names from which to draw " "environment variables (using ``envFrom``) for the Job. Default: ``[]``. See:" "https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#define-an-environment-variable-for-a-container", ), "env_secrets": Field( Noneable(Array(StringSource)), is_required=False, description= "A list of custom Secret names from which to draw environment " "variables (using ``envFrom``) for the Job. Default: ``[]``. See:" "https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables", ), }
def define_logger_dictionary_cls(creation_data): check.inst_param(creation_data, "creation_data", EnvironmentClassCreationData) return Shape({ logger_name: def_config_field(logger_definition, is_required=False) for logger_name, logger_definition in creation_data.logger_defs.items() })
def define_solid_config_cls(config_schema, inputs_field, outputs_field): check_opt_field_param(config_schema, 'config_schema') check_opt_field_param(inputs_field, 'inputs_field') check_opt_field_param(outputs_field, 'outputs_field') return Shape( remove_none_entries( {'config': config_schema, 'inputs': inputs_field, 'outputs': outputs_field} ), )
def config_type_pipeline_run(cls): '''Configuration intended to be set at pipeline execution time. ''' return { 'job_image': Field( StringSource, is_required=True, description='Docker image to use for launched task Jobs ' '(e.g. "mycompany.com/dagster-k8s-image:latest").', ), 'image_pull_policy': Field( StringSource, is_required=False, default_value='IfNotPresent', description= 'Image pull policy to set on the launched task Job Pods. Defaults to ' '"IfNotPresent".', ), 'image_pull_secrets': Field( Noneable(Array(Shape({'name': StringSource}))), is_required=False, description= '(Advanced) Specifies that Kubernetes should get the credentials from ' 'the Secrets named in this list.', ), 'service_account_name': Field( Noneable(StringSource), is_required=False, description= '(Advanced) Override the name of the Kubernetes service account under ' 'which to run the Job.', ), 'env_config_maps': Field( Noneable(Array(StringSource)), is_required=False, description= 'A list of custom ConfigMapEnvSource names from which to draw ' 'environment variables (using ``envFrom``) for the Job. Default: ``[]``. See:' 'https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#define-an-environment-variable-for-a-container', ), 'env_secrets': Field( Noneable(Array(StringSource)), is_required=False, description= 'A list of custom Secret names from which to draw environment ' 'variables (using ``envFrom``) for the Job. Default: ``[]``. See:' 'https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#configure-all-key-value-pairs-in-a-secret-as-container-environment-variables', ), }
def define_intermediate_storage_config_cls(mode_definition): check.inst_param(mode_definition, "mode_definition", ModeDefinition) fields = {} for intermediate_storage_def in mode_definition.intermediate_storage_defs: fields[intermediate_storage_def.name] = Field( Shape(fields={"config": intermediate_storage_def.config_schema} if intermediate_storage_def.config_schema else {}, )) return Selector(fields)
def define_executor_config_cls(mode_definition): check.inst_param(mode_definition, 'mode_definition', ModeDefinition) fields = {} for executor_def in mode_definition.executor_defs: fields[executor_def.name] = Field( Shape(fields={'config': executor_def.config_field} if executor_def.config_field else {}, )) return Selector(fields)
def define_solid_config_cls(config_schema, inputs_field, outputs_field): check_opt_field_param(config_schema, "config_schema") check_opt_field_param(inputs_field, "inputs_field") check_opt_field_param(outputs_field, "outputs_field") return Shape( remove_none_entries({ "config": config_schema, "inputs": inputs_field, "outputs": outputs_field }), )
def define_storage_config_cls(mode_definition): check.inst_param(mode_definition, 'mode_definition', ModeDefinition) fields = {} for storage_def in mode_definition.system_storage_defs: fields[storage_def.name] = Field( Shape(fields={'config': storage_def.config_field} if storage_def.config_field else {}, )) return Selector(fields)
def define_executor_config_cls(mode_definition): check.inst_param(mode_definition, "mode_definition", ModeDefinition) fields = {} for executor_def in mode_definition.executor_defs: fields[executor_def.name] = Field( Shape(fields={"config": executor_def.config_schema} if executor_def.config_schema else {}, )) return Selector(fields)