Exemplo n.º 1
0
 def __new__(cls, hook_name, is_skipped=None):
     return super(HookExecutionResult, cls).__new__(
         cls,
         hook_name=check.str_param(hook_name, "hook_name"),
         is_skipped=check.opt_bool_param(is_skipped,
                                         "is_skipped",
                                         default=False),
     )
Exemplo n.º 2
0
 def __init__(self, name, constraints=None, is_required=None):
     self.name = check.str_param(name, "name")
     self.is_required = check.opt_bool_param(is_required,
                                             "is_required",
                                             default=True)
     self.constraints = check.opt_list_param(constraints,
                                             "constraints",
                                             of_type=Constraint)
Exemplo n.º 3
0
    def __init__(
        self, dagster_type=None, name=None, description=None, is_optional=None, is_required=None
    ):
        self._name = check_valid_name(check.opt_str_param(name, 'name', DEFAULT_OUTPUT))
        self._runtime_type = resolve_dagster_type(dagster_type)
        self._description = check.opt_str_param(description, 'description')
        check.opt_bool_param(is_optional, 'is_optional')
        check.opt_bool_param(is_required, 'is_required')

        canonical_is_required = canonicalize_backcompat_args(
            new_val=is_required,
            new_arg='is_required',
            old_val=is_optional,
            old_arg='is_optional',
            coerce_old_to_new=lambda val: not val,
            additional_warn_txt='"is_optional" deprecated in 0.7.0 and will be removed in 0.8.0. Users should use "is_required" instead.',
        )
        self._optional = False if (canonical_is_required is None) else not canonical_is_required
Exemplo n.º 4
0
 def __init__(
     self,
     config_schema: Optional[Any] = None,
     receive_processed_config_values: Optional[bool] = None,
 ):
     self.config_schema = config_schema
     self.receive_processed_config_values = check.opt_bool_param(
         receive_processed_config_values, "receive_processed_config_values"
     )
Exemplo n.º 5
0
 def __new__(cls, daemon_type, required, healthy, last_heartbeat):
     return super(DaemonStatus, cls).__new__(
         cls,
         daemon_type=check.str_param(daemon_type, "daemon_type"),
         required=check.bool_param(required, "required"),
         healthy=check.opt_bool_param(healthy, "healthy"),
         last_heartbeat=check.opt_inst_param(last_heartbeat,
                                             "last_heartbeat",
                                             DaemonHeartbeat),
     )
Exemplo n.º 6
0
    def __init__(
        self,
        dagster_type=None,
        name=None,
        description=None,
        is_required=None,
        io_manager_key=None,
        metadata=None,
        asset_key=None,
        asset_partitions=None,
        # make sure new parameters are updated in combine_with_inferred below
    ):
        self._name = check_valid_name(
            check.opt_str_param(name, "name", DEFAULT_OUTPUT))
        self._type_not_set = dagster_type is None
        self._dagster_type = resolve_dagster_type(dagster_type)
        self._description = check.opt_str_param(description, "description")
        self._is_required = check.opt_bool_param(is_required,
                                                 "is_required",
                                                 default=True)
        self._manager_key = check.opt_str_param(io_manager_key,
                                                "io_manager_key",
                                                default="io_manager")
        if metadata:
            experimental_arg_warning("metadata", "OutputDefinition.__init__")
        self._metadata = metadata

        if asset_key:
            experimental_arg_warning("asset_key", "OutputDefinition.__init__")

        if callable(asset_key):
            self._asset_key_fn = asset_key
        elif asset_key is not None:
            asset_key = check.opt_inst_param(asset_key, "asset_key", AssetKey)
            self._asset_key_fn = lambda _: asset_key
        else:
            self._asset_key_fn = None

        if asset_partitions:
            experimental_arg_warning("asset_partitions",
                                     "OutputDefinition.__init__")
            check.param_invariant(
                asset_key is not None,
                "asset_partitions",
                'Cannot specify "asset_partitions" argument without also specifying "asset_key"',
            )

        if callable(asset_partitions):
            self._asset_partitions_fn = asset_partitions
        elif asset_partitions is not None:
            asset_partitions = check.opt_set_param(asset_partitions,
                                                   "asset_partitions", str)
            self._asset_partitions_fn = lambda _: asset_partitions
        else:
            self._asset_partitions_fn = None
Exemplo n.º 7
0
 def __new__(cls, host, port=None, socket=None, location_name=None, use_ssl=None):
     return super(GrpcServerRepositoryLocationOrigin, cls).__new__(
         cls,
         check.str_param(host, "host"),
         check.opt_int_param(port, "port"),
         check.opt_str_param(socket, "socket"),
         check.str_param(location_name, "location_name")
         if location_name
         else _assign_grpc_location_name(port, socket, host),
         use_ssl if check.opt_bool_param(use_ssl, "use_ssl") else None,
     )
Exemplo n.º 8
0
 def __new__(cls, run_config=None, tags=None, should_execute=None):
     return super(ExternalExecutionParamsData, cls).__new__(
         cls,
         run_config=check.opt_dict_param(run_config, "run_config"),
         tags=check.opt_dict_param(tags,
                                   "tags",
                                   key_type=str,
                                   value_type=str),
         should_execute=check.opt_bool_param(should_execute,
                                             "should_execute"),
     )
Exemplo n.º 9
0
 def __init__(self,
              dagster_type=None,
              name=None,
              description=None,
              is_required=None):
     self._name = check_valid_name(
         check.opt_str_param(name, "name", DEFAULT_OUTPUT))
     self._dagster_type = resolve_dagster_type(dagster_type)
     self._description = check.opt_str_param(description, "description")
     self._is_required = check.opt_bool_param(is_required,
                                              "is_required",
                                              default=True)
Exemplo n.º 10
0
    def _runs_query(
        self,
        filters: PipelineRunsFilter = None,
        cursor: str = None,
        limit: int = None,
        columns: List[str] = None,
        order_by: str = None,
        ascending: bool = False,
    ):

        filters = check.opt_inst_param(filters,
                                       "filters",
                                       PipelineRunsFilter,
                                       default=PipelineRunsFilter())
        check.opt_str_param(cursor, "cursor")
        check.opt_int_param(limit, "limit")
        check.opt_list_param(columns, "columns")
        check.opt_str_param(order_by, "order_by")
        check.opt_bool_param(ascending, "ascending")

        if columns is None:
            columns = ["run_body"]

        base_query_columns = [
            getattr(RunsTable.c, column) for column in columns
        ]

        # If we have a tags filter, then we need to select from a joined table
        if filters.tags:
            base_query = db.select(base_query_columns).select_from(
                RunsTable.join(RunTagsTable,
                               RunsTable.c.run_id == RunTagsTable.c.run_id))
        else:
            base_query = db.select(base_query_columns).select_from(RunsTable)

        query = self._add_filters_to_query(base_query, filters)
        query = self._add_cursor_limit_to_query(query, cursor, limit, order_by,
                                                ascending)

        return query
Exemplo n.º 11
0
def get_schedule_states_or_error(
    graphene_info, repository_selector, with_no_schedule_definition_filter=None
):
    check.inst_param(graphene_info, "graphene_info", ResolveInfo)
    check.opt_inst_param(repository_selector, "repository_selector", RepositorySelector)
    check.opt_bool_param(
        with_no_schedule_definition_filter, "with_no_schedule_definition_filter", default=False
    )

    instance = graphene_info.context.instance
    if not repository_selector:
        stored_schedule_states = instance.all_stored_job_state(job_type=JobType.SCHEDULE)
        external_schedules = [
            schedule
            for repository_location in graphene_info.context.repository_locations
            for repository in repository_location.get_repositories().values()
            for schedule in repository.get_external_schedules()
        ]
        return _get_schedule_states(
            graphene_info,
            stored_schedule_states,
            external_schedules,
            with_no_schedule_definition_filter,
        )

    location = graphene_info.context.get_repository_location(repository_selector.location_name)
    repository = location.get_repository(repository_selector.repository_name)
    repository_origin_id = repository.get_external_origin().get_id()
    instance = graphene_info.context.instance

    schedule_states = instance.all_stored_job_state(
        repository_origin_id=repository_origin_id, job_type=JobType.SCHEDULE
    )

    return _get_schedule_states(
        graphene_info,
        schedule_states,
        repository.get_external_schedules(),
        with_no_schedule_definition_filter,
    )
Exemplo n.º 12
0
def get_schedule_states_or_error(graphene_info,
                                 repository_selector,
                                 with_no_schedule_definition_filter=None):
    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.inst_param(repository_selector, 'repository_selector',
                     RepositorySelector)
    check.opt_bool_param(with_no_schedule_definition_filter,
                         'with_no_schedule_definition_filter',
                         default=False)

    location = graphene_info.context.get_repository_location(
        repository_selector.location_name)
    repository = location.get_repository(repository_selector.repository_name)
    repository_origin_id = repository.get_origin().get_id()
    instance = graphene_info.context.instance

    results = [
        graphene_info.schema.type_named('ScheduleState')(
            graphene_info, schedule_state=schedule_state)
        for schedule_state in instance.all_stored_schedule_state(
            repository_origin_id=repository_origin_id)
    ]

    if with_no_schedule_definition_filter:
        external_schedules = repository.get_external_schedules()
        external_schedule_origin_ids = set(
            external_schedule.get_origin_id()
            for external_schedule in external_schedules)

        # Filter for all schedule states for which there are no matching external schedules with the
        # same origin id
        results = list(
            filter(
                lambda schedule_state: schedule_state.schedule_origin_id not in
                external_schedule_origin_ids,
                results,
            ))

    return graphene_info.schema.type_named('ScheduleStates')(results=results)
Exemplo n.º 13
0
 def __new__(
     cls,
     config_fn: Callable[[Any], Any],
     config_schema: Optional[Any] = None,
     receive_processed_config_values: Optional[bool] = None,
 ):
     return super(ConfigMapping, cls).__new__(
         cls,
         config_fn=check.callable_param(config_fn, "config_fn"),
         config_schema=convert_user_facing_definition_config_schema(
             config_schema),
         receive_processed_config_values=check.opt_bool_param(
             receive_processed_config_values,
             "receive_processed_config_values"),
     )
Exemplo n.º 14
0
 def __new__(
     cls,
     address=None,
     timeout=None,
     scheduler_file=None,
     direct_to_workers=False,
     heartbeat_interval=None,
 ):
     return super(DaskConfig, cls).__new__(
         cls,
         address=check.opt_str_param(address, 'address'),
         timeout=check.opt_int_param(timeout, 'timeout'),
         scheduler_file=check.opt_str_param(scheduler_file, 'scheduler_file'),
         direct_to_workers=check.opt_bool_param(direct_to_workers, 'direct_to_workers'),
         heartbeat_interval=check.opt_int_param(heartbeat_interval, 'heartbeat_interval'),
     )
Exemplo n.º 15
0
    def __init__(
        self,
        dagster_type=None,
        name=None,
        description=None,
        is_required=None,
        asset_store_key=None,
        asset_metadata=None,
    ):
        from dagster.core.storage.asset_store import AssetStoreHandle

        self._name = check_valid_name(check.opt_str_param(name, "name", DEFAULT_OUTPUT))
        self._dagster_type = resolve_dagster_type(dagster_type)
        self._description = check.opt_str_param(description, "description")
        self._is_required = check.opt_bool_param(is_required, "is_required", default=True)
        self._asset_store_handle = (
            AssetStoreHandle(asset_store_key, asset_metadata) if asset_store_key else None
        )
Exemplo n.º 16
0
 def __init__(
     self,
     dagster_type=None,
     name=None,
     description=None,
     is_required=None,
     asset_metadata=None,
     manager_key=None,
     metadata=None,
 ):
     self._name = check_valid_name(check.opt_str_param(name, "name", DEFAULT_OUTPUT))
     self._dagster_type = resolve_dagster_type(dagster_type)
     self._description = check.opt_str_param(description, "description")
     self._is_required = check.opt_bool_param(is_required, "is_required", default=True)
     self._manager_key = check.opt_str_param(
         manager_key, "manager_key", default="object_manager"
     )
     self._metadata = asset_metadata or metadata
Exemplo n.º 17
0
 def __new__(
     cls,
     solid_handle: SolidHandle,
     name: str,
     dagster_type_key: str,
     is_required: bool,
     should_materialize: Optional[bool] = None,
 ):
     return super(StepOutput, cls).__new__(
         cls,
         solid_handle=check.inst_param(solid_handle, "solid_handle",
                                       SolidHandle),
         name=check.str_param(name, "name"),
         dagster_type_key=check.str_param(dagster_type_key,
                                          "dagster_type_key"),
         is_required=check.bool_param(is_required, "is_required"),
         should_materialize=check.opt_bool_param(should_materialize,
                                                 "should_materialize"),
     )
Exemplo n.º 18
0
 def __new__(cls,
             pipeline_origin,
             pipeline_run_id,
             instance_ref,
             set_exit_code_on_failure=None):
     return super(ExecuteRunArgs, cls).__new__(
         cls,
         pipeline_origin=check.inst_param(
             pipeline_origin,
             "pipeline_origin",
             PipelinePythonOrigin,
         ),
         pipeline_run_id=check.str_param(pipeline_run_id,
                                         "pipeline_run_id"),
         instance_ref=check.opt_inst_param(instance_ref, "instance_ref",
                                           InstanceRef),
         set_exit_code_on_failure=(True if check.opt_bool_param(
             set_exit_code_on_failure, "set_exit_code_on_failure") == True
                                   else None),  # for back-compat
     )
Exemplo n.º 19
0
 def __init__(
     self,
     dagster_type=None,
     name=None,
     description=None,
     is_required=None,
     io_manager_key=None,
     metadata=None,
 ):
     self._name = check_valid_name(
         check.opt_str_param(name, "name", DEFAULT_OUTPUT))
     self._dagster_type = resolve_dagster_type(dagster_type)
     self._description = check.opt_str_param(description, "description")
     self._is_required = check.opt_bool_param(is_required,
                                              "is_required",
                                              default=True)
     self._manager_key = check.opt_str_param(io_manager_key,
                                             "io_manager_key",
                                             default="io_manager")
     if metadata:
         experimental_arg_warning("metadata", "OutputDefinition")
     self._metadata = metadata
Exemplo n.º 20
0
    def __init__(
        self,
        dagster_type,
        default_value=FIELD_NO_DEFAULT_PROVIDED,
        is_optional=None,
        is_required=None,
        description=None,
    ):
        if not isinstance(dagster_type, ConfigType):
            config_type = resolve_to_config_type(dagster_type)
            if not config_type:
                raise DagsterInvalidDefinitionError(
                    (
                        'Attempted to pass {value_repr} to a Field that expects a valid '
                        'dagster type usable in config (e.g. Dict, Int, String et al).'
                    ).format(value_repr=repr(dagster_type))
                )
        else:
            config_type = dagster_type

        self.config_type = check.inst_param(config_type, 'config_type', ConfigType)

        self.description = check.opt_str_param(description, 'description')

        check.opt_bool_param(is_optional, 'is_optional')
        check.opt_bool_param(is_required, 'is_required')

        canonical_is_required = canonicalize_backcompat_args(
            new_val=is_required,
            new_arg='is_required',
            old_val=is_optional,
            old_arg='is_optional',
            coerce_old_to_new=lambda val: not val,
            additional_warn_txt='"is_optional" deprecated in 0.7.0 and will be removed in 0.8.0.',
        )

        if canonical_is_required is True:
            check.param_invariant(
                default_value == FIELD_NO_DEFAULT_PROVIDED,
                'default_value',
                'required arguments should not specify default values',
            )

        if canonical_is_required is None:
            # neither is_required nor is_optional where specified
            canonical_is_required = not all_optional_type(self.config_type)

            self._default_value = (
                default_value
                if canonical_is_required
                else post_process_config(self.config_type, None)
            )
        else:
            self._default_value = default_value

        if (
            config_type.kind == ConfigTypeKind.SCALAR
            and self._default_value != FIELD_NO_DEFAULT_PROVIDED
        ):
            check.param_invariant(
                config_type.is_config_scalar_valid(self._default_value),
                'default_value',
                (
                    'default value not valid for config type {name}, '
                    'got value {val} of type {type}'
                ).format(
                    name=config_type.given_name,
                    val=self._default_value,
                    type=type(self._default_value),
                ),
            )

        self.is_optional = not canonical_is_required
Exemplo n.º 21
0
def schedule_partition_range(
    start,
    end,
    cron_schedule,
    fmt,
    timezone,
    execution_time_to_partition_fn,
    inclusive=False,
):
    check.inst_param(start, "start", datetime.datetime)
    check.opt_inst_param(end, "end", datetime.datetime)
    check.str_param(cron_schedule, "cron_schedule")
    check.str_param(fmt, "fmt")
    check.opt_str_param(timezone, "timezone")
    check.callable_param(execution_time_to_partition_fn, "execution_time_to_partition_fn")
    check.opt_bool_param(inclusive, "inclusive")

    if end and start > end:
        raise DagsterInvariantViolationError(
            'Selected date range start "{start}" is after date range end "{end}'.format(
                start=start.strftime(fmt),
                end=end.strftime(fmt),
            )
        )

    def get_schedule_range_partitions(current_time=None):
        check.opt_inst_param(current_time, "current_time", datetime.datetime)
        tz = timezone if timezone else "UTC"
        _start = (
            to_timezone(start, tz)
            if isinstance(start, PendulumDateTime)
            else pendulum.instance(start, tz=tz)
        )

        if end:
            _end = end
        elif current_time:
            _end = current_time
        else:
            _end = pendulum.now(tz)

        # coerce to the definition timezone
        if isinstance(_end, PendulumDateTime):
            _end = to_timezone(_end, tz)
        else:
            _end = pendulum.instance(_end, tz=tz)

        end_timestamp = _end.timestamp()

        partitions = []
        for next_time in schedule_execution_time_iterator(_start.timestamp(), cron_schedule, tz):

            partition_time = execution_time_to_partition_fn(next_time)

            if partition_time.timestamp() > end_timestamp:
                break

            if partition_time.timestamp() < _start.timestamp():
                continue

            partitions.append(Partition(value=partition_time, name=partition_time.strftime(fmt)))

        return partitions if inclusive else partitions[:-1]

    return get_schedule_range_partitions
Exemplo n.º 22
0
    def __init__(
        self,
        service_account_name,
        instance_config_map,
        postgres_password_secret=None,
        dagster_home=None,
        job_image=None,
        image_pull_policy=None,
        image_pull_secrets=None,
        load_incluster_config=True,
        kubeconfig_file=None,
        inst_data=None,
        job_namespace="default",
        env_config_maps=None,
        env_secrets=None,
        env_vars=None,
        k8s_client_batch_api=None,
        volume_mounts=None,
        volumes=None,
        labels=None,
        fail_pod_on_run_failure=None,
    ):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data",
                                               ConfigurableClassData)
        self.job_namespace = check.str_param(job_namespace, "job_namespace")

        self.load_incluster_config = load_incluster_config
        self.kubeconfig_file = kubeconfig_file
        if load_incluster_config:
            check.invariant(
                kubeconfig_file is None,
                "`kubeconfig_file` is set but `load_incluster_config` is True.",
            )
            kubernetes.config.load_incluster_config()
        else:
            check.opt_str_param(kubeconfig_file, "kubeconfig_file")
            kubernetes.config.load_kube_config(kubeconfig_file)

        self._fixed_batch_api = k8s_client_batch_api

        self._job_config = None
        self._job_image = check.opt_str_param(job_image, "job_image")
        self.dagster_home = check.str_param(dagster_home, "dagster_home")
        self._image_pull_policy = check.opt_str_param(image_pull_policy,
                                                      "image_pull_policy",
                                                      "IfNotPresent")
        self._image_pull_secrets = check.opt_list_param(image_pull_secrets,
                                                        "image_pull_secrets",
                                                        of_type=dict)
        self._service_account_name = check.str_param(service_account_name,
                                                     "service_account_name")
        self.instance_config_map = check.str_param(instance_config_map,
                                                   "instance_config_map")
        self.postgres_password_secret = check.opt_str_param(
            postgres_password_secret, "postgres_password_secret")
        self._env_config_maps = check.opt_list_param(env_config_maps,
                                                     "env_config_maps",
                                                     of_type=str)
        self._env_secrets = check.opt_list_param(env_secrets,
                                                 "env_secrets",
                                                 of_type=str)
        self._env_vars = check.opt_list_param(env_vars,
                                              "env_vars",
                                              of_type=str)
        self._volume_mounts = check.opt_list_param(volume_mounts,
                                                   "volume_mounts")
        self._volumes = check.opt_list_param(volumes, "volumes")
        self._labels = check.opt_dict_param(labels,
                                            "labels",
                                            key_type=str,
                                            value_type=str)
        self._fail_pod_on_run_failure = check.opt_bool_param(
            fail_pod_on_run_failure, "fail_pod_on_run_failure")

        super().__init__()
Exemplo n.º 23
0
 def __init__(self, fail_on_setup=False, fail_on_teardown=False):
     self._fail_on_setup = check.opt_bool_param(fail_on_setup, "fail_on_setup")
     self._fail_on_teardown = check.opt_bool_param(fail_on_teardown, "fail_on_teardown")
Exemplo n.º 24
0
    def __init__(
        self,
        config,
        default_value=FIELD_NO_DEFAULT_PROVIDED,
        is_optional=None,
        is_required=None,
        description=None,
    ):
        from .validate import validate_config
        from .post_process import post_process_config

        self.config_type = check.inst(self._resolve_config_arg(config),
                                      ConfigType)

        self.description = check.opt_str_param(description, 'description')

        check.opt_bool_param(is_optional, 'is_optional')
        check.opt_bool_param(is_required, 'is_required')

        canonical_is_required = canonicalize_backcompat_args(
            new_val=is_required,
            new_arg='is_required',
            old_val=is_optional,
            old_arg='is_optional',
            coerce_old_to_new=lambda val: not val,
            additional_warn_txt=
            '"is_optional" deprecated in 0.7.0 and will be removed in 0.8.0.',
        )

        if canonical_is_required is True:
            check.param_invariant(
                default_value == FIELD_NO_DEFAULT_PROVIDED,
                'default_value',
                'required arguments should not specify default values',
            )
        self._default_value = default_value

        # check explicit default value
        if self.default_provided:
            # invoke through property in case it is callable
            value = self.default_value
            evr = validate_config(self.config_type, value)
            if not evr.success:
                raise DagsterInvalidConfigError(
                    'Invalid default_value for Field.',
                    evr.errors,
                    default_value,
                )

        if canonical_is_required is None:
            # neither is_required nor is_optional were specified
            canonical_is_required = not all_optional_type(self.config_type)

            # on implicitly optional - set the default value
            # by resolving the defaults of the type
            if not canonical_is_required and self._default_value == FIELD_NO_DEFAULT_PROVIDED:
                evr = post_process_config(self.config_type, None)
                if not evr.success:
                    raise DagsterInvalidConfigError(
                        'Unable to resolve implicit default_value for Field.',
                        evr.errors,
                        None,
                    )
                self._default_value = evr.value

        self.is_optional = not canonical_is_required
Exemplo n.º 25
0
 def __init__(self, name, constraints=None, is_optional=False):
     self.name = check.str_param(name, 'name')
     self.is_optional = check.opt_bool_param(is_optional, 'is_optional')
     self.constraints = check.opt_list_param(constraints,
                                             'constraints',
                                             of_type=Constraint)
Exemplo n.º 26
0
 def __init__(self, raise_on_error=True):
     self._raise_on_error = check.opt_bool_param(raise_on_error,
                                                 'raise_on_error',
                                                 default=True)
Exemplo n.º 27
0
    def _runs_query(
        self,
        filters: PipelineRunsFilter = None,
        cursor: str = None,
        limit: int = None,
        columns: List[str] = None,
        order_by: str = None,
        ascending: bool = False,
        bucket_by: Optional[Union[JobBucket, TagBucket]] = None,
    ):
        filters = check.opt_inst_param(filters,
                                       "filters",
                                       PipelineRunsFilter,
                                       default=PipelineRunsFilter())
        check.opt_str_param(cursor, "cursor")
        check.opt_int_param(limit, "limit")
        check.opt_list_param(columns, "columns")
        check.opt_str_param(order_by, "order_by")
        check.opt_bool_param(ascending, "ascending")

        if columns is None:
            columns = ["run_body"]

        query_columns = [getattr(RunsTable.c, column) for column in columns]

        if bucket_by:
            if limit or cursor:
                check.failed(
                    "cannot specify bucket_by and limit/cursor at the same time"
                )

            # this is a bucketed query, so we need to calculate rank to apply bucket-based limits
            # and ordering
            query_columns.append(
                self._bucket_rank_column(bucket_by, order_by, ascending))

            if isinstance(bucket_by, JobBucket):
                base_query = (db.select(query_columns).select_from(
                    RunsTable.join(RunTagsTable, RunsTable.c.run_id ==
                                   RunTagsTable.c.run_id) if filters.
                    tags else RunsTable).where(
                        RunsTable.c.pipeline_name.in_(bucket_by.job_names)))
            else:
                check.invariant(isinstance(bucket_by, TagBucket))
                base_query = (db.select(query_columns).select_from(
                    RunsTable.join(
                        RunTagsTable,
                        RunsTable.c.run_id == RunTagsTable.c.run_id)).where(
                            RunTagsTable.c.key == bucket_by.tag_key).where(
                                RunTagsTable.c.value.in_(
                                    bucket_by.tag_values)))

            base_query = self._add_filters_to_query(base_query, filters)
            subquery = base_query.subquery()
            query = db.select(subquery).order_by(subquery.c.rank.asc())
            if bucket_by.bucket_limit:
                query = query.where(subquery.c.rank <= bucket_by.bucket_limit)
        else:
            if filters.tags:
                base_query = db.select(query_columns).select_from(
                    RunsTable.join(
                        RunTagsTable,
                        RunsTable.c.run_id == RunTagsTable.c.run_id))
            else:
                base_query = db.select(query_columns).select_from(RunsTable)

            query = self._add_filters_to_query(base_query, filters)
            query = self._add_cursor_limit_to_query(query, cursor, limit,
                                                    order_by, ascending)

        return query
Exemplo n.º 28
0
    def __init__(
        self,
        dagster_type=None,
        name=None,
        description=None,
        is_required=None,
        io_manager_key=None,
        metadata=None,
        asset_key=None,
        asset_partitions=None,
        asset_partitions_def=None
        # make sure new parameters are updated in combine_with_inferred below
    ):
        from dagster.core.definitions.partition import PartitionsDefinition

        self._name = (check_valid_name(
            check.opt_str_param(name, "name", DEFAULT_OUTPUT))
                      if name is not NoNameSentinel else None)
        self._type_not_set = dagster_type is None
        self._dagster_type = resolve_dagster_type(dagster_type)
        self._description = check.opt_str_param(description, "description")
        self._is_required = check.opt_bool_param(is_required,
                                                 "is_required",
                                                 default=True)
        self._manager_key = check.opt_str_param(io_manager_key,
                                                "io_manager_key",
                                                default="io_manager")
        self._metadata = check.opt_dict_param(metadata,
                                              "metadata",
                                              key_type=str)
        self._metadata_entries = check.is_list(
            normalize_metadata(self._metadata, [], allow_invalid=True),
            MetadataEntry)

        if asset_key:
            experimental_arg_warning("asset_key", "OutputDefinition.__init__")

        if callable(asset_key):
            warnings.warn(
                "Passing a function as the `asset_key` argument to `Out` or `OutputDefinition` is "
                "deprecated behavior and will be removed in version 0.15.0.")
        else:
            check.opt_inst_param(asset_key, "asset_key", AssetKey)

        self._asset_key = asset_key

        if asset_partitions:
            experimental_arg_warning("asset_partitions",
                                     "OutputDefinition.__init__")
            check.param_invariant(
                asset_key is not None,
                "asset_partitions",
                'Cannot specify "asset_partitions" argument without also specifying "asset_key"',
            )

        if callable(asset_partitions):
            self._asset_partitions_fn = asset_partitions
        elif asset_partitions is not None:
            asset_partitions = check.opt_set_param(asset_partitions,
                                                   "asset_partitions", str)
            self._asset_partitions_fn = lambda _: asset_partitions
        else:
            self._asset_partitions_fn = None

        if asset_partitions_def:
            experimental_arg_warning("asset_partitions_def",
                                     "OutputDefinition.__init__")
        self._asset_partitions_def = check.opt_inst_param(
            asset_partitions_def, "asset_partition_def", PartitionsDefinition)
Exemplo n.º 29
0
    def __init__(
        self,
        config,
        default_value=FIELD_NO_DEFAULT_PROVIDED,
        is_required=None,
        description=None,
    ):
        from .validate import validate_config
        from .post_process import resolve_defaults

        self.config_type = check.inst(self._resolve_config_arg(config),
                                      ConfigType)

        self.description = check.opt_str_param(description, 'description')

        check.opt_bool_param(is_required, 'is_required')

        if default_value != FIELD_NO_DEFAULT_PROVIDED:
            check.param_invariant(not (callable(default_value)),
                                  'default_value',
                                  'default_value cannot be a callable')

        if is_required is True:
            check.param_invariant(
                default_value == FIELD_NO_DEFAULT_PROVIDED,
                'default_value',
                'required arguments should not specify default values',
            )

        self._default_value = default_value

        # check explicit default value
        if self.default_provided:
            if self.config_type.kind == ConfigTypeKind.ENUM and is_enum_value(
                    default_value):
                raise DagsterInvalidDefinitionError((
                    'You have passed into a python enum value as the default value '
                    'into of a config enum type {name}. You must pass in the underlying '
                    'string represention as the default value. One of {value_set}.'
                ).format(
                    value_set=[
                        ev.config_value for ev in self.config_type.enum_values
                    ],
                    name=self.config_type.given_name,
                ))

            evr = validate_config(self.config_type, default_value)
            if not evr.success:
                raise DagsterInvalidConfigError(
                    'Invalid default_value for Field.',
                    evr.errors,
                    default_value,
                )

        if is_required is None:
            is_required = not all_optional_type(self.config_type)

            # on implicitly optional - set the default value
            # by resolving the defaults of the type
            if not is_required and not self.default_provided:
                evr = resolve_defaults(self.config_type, None)
                if not evr.success:
                    raise DagsterInvalidConfigError(
                        'Unable to resolve implicit default_value for Field.',
                        evr.errors,
                        None,
                    )
                self._default_value = evr.value
        self._is_required = is_required
Exemplo n.º 30
0
    def __init__(
        self,
        instance_config_map,
        dagster_home,
        postgres_password_secret,
        load_incluster_config=True,
        kubeconfig_file=None,
        broker=None,
        backend=None,
        include=None,
        config_source=None,
        retries=None,
        inst_data=None,
        k8s_client_batch_api=None,
        env_config_maps=None,
        env_secrets=None,
        volume_mounts=None,
        volumes=None,
        service_account_name=None,
        image_pull_policy=None,
        image_pull_secrets=None,
        labels=None,
        fail_pod_on_run_failure=None,
    ):
        self._inst_data = check.opt_inst_param(inst_data, "inst_data", ConfigurableClassData)

        if load_incluster_config:
            check.invariant(
                kubeconfig_file is None,
                "`kubeconfig_file` is set but `load_incluster_config` is True.",
            )
            kubernetes.config.load_incluster_config()
        else:
            check.opt_str_param(kubeconfig_file, "kubeconfig_file")
            kubernetes.config.load_kube_config(kubeconfig_file)

        self._fixed_batch_api = k8s_client_batch_api

        self.instance_config_map = check.str_param(instance_config_map, "instance_config_map")
        self.dagster_home = check.str_param(dagster_home, "dagster_home")
        self.postgres_password_secret = check.str_param(
            postgres_password_secret, "postgres_password_secret"
        )
        self.broker = check.opt_str_param(broker, "broker")
        self.backend = check.opt_str_param(backend, "backend")
        self.include = check.opt_list_param(include, "include")
        self.config_source = check.opt_dict_param(config_source, "config_source")

        retries = check.opt_dict_param(retries, "retries") or {"enabled": {}}
        self.retries = RetryMode.from_config(retries)

        self._env_config_maps = check.opt_list_param(
            env_config_maps, "env_config_maps", of_type=str
        )
        self._env_secrets = check.opt_list_param(env_secrets, "env_secrets", of_type=str)

        self._volume_mounts = check.opt_list_param(volume_mounts, "volume_mounts")
        self._volumes = check.opt_list_param(volumes, "volumes")

        self._service_account_name = check.opt_str_param(
            service_account_name, "service_account_name"
        )
        self._image_pull_policy = check.opt_str_param(
            image_pull_policy, "image_pull_policy", "IfNotPresent"
        )
        self._image_pull_secrets = check.opt_list_param(
            image_pull_secrets, "image_pull_secrets", of_type=dict
        )
        self._labels = check.opt_dict_param(labels, "labels", key_type=str, value_type=str)
        self._fail_pod_on_run_failure = check.opt_bool_param(
            fail_pod_on_run_failure, "fail_pod_on_run_failure"
        )

        super().__init__()