Exemplo n.º 1
0
class DynamicTaskConfig(Config):
    """
    Configuration section for dynamically generated tasks.
    """

    _conf__task_family = "dynamic_task"

    enabled = parameter(default=True, description="Allow tasks calls at runtime")[bool]
    in_memory_outputs = parameter(
        default=False, description="Store outputs for inline tasks in memory"
    )[bool]
Exemplo n.º 2
0
class HistogramConfig(Config):
    _conf__task_family = "histogram"

    spark_parquet_cache_dir = parameter(
        default=None,
        description="Enables pre-cache DF using .parquet store at `spark_temp_dir`",
    )[str]

    spark_cache_dataframe = parameter(
        default=False, description="Enables caching of the whole frame"
    )[bool]

    spark_cache_dataframe_column = parameter(
        default=True,
        description="Enables caching of the numerical df during histogram calculation",
    )[str]
Exemplo n.º 3
0
class TrackingConfig(Config):
    _conf__task_family = "tracking"

    project = parameter(
        default=None,
        description="Project to which run should be assigned. "
        "If not set default project is used. Tracking server will select project with is_default == True.",
    )[str]

    databand_external_url = parameter(
        default=None,
        description="Tracker URL to be used for tracking from external systems",
    )[str]

    log_value_size = parameter(
        default=True,
        description=
        "Calculate and log value size (can cause a full scan on not-indexable distributed memory objects) ",
    )[bool]

    log_value_schema = parameter(
        default=True, description="Calculate and log value schema ")[bool]

    log_value_stats = parameter(
        default=True,
        description=
        "Calculate and log value stats(expensive to calculate, better use log_stats on parameter level)",
    )[bool]

    log_value_preview = parameter(
        default=True,
        description=
        "Calculate and log value preview. Can be expensive on Spark.",
    )[bool]

    log_value_preview_max_len = parameter(
        description=
        "Max size of value preview to be saved at DB, max value=50000").value(
            _DEFAULT_VALUE_PREVIEW_MAX_LEN)

    log_value_meta = parameter(
        default=True, description="Calculate and log value meta ")[bool]

    log_histograms = parameter(
        default=True,
        description=
        "Enable calculation and tracking of histograms. Can be expensive",
    )[bool]

    value_reporting_strategy = parameter(
        default=ValueTrackingLevel.SMART,
        description=
        "Multiple strategies with different limitations on potentially expensive calculation for value_meta."
        "ALL => no limitations."
        "SMART => restrictions on lazy evaluation types."
        "NONE (default) => limit everything.",
    ).enum(ValueTrackingLevel)

    track_source_code = parameter(
        default=False,
        description="Enable tracking of function, module and file source code",
    )[bool]

    auto_disable_slow_size = parameter(
        default=True,
        description="Auto disable slow preview for Spark DF with text formats",
    )[bool]

    flatten_operator_fields = parameter(
        default={},
        description=
        "Control which of the operator's fields would be flatten when tracked",
    )[Dict[str, str]]

    capture_tracking_log = parameter(
        default=False,
        description="Enable log capturing for tracking tasks")[bool]

    def get_value_meta_conf(self, meta_conf, value_type, target=None):
        # type: (ValueMetaConf, ValueType, Optional[Target]) -> ValueMetaConf
        meta_conf_by_type = calc_meta_conf_for_value_type(
            self.value_reporting_strategy, value_type, target)
        # translating TrackingConfig to meta_conf
        meta_conf_by_config = self._build_meta_conf()
        return meta_conf.merge_if_none(meta_conf_by_type).merge_if_none(
            meta_conf_by_config)

    def _build_meta_conf(self):
        # type: () -> ValueMetaConf
        """
        Translate this configuration into value meta conf
        WE EXPECT IT TO HAVE ALL THE INNER VALUES SET WITHOUT NONES
        """
        return ValueMetaConf(
            log_schema=self.log_value_schema,
            log_size=self.log_value_size,
            log_preview_size=self.log_value_preview_max_len,
            log_preview=self.log_value_preview,
            log_stats=self.log_value_stats,
            log_histograms=self.log_histograms,
        )