class DynamicTaskConfig(Config): """ Configuration section for dynamically generated tasks. """ _conf__task_family = "dynamic_task" enabled = parameter(default=True, description="Allow tasks calls at runtime")[bool] in_memory_outputs = parameter( default=False, description="Store outputs for inline tasks in memory" )[bool]
class HistogramConfig(Config): _conf__task_family = "histogram" spark_parquet_cache_dir = parameter( default=None, description="Enables pre-cache DF using .parquet store at `spark_temp_dir`", )[str] spark_cache_dataframe = parameter( default=False, description="Enables caching of the whole frame" )[bool] spark_cache_dataframe_column = parameter( default=True, description="Enables caching of the numerical df during histogram calculation", )[str]
class TrackingConfig(Config): _conf__task_family = "tracking" project = parameter( default=None, description="Project to which run should be assigned. " "If not set default project is used. Tracking server will select project with is_default == True.", )[str] databand_external_url = parameter( default=None, description="Tracker URL to be used for tracking from external systems", )[str] log_value_size = parameter( default=True, description= "Calculate and log value size (can cause a full scan on not-indexable distributed memory objects) ", )[bool] log_value_schema = parameter( default=True, description="Calculate and log value schema ")[bool] log_value_stats = parameter( default=True, description= "Calculate and log value stats(expensive to calculate, better use log_stats on parameter level)", )[bool] log_value_preview = parameter( default=True, description= "Calculate and log value preview. Can be expensive on Spark.", )[bool] log_value_preview_max_len = parameter( description= "Max size of value preview to be saved at DB, max value=50000").value( _DEFAULT_VALUE_PREVIEW_MAX_LEN) log_value_meta = parameter( default=True, description="Calculate and log value meta ")[bool] log_histograms = parameter( default=True, description= "Enable calculation and tracking of histograms. Can be expensive", )[bool] value_reporting_strategy = parameter( default=ValueTrackingLevel.SMART, description= "Multiple strategies with different limitations on potentially expensive calculation for value_meta." "ALL => no limitations." "SMART => restrictions on lazy evaluation types." "NONE (default) => limit everything.", ).enum(ValueTrackingLevel) track_source_code = parameter( default=False, description="Enable tracking of function, module and file source code", )[bool] auto_disable_slow_size = parameter( default=True, description="Auto disable slow preview for Spark DF with text formats", )[bool] flatten_operator_fields = parameter( default={}, description= "Control which of the operator's fields would be flatten when tracked", )[Dict[str, str]] capture_tracking_log = parameter( default=False, description="Enable log capturing for tracking tasks")[bool] def get_value_meta_conf(self, meta_conf, value_type, target=None): # type: (ValueMetaConf, ValueType, Optional[Target]) -> ValueMetaConf meta_conf_by_type = calc_meta_conf_for_value_type( self.value_reporting_strategy, value_type, target) # translating TrackingConfig to meta_conf meta_conf_by_config = self._build_meta_conf() return meta_conf.merge_if_none(meta_conf_by_type).merge_if_none( meta_conf_by_config) def _build_meta_conf(self): # type: () -> ValueMetaConf """ Translate this configuration into value meta conf WE EXPECT IT TO HAVE ALL THE INNER VALUES SET WITHOUT NONES """ return ValueMetaConf( log_schema=self.log_value_schema, log_size=self.log_value_size, log_preview_size=self.log_value_preview_max_len, log_preview=self.log_value_preview, log_stats=self.log_value_stats, log_histograms=self.log_histograms, )