def get_value_meta(value, meta_conf, tracking_config, value_type=None, target=None): # type: ( Any, ValueMetaConf, TrackingConfig, Optional[ValueType], Optional[Target]) -> Optional[ValueMeta] """ Build the value meta for tracking logging. Using the given meta config, the value, and tracking_config to calculate the required value meta. @param value: the value to calc value meta for @param meta_conf: a given meta_config by a user @param tracking_config: TrackingConfig to calc the wanted meta conf @param value_type: optional value_type, if its known. @param target: knowledge about the target which contains the value - this can effect the cost of the calculation @return: Calculated value meta """ if value is None: return None # required for calculating the relevant configuration and to build value_meta if _is_default_value_type(value_type) or isinstance( value_type, TargetValueType): # we calculate the actual value_type even if the given value is the default value # so we can be sure that we can report it the right way # also Targets are futures types and now would can log their actual value value_type = get_value_type_of_obj( value, default_value_type=ObjectValueType()) meta_conf = tracking_config.get_value_meta_conf(meta_conf, value_type, target) return value_type.get_value_meta(value, meta_conf=meta_conf)
def test_log_schema( self, tracking_config, param_log_schema, config_log_schema, expected_log_schema ): # type: (Callable[[], TrackingConfig], bool, bool, bool) -> None # Arrange tracking_config = tracking_config() param_mc = ValueMetaConf(log_schema=param_log_schema) if config_log_schema is not None: tracking_config.log_value_schema = config_log_schema # Act actual_value_meta_conf = tracking_config.get_value_meta_conf( param_mc, ObjectValueType()) # Assert assert actual_value_meta_conf.log_schema == expected_log_schema
def dump(self, value, value_type=None, **kwargs): from targets.values import get_value_type_of_obj, get_value_type_of_type from targets.values import ObjectValueType if value_type: value_type = get_value_type_of_type(value_type) else: value_type = get_value_type_of_obj(value, ObjectValueType()) try: m = get_marshaller_ctrl(self, value_type=value_type) m.dump(value, **kwargs) except Exception as ex: raise friendly_error.failed_to_write_task_output( ex, self, value_type=value_type ) cache_key = TargetCacheKey(target=self, value_type=value_type) TARGET_CACHE[cache_key] = value
def test_log_preview_size( self, tracking_config, param_log_preview_size, config_log_preview_size, expected_log_preview_size, ): # type: (Callable[[], TrackingConfig], int, int, int) -> None # Arrange tc = tracking_config() param_mc = ValueMetaConf(log_preview_size=param_log_preview_size) if config_log_preview_size is not None: tc.log_value_preview_max_len = config_log_preview_size # Act actual_value_meta_conf = tc.get_value_meta_conf( param_mc, ObjectValueType()) # Assert assert actual_value_meta_conf.log_preview_size == expected_log_preview_size
def test_log_preview( self, tracking_config, # type: Callable[[], TrackingConfig] param_log_preview, # type: bool config_log_preview, # type: bool expected_log_preview, # type: bool ): # Arrange tc = tracking_config() param_mc = ValueMetaConf(log_preview=param_log_preview) if config_log_preview is not None: tc.log_value_preview = config_log_preview # Act actual_value_meta_conf = tc.get_value_meta_conf( param_mc, ObjectValueType()) # Assert assert actual_value_meta_conf.log_preview == expected_log_preview
def test_log_stats( self, tracking_config, param_log_stats, config_log_stats, expected_log_stats ): # type: (Callable[[], TrackingConfig], bool, bool, bool) -> None # Arrange tc = tracking_config() param_mc = ValueMetaConf(log_stats=param_log_stats) if config_log_stats is not None: tc.log_value_stats = config_log_stats expected_log_stats = LogDataRequest( include_all_boolean=expected_log_stats, include_all_numeric=expected_log_stats, include_all_string=expected_log_stats, ) # Act actual_value_meta_conf = tc.get_value_meta_conf( param_mc, ObjectValueType()) # Assert assert actual_value_meta_conf.log_stats == expected_log_stats
def test_log_histograms( self, tracking_config, # type: Callable[[], TrackingConfig] param_log_histograms, # type: bool config_log_histograms, # type: bool expected_log_histograms, # type: bool ): # Arrange tc = tracking_config() param_mc = ValueMetaConf(log_histograms=param_log_histograms) if config_log_histograms is not None: tc.log_histograms = config_log_histograms expected_log_histograms = LogDataRequest( include_all_boolean=expected_log_histograms, include_all_numeric=expected_log_histograms, include_all_string=expected_log_histograms, ) # Act actual_value_meta_conf = tc.get_value_meta_conf( param_mc, ObjectValueType()) # Assert assert actual_value_meta_conf.log_histograms == expected_log_histograms
def dump(self, value, value_type=None, **kwargs): self._obj = value if value_type: self.value_type = get_value_type_of_type(value_type) else: self.value_type = get_value_type_of_obj(value, ObjectValueType())
class TestTrackingConfig: @pytest.mark.parametrize( "value, value_type, target, expected_value_preview, expected_data_schema", [ (10, None, None, "10", { "type": "int" }), (10, ObjectValueType(), None, "10", { "type": "int" }), (10, TargetValueType(), target_factory("/path"), "10", { "type": "int" }), (10, StrValueType(), None, "10", { "type": "str" }), ( [10], ListValueType(), None, "[10]", { "columns": [], "dtypes": {}, "shape": (1, 0), "size.bytes": 48, "type": "List", }, ), ], ) def test_get_value_meta(self, value, value_type, target, expected_value_preview, expected_data_schema): tracking_config = TrackingConfig.from_databand_context() tracking_config.value_reporting_strategy = ValueTrackingLevel.ALL result = get_value_meta( value, ValueMetaConf(), tracking_config, value_type=value_type, target=target, ) assert result.value_preview == expected_value_preview assert result.data_schema.as_dict() == expected_data_schema @pytest.mark.parametrize( "value, value_type, target, expected_value_preview, expected_data_schema", [( [ { "test": "test", "num": 10, "bool": True }, { "test": "test_2", "num": 20, "bool": False }, ], ListValueType(), None, '[{"test": "test", "num": 10, "bool": true}, {"test": "test_2", "num": 20, ' '"bool": false}]', { "columns": ["bool", "num", "test"], "dtypes": { "bool": "<class 'bool'>", "num": "<class 'int'>", "test": "<class 'str'>", }, "shape": (2, 3), "size.bytes": 56, "type": "List", }, )], ) def test_get_value_meta_list_of_flat_dict(self, value, value_type, target, expected_value_preview, expected_data_schema): self.test_get_value_meta(value, value_type, target, expected_value_preview, expected_data_schema) @pytest.mark.parametrize( "value, value_type, target, expected_value_preview, expected_data_schema", [( [ { "test": "test", "num": 10, "bool": True, "test_object": { "test_object_name": "some_name", "test_id": 12345, "inner": { "foo": "bar" }, }, }, { "test": "test_2", "num": 20, "bool": False, "test_object": { "test_object_name": "some_other_name", "test_id": 56789, "inner": { "foo": "bar_2" }, }, }, ], ListValueType(), None, '[{"test": "test", "num": 10, "bool": true, "test_object": ' '{"test_object_name": "some_name", "test_id": 12345, "inner": {"foo": ' '"bar"}}}, {"test": "test_2", "num": 20, "bool": false, "test_object": ' '{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": ' '"bar_2"}}}]', { "columns": [ "bool", "num", "test", "test_object", "test_object.inner.foo", "test_object.test_id", "test_object.test_object_name", ], "dtypes": { "bool": "<class 'bool'>", "num": "<class 'int'>", "test": "<class 'str'>", "test_object": "<class 'dict'>", "test_object.inner.foo": "<class 'str'>", "test_object.test_id": "<class 'int'>", "test_object.test_object_name": "<class 'str'>", }, "shape": (2, 7), "size.bytes": 56, "type": "List", }, )], ) def test_get_value_meta_list_of_nested_dict(self, value, value_type, target, expected_value_preview, expected_data_schema): self.test_get_value_meta(value, value_type, target, expected_value_preview, expected_data_schema) @pytest.mark.parametrize( "value, value_type, target, expected_value_preview, expected_data_schema", [( [ { "test": "test", "num": 10, "bool": True, "test_objects": [{ "test_object_name": "some_name", "test_id": 12345 }], }, { "test": "test_2", "num": 20, "bool": False, "test_objects": [{ "test_object_name": "some_other_name", "test_id": 56789 }], }, ], ListValueType(), None, '[{"test": "test", "num": 10, "bool": true, "test_objects": ' '[{"test_object_name": "some_name", "test_id": 12345}]}, {"test": "test_2", ' '"num": 20, "bool": false, "test_objects": [{"test_object_name": ' '"some_other_name", "test_id": 56789}]}]', { "columns": ["bool", "num", "test", "test_objects"], "dtypes": { "bool": "<class 'bool'>", "num": "<class 'int'>", "test": "<class 'str'>", "test_objects": "<class 'list'>", }, "shape": (2, 4), "size.bytes": 56, "type": "List", }, )], ) def test_get_value_meta_list_of_dict_with_list_of_flat_dict( self, value, value_type, target, expected_value_preview, expected_data_schema): self.test_get_value_meta(value, value_type, target, expected_value_preview, expected_data_schema) @pytest.mark.parametrize( "value, value_type, target, expected_value_preview, expected_data_schema", [( [ { "test": "test", "num": 10, "bool": True, "test_object": { "test_object_name": "some_other_name", "test_id": 56789, "inner": { "foo": "bar_3" }, }, "test_objects": [ { "test_object_name": "some_name", "test_id": 12345, "inner": { "foo": "bar" }, }, { "test_object_name": "some_name_1", "test_id": 54321, "inner": { "foo": "bar_2" }, }, ], }, { "test": "test_2", "num": 20, "bool": False, "test_object": { "test_object_name": "some_name", "test_id": 11111, "inner": { "foo": "bar_6" }, }, "test_objects": [ { "test_object_name": "some_other_name", "test_id": 56789, "inner": { "foo": "bar_3" }, }, { "test_object_name": "some_other_name", "test_id": 98765, "inner": { "foo": "bar_4" }, }, ], }, ], ListValueType(), None, '[{"test": "test", "num": 10, "bool": true, "test_object": ' '{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": ' '"bar_3"}}, "test_objects": [{"test_object_name": "some_name", "test_id": ' '12345, "inner": {"foo": "bar"}}, {"test_object_name": "some_name_1", ' '"test_id": 54321, "inner": {"foo": "bar_2"}}]}, {"test": "test_2", "num": ' '20, "bool": false, "test_object": {"test_object_name": "some_name", ' '"test_id": 11111, "inner": {"foo": "bar_6"}}, "test_objects": ' '[{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": ' '"bar_3"}}, {"test_object_name": "some_other_name", "test_id": 98765, ' '"inner": {"foo": "bar_4"}}]}]', { "columns": [ "bool", "num", "test", "test_object", "test_object.inner.foo", "test_object.test_id", "test_object.test_object_name", "test_objects", ], "dtypes": { "bool": "<class 'bool'>", "num": "<class 'int'>", "test": "<class 'str'>", "test_object": "<class 'dict'>", "test_object.inner.foo": "<class 'str'>", "test_object.test_id": "<class 'int'>", "test_object.test_object_name": "<class 'str'>", "test_objects": "<class 'list'>", }, "shape": (2, 8), "size.bytes": 56, "type": "List", }, )], ) def test_get_value_meta_list_of_dict_with_list_of_nested_dicts( self, value, value_type, target, expected_value_preview, expected_data_schema): self.test_get_value_meta(value, value_type, target, expected_value_preview, expected_data_schema) @pytest.mark.parametrize( "value, value_type, target, expected_value_preview, expected_data_schema", [( [ { "test": "test", "num": 10, "bool": True, "test_object": { "test_object_name": "some_other_name", "test_id": 56789, "inner": { "foo": "bar_3" }, }, "test_objects": [ { "test_object_name": "some_name", "test_id": 12345, "inner": { "foo": "bar" }, }, { "test_object_name": "some_name_1", "test_id": 54321, "inner": { "foo": "bar_2" }, }, ], }, { "test_1": "test_2", "num": 20, "bool": False, "test_object": { "test_object_name": "some_name", "test_id": 11111, "inner": { "foo": "bar_6", "bar": "foo" }, }, "test_objects": [ { "test_object_name": "some_other_name", "test_id": 56789, "inner": { "foo": "bar_3" }, }, { "test_object_name": "some_other_name", "test_id": 98765, "inner": { "foo": "bar_4" }, }, ], }, ], ListValueType(), None, '[{"test": "test", "num": 10, "bool": true, "test_object": ' '{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": ' '"bar_3"}}, "test_objects": [{"test_object_name": "some_name", "test_id": ' '12345, "inner": {"foo": "bar"}}, {"test_object_name": "some_name_1", ' '"test_id": 54321, "inner": {"foo": "bar_2"}}]}, {"test_1": "test_2", "num": ' '20, "bool": false, "test_object": {"test_object_name": "some_name", ' '"test_id": 11111, "inner": {"foo": "bar_6", "bar": "foo"}}, "test_objects": ' '[{"test_object_name": "some_other_name", "test_id": 56789, "inner": {"foo": ' '"bar_3"}}, {"test_object_name": "some_other_name", "test_id": 98765, ' '"inner": {"foo": "bar_4"}}]}]', { "columns": [ "bool", "num", "test", "test_1", "test_object", "test_object.inner.bar", "test_object.inner.foo", "test_object.test_id", "test_object.test_object_name", "test_objects", ], "dtypes": { "bool": "<class 'bool'>", "num": "<class 'int'>", "test": "<class 'str'>", "test_1": "<class 'str'>", "test_object": "<class 'dict'>", "test_object.inner.bar": "<class 'str'>", "test_object.inner.foo": "<class 'str'>", "test_object.test_id": "<class 'int'>", "test_object.test_object_name": "<class 'str'>", "test_objects": "<class 'list'>", }, "shape": (2, 10), "size.bytes": 56, "type": "List", }, )], ) def test_get_value_meta_list_of_dict_with_list_of_nested_dicts_with_different_keys( self, value, value_type, target, expected_value_preview, expected_data_schema): self.test_get_value_meta(value, value_type, target, expected_value_preview, expected_data_schema) @pytest.mark.parametrize( "value, value_type, target, expected_value_preview", [( [ { "a": "a", "b": "b" }, { "a": "c", "b": "d" }, { "a": "e", "b": "f" }, { "a": "g", "b": "h" }, { "a": "i", "b": "j" }, { "a": "k", "b": "l" }, { "a": "m", "b": "n" }, { "a": "o", "b": "p" }, { "a": "q", "b": "r" }, { "a": "s", "b": "t" }, { "a": "u", "b": "v" }, { "a": "w", "b": "x" }, { "a": "y", "b": "z" }, ], ListValueType(), None, '[{"a": "a", "b": "b"}, {"a": "c", "b": "d"}, {"a": "e", "b": "f"}, {"a": ' '"g", "b": "h"}, {"a": "i", "b": "j"}, {"a": "k", "b": "l"}, {"a": "m", "b": ' '"n"}, {"a": "o", "b": "p"}, {"a": "q", "b": "r"}, {"a": "s", "b": "t"}]', )], ) def test_get_value_meta_preview_size_default_max_elements( self, value, value_type, target, expected_value_preview): tracking_config = TrackingConfig.from_databand_context() tracking_config.value_reporting_strategy = ValueTrackingLevel.ALL result = get_value_meta( value, ValueMetaConf(), tracking_config, value_type=value_type, target=target, ) assert result.value_preview == expected_value_preview @pytest.mark.parametrize( "value, value_type, target, expected_value_preview", [( [ { "a": "a", "b": "b" }, { "a": "c", "b": "d" }, { "a": "e", "b": "f" }, { "a": "g", "b": "h" }, { "a": "i", "b": "j" }, { "a": "k", "b": "l" }, { "a": "m", "b": "n" }, { "a": "o", "b": "p" }, { "a": "q", "b": "r" }, { "a": "s", "b": "t" }, { "a": "u", "b": "v" }, { "a": "w", "b": "x" }, { "a": "y", "b": "z" }, ], ListValueType(), None, '[{"a": "a", "b": "b"}, {"a": "c", "b": "d"}, {"a": "e", "b": "f"}, {"a": ' '"g", "b": "h"}, {"a": "i", "b": "j"}]', )], ) def test_get_value_meta_preview_size_config_max_elements( self, value, value_type, target, expected_value_preview): tracking_config = TrackingConfig.from_databand_context() tracking_config.value_reporting_strategy = ValueTrackingLevel.ALL result = get_value_meta( value, ValueMetaConf(log_preview_size=5), tracking_config, value_type=value_type, target=target, ) assert result.value_preview == expected_value_preview @pytest.mark.parametrize( "value, value_type, target, expected_value_preview", [([{ "a": "a", "b": "b" }], ListValueType(), None, '[{"a": "a", "b": "b"}]')], ) def test_get_value_meta_preview_small_size(self, value, value_type, target, expected_value_preview): tracking_config = TrackingConfig.from_databand_context() tracking_config.value_reporting_strategy = ValueTrackingLevel.ALL result = get_value_meta( value, ValueMetaConf(), tracking_config, value_type=value_type, target=target, ) assert result.value_preview == expected_value_preview
log_histograms=False, ), ), ], ) def test_summing(self, meta_conf_list, expected): assert reduce(lambda x, y: x.merge_if_none(y), meta_conf_list) == expected @pytest.mark.parametrize( "level, value_type, target, expected", [ # ValueTrackingLevel.NONE ( ValueTrackingLevel.NONE, ObjectValueType(), None, ValueMetaConf( log_preview=False, log_histograms=False, log_stats=False, log_size=False ), ), ( ValueTrackingLevel.NONE, LazyValueType(), None, ValueMetaConf( log_preview=False, log_histograms=False, log_stats=False, log_size=False ), ), ( ValueTrackingLevel.NONE,