def test_option_timing(self): data = [2.0, 12.5, 'not a float', 6.0, 'not a float'] df = pd.Series(data).apply(str) options = FloatOptions() options.set({"min.is_enabled": False}) profiler = FloatColumn(df.name, options=options) time_array = [float(i) for i in range(100, 0, -1)] with mock.patch('time.time', side_effect=lambda: time_array.pop()): # Validate that the times dictionary is empty self.assertEqual(defaultdict(float), profiler.profile['times']) profiler.update(df) # Validate the time in the datetime class has the expected time. profile = profiler.profile expected = defaultdict(float, {'max': 1.0, 'sum': 1.0,\ 'variance': 1.0, 'precision': 1.0,\ 'histogram_and_quantiles': 15.0}) self.assertEqual(expected, profile['times']) # Validate time in datetime class has expected time after second update profiler.update(df) expected = defaultdict(float, {'max': 2.0, 'sum': 2.0,\ 'variance': 2.0, 'precision': 2.0,\ 'histogram_and_quantiles': 30.0}) self.assertEqual(expected, profiler.profile['times'])
def test_setting_options(self, *mocks): options = ProfilerOptions() # Ensure set works appropriately options.set( { "data_labeler.is_enabled": False, "min.is_enabled": False, "structured_options.data_labeler.data_labeler_dirpath": "test", "data_labeler.max_sample_size": 15, } ) text_options = options.structured_options.text.properties float_options = options.structured_options.float.properties int_options = options.structured_options.int.properties data_labeler_options = options.structured_options.data_labeler.properties self.assertFalse(options.structured_options.data_labeler.is_enabled) self.assertFalse(text_options["min"].is_enabled) self.assertFalse(float_options["min"].is_enabled) self.assertFalse(int_options["min"].is_enabled) self.assertEqual(data_labeler_options["data_labeler_dirpath"], "test") self.assertEqual(data_labeler_options["max_sample_size"], 15) # Ensure direct attribute setting works appropriately options.structured_options.data_labeler.max_sample_size = 12 options.structured_options.text.histogram_and_quantiles.is_enabled = True options.structured_options.text.is_enabled = False text_options = options.structured_options.text.properties data_labeler_options = options.structured_options.data_labeler.properties self.assertEqual(data_labeler_options["max_sample_size"], 12) self.assertTrue(text_options["histogram_and_quantiles"].is_enabled) self.assertFalse(text_options["is_enabled"]) # check direct attribute access after set float_options = FloatOptions() float_options.set( { "precision.is_enabled": False, "min.is_enabled": False, "*.is_enabled": False, } ) self.assertFalse(float_options.precision.is_enabled) self.assertFalse(float_options.min.is_enabled) self.assertFalse(float_options.is_enabled)
def test_option_precision(self): data = [1.1, 2.2, 3.3, 4.4] df = pd.Series(data).apply(str) # Turn off precision options = FloatOptions() options.set({"precision.is_enabled": False}) profiler = FloatColumn(df.name, options=options) profiler.update(df) self.assertEqual(None, profiler.precision['sample_size']) # Turn on precision, check sample_size options = FloatOptions() options.set({"precision.is_enabled": True}) profiler = FloatColumn(df.name, options=options) profiler.update(df) self.assertEqual(4, profiler.precision['sample_size']) # Trun on precision, set 0.5 sample_size options = FloatOptions() options.set({"precision.sample_ratio": 0.5}) profiler = FloatColumn(df.name, options=options) profiler.update(df) self.assertEqual(2, profiler.precision['sample_size'])