def test_validate_numeric_stats(self, *mocks): options = ProfilerOptions() numerical_options = { "histogram_and_quantiles.is_enabled": False, "min.is_enabled": False, "max.is_enabled": False, "sum.is_enabled": False, "variance.is_enabled": True } # Asserts error since sum must be toggled on if variance is expected_error = ( "ProfilerOptions.structured_options.int: The numeric stats must " "toggle on the sum if the variance is toggled on.\n" "ProfilerOptions.structured_options.float: The numeric stats must " "toggle on the sum if the variance is toggled on.\n" "ProfilerOptions.structured_options.text: The numeric stats must " "toggle on the sum if the variance is toggled on.") options.set(numerical_options) with self.assertRaisesRegex(ValueError, expected_error): options.validate() # test warns if is_numeric_stats_enabled = False numerical_options = { "is_numeric_stats_enabled": False, } options.set(numerical_options) with self.assertWarnsRegex( UserWarning, 'ProfilerOptions.structured_options.int.' 'numeric_stats: The numeric stats are ' 'completely disabled.'): options.validate()
def test_data_labeler(self, *mocks): options = ProfilerOptions() options.structured_options.data_labeler.data_labeler_dirpath \ = "Test_Dirpath" options.structured_options.data_labeler.max_sample_size = 50 options.structured_options.multiprocess.is_enabled = False profile = Profiler(self.data, profiler_options=options) # Mock[0] is the Datalabeler Object mock mocks[0].assert_called_with(dirpath='Test_Dirpath', labeler_type='structured', load_options=None) actual_sample_size = profile._profile[0].profiles['data_label_profile'] \ ._profiles["data_labeler"]._max_sample_size self.assertEqual(actual_sample_size, 50) data_labeler = mock.Mock(spec=BaseDataLabeler) data_labeler.reverse_label_mapping = dict() data_labeler.model.num_labels = 0 options.set({'data_labeler.data_labeler_object': data_labeler}) with self.assertWarnsRegex( UserWarning, "The data labeler passed in will be used," " not through the directory of the default" " model"): options.validate() profile = Profiler(self.data, profiler_options=options) self.assertEqual( data_labeler, # profile, col prof, compiler ( profile._profile[0].profiles['data_label_profile']. # column profiler _profiles["data_labeler"].data_labeler))
def test_improper_profile_options(self, *mocks): with self.assertRaisesRegex( ValueError, "The profile options must be passed as a " "ProfileOptions object."): profile = Profiler(self.data, options="Strings are not accepted") with self.assertRaisesRegex( ValueError, "ProfilerOptions.structured_options.text.max." "is_enabled must be a Boolean."): profile_options = ProfilerOptions() profile_options.structured_options.text.max.is_enabled = "String" profile_options.validate()
def test_validate(self, *mocks): options = ProfilerOptions() options.structured_options.data_labeler.is_enabled = "Invalid" options.structured_options.data_labeler.data_labeler_dirpath = 5 options.structured_options.int.max = "Invalid" expected_error = ( "ProfilerOptions.structured_options.int.max must be a " "BooleanOption.\n" "ProfilerOptions.structured_options.data_labeler.is_enabled must be" " a Boolean.\n" "ProfilerOptions.structured_options.data_labeler." "data_labeler_dirpath must be a string.") with self.assertRaisesRegex(ValueError, expected_error): options.validate()