示例#1
0
    def test_option_timing(self):
        data = [2.0, 12.5, 'not a float', 6.0, 'not a float']
        df = pd.Series(data).apply(str)

        options = FloatOptions()
        options.set({"min.is_enabled": False})

        profiler = FloatColumn(df.name, options=options)

        time_array = [float(i) for i in range(100, 0, -1)]
        with mock.patch('time.time', side_effect=lambda: time_array.pop()):
            # Validate that the times dictionary is empty
            self.assertEqual(defaultdict(float), profiler.profile['times'])
            profiler.update(df)

            # Validate the time in the datetime class has the expected time.
            profile = profiler.profile

            expected = defaultdict(float, {'max': 1.0, 'sum': 1.0,\
                                           'variance': 1.0, 'precision': 1.0,\
                                           'histogram_and_quantiles': 15.0})
            self.assertEqual(expected, profile['times'])

            # Validate time in datetime class has expected time after second update
            profiler.update(df)
            expected = defaultdict(float, {'max': 2.0, 'sum': 2.0,\
                                           'variance': 2.0, 'precision': 2.0,\
                                           'histogram_and_quantiles': 30.0})
            self.assertEqual(expected, profiler.profile['times'])
    def test_setting_options(self, *mocks):
        options = ProfilerOptions()

        # Ensure set works appropriately
        options.set(
            {
                "data_labeler.is_enabled": False,
                "min.is_enabled": False,
                "structured_options.data_labeler.data_labeler_dirpath": "test",
                "data_labeler.max_sample_size": 15,
            }
        )

        text_options = options.structured_options.text.properties
        float_options = options.structured_options.float.properties
        int_options = options.structured_options.int.properties
        data_labeler_options = options.structured_options.data_labeler.properties

        self.assertFalse(options.structured_options.data_labeler.is_enabled)
        self.assertFalse(text_options["min"].is_enabled)
        self.assertFalse(float_options["min"].is_enabled)
        self.assertFalse(int_options["min"].is_enabled)
        self.assertEqual(data_labeler_options["data_labeler_dirpath"], "test")
        self.assertEqual(data_labeler_options["max_sample_size"], 15)

        # Ensure direct attribute setting works appropriately
        options.structured_options.data_labeler.max_sample_size = 12
        options.structured_options.text.histogram_and_quantiles.is_enabled = True
        options.structured_options.text.is_enabled = False

        text_options = options.structured_options.text.properties
        data_labeler_options = options.structured_options.data_labeler.properties
        self.assertEqual(data_labeler_options["max_sample_size"], 12)
        self.assertTrue(text_options["histogram_and_quantiles"].is_enabled)
        self.assertFalse(text_options["is_enabled"])

        # check direct attribute access after set
        float_options = FloatOptions()
        float_options.set(
            {
                "precision.is_enabled": False,
                "min.is_enabled": False,
                "*.is_enabled": False,
            }
        )

        self.assertFalse(float_options.precision.is_enabled)
        self.assertFalse(float_options.min.is_enabled)
        self.assertFalse(float_options.is_enabled)
示例#3
0
    def test_option_precision(self):
        data = [1.1, 2.2, 3.3, 4.4]
        df = pd.Series(data).apply(str)

        # Turn off precision
        options = FloatOptions()
        options.set({"precision.is_enabled": False})
        profiler = FloatColumn(df.name, options=options)
        profiler.update(df)
        self.assertEqual(None, profiler.precision['sample_size'])

        # Turn on precision, check sample_size
        options = FloatOptions()
        options.set({"precision.is_enabled": True})
        profiler = FloatColumn(df.name, options=options)
        profiler.update(df)
        self.assertEqual(4, profiler.precision['sample_size'])

        # Trun on precision, set 0.5 sample_size
        options = FloatOptions()
        options.set({"precision.sample_ratio": 0.5})
        profiler = FloatColumn(df.name, options=options)
        profiler.update(df)
        self.assertEqual(2, profiler.precision['sample_size'])