Пример #1
0
    def test_fit_with_analyze(self):
        """Test fit and analyze the transformers"""
        # Setup
        data = pd.DataFrame({
            'integers': [1, 2, 3, 4],
            'floats': [1.1, 2.2, 3.3, 4.4],
            'booleans': [True, False, False, True]
        })

        int_mock = Mock()
        float_mock = Mock()
        bool_mock = Mock()

        analyzed_data = {
            'integers': int_mock,
            'floats': float_mock,
            'booleans': bool_mock
        }

        # Run
        transformer = Mock()
        transformer.transformers = None
        transformer._analyze.return_value = analyzed_data

        HyperTransformer.fit(transformer, data)

        # Asserts
        expect_int_call_count = 1
        expect_float_call_count = 1
        expect_bool_call_count = 1

        self.assertEqual(int_mock.fit.call_count, expect_int_call_count)
        self.assertEqual(float_mock.fit.call_count, expect_float_call_count)
        self.assertEqual(bool_mock.fit.call_count, expect_bool_call_count)
Пример #2
0
def test_hypertransformer_with_transformers(faker_mock):
    faker_mock.return_value.first_name.side_effect = [
        'Jaime', 'Cersei', 'Tywin', 'Tyrion'
    ]
    data = get_input_data()
    transformers = get_transformers()

    ht = HyperTransformer(transformers)
    ht.fit(data)
    transformed = ht.transform(data)

    expected = get_transformed_data()

    np.testing.assert_allclose(
        transformed.sort_index(axis=1).values,
        expected.sort_index(axis=1).values)

    reversed_data = ht.reverse_transform(transformed)

    original_names = data.pop('names')
    reversed_names = reversed_data.pop('names')

    pd.testing.assert_frame_equal(data.sort_index(axis=1),
                                  reversed_data.sort_index(axis=1))

    for name in original_names:
        assert name not in reversed_names
Пример #3
0
def test_single_category():
    ht = HyperTransformer(transformers={'a': OneHotEncodingTransformer()})
    data = pd.DataFrame({'a': ['a', 'a', 'a']})

    ht.fit(data)
    transformed = ht.transform(data)

    reverse = ht.reverse_transform(transformed)

    pd.testing.assert_frame_equal(data, reverse)
Пример #4
0
def test_dtype_category():
    df = pd.DataFrame({'a': ['a', 'b', 'c']}, dtype='category')

    ht = HyperTransformer()
    ht.fit(df)

    trans = ht.transform(df)

    rever = ht.reverse_transform(trans)

    pd.testing.assert_frame_equal(df, rever)
Пример #5
0
def test_empty_transformers_nan_data():
    """If transformers is an empty dict, do nothing."""
    data = get_input_data_with_nan()

    ht = HyperTransformer(transformers={})
    ht.fit(data)

    transformed = ht.transform(data)
    reverse = ht.reverse_transform(transformed)

    pd.testing.assert_frame_equal(data, transformed)
    pd.testing.assert_frame_equal(data, reverse)
Пример #6
0
def test_subset_of_columns_nan_data():
    """HyperTransform should be able to transform a subset of the training columns.

    See https://github.com/sdv-dev/RDT/issues/152
    """
    data = get_input_data_with_nan()

    ht = HyperTransformer()
    ht.fit(data)

    subset = data[[data.columns[0]]]
    transformed = ht.transform(subset)
    reverse = ht.reverse_transform(transformed)

    pd.testing.assert_frame_equal(subset, reverse)
Пример #7
0
def test_hypertransformer_without_transformers_nan_data():
    data = get_input_data_with_nan()

    ht = HyperTransformer()
    ht.fit(data)
    transformed = ht.transform(data)

    expected = get_transformed_nan_data()

    np.testing.assert_allclose(
        transformed.sort_index(axis=1).values,
        expected.sort_index(axis=1).values)

    reversed_data = ht.reverse_transform(transformed)

    original_names = data.pop('names')
    reversed_names = reversed_data.pop('names')

    pd.testing.assert_frame_equal(data.sort_index(axis=1),
                                  reversed_data.sort_index(axis=1))

    for name in original_names:
        assert name not in reversed_names