Пример #1
0
    def test_fit_array(self):
        """Test fit with numpy.array"""
        # Setup
        data = np.array([False, True, True, False, True])

        # Run
        transformer = BooleanTransformer(nan=0)
        transformer.fit(data)

        # Asserts
        expect_fill_value = 0

        self.assertEqual(transformer.null_transformer.fill_value,
                         expect_fill_value, "Unexpected fill value")
Пример #2
0
    def test_fit_nan_not_ignore(self):
        """Test fit nan not equal to ignore"""
        # Setup
        data = pd.Series([False, True, True, False, True])

        # Run
        transformer = BooleanTransformer(nan=0)
        transformer.fit(data)

        # Asserts
        expect_fill_value = 0

        self.assertEqual(transformer.null_transformer.fill_value,
                         expect_fill_value, "Unexpected fill value")
Пример #3
0
    def test___init__(self):
        """Test default instance"""
        # Run
        transformer = BooleanTransformer()

        # Asserts
        self.assertEqual(transformer.nan, -1, "Unexpected nan")
        self.assertIsNone(transformer.null_column,
                          "null_column is None by default")
Пример #4
0
    def _analyze(self, data):
        """Build a ``dict`` with column names and transformers from a given ``pandas.DataFrame``.

        When ``self.dtypes`` is ``None``, use the dtypes from the input data.

        When ``dtype`` is:
            - ``int``: a ``NumericalTransformer`` is created with ``dtype=int``.
            - ``float``: a ``NumericalTransformer`` is created with ``dtype=float``.
            - ``object`` or ``category``: a ``CategoricalTransformer`` is created.
            - ``bool``: a ``BooleanTransformer`` is created.
            - ``datetime``: a ``DatetimeTransformer`` is created.

        Any other ``dtype`` is not supported and raises a ``ValueError``.

        Args:
            data (pandas.DataFrame):
                Data used to analyze the ``pandas.DataFrame`` dtypes.

        Returns:
            dict:
                Mapping of column names and transformer instances.

        Raises:
            ValueError:
                if a ``dtype`` is not supported by the `HyperTransformer``.
        """
        transformers = dict()
        dtypes = self.dtypes or data.dtypes
        if self.dtypes:
            dtypes = self.dtypes
        else:
            dtypes = [
                data[column].dropna().infer_objects()
                for column in data.columns
            ]

        for name, dtype in zip(data.columns, dtypes):
            dtype = np.dtype(dtype)
            if dtype.kind == 'i':
                transformer = NumericalTransformer(dtype=int)
            elif dtype.kind == 'f':
                transformer = NumericalTransformer(dtype=float)
            elif dtype.kind == 'O':
                anonymize = self.anonymize.get(name)
                transformer = CategoricalTransformer(anonymize=anonymize)
            elif dtype.kind == 'b':
                transformer = BooleanTransformer()
            elif dtype.kind == 'M':
                transformer = DatetimeTransformer()
            else:
                raise ValueError('Unsupported dtype: {}'.format(dtype))

            transformers[name] = transformer

        return transformers
Пример #5
0
    def test_transform_array(self):
        """Test transform numpy.array"""
        # Setup
        data = np.array([False, True, None, True, False])

        # Run
        transformer = Mock()

        BooleanTransformer.transform(transformer, data)

        # Asserts
        expect_call_count = 1
        expect_call_args = pd.Series([0, 1, None, 1, 0], dtype=object)

        self.assertEqual(transformer.null_transformer.transform.call_count,
                         expect_call_count,
                         "NullTransformer.transform must be called one time")
        pd.testing.assert_series_equal(
            transformer.null_transformer.transform.call_args[0][0],
            expect_call_args)
Пример #6
0
    def test_transform_series(self):
        """Test transform pandas.Series"""
        # Setup
        data = pd.Series([False, True, None, True, False])

        # Run
        transformer = Mock()

        BooleanTransformer.transform(transformer, data)

        # Asserts
        expect_call_count = 1
        expect_call_args = pd.Series([0., 1., None, 1., 0.], dtype=float)

        self.assertEqual(transformer.null_transformer.transform.call_count,
                         expect_call_count,
                         "NullTransformer.transform must be called one time")
        pd.testing.assert_series_equal(
            transformer.null_transformer.transform.call_args[0][0],
            expect_call_args)
Пример #7
0
    def test_reverse_transform_not_null_values(self):
        """Test reverse_transform not null values correctly"""
        # Setup
        data = np.array([1., 0., 1.])

        # Run
        transformer = Mock()
        transformer.nan = None

        result = BooleanTransformer.reverse_transform(transformer, data)

        # Asserts
        expected = np.array([True, False, True])

        assert type(result) == pd.Series
        np.testing.assert_equal(result.to_numpy(), expected)
Пример #8
0
    def test_reverse_transform_2d_ndarray(self):
        """Test reverse_transform not null values correctly"""
        # Setup
        data = np.array([[1.], [0.], [1.]])

        # Run
        transformer = Mock()
        transformer.nan = None

        result = BooleanTransformer.reverse_transform(transformer, data)

        # Asserts
        expected = np.array([True, False, True])

        assert isinstance(result, pd.Series)
        np.testing.assert_equal(result.values, expected)
Пример #9
0
    def test_reverse_transform_nan_ignore(self):
        """Test reverse_transform with nan equal to ignore"""
        # Setup
        data = np.array([0.0, 1.0, 0.0, 1.0, 0.0])

        # Run
        transformer = Mock()
        transformer.nan = None

        result = BooleanTransformer.reverse_transform(transformer, data)

        # Asserts
        expect = np.array([False, True, False, True, False])
        expect_call_count = 0

        np.testing.assert_equal(result, expect)
        self.assertEqual(
            transformer.null_transformer.reverse_transform.call_count,
            expect_call_count,
            "NullTransformer.reverse_transform should not be called when nan is ignore"
        )
Пример #10
0
def test_load_transformers():
    transformers = {
        'bool': BooleanTransformer(),
        'int': {
            'class': 'NumericalTransformer',
            'kwargs': {
                'dtype': 'int'
            }
        },
        'datetime': {
            'class': DatetimeTransformer,
        }
    }

    returned = load_transformers(transformers)

    assert isinstance(returned, dict)
    assert set(returned.keys()) == {'bool', 'int', 'datetime'}
    assert isinstance(returned['bool'], BooleanTransformer)
    assert isinstance(returned['int'], NumericalTransformer)
    assert returned['int'].dtype == 'int'
    assert isinstance(returned['datetime'], DatetimeTransformer)
Пример #11
0
def test_load_transformer_instance():
    transformer = BooleanTransformer()

    returned = load_transformer(transformer)

    assert returned is transformer