示例#1
0
    def test_transform_series(self):
        """Test transform datetime series"""
        # Setup
        data = pd.Series([None, '1996-10-17', '1965-05-23'])
        data = pd.to_datetime(data)

        data_transform = pd.Series(
            [np.nan, 845510400000000000, -145497600000000000])

        # Run
        transformer = Mock()
        transformer._transform.return_value = data_transform

        DatetimeTransformer.transform(transformer, data)

        # Asserts
        exp_call_data = pd.Series([None, '1996-10-17', '1965-05-23'])
        expect_call_args = pd.to_datetime(exp_call_data)
        expect_call_count = 1

        pd.testing.assert_series_equal(transformer._transform.call_args[0][0],
                                       expect_call_args)
        self.assertEqual(
            transformer.null_transformer.transform.call_count,
            expect_call_count,
            "NullTransformer.transform must be called only once.")
示例#2
0
    def test_reverse_transform_all_none(self):
        dt = pd.to_datetime(['2020-01-01'])
        dtt = DatetimeTransformer(strip_constant=True)
        dtt.fit(dt)

        output = dtt.reverse_transform(np.array([None]))

        expected = pd.to_datetime(['NaT'])
        pd.testing.assert_series_equal(output.to_series(), expected.to_series())
示例#3
0
    def test_reverse_transform_2d_ndarray(self):
        dt = pd.to_datetime(['2020-01-01', '2020-02-01', '2020-03-01'])
        dtt = DatetimeTransformer(strip_constant=True)
        dtt.fit(dt)

        transformed = np.array([[18262.], [18293.], [18322.]])
        output = dtt.reverse_transform(transformed)

        expected = pd.to_datetime(['2020-01-01', '2020-02-01', '2020-03-01'])
        pd.testing.assert_series_equal(output.to_series(), expected.to_series())
示例#4
0
    def test_no_strip(self):
        dtt = DatetimeTransformer(strip_constant=False)
        data = pd.to_datetime(pd.Series([None, '1996-10-17', '1965-05-23']))

        # Run
        transformed = dtt.fit_transform(data.copy().to_numpy())
        reverted = dtt.reverse_transform(transformed)

        # Asserts
        expect_trans = np.array([
            [350006400000000000, 1.0],
            [845510400000000000, 0.0],
            [-145497600000000000, 0.0]
        ])
        np.testing.assert_almost_equal(expect_trans, transformed)
        pd.testing.assert_series_equal(reverted, data)
示例#5
0
    def test_reverse_transform_nan_ignore(self):
        """Test reverse_transform with nan equal to ignore"""
        # Setup
        data = pd.Series([np.nan, 845510400000000000, -145497600000000000])

        # Run
        transformer = Mock()
        transformer.nan = None

        result = DatetimeTransformer.reverse_transform(transformer, data)

        # Asserts
        expect = pd.Series([
            np.nan,
            pd.to_datetime(845510400000000000),
            pd.to_datetime(-145497600000000000)
        ])
        expect_reverse_call_count = 0

        pd.testing.assert_series_equal(result, expect)
        self.assertEqual(
            transformer.null_transformer.reverse_transform.call_count,
            expect_reverse_call_count,
            "NullTransformer.reverse_transform won't be called when nan is ignore"
        )
示例#6
0
    def test_fit_nan_mode_series(self):
        """Test fit nan mode with pandas.Series"""
        # Setup
        data = np.array([None, '1996-10-17', '1965-05-23'])
        data = pd.to_datetime(data)

        # Run
        transformer = DatetimeTransformer(nan='mode')
        transformer.fit(data)

        # Asserts
        expect_nan = 'mode'
        expect_fill_value = -145497600000000000

        self.assertEqual(transformer.nan, expect_nan, 'Unexpected nan')
        self.assertEqual(transformer.null_transformer.fill_value,
                         expect_fill_value, "Data mean is wrong")
示例#7
0
    def test_fit_nan_mean_array(self):
        """Test fit nan mean with numpy.array"""
        # Setup
        data = np.array([None, '1996-10-17', '1965-05-23'])
        data = pd.to_datetime(data).to_numpy()

        # Run
        transformer = DatetimeTransformer(nan='mean')
        transformer.fit(data)

        # Asserts
        expect_nan = 'mean'
        expect_fill_value = 350006400000000000

        self.assertEqual(transformer.nan, expect_nan, 'Unexpected nan')
        self.assertEqual(transformer.null_transformer.fill_value,
                         expect_fill_value, "Data mean is wrong")
示例#8
0
    def _analyze(self, data):
        """Build a ``dict`` with column names and transformers from a given ``pandas.DataFrame``.

        When ``self.dtypes`` is ``None``, use the dtypes from the input data.

        When ``dtype`` is:
            - ``int``: a ``NumericalTransformer`` is created with ``dtype=int``.
            - ``float``: a ``NumericalTransformer`` is created with ``dtype=float``.
            - ``object`` or ``category``: a ``CategoricalTransformer`` is created.
            - ``bool``: a ``BooleanTransformer`` is created.
            - ``datetime``: a ``DatetimeTransformer`` is created.

        Any other ``dtype`` is not supported and raises a ``ValueError``.

        Args:
            data (pandas.DataFrame):
                Data used to analyze the ``pandas.DataFrame`` dtypes.

        Returns:
            dict:
                Mapping of column names and transformer instances.

        Raises:
            ValueError:
                if a ``dtype`` is not supported by the `HyperTransformer``.
        """
        transformers = dict()
        dtypes = self.dtypes or data.dtypes
        if self.dtypes:
            dtypes = self.dtypes
        else:
            dtypes = [
                data[column].dropna().infer_objects()
                for column in data.columns
            ]

        for name, dtype in zip(data.columns, dtypes):
            dtype = np.dtype(dtype)
            if dtype.kind == 'i':
                transformer = NumericalTransformer(dtype=int)
            elif dtype.kind == 'f':
                transformer = NumericalTransformer(dtype=float)
            elif dtype.kind == 'O':
                anonymize = self.anonymize.get(name)
                transformer = CategoricalTransformer(anonymize=anonymize)
            elif dtype.kind == 'b':
                transformer = BooleanTransformer()
            elif dtype.kind == 'M':
                transformer = DatetimeTransformer()
            else:
                raise ValueError('Unsupported dtype: {}'.format(dtype))

            transformers[name] = transformer

        return transformers
示例#9
0
    def test___init__(self):
        """Test default instance"""
        # Run
        transformer = DatetimeTransformer()

        # Asserts
        self.assertEqual(transformer.nan, 'mean', "Unexpected nan")
        self.assertIsNone(transformer.null_column,
                          "null_column is None by default")
        self.assertIsNone(transformer.null_transformer,
                          "null_transformer is None by default")
示例#10
0
    def test__transform(self):
        """Test transform datetimes series to integer"""
        # Setup
        data = pd.Series([None, '1996-10-17', '1965-05-23'])
        data = pd.to_datetime(data)

        # Run
        result = DatetimeTransformer._transform(data)

        # Asserts
        expect = pd.Series([np.nan, 845510400000000000, -145497600000000000])

        pd.testing.assert_series_equal(result, expect)
示例#11
0
    def test_reverse_transform_nan_not_ignore(self):
        """Test reverse_transform with nan not equal to ignore"""
        # Setup
        data = pd.Series([np.nan, 845510400000000000, -145497600000000000])

        reversed_data = pd.Series(
            [np.nan, 845510400000000000, -145497600000000000])

        # Run
        transformer = Mock()
        transformer.nan = 'mean'
        transformer.null_transformer.reverse_transform.return_value = reversed_data

        DatetimeTransformer.reverse_transform(transformer, data)

        # Asserts
        expect_reverse_call_count = 1

        self.assertEqual(
            transformer.null_transformer.reverse_transform.call_count,
            expect_reverse_call_count,
            "NullTransformer.reverse_transform must be called when nan is not ignore"
        )