Exemplo n.º 1
0
    def transform(self, col, col_meta=None, missing=None):
        """Prepare the transformer to convert data and return the processed table.

        Args:
            col(pandas.DataFrame): Data to transform.
            col_meta(dict): Meta information of the column.
            missing(bool): Wheter or not handle missing values using NullTransformer.

        Returns:
            pandas.DataFrame
        """

        col_meta = col_meta or self.col_meta
        missing = missing if missing is not None else self.missing

        self.check_data_type(col_meta)

        out = pd.DataFrame()
        out[self.col_name] = self.safe_datetime_cast(col, col_meta)
        out[self.col_name] = self.to_timestamp(out)

        # Handle missing
        if missing:
            nt = NullTransformer()
            res = nt.fit_transform(out, col_meta)
            return res

        return out
Exemplo n.º 2
0
    def transform(self, col, col_meta=None, missing=None):
        """Prepare the transformer to convert data and return the processed table.

        Args:
            col(pandas.DataFrame): Data to transform.
            col_meta(dict): Meta information of the column.
            missing(bool): Wheter or not handle missing values using NullTransformer.

        Returns:
            pandas.DataFrame
        """

        col_meta = col_meta or self.col_meta
        missing = missing if missing is not None else self.missing

        self.check_data_type(col_meta)

        out = pd.DataFrame()

        # Make sure all nans are handled the same by replacing with None
        column = col[self.col_name].replace({np.nan: None})
        out[self.col_name] = column.apply(self.get_val)
        # Handle missing

        if missing:
            nt = NullTransformer()
            res = nt.fit_transform(out, col_meta)
            return res

        return out
Exemplo n.º 3
0
    def transform(self, col, col_meta=None, missing=None):
        """Prepare the transformer to convert data and return the processed table.

        Args:
            col(pandas.DataFrame): Data to transform.
            col_meta(dict): Meta information of the column.
            missing(bool): Wheter or not handle missing values using NullTransformer.

        Returns:
            pandas.DataFrame
        """

        col_meta = col_meta or self.col_meta
        missing = missing if missing is not None else self.missing
        self.check_data_type(col_meta)

        out = pd.DataFrame()

        # if are just processing child rows, then the name is already known
        out[self.col_name] = col[self.col_name]

        # Handle missing
        if missing:
            nt = NullTransformer()
            out = nt.fit_transform(out, col_meta)
            out[self.col_name] = out.apply(self.get_val, axis=1)
            return out

        out[self.col_name] = out.apply(self.get_val, axis=1)

        if self.subtype == 'int':
            out[self.col_name] = out[self.col_name].astype(int)

        return out
Exemplo n.º 4
0
    def test_fit_transform_notnull(self):
        """Creates a new column with the mean of the values."""

        # Setup
        col = pd.Series([62, 53, 53, 45, np.nan])
        col_meta = {'name': 'age', 'type': 'number'}
        transformer = NullTransformer()

        expected_result = pd.DataFrame({
            'age': [62.0, 53.0, 53.0, 45.0, 53.25],
            '?age': [1, 1, 1, 1, 0]
        })

        # Run
        result = transformer.fit_transform(col, col_meta)

        # Check
        assert result.equals(expected_result)
Exemplo n.º 5
0
    def test_fit_transform_isnull(self):
        """It will replace nan values with 0 and creats a new column."""

        # Setup
        col = pd.Series([62, np.nan, np.nan, np.nan, np.nan], name='age')
        col_meta = {'name': 'age', 'type': 'number'}
        transformer = NullTransformer()

        expected_result = pd.DataFrame({
            'age': [62.0, 62.0, 62.0, 62.0, 62.0],
            '?age': [1, 0, 0, 0, 0]
        })

        # Run
        result = transformer.fit_transform(col, col_meta)

        # Check
        assert result.equals(expected_result)