Пример #1
0
 def _transform(self, data, on_cols):
     _data = data.copy()
     if len(self._drop_indices_row) > 0:
         _data.drop(self._drop_indices_row, inplace=True)
     if self.on_cols and on_cols:
         _data = _data.T.drop_duplicates()
         _data = _data.T
     self.logging('  shape: {}'.format(_shape(_data)), level=logging.DEBUG)
     return _data
Пример #2
0
    def transform_y(self, y):
        """Transform y just dropping irrelevant columns.

        Args:
            :param y: a Pandas Dataframe of shape [n_samples] the target
        Returns:
            :return y_new: the new transformed y
        """
        if len(self.y_cols) == 0:
            return y
        self.logging('y shape: {}'.format(_shape(y)), level=logging.DEBUG)
        y_new = y.copy()
        if len(self.y_cols) > 0:
            y_new.drop(
                list(set(y_new.columns.values.tolist()) - set(self.y_cols)),
                axis=1,
                inplace=True)
        self.logging('  shape: {}'.format(_shape(y_new)), level=logging.DEBUG)
        return y_new
Пример #3
0
    def transform_x(self, x):
        """Transform x just dropping irrelevant columns.

        Args:
            :param x: a Pandas Dataframe of shape [n_samples, n_features] the dataset
        Returns:s
            :return x_new: the new transformed x
        """
        if len(self.x_cols) == 0:
            return x
        self.logging('x shape: {}'.format(_shape(x)), level=logging.DEBUG)
        x_new = x.copy()
        if len(self.x_cols) > 0:
            x_new.drop(
                list(set(x_new.columns.values.tolist()) - set(self.x_cols)),
                axis=1,
                inplace=True)
        self.logging(' shape: {}'.format(_shape(x_new)), level=logging.DEBUG)
        return x_new
Пример #4
0
    def _fit(self, data, cols, is_categorical, cat2num):
        if len(cols) == 0:
            return data
        assert _shape(data)[1] >= 2, 'at least 2 columns are needed'
        _data = data.copy()

        nan_ids = pandas.isnull(data).any(axis=1).nonzero()[0].tolist()
        _data.drop(nan_ids, inplace=True)
        self.logging('current shape: {}'.format(_shape(_data)))
        assert _shape(
            _data)[0] >= self.k, 'not enough rows: {} removed {} rows'.format(
                _shape(_data)[0], len(nan_ids))

        _data, _ = cat2num.fit_transform(_data, None)

        models = {}
        for col, cat in zip(cols, is_categorical):
            if cat:
                models[col] = KNeighborsClassifier(n_neighbors=self.k)
            else:
                models[col] = KNeighborsRegressor(n_neighbors=self.k)
            x_train = _data[list(set(_data.columns.tolist()) - set([col]))]
            y_train = _data[col]
            self.logging(
                'fitting imputing_model for column: {} x_train.shape={} y_train.shape={}'
                .format(col, _shape(x_train), _shape(y_train)),
                level=logging.DEBUG)
            models[col].fit(x_train, y_train)
        return models
Пример #5
0
 def transform_y(self, y):
     self.logging('y shape: {}'.format(_shape(y)), level=logging.DEBUG)
     return self._transform(y, self.on_y)
Пример #6
0
 def transform_x(self, x):
     self.logging('x shape: {}'.format(_shape(x)), level=logging.DEBUG)
     return self._transform(x, self.on_x)
Пример #7
0
 def _transform(self, data):
     _data = data.copy()
     _data.drop(self._drop_indices, inplace=True)
     self.logging('  shape: {}'.format(_shape(_data)), level=logging.DEBUG)
     return _data
Пример #8
0
 def _transform(self, data):
     _data = data.copy()
     _data.dropna(axis=1, how=self.how, inplace=True)
     self.logging('  shape: {}'.format(_shape(_data)), level=logging.DEBUG)
     return _data