示例#1
0
    def transform(self, X, y=None):
        """
        Transform X, segments time-series in each column into random
        intervals using interval indices generated
        during `fit`.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with same number of rows and one
          column for each generated interval.
        """

        # Check inputs.
        self.check_is_fitted()
        X = check_X(X)

        # Check that the input is of the same shape as the one passed
        # during fit.
        if X.shape[1] != self.input_shape_[1]:
            raise ValueError(
                'Number of columns of input is different from what was seen'
                'in `fit`')
        # # Input validation
        # if not all([np.array_equal(fit_idx, trans_idx)
        #             for trans_idx, fit_idx in zip(check_equal_index(X),
        #             self._time_index)]):
        #     raise ValueError('Indexes of input time-series are different
        #     from what was seen in `fit`')

        # Segment into intervals.
        # TODO generalise to non-equal-index cases
        intervals = []
        colname = X.columns[0]
        colnames = []
        # Tabularise assuming series
        arr = tabularize(X, return_array=True)
        # have equal indexes in any given column
        print(self.intervals_)
        for start, end in self.intervals_:
            interval = arr[:, start:end]
            intervals.append(interval)
            colnames.append(f"{colname}_{start}_{end}")

        # Return nested pandas DataFrame.
        Xt = pd.DataFrame(concat_nested_arrays(intervals, return_arrays=True))
        Xt.columns = colnames
        return Xt
示例#2
0
    def _apply_rowwise(self, func, X, y=None):
        """Helper function to apply transform or inverse_transform function
        on each row of data container"""
        self.check_is_fitted()
        X = check_X(X)

        # 1st attempt: apply, relatively fast but not robust
        # try and except, but sometimes breaks in other cases than excepted
        # ValueError
        # Works on single column, but on multiple columns only if columns
        # have equal-length series.
        # try:
        #     Xt = X.apply(self.transformer.fit_transform)
        #
        # # Otherwise call apply on each column separately.
        # except ValueError as e:
        #     if str(e) == "arrays must all be same length":
        #         Xt = pd.concat([pd.Series(col.apply(
        #         self.transformer.fit_transform)) for _, col in X.items()],
        #         axis=1)
        #     else:
        #         raise

        # 2nd attempt: apply but iterate over columns, still relatively fast
        # but still not very robust
        # but column is not 2d and thus breaks if transformer expects 2d input
        try:
            Xt = pd.concat([pd.Series(col.apply(func))
                            for _, col in X.items()], axis=1)

        # 3rd attempt: explicit for-loops, most robust but very slow
        except Exception:
            cols_t = []
            for c in range(X.shape[1]):  # loop over columns
                col = X.iloc[:, c]
                rows_t = []
                for row in col:  # loop over rows in each column
                    row_2d = pd.DataFrame(row)  # convert into 2d dataframe
                    row_t = func(row_2d).ravel()  # apply transform
                    rows_t.append(row_t)  # append transformed rows
                cols_t.append(rows_t)  # append transformed columns

            # if series-to-series transform, flatten transformed series
            Xt = concat_nested_arrays(
                cols_t)  # concatenate transformed columns

            # tabularise/unnest series-to-primitive transforms
            xt = Xt.iloc[0, 0]
            if isinstance(xt, (pd.Series, np.ndarray)) and len(xt) == 1:
                Xt = tabularize(Xt)
        return Xt
示例#3
0
    def transform(self, X):
        """
        Apply the `fit_transform()` method of the per-row transformer repeatedly
        on each row.

        Parameters
        ----------
        X : 1D array-like, pandas Series, shape (n_samples, 1)
            The training input samples. Shoould not be a DataFrame.

        Returns
        -------
        T : 1D array-like, pandas Series, shape (n_samples, ...)
            The transformed data
        """
        # check the validity of input

        validate_X(X)
        check_is_fitted(self, 'is_fitted_')

        # 1st attempt: apply, relatively fast but not robust
        # try and except, but sometimes breaks in other cases than excepted ValueError
        # Works on single column, but on multiple columns only if columns have equal-length series.
        # try:
        #     Xt = X.apply(self.transformer.fit_transform)
        #
        # # Otherwise call apply on each column separately.
        # except ValueError as e:
        #     if str(e) == 'arrays must all be same length':
        #         Xt = pd.concat([pd.Series(col.apply(self.transformer.fit_transform)) for _, col in X.items()], axis=1)
        #     else:
        #         raise

        # 2nd attempt: apply but iterate over columns, still relatively fast but still not very robust
        # but column is not 2d and thus breaks if transformer expects 2d input
        try:
            Xt = pd.concat([
                pd.Series(col.apply(self.transformer.fit_transform))
                for _, col in X.items()
            ],
                           axis=1)

        # 3rd attempt: explicit for-loops, most robust but very slow
        except:
            cols_t = []
            for c in range(X.shape[1]):  # loop over columns
                col = X.iloc[:, c]
                rows_t = []
                for row in col:  # loop over rows in each column
                    row_2d = pd.DataFrame(row)  # convert into 2d dataframe
                    row_t = self.transformer.fit_transform(
                        row_2d).ravel()  # apply transform
                    rows_t.append(row_t)  # append transformed rows
                cols_t.append(rows_t)  # append transformed columns

            # if series-to-series transform, flatten transformed series
            Xt = concat_nested_arrays(
                cols_t)  # concatenate transformed columns

            # tabularise/unnest series-to-primitive transforms
            xt = Xt.iloc[0, 0]
            if isinstance(xt, (pd.Series, np.ndarray)) and len(xt) == 1:
                Xt = tabularize(Xt)
        return Xt