Пример #1
0
    def apply_moving_entropy(self,
                             input_column,
                             dest_column=None,
                             row_range=(0, None),
                             window=10,
                             no_of_bins=5):
        '''
        Apply moving entropy as another column

        :param input_column: Required column to add feature engineering
        :param dest_column: Destination column name
        :param row_range: Range of rows that need to modify
        :param window: Window size of the calculation takes part
        :param no_of_bins: Number of discrete levels
        :return: None
        '''

        if dest_column == None:
            dest_column = input_column + '_mentr_' + str(window) + '_' + str(
                no_of_bins)

        full_series = list(self._pd_frame[input_column])
        filtered_series = full_series[row_range[0]:row_range[1]]
        result = Features.moving_entropy(series=filtered_series,
                                         window=window,
                                         no_of_bins=no_of_bins,
                                         default=True)
        full_series[row_range[0]:row_range[1]] = result
        self.add_column(column_name=dest_column, series=full_series)
Пример #2
0
    def apply_moving_median(self,
                            input_column,
                            dest_column=None,
                            row_range=(0, None),
                            window=5):
        '''
        Add moving median as another column

        :param input_column: Required column to add feature engineering
        :param row_range: Range of rows that need to modify
        :param window: Window size of the calculation takes part
        :param dest_column: Destination column name
        :return: None
        '''

        if dest_column == None:
            dest_column = input_column + '_mm_' + str(window)

        full_series = list(self._pd_frame[input_column])
        filtered_series = full_series[row_range[0]:row_range[1]]
        result = Features.moving_median(series=filtered_series,
                                        window=window,
                                        default=True)
        full_series[row_range[0]:row_range[1]] = result
        self.add_column(column_name=dest_column, series=full_series)
Пример #3
0
    def apply_moving_median_centered_average(self,
                                             input_column,
                                             dest_column=None,
                                             row_range=(0, None),
                                             window=5,
                                             boundary=1):
        '''
        Apply moving median centered average as another column

        :param input_column: Required column to add feature engineering
        :param dest_column: Destination column name
        :param row_range: Range of rows that need to modify
        :param window: Window size of the calculation takes part
        :param boundary: number of values that need to be removed from both ends of the sorted window
        :return: None
        '''
        if dest_column == None:
            dest_column = input_column + '_mmca_' + str(window)

        full_series = list(self._pd_frame[input_column])
        filtered_series = full_series[row_range[0]:row_range[1]]
        result = Features.moving_median_centered_average(
            series=filtered_series,
            window=window,
            boundary=boundary,
            default=True)
        full_series[row_range[0]:row_range[1]] = result
        self.add_column(column_name=dest_column, series=full_series)
Пример #4
0
    def apply_moving_weighted_average(self,
                                      input_column,
                                      dest_column=None,
                                      row_range=(0, None),
                                      window=5,
                                      weights=[1, 2, 3, 4, 5]):
        '''
        Apply moving weighted average as another column

        :param input_column: Required column to add feature engineering
        :param dest_column: Destination column name
        :param row_range: Range of rows that need to modify
        :param window: Window size of the calculation takes part
        :param weights: list of integers
        :return: None
        '''
        if dest_column == None:
            dest_column = input_column + '_mwa_' + str(window)

        full_series = list(self._pd_frame[input_column])
        filtered_series = full_series[row_range[0]:row_range[1]]
        result = Features.moving_weighted_average(series=filtered_series,
                                                  window=window,
                                                  weights=weights,
                                                  default=True)
        full_series[row_range[0]:row_range[1]] = result
        self.add_column(column_name=dest_column, series=full_series)
Пример #5
0
    def apply_moving_k_closest_average(self,
                                       input_column,
                                       dest_column=None,
                                       row_range=(0, None),
                                       window=5,
                                       kclosest=3):
        '''
        Apply moving k closest average as another column

        :param input_column: Required column to add feature engineering
        :param dest_column: Destination column name
        :param row_range: Range of rows that need to modify
        :param window: Window size of the calculation takes part
        :param kclosest: k number of closest values to the recent occurrence including itself
        :return: None
        '''
        if dest_column == None:
            dest_column = input_column + '_kca_' + str(window)

        full_series = list(self._pd_frame[input_column])
        filtered_series = full_series[row_range[0]:row_range[1]]
        result = Features.moving_k_closest_average(series=filtered_series,
                                                   window=window,
                                                   kclosest=kclosest,
                                                   default=True)
        full_series[row_range[0]:row_range[1]] = result
        self.add_column(column_name=dest_column, series=full_series)