Пример #1
0
    def transform(self, data, fit_config):
        LOGGER.info("Start scale data transform ...")

        if len(fit_config) == 0:
            LOGGER.warning(
                "length fit_config is 0, can not do transform, do nothing and return"
            )

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler()
            data = min_max_scaler.transform(data, fit_config[0])
        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler()
            data = standard_scaler.transform(data,
                                             mean=fit_config[0],
                                             scale=fit_config[1])
        else:
            LOGGER.info(
                "DataTransform method is {}, do nothing and return!".format(
                    self.scale_param.method))

        LOGGER.info("End transform data ...")

        return data
Пример #2
0
    def fit(self, data):
        LOGGER.info("Start scale data fit ...")
        scale_value_results = []

        self.header = data.schema.get('header')

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler(
                mode=self.scale_param.mode,
                area=self.scale_param.area,
                feat_upper=self.scale_param.feat_upper,
                feat_lower=self.scale_param.feat_lower,
                out_upper=self.scale_param.out_upper,
                out_lower=self.scale_param.out_lower)

            data, cols_scale_value = min_max_scaler.fit(data)
            scale_value_results.append(cols_scale_value)
            self.cols_scale_value = cols_scale_value

        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler(
                with_mean=self.scale_param.with_mean,
                with_std=self.scale_param.with_std)
            data, mean, std = standard_scaler.fit(data)
            scale_value_results.append(mean)
            scale_value_results.append(std)
            self.mean = mean
            self.std = std

        else:
            LOGGER.info("Scale method is {}, do nothing and return!".format(
                self.scale_param.method))

        data.schema['header'] = self.header
        LOGGER.info("End fit data ...")
        return data, scale_value_results