Exemplo n.º 1
0
    def test_cols_select_fit_and_transform_repeat(self):
        scale_column_idx = [1, 1, 2, 2, 4, 5, 5]
        min_max_scaler = MinMaxScaler(mode='normal',
                                      area='col',
                                      scale_column_idx=scale_column_idx,
                                      feat_upper=None,
                                      feat_lower=None,
                                      out_upper=None,
                                      out_lower=None)
        fit_data, cols_transform_value = min_max_scaler.fit(
            self.table_instance)

        scaler = MMS()
        scaler.fit(self.test_data)
        mms_transform_data = np.around(scaler.transform(self.test_data),
                                       4).tolist()

        for i, line in enumerate(mms_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    mms_transform_data[i][j] = self.test_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_data),
                             mms_transform_data)

        transform_data = min_max_scaler.transform(self.table_instance,
                                                  cols_transform_value)
        self.assertListEqual(self.get_table_instance_feature(transform_data),
                             mms_transform_data)
Exemplo n.º 2
0
    def transform(self, data, fit_config):
        """
        Transform input data using scale with fit results
        Parameters
        ----------
        data: data_instance, input data
        fit_config: list, the fit results information of scale

        Returns
        ----------
        transform_data:data_instance, data after transform
        """
        LOGGER.info("Start scale data transform ...")
        self.header = data.schema.get('header')
        if len(fit_config) == 0:
            LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return")

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler()
            data = min_max_scaler.transform(data, fit_config)
        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler()
            data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1],
                                             scale_column_idx=fit_config[2])
        else:
            LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method))

        data.schema['header'] = self.header
        LOGGER.info("End transform data ...")

        return data
Exemplo n.º 3
0
    def test_transform_all(self):
        feat_upper = 8
        feat_lower = 3
        out_upper = 2
        out_lower = -1
        min_max_scaler = MinMaxScaler(mode='normal', area='all', feat_upper=feat_upper, feat_lower=feat_lower,
                                      out_upper=out_upper, out_lower=out_lower)
        fit_data, cols_transform_value = min_max_scaler.fit(self.table_instance)

        transform_data = min_max_scaler.transform(self.table_instance, cols_transform_value)

        self.assertListEqual(self.get_table_instance_feature(fit_data), self.get_table_instance_feature(transform_data))
Exemplo n.º 4
0
    def transform(self, data, fit_config):
        LOGGER.info("Start scale data transform ...")

        if len(fit_config) == 0:
            LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return")

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler()
            data = min_max_scaler.transform(data, fit_config[0])
        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler()
            data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1])
        else:
            LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method))

        LOGGER.info("End transform data ...")

        return data