Exemplo n.º 1
0
    def test_fit_col_feat(self):
        feat_upper = [0.8, 5, 6, 7, 8, 9]
        feat_lower = [0.5, 2, 3, 3, 4, 4.0]
        out_upper = 2.0
        out_lower = -1.0
        min_max_scaler = MinMaxScaler(mode='normal', area='col', feat_upper=feat_upper, feat_lower=feat_lower,
                                      out_upper=out_upper, out_lower=out_lower)
        fit_data, cols_transform_value = min_max_scaler.fit(self.table_instance)

        new_data = []
        for data in self.test_data:
            tmp_data = []
            for i in range(len(data)):
                if data[i] > feat_upper[i]:
                    tmp_data.append(feat_upper[i])
                elif data[i] < feat_lower[i]:
                    tmp_data.append(feat_lower[i])
                else:
                    tmp_data.append(data[i])

            new_data.append(tmp_data)

        feature_range = (out_lower, out_upper)
        scaler = MMS(feature_range)
        scaler.fit(new_data)
        self.assertListEqual(self.get_table_instance_feature(fit_data),
                             np.around(scaler.transform(new_data), 4).tolist())
Exemplo n.º 2
0
    def fit(self, data):
        LOGGER.info("Start scale data fit ...")
        scale_value_results = []

        self.header = data.schema.get('header')

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler(mode=self.scale_param.mode, area=self.scale_param.area,
                                          feat_upper=self.scale_param.feat_upper,
                                          feat_lower=self.scale_param.feat_lower,
                                          out_upper=self.scale_param.out_upper, out_lower=self.scale_param.out_lower)

            data, cols_scale_value = min_max_scaler.fit(data)
            scale_value_results.append(cols_scale_value)
            self.cols_scale_value = cols_scale_value

        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler(with_mean=self.scale_param.with_mean, with_std=self.scale_param.with_std)
            data, mean, std = standard_scaler.fit(data)
            scale_value_results.append(mean)
            scale_value_results.append(std)
            self.mean = mean
            self.std = std

        else:
            LOGGER.info("Scale method is {}, do nothing and return!".format(self.scale_param.method))

        data.schema['header'] = self.header
        LOGGER.info("End fit data ...")
        return data, scale_value_results
Exemplo n.º 3
0
    def test_cols_select_fit_and_transform_repeat(self):
        scale_column_idx = [1, 1, 2, 2, 4, 5, 5]
        min_max_scaler = MinMaxScaler(mode='normal',
                                      area='col',
                                      scale_column_idx=scale_column_idx,
                                      feat_upper=None,
                                      feat_lower=None,
                                      out_upper=None,
                                      out_lower=None)
        fit_data, cols_transform_value = min_max_scaler.fit(
            self.table_instance)

        scaler = MMS()
        scaler.fit(self.test_data)
        mms_transform_data = np.around(scaler.transform(self.test_data),
                                       4).tolist()

        for i, line in enumerate(mms_transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    mms_transform_data[i][j] = self.test_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_data),
                             mms_transform_data)

        transform_data = min_max_scaler.transform(self.table_instance,
                                                  cols_transform_value)
        self.assertListEqual(self.get_table_instance_feature(transform_data),
                             mms_transform_data)
Exemplo n.º 4
0
    def transform(self, data, fit_config):
        """
        Transform input data using scale with fit results
        Parameters
        ----------
        data: data_instance, input data
        fit_config: list, the fit results information of scale

        Returns
        ----------
        transform_data:data_instance, data after transform
        """
        LOGGER.info("Start scale data transform ...")
        self.header = data.schema.get('header')
        if len(fit_config) == 0:
            LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return")

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler()
            data = min_max_scaler.transform(data, fit_config)
        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler()
            data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1],
                                             scale_column_idx=fit_config[2])
        else:
            LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method))

        data.schema['header'] = self.header
        LOGGER.info("End transform data ...")

        return data
Exemplo n.º 5
0
    def test_fit_instance_default(self):
        min_max_scaler = MinMaxScaler(mode='normal', area='all', feat_upper=None, feat_lower=None, out_upper=None,
                                      out_lower=None)
        fit_instance, cols_transform_value = min_max_scaler.fit(self.table_instance)

        scaler = MMS()
        scaler.fit(self.test_data)
        self.assertListEqual(self.get_table_instance_feature(fit_instance),
                             np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertListEqual(cols_transform_value, self.sklearn_attribute_format(scaler, [0, 1]))
Exemplo n.º 6
0
    def test_fit_col_out(self):
        min_max_scaler = MinMaxScaler(mode='normal', area='col', feat_upper=None, feat_lower=None, out_upper=2,
                                      out_lower=-1)
        fit_data, cols_transform_value = min_max_scaler.fit(self.table_instance)

        feature_range = (-1, 2)
        scaler = MMS(feature_range)
        scaler.fit(self.test_data)
        self.assertListEqual(self.get_table_instance_feature(fit_data),
                             np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertListEqual(cols_transform_value, self.sklearn_attribute_format(scaler, feature_range))
Exemplo n.º 7
0
    def test_transform_all(self):
        feat_upper = 8
        feat_lower = 3
        out_upper = 2
        out_lower = -1
        min_max_scaler = MinMaxScaler(mode='normal', area='all', feat_upper=feat_upper, feat_lower=feat_lower,
                                      out_upper=out_upper, out_lower=out_lower)
        fit_data, cols_transform_value = min_max_scaler.fit(self.table_instance)

        transform_data = min_max_scaler.transform(self.table_instance, cols_transform_value)

        self.assertListEqual(self.get_table_instance_feature(fit_data), self.get_table_instance_feature(transform_data))
Exemplo n.º 8
0
    def test_fit_feat(self):
        feat_upper = 8
        feat_lower = 3
        out_upper = 2
        out_lower = -1
        min_max_scaler = MinMaxScaler(mode='normal',
                                      area='all',
                                      feat_upper=feat_upper,
                                      feat_lower=feat_lower,
                                      out_upper=out_upper,
                                      out_lower=out_lower)
        fit_data, cols_transform_value = min_max_scaler.fit(
            self.table_instance)

        new_data = []
        for data in self.test_data:
            tmp_data = []
            for i in range(len(data)):
                if data[i] > feat_upper:
                    tmp_data.append(feat_upper)
                elif data[i] < feat_lower:
                    tmp_data.append(feat_lower)
                else:
                    tmp_data.append(data[i])

            new_data.append(tmp_data)

        feature_range = (out_lower, out_upper)
        scaler = MMS(feature_range)
        scaler.fit(new_data)
        self.assertListEqual(self.get_table_instance_feature(fit_data),
                             np.around(scaler.transform(new_data), 4).tolist())
        sklearn_res = self.sklearn_attribute_format(scaler,
                                                    [out_lower, out_upper])
        # self.assertListEqual(cols_transform_value, [(feat_lower, feat_upper, out_lower, out_upper)])
        for i in range(len(sklearn_res)):
            if sklearn_res[i][0] != feat_lower:
                tmp_res = list(sklearn_res[i])
                tmp_res[0] = feat_lower
                sklearn_res[i] = tuple(tmp_res)
            if sklearn_res[i][1] != feat_upper:
                tmp_res = list(sklearn_res[i])
                tmp_res[1] = feat_upper
                sklearn_res[i] = tuple(tmp_res)

        self.assertListEqual(cols_transform_value[0], sklearn_res)
Exemplo n.º 9
0
    def transform(self, data, fit_config):
        LOGGER.info("Start scale data transform ...")

        if len(fit_config) == 0:
            LOGGER.warning("length fit_config is 0, can not do transform, do nothing and return")

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler()
            data = min_max_scaler.transform(data, fit_config[0])
        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler()
            data = standard_scaler.transform(data, mean=fit_config[0], scale=fit_config[1])
        else:
            LOGGER.info("DataTransform method is {}, do nothing and return!".format(self.scale_param.method))

        LOGGER.info("End transform data ...")

        return data
Exemplo n.º 10
0
    def fit(self, data):
        """
        Apply scale for input data
        Parameters
        ----------
        data: data_instance, input data

        Returns
        ----------
        data:data_instance, data after scale
        scale_value_results: list, the fit results information of scale
        """
        LOGGER.info("Start scale data fit ...")

        self.header = data.schema.get('header')

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler(mode=self.scale_param.mode, area=self.scale_param.area,
                                          scale_column_idx=self.scale_param.scale_column_idx,
                                          feat_upper=self.scale_param.feat_upper,
                                          feat_lower=self.scale_param.feat_lower,
                                          out_upper=self.scale_param.out_upper, out_lower=self.scale_param.out_lower)

            data, cols_scale_value = min_max_scaler.fit(data)
            self.cols_scale_res = cols_scale_value

        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler(area=self.scale_param.area,
                                             scale_column_idx=self.scale_param.scale_column_idx,
                                             with_mean=self.scale_param.with_mean, with_std=self.scale_param.with_std)
            data, cols_scale_value = standard_scaler.fit(data)
            self.mean = cols_scale_value[0]
            self.std = cols_scale_value[1]
            self.std_scale_column_idx = cols_scale_value[2]
            self.cols_scale_res = cols_scale_value
        else:
            LOGGER.info("Scale method is {}, do nothing and return!".format(self.scale_param.method))

        data.schema['header'] = self.header
        LOGGER.info("End fit data ...")
        return data, self.cols_scale_res