Пример #1
0
    def fit(self, data):
        LOGGER.info("Start scale data fit ...")
        scale_value_results = []

        self.header = data.schema.get('header')

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler(mode=self.scale_param.mode, area=self.scale_param.area,
                                          feat_upper=self.scale_param.feat_upper,
                                          feat_lower=self.scale_param.feat_lower,
                                          out_upper=self.scale_param.out_upper, out_lower=self.scale_param.out_lower)

            data, cols_scale_value = min_max_scaler.fit(data)
            scale_value_results.append(cols_scale_value)
            self.cols_scale_value = cols_scale_value

        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler(with_mean=self.scale_param.with_mean, with_std=self.scale_param.with_std)
            data, mean, std = standard_scaler.fit(data)
            scale_value_results.append(mean)
            scale_value_results.append(std)
            self.mean = mean
            self.std = std

        else:
            LOGGER.info("Scale method is {}, do nothing and return!".format(self.scale_param.method))

        data.schema['header'] = self.header
        LOGGER.info("End fit data ...")
        return data, scale_value_results
Пример #2
0
 def test_transform1(self):
     standard_scaler = StandardScaler(with_mean=True, with_std=True)
     fit_instance, mean, std = standard_scaler.fit(self.table_instance)
     transform_data = standard_scaler.transform(self.table_instance, mean,
                                                std)
     self.assertListEqual(self.get_table_instance_feature(transform_data),
                          self.get_table_instance_feature(fit_instance))
Пример #3
0
    def test_cols_select_fit_and_transform_repeat(self):
        scale_column_idx = [1, 1, 2, 2, 4, 5, 5]
        standard_scaler = StandardScaler(area='col',
                                         scale_column_idx=scale_column_idx,
                                         with_mean=True,
                                         with_std=True)
        fit_data, scale_conf = standard_scaler.fit(self.table_instance)
        mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[
            2]
        scaler = SSL(with_mean=True, with_std=True)
        scaler.fit(self.test_data)
        transform_data = np.around(scaler.transform(self.test_data),
                                   4).tolist()

        for i, line in enumerate(transform_data):
            for j, cols in enumerate(line):
                if j not in scale_column_idx:
                    transform_data[i][j] = self.test_data[i][j]

        self.assertListEqual(self.get_table_instance_feature(fit_data),
                             transform_data)

        std_scale_transform_data = standard_scaler.transform(
            self.table_instance, mean, std, scale_column_idx)
        self.assertListEqual(
            self.get_table_instance_feature(std_scale_transform_data),
            transform_data)
Пример #4
0
    def test_transform4(self):
        standard_scaler = StandardScaler(with_mean=False, with_std=False)
        fit_instance, scale_conf = standard_scaler.fit(self.table_instance)
        mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[
            2]
        transform_data = standard_scaler.transform(self.table_instance, mean,
                                                   std, scale_column_idx)

        self.assertListEqual(self.get_table_instance_feature(transform_data),
                             self.get_table_instance_feature(fit_instance))
Пример #5
0
    def test_fit4(self):
        standard_scaler = StandardScaler(with_mean=False, with_std=False)
        fit_instance, mean, std = standard_scaler.fit(self.table_instance)

        scaler = SSL(with_mean=False, with_std=False)
        scaler.fit(self.test_data)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertEqual(mean, [0 for _ in range(len(self.test_data[0]))])
        self.assertEqual(std, [1 for _ in range(len(self.test_data[0]))])
Пример #6
0
    def test_fit3(self):
        standard_scaler = StandardScaler(with_mean=True, with_std=False)
        fit_instance, mean, std = standard_scaler.fit(self.table_instance)

        scaler = SSL(with_std=False)
        scaler.fit(self.test_data)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertListEqual(list(np.around(mean, 4)),
                             list(np.around(scaler.mean_, 4)))
        self.assertListEqual(list(np.around(std, 4)), [1 for _ in std])
Пример #7
0
    def test_fit2(self):
        standard_scaler = StandardScaler(with_mean=False, with_std=True)
        fit_instance, scale_conf = standard_scaler.fit(self.table_instance)
        mean, std = scale_conf[0], scale_conf[1]

        scaler = SSL(with_mean=False)
        scaler.fit(self.test_data)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertListEqual(list(np.around(mean, 4)), [0 for _ in mean])
        self.assertListEqual(list(np.around(std, 4)),
                             list(np.around(scaler.scale_, 4)))
Пример #8
0
    def test_fit6(self):
        standard_scaler = StandardScaler(area='col',
                                         with_mean=True,
                                         with_std=True)
        fit_instance, scale_conf = standard_scaler.fit(self.table_instance)
        mean, std, scale_column_idx = scale_conf[0], scale_conf[1], scale_conf[
            2]

        scaler = SSL()
        scaler.fit(self.test_data)
        self.assertListEqual(
            self.get_table_instance_feature(fit_instance),
            np.around(scaler.transform(self.test_data), 4).tolist())
        self.assertListEqual(list(np.around(mean, 4)),
                             list(np.around(scaler.mean_, 4)))
        self.assertListEqual(list(np.around(std, 4)),
                             list(np.around(scaler.scale_, 4)))
Пример #9
0
    def fit(self, data):
        """
        Apply scale for input data
        Parameters
        ----------
        data: data_instance, input data

        Returns
        ----------
        data:data_instance, data after scale
        scale_value_results: list, the fit results information of scale
        """
        LOGGER.info("Start scale data fit ...")

        self.header = data.schema.get('header')

        if self.scale_param.method == consts.MINMAXSCALE:
            min_max_scaler = MinMaxScaler(mode=self.scale_param.mode, area=self.scale_param.area,
                                          scale_column_idx=self.scale_param.scale_column_idx,
                                          feat_upper=self.scale_param.feat_upper,
                                          feat_lower=self.scale_param.feat_lower,
                                          out_upper=self.scale_param.out_upper, out_lower=self.scale_param.out_lower)

            data, cols_scale_value = min_max_scaler.fit(data)
            self.cols_scale_res = cols_scale_value

        elif self.scale_param.method == consts.STANDARDSCALE:
            standard_scaler = StandardScaler(area=self.scale_param.area,
                                             scale_column_idx=self.scale_param.scale_column_idx,
                                             with_mean=self.scale_param.with_mean, with_std=self.scale_param.with_std)
            data, cols_scale_value = standard_scaler.fit(data)
            self.mean = cols_scale_value[0]
            self.std = cols_scale_value[1]
            self.std_scale_column_idx = cols_scale_value[2]
            self.cols_scale_res = cols_scale_value
        else:
            LOGGER.info("Scale method is {}, do nothing and return!".format(self.scale_param.method))

        data.schema['header'] = self.header
        LOGGER.info("End fit data ...")
        return data, self.cols_scale_res