Exemplo n.º 1
0
    def test_get_impute_rate(self):
        imputer_value = ['', 'none', 'na', 'null', "10000", "-10000"]
        imputer = Imputer(missing_value_list=imputer_value)
        _, _ = imputer.fit(self.table_instance, "median", output_format='str')
        cols_impute_rate_ground_true = [0, 0.3, 0.1, 0.1, 0.1, 0.1, 0, 0.1, 0, 0]
        cols_fit_impute_rate = imputer.get_impute_rate(mode="fit")
        self.assertListEqual(cols_fit_impute_rate, cols_impute_rate_ground_true)

        cols_transform_value_ground_true = [-0.606584, -0.193332, -0.620475, -0.591332, -0.327392, -0.519504, -0.610669,
                                            -0.768581, -0.28757, -0.247477]
        _ = imputer.transform(self.table_instance, cols_transform_value_ground_true)
        cols_transform_impute_rate = imputer.get_impute_rate(mode="fit")
        self.assertListEqual(cols_transform_impute_rate, cols_impute_rate_ground_true)
Exemplo n.º 2
0
    def replace_outlier_value(self, input_data_features, mode="fit"):
        if self.outlier_replace:
            from federatedml.feature.imputer import Imputer
            imputer_processor = Imputer(self.outlier_impute)
            if mode == "fit":
                input_data_features, self.outlier_replace_value = \
                    imputer_processor.fit(input_data_features,
                                          replace_method=self.outlier_replace_method,
                                          replace_value=self.outlier_replace_value)

                if self.outlier_impute is None:
                    self.outlier_impute = imputer_processor.get_missing_value_list(
                    )
            else:
                input_data_features = imputer_processor.transform(
                    input_data_features,
                    transform_value=self.outlier_replace_value)

            self.outlier_replace_rate = imputer_processor.get_impute_rate(mode)
            # callback("outlier_value_ratio",
            #         outlier_replace_rate,
            #         self.tracker)

            # callback("outlier_value_list",
            #          self.outlier_impute,
            #          self.tracker)

        return input_data_features
Exemplo n.º 3
0
    def transform(self, data):
        LOGGER.info(f"Enter Feature Imputation transform")
        imputer_processor = Imputer(self.missing_impute)
        imputed_data = imputer_processor.transform(
            data, transform_value=self.default_value, skip_cols=self.skip_cols)
        if self.missing_impute is None:
            self.missing_impute = imputer_processor.get_missing_value_list()

        self.missing_impute_rate = imputer_processor.get_impute_rate(
            "transform")
        return imputed_data
Exemplo n.º 4
0
    def fill_missing_value(self, input_data_features, mode="fit"):
        if self.missing_fill:
            from federatedml.feature.imputer import Imputer
            imputer_processor = Imputer(self.missing_impute)
            if mode == "fit":
                input_data_features, self.default_value = imputer_processor.fit(input_data_features,
                                                                                replace_method=self.missing_fill_method,
                                                                                replace_value=self.default_value)
                if self.missing_impute is None:
                    self.missing_impute = imputer_processor.get_missing_value_list()
            else:
                input_data_features = imputer_processor.transform(input_data_features,
                                                                  transform_value=self.default_value)

            if self.missing_impute is None:
                self.missing_impute = imputer_processor.get_missing_value_list()

            self.missing_impute_rate = imputer_processor.get_impute_rate(mode)

        return input_data_features
Exemplo n.º 5
0
    def fill_missing_value(self, input_data, tags_dict, mode="fit"):
        str_trans_method = functools.partial(
            self.change_tag_to_str,
            tags_dict=tags_dict,
            delimitor=self.delimitor,
            with_label=self.with_label,
            tag_value_delimitor=self.tag_value_delimitor)

        input_data = input_data.mapValues(str_trans_method)
        schema = make_schema(self.header, self.sid_name, self.label_name)
        set_schema(input_data, schema)

        from federatedml.feature.imputer import Imputer
        imputer_processor = Imputer()
        if mode == "fit":
            data, self.default_value = imputer_processor.fit(
                input_data,
                replace_method=self.missing_fill_method,
                replace_value=self.default_value)
            LOGGER.debug("self.default_value is {}".format(self.default_value))
        else:
            data = imputer_processor.transform(
                input_data, transform_value=self.default_value)
        if self.missing_impute is None:
            self.missing_impute = imputer_processor.get_missing_value_list()

        LOGGER.debug("self.missing_impute is {}".format(self.missing_impute))

        self.missing_impute_rate = imputer_processor.get_impute_rate(mode)

        str_trans_tag_method = functools.partial(
            self.change_str_to_tag,
            tags_dict=tags_dict,
            delimitor=self.delimitor,
            tag_value_delimitor=self.tag_value_delimitor)

        data = data.mapValues(str_trans_tag_method)

        return data
Exemplo n.º 6
0
    def fit(self, data):
        LOGGER.info(f"Enter Feature Imputation fit")
        imputer_processor = Imputer(self.missing_impute)
        self.header = get_header(data)
        if self.col_missing_fill_method:
            for k in self.col_missing_fill_method.keys():
                if k not in self.header:
                    raise ValueError(
                        f"{k} not found in data header. Please check col_missing_fill_method keys."
                    )
        imputed_data, self.default_value = imputer_processor.fit(
            data,
            replace_method=self.missing_fill_method,
            replace_value=self.default_value,
            col_replace_method=self.col_missing_fill_method)
        if self.missing_impute is None:
            self.missing_impute = imputer_processor.get_missing_value_list()
        self.missing_impute_rate = imputer_processor.get_impute_rate("fit")
        # self.header = get_header(imputed_data)
        self.cols_replace_method = imputer_processor.cols_replace_method
        self.skip_cols = imputer_processor.get_skip_cols()
        self.set_summary(self.get_summary())

        return imputed_data