def _filter(self, data_instances, method, suffix):
        this_filter = filter_factory.get_filter(filter_name=method,
                                                model_param=self.model_param,
                                                role=self.role)
        this_filter.set_selection_properties(self.curt_select_properties)
        this_filter.set_statics_obj(self.static_obj)
        this_filter.set_binning_obj(self.binning_model)
        this_filter.set_transfer_variable(self.transfer_variable)
        self.curt_select_properties = this_filter.fit(
            data_instances, suffix).selection_properties
        host_select_properties = getattr(this_filter,
                                         'host_selection_properties', None)
        LOGGER.debug("method: {}, host_select_properties: {}".format(
            method, host_select_properties))

        self.completed_selection_result.add_filter_results(
            filter_name=method,
            select_properties=self.curt_select_properties,
            host_select_properties=host_select_properties)
        LOGGER.debug("method: {}, selection_cols: {}, left_cols: {}".format(
            method, self.curt_select_properties.select_col_names,
            self.curt_select_properties.left_col_names))
        self.update_curt_select_param()
        LOGGER.debug(
            "After updated, method: {}, selection_cols: {}, left_cols: {}".
            format(method, self.curt_select_properties.select_col_names,
                   self.curt_select_properties.left_col_names))
        self.meta_dicts = this_filter.get_meta_obj(self.meta_dicts)
Exemplo n.º 2
0
 def _run_filter(self, data_table):
     select_param = FeatureSelectionParam()
     select_param.percentage_value_param.upper_pct = 0.2
     filter_obj = get_filter(consts.PERCENTAGE_VALUE, select_param)
     select_properties = SelectionProperties()
     select_properties.set_header(self.header)
     select_properties.set_last_left_col_indexes(
         [x for x in range(len(self.header))])
     select_properties.set_select_all_cols()
     filter_obj.set_selection_properties(select_properties)
     res_select_properties = filter_obj.fit(data_table,
                                            suffix='').selection_properties
     left_cols = [3, 4]
     self.assertEqual(res_select_properties.all_left_col_names,
                      [self.header[x] for x in left_cols])
Exemplo n.º 3
0
 def test_unique_logic(self):
     data_table = self.gen_data(1000, 48)
     select_param = FeatureSelectionParam()
     filter_obj = get_filter(consts.UNIQUE_VALUE, select_param)
     select_properties = SelectionProperties()
     select_properties.set_header(self.header)
     select_properties.set_last_left_col_indexes(
         [x for x in range(len(self.header))])
     select_properties.set_select_all_cols()
     filter_obj.set_selection_properties(select_properties)
     res_select_properties = filter_obj.fit(data_table,
                                            suffix='').selection_properties
     self.assertEqual(res_select_properties.all_left_col_names,
                      [self.header[1]])
     data_table.destroy()
Exemplo n.º 4
0
 def test_left_logic(self):
     data_table = self.gen_data(1000, 10, 48)
     select_param = FeatureSelectionParam()
     select_param.manually_param.left_col_indexes = [0, 1]
     select_param.manually_param.left_col_names = ['3', '2']
     filter_obj = get_filter(consts.MANUALLY_FILTER, select_param)
     select_properties = SelectionProperties()
     select_properties.set_header(self.header)
     select_properties.set_last_left_col_indexes(
         [x for x in range(len(self.header))])
     select_properties.set_select_all_cols()
     filter_obj.set_selection_properties(select_properties)
     res_select_properties = filter_obj.fit(data_table,
                                            suffix='').selection_properties
     result = ['0', '1', '2', '3']
     self.assertEqual(res_select_properties.all_left_col_names, result)
Exemplo n.º 5
0
    def _filter(self, data_instances, method, suffix, idx=0):
        this_filter = filter_factory.get_filter(filter_name=method,
                                                model_param=self.model_param,
                                                role=self.role,
                                                model=self,
                                                idx=idx)
        if method == consts.STATISTIC_FILTER:
            method = self.model_param.statistic_param.metrics[idx]
        elif method == consts.IV_FILTER:
            metric = self.model_param.iv_param.metrics[idx]
            f_type = self.model_param.iv_param.filter_type[idx]
            method = f"{metric}_{f_type}"
        elif method == consts.PSI_FILTER:
            metric = self.model_param.psi_param.metrics[idx]
            f_type = self.model_param.psi_param.filter_type[idx]
            method = f"{metric}_{f_type}"
        this_filter.set_selection_properties(self.curt_select_properties)

        this_filter.set_transfer_variable(self.transfer_variable)
        self.curt_select_properties = this_filter.fit(
            data_instances, suffix).selection_properties
        host_select_properties = getattr(this_filter,
                                         'host_selection_properties', None)
        if host_select_properties is not None:
            LOGGER.debug("method: {}, host_select_properties: {}".format(
                method, host_select_properties[0].all_left_col_names))

        self.completed_selection_result.add_filter_results(
            filter_name=method,
            select_properties=self.curt_select_properties,
            host_select_properties=host_select_properties)
        last_col_nums = len(self.curt_select_properties.last_left_col_names)
        left_col_names = self.curt_select_properties.left_col_names
        self.add_summary(
            method, {
                "last_col_nums": last_col_nums,
                "left_col_nums": len(left_col_names),
                "left_col_names": left_col_names
            })
        LOGGER.debug("method: {}, selection_cols: {}, left_cols: {}".format(
            method, self.curt_select_properties.select_col_names,
            self.curt_select_properties.left_col_names))
        self.update_curt_select_param()
        LOGGER.debug("After updated, method: {}, selection_cols: {}".format(
            method, self.curt_select_properties.select_col_names))
        # self.meta_dicts = this_filter.get_meta_obj(self.meta_dicts)
        self.meta_list.append(this_filter.get_meta_obj())
Exemplo n.º 6
0
    def test_unique_logic(self):
        data_table = self.gen_data(1000, 10, 48)
        select_param = FeatureSelectionParam()
        select_param.variance_coe_param.value_threshold = 0.1
        filter_obj = get_filter(consts.COEFFICIENT_OF_VARIATION_VALUE_THRES, select_param)
        select_properties = SelectionProperties()
        select_properties.set_header(self.header)
        select_properties.set_last_left_col_indexes([x for x in range(len(self.header))])
        select_properties.set_select_all_cols()
        filter_obj.set_selection_properties(select_properties)
        res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties
        result = [self.header[idx] for idx, x in enumerate(self.coe_list)
                  if x >= select_param.variance_coe_param.value_threshold]

        self.assertEqual(res_select_properties.all_left_col_names, result)
        self.assertEqual(len(res_select_properties.all_left_col_names), 9)
        data_table.destroy()
Exemplo n.º 7
0
    def test_filter_logic(self):
        data_table = self.gen_data(1000, 10, 48)
        select_param = FeatureSelectionParam()
        select_param.outlier_param.percentile = 0.9
        select_param.outlier_param.upper_threshold = 99
        filter_obj = get_filter(consts.OUTLIER_COLS, select_param)
        select_properties = SelectionProperties()
        select_properties.set_header(self.header)
        select_properties.set_last_left_col_indexes(
            [x for x in range(len(self.header))])
        select_properties.set_select_all_cols()
        filter_obj.set_selection_properties(select_properties)
        res_select_properties = filter_obj.fit(data_table,
                                               suffix='').selection_properties

        self.assertEqual(res_select_properties.all_left_col_names,
                         [self.header[x] for x in range(9)])
        self.assertEqual(len(res_select_properties.all_left_col_names), 9)
        data_table.destroy()