def _filter(self, data_instances, method, suffix): this_filter = filter_factory.get_filter(filter_name=method, model_param=self.model_param, role=self.role) this_filter.set_selection_properties(self.curt_select_properties) this_filter.set_statics_obj(self.static_obj) this_filter.set_binning_obj(self.binning_model) this_filter.set_transfer_variable(self.transfer_variable) self.curt_select_properties = this_filter.fit( data_instances, suffix).selection_properties host_select_properties = getattr(this_filter, 'host_selection_properties', None) LOGGER.debug("method: {}, host_select_properties: {}".format( method, host_select_properties)) self.completed_selection_result.add_filter_results( filter_name=method, select_properties=self.curt_select_properties, host_select_properties=host_select_properties) LOGGER.debug("method: {}, selection_cols: {}, left_cols: {}".format( method, self.curt_select_properties.select_col_names, self.curt_select_properties.left_col_names)) self.update_curt_select_param() LOGGER.debug( "After updated, method: {}, selection_cols: {}, left_cols: {}". format(method, self.curt_select_properties.select_col_names, self.curt_select_properties.left_col_names)) self.meta_dicts = this_filter.get_meta_obj(self.meta_dicts)
def _run_filter(self, data_table): select_param = FeatureSelectionParam() select_param.percentage_value_param.upper_pct = 0.2 filter_obj = get_filter(consts.PERCENTAGE_VALUE, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes( [x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties left_cols = [3, 4] self.assertEqual(res_select_properties.all_left_col_names, [self.header[x] for x in left_cols])
def test_unique_logic(self): data_table = self.gen_data(1000, 48) select_param = FeatureSelectionParam() filter_obj = get_filter(consts.UNIQUE_VALUE, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes( [x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties self.assertEqual(res_select_properties.all_left_col_names, [self.header[1]]) data_table.destroy()
def test_left_logic(self): data_table = self.gen_data(1000, 10, 48) select_param = FeatureSelectionParam() select_param.manually_param.left_col_indexes = [0, 1] select_param.manually_param.left_col_names = ['3', '2'] filter_obj = get_filter(consts.MANUALLY_FILTER, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes( [x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties result = ['0', '1', '2', '3'] self.assertEqual(res_select_properties.all_left_col_names, result)
def _filter(self, data_instances, method, suffix, idx=0): this_filter = filter_factory.get_filter(filter_name=method, model_param=self.model_param, role=self.role, model=self, idx=idx) if method == consts.STATISTIC_FILTER: method = self.model_param.statistic_param.metrics[idx] elif method == consts.IV_FILTER: metric = self.model_param.iv_param.metrics[idx] f_type = self.model_param.iv_param.filter_type[idx] method = f"{metric}_{f_type}" elif method == consts.PSI_FILTER: metric = self.model_param.psi_param.metrics[idx] f_type = self.model_param.psi_param.filter_type[idx] method = f"{metric}_{f_type}" this_filter.set_selection_properties(self.curt_select_properties) this_filter.set_transfer_variable(self.transfer_variable) self.curt_select_properties = this_filter.fit( data_instances, suffix).selection_properties host_select_properties = getattr(this_filter, 'host_selection_properties', None) if host_select_properties is not None: LOGGER.debug("method: {}, host_select_properties: {}".format( method, host_select_properties[0].all_left_col_names)) self.completed_selection_result.add_filter_results( filter_name=method, select_properties=self.curt_select_properties, host_select_properties=host_select_properties) last_col_nums = len(self.curt_select_properties.last_left_col_names) left_col_names = self.curt_select_properties.left_col_names self.add_summary( method, { "last_col_nums": last_col_nums, "left_col_nums": len(left_col_names), "left_col_names": left_col_names }) LOGGER.debug("method: {}, selection_cols: {}, left_cols: {}".format( method, self.curt_select_properties.select_col_names, self.curt_select_properties.left_col_names)) self.update_curt_select_param() LOGGER.debug("After updated, method: {}, selection_cols: {}".format( method, self.curt_select_properties.select_col_names)) # self.meta_dicts = this_filter.get_meta_obj(self.meta_dicts) self.meta_list.append(this_filter.get_meta_obj())
def test_unique_logic(self): data_table = self.gen_data(1000, 10, 48) select_param = FeatureSelectionParam() select_param.variance_coe_param.value_threshold = 0.1 filter_obj = get_filter(consts.COEFFICIENT_OF_VARIATION_VALUE_THRES, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes([x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties result = [self.header[idx] for idx, x in enumerate(self.coe_list) if x >= select_param.variance_coe_param.value_threshold] self.assertEqual(res_select_properties.all_left_col_names, result) self.assertEqual(len(res_select_properties.all_left_col_names), 9) data_table.destroy()
def test_filter_logic(self): data_table = self.gen_data(1000, 10, 48) select_param = FeatureSelectionParam() select_param.outlier_param.percentile = 0.9 select_param.outlier_param.upper_threshold = 99 filter_obj = get_filter(consts.OUTLIER_COLS, select_param) select_properties = SelectionProperties() select_properties.set_header(self.header) select_properties.set_last_left_col_indexes( [x for x in range(len(self.header))]) select_properties.set_select_all_cols() filter_obj.set_selection_properties(select_properties) res_select_properties = filter_obj.fit(data_table, suffix='').selection_properties self.assertEqual(res_select_properties.all_left_col_names, [self.header[x] for x in range(9)]) self.assertEqual(len(res_select_properties.all_left_col_names), 9) data_table.destroy()