def feature_selection_transform(self, data_instance, flow_id='sample_flowid'): if self.mode == consts.H**O: LOGGER.info( "H**o feature selection is not supporting yet. Coming soon") return data_instance if data_instance is None: return data_instance if self.workflow_param.need_feature_selection: LOGGER.info("Start feature selection transform") feature_select_param = param_generator.FeatureSelectionParam() feature_select_param = ParamExtract.parse_param_from_config( feature_select_param, self.config_path) param_checker.FeatureSelectionParamChecker.check_param( feature_select_param) if self.role == consts.HOST: feature_selector = HeteroFeatureSelectionHost( feature_select_param) elif self.role == consts.GUEST: feature_selector = HeteroFeatureSelectionGuest( feature_select_param) elif self.role == consts.ARBITER: return data_instance else: raise ValueError("Unknown role of workflow") feature_selector.set_flowid(flow_id) feature_selector.load_model(self.workflow_param.model_table, self.workflow_param.model_namespace) LOGGER.debug( "Role: {}, in transform feature selector left_cols: {}".format( self.role, feature_selector.left_cols)) data_instance = feature_selector.transform(data_instance) LOGGER.info("Finish feature selection") return data_instance else: LOGGER.info("No need to do feature selection") return data_instance
class HeteroFeatureSelectHostWorkflow(WorkFlow): def _initialize(self, config_path): self._initialize_role_and_mode() self._initialize_model(config_path) self._initialize_workflow_param(config_path) def _initialize_role_and_mode(self): self.role = consts.HOST self.mode = consts.HETERO def _initialize_intersect(self, config): pass def _initialize_model(self, runtime_conf_path): feature_param = FeatureSelectionParam() self.feature_param = ParamExtract.parse_param_from_config( feature_param, runtime_conf_path) FeatureSelectionParamChecker.check_param(self.feature_param) self.model = HeteroFeatureSelectionHost(self.feature_param) LOGGER.debug("Guest model started") @status_tracer_decorator.status_trace def run(self): self._init_argument() if self.workflow_param.method == "feature_select": if self.feature_param.method == 'fit': train_data_instance = self.gen_data_instance( self.workflow_param.train_input_table, self.workflow_param.train_input_namespace) if self.feature_param.local_only: self.model.fit_local(train_data_instance) else: self.model.fit(train_data_instance) self.model.save_model(self.workflow_param.model_table, self.workflow_param.model_namespace) elif self.feature_param.method == 'fit_transform': train_data_instance = self.gen_data_instance( self.workflow_param.train_input_table, self.workflow_param.train_input_namespace) if self.feature_param.local_only: result_table = self.model.fit_local_transform( train_data_instance) else: result_table = self.model.fit_transform( train_data_instance) self.model.save_model(self.workflow_param.model_table, self.workflow_param.model_namespace) self.save_predict_result(result_table) LOGGER.info("Predict result saved, table: {}," " namespace: {}".format( self.workflow_param.predict_output_table, self.workflow_param.predict_output_namespace)) elif self.feature_param.method == 'transform': train_data_instance = self.gen_data_instance( self.workflow_param.train_input_table, self.workflow_param.train_input_namespace, mode='transform') self.load_model() result_table = self.model.transform(train_data_instance) self.save_predict_result(result_table) LOGGER.info("Predict result saved, table: {}," " namespace: {}".format( self.workflow_param.predict_output_table, self.workflow_param.predict_output_namespace)) else: raise TypeError("method %s is not support yet" % (self.workflow_param.method)) LOGGER.info("Finish host party feature selection")