Exemplo n.º 1
0
    def feature_selection_transform(self,
                                    data_instance,
                                    flow_id='sample_flowid'):
        if self.mode == consts.H**O:
            LOGGER.info(
                "H**o feature selection is not supporting yet. Coming soon")
            return data_instance

        if data_instance is None:
            return data_instance

        if self.workflow_param.need_feature_selection:
            LOGGER.info("Start feature selection transform")
            feature_select_param = param_generator.FeatureSelectionParam()
            feature_select_param = ParamExtract.parse_param_from_config(
                feature_select_param, self.config_path)
            param_checker.FeatureSelectionParamChecker.check_param(
                feature_select_param)

            if self.role == consts.HOST:
                feature_selector = HeteroFeatureSelectionHost(
                    feature_select_param)
            elif self.role == consts.GUEST:
                feature_selector = HeteroFeatureSelectionGuest(
                    feature_select_param)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")

            feature_selector.set_flowid(flow_id)

            feature_selector.load_model(self.workflow_param.model_table,
                                        self.workflow_param.model_namespace)

            LOGGER.debug(
                "Role: {}, in transform feature selector left_cols: {}".format(
                    self.role, feature_selector.left_cols))

            data_instance = feature_selector.transform(data_instance)

            LOGGER.info("Finish feature selection")
            return data_instance
        else:
            LOGGER.info("No need to do feature selection")
            return data_instance
Exemplo n.º 2
0
class HeteroFeatureSelectHostWorkflow(WorkFlow):
    def _initialize(self, config_path):
        self._initialize_role_and_mode()
        self._initialize_model(config_path)
        self._initialize_workflow_param(config_path)

    def _initialize_role_and_mode(self):
        self.role = consts.HOST
        self.mode = consts.HETERO

    def _initialize_intersect(self, config):
        pass

    def _initialize_model(self, runtime_conf_path):
        feature_param = FeatureSelectionParam()
        self.feature_param = ParamExtract.parse_param_from_config(
            feature_param, runtime_conf_path)
        FeatureSelectionParamChecker.check_param(self.feature_param)
        self.model = HeteroFeatureSelectionHost(self.feature_param)
        LOGGER.debug("Guest model started")

    @status_tracer_decorator.status_trace
    def run(self):
        self._init_argument()

        if self.workflow_param.method == "feature_select":
            if self.feature_param.method == 'fit':
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace)
                if self.feature_param.local_only:
                    self.model.fit_local(train_data_instance)
                else:
                    self.model.fit(train_data_instance)
                self.model.save_model(self.workflow_param.model_table,
                                      self.workflow_param.model_namespace)

            elif self.feature_param.method == 'fit_transform':
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace)
                if self.feature_param.local_only:
                    result_table = self.model.fit_local_transform(
                        train_data_instance)
                else:
                    result_table = self.model.fit_transform(
                        train_data_instance)
                self.model.save_model(self.workflow_param.model_table,
                                      self.workflow_param.model_namespace)
                self.save_predict_result(result_table)
                LOGGER.info("Predict result saved, table: {},"
                            " namespace: {}".format(
                                self.workflow_param.predict_output_table,
                                self.workflow_param.predict_output_namespace))

            elif self.feature_param.method == 'transform':
                train_data_instance = self.gen_data_instance(
                    self.workflow_param.train_input_table,
                    self.workflow_param.train_input_namespace,
                    mode='transform')
                self.load_model()
                result_table = self.model.transform(train_data_instance)
                self.save_predict_result(result_table)
                LOGGER.info("Predict result saved, table: {},"
                            " namespace: {}".format(
                                self.workflow_param.predict_output_table,
                                self.workflow_param.predict_output_namespace))
        else:
            raise TypeError("method %s is not support yet" %
                            (self.workflow_param.method))

        LOGGER.info("Finish host party feature selection")