示例#1
0
    def feature_selection_fit(self,
                              data_instance,
                              flow_id='sample_flowid',
                              without_transform=False):
        if self.mode == consts.H**O:
            LOGGER.info(
                "H**o feature selection is not supporting yet. Coming soon")
            return data_instance

        if data_instance is None:
            return data_instance

        if self.workflow_param.need_feature_selection:
            LOGGER.info("Start feature selection")
            feature_select_param = param_generator.FeatureSelectionParam()
            feature_select_param = ParamExtract.parse_param_from_config(
                feature_select_param, self.config_path)
            param_checker.FeatureSelectionParamChecker.check_param(
                feature_select_param)

            if self.role == consts.HOST:
                feature_selector = HeteroFeatureSelectionHost(
                    feature_select_param)
            elif self.role == consts.GUEST:
                feature_selector = HeteroFeatureSelectionGuest(
                    feature_select_param)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")

            feature_selector.set_flowid(flow_id)
            filter_methods = feature_select_param.filter_method
            previous_model = {}
            if 'iv_value_thres' in filter_methods or 'iv_percentile' in filter_methods:

                binning_model = {
                    'name': self.workflow_param.model_table,
                    'namespace': self.workflow_param.model_namespace
                }
                previous_model['binning_model'] = binning_model
            feature_selector.init_previous_model(**previous_model)

            if without_transform:
                data_instance = feature_selector.fit(data_instance)
            else:
                data_instance = feature_selector.fit_transform(data_instance)
            save_result = feature_selector.save_model(
                self.workflow_param.model_table,
                self.workflow_param.model_namespace)
            # Save model result in pipeline
            for meta_buffer_type, param_buffer_type in save_result:
                self.pipeline.node_meta.append(meta_buffer_type)
                self.pipeline.node_param.append(param_buffer_type)

            LOGGER.info("Finish feature selection")
            return data_instance
        else:
            LOGGER.info("No need to do feature selection")
            return data_instance
示例#2
0
    def feature_selection_fit(self, data_instance, flow_id='sample_flowid'):
        if self.mode == consts.H**O:
            LOGGER.info(
                "H**o feature selection is not supporting yet. Coming soon")
            return data_instance

        if data_instance is None:
            return data_instance

        if self.workflow_param.need_feature_selection:
            LOGGER.info("Start feature selection")
            feature_select_param = param_generator.FeatureSelectionParam()
            feature_select_param = ParamExtract.parse_param_from_config(
                feature_select_param, self.config_path)
            param_checker.FeatureSelectionParamChecker.check_param(
                feature_select_param)

            if self.role == consts.HOST:
                feature_selector = HeteroFeatureSelectionHost(
                    feature_select_param)
            elif self.role == consts.GUEST:
                feature_selector = HeteroFeatureSelectionGuest(
                    feature_select_param)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")

            feature_selector.set_flowid(flow_id)

            local_only = feature_select_param.local_only  # Decide whether do fit_local or fit
            if local_only:
                data_instance = feature_selector.fit_local_transform(
                    data_instance)
                save_result = feature_selector.save_model(
                    self.workflow_param.model_table,
                    self.workflow_param.model_namespace)
                # Save model result in pipeline
                for meta_buffer_type, param_buffer_type in save_result:
                    self.pipeline.node_meta.append(meta_buffer_type)
                    self.pipeline.node_param.append(param_buffer_type)

            else:
                data_instance = feature_selector.fit_transform(data_instance)
                save_result = feature_selector.save_model(
                    self.workflow_param.model_table,
                    self.workflow_param.model_namespace)
                # Save model result in pipeline
                for meta_buffer_type, param_buffer_type in save_result:
                    self.pipeline.node_meta.append(meta_buffer_type)
                    self.pipeline.node_param.append(param_buffer_type)

            LOGGER.info("Finish feature selection")
            return data_instance
        else:
            LOGGER.info("No need to do feature selection")
            return data_instance
示例#3
0
    def feature_selection_transform(self,
                                    data_instance,
                                    flow_id='sample_flowid'):
        if self.mode == consts.H**O:
            LOGGER.info(
                "H**o feature selection is not supporting yet. Coming soon")
            return data_instance

        if data_instance is None:
            return data_instance

        if self.workflow_param.need_feature_selection:
            LOGGER.info("Start feature selection transform")
            feature_select_param = param_generator.FeatureSelectionParam()
            feature_select_param = ParamExtract.parse_param_from_config(
                feature_select_param, self.config_path)
            param_checker.FeatureSelectionParamChecker.check_param(
                feature_select_param)

            if self.role == consts.HOST:
                feature_selector = HeteroFeatureSelectionHost(
                    feature_select_param)
            elif self.role == consts.GUEST:
                feature_selector = HeteroFeatureSelectionGuest(
                    feature_select_param)
            elif self.role == consts.ARBITER:
                return data_instance
            else:
                raise ValueError("Unknown role of workflow")

            feature_selector.set_flowid(flow_id)

            feature_selector.load_model(self.workflow_param.model_table,
                                        self.workflow_param.model_namespace)

            LOGGER.debug(
                "Role: {}, in transform feature selector left_cols: {}".format(
                    self.role, feature_selector.left_cols))

            data_instance = feature_selector.transform(data_instance)

            LOGGER.info("Finish feature selection")
            return data_instance
        else:
            LOGGER.info("No need to do feature selection")
            return data_instance