Exemplo n.º 1
0
    def predict(self, data_inst):

        LOGGER.debug('host start to predict')

        self.transfer_variable.predict_host_u.disable_auto_clean()

        data_loader_key = self.get_dataset_key(data_inst)

        data_inst_ = data_overview.header_alignment(data_inst,
                                                    self.store_header)

        if data_loader_key in self.cache_dataloader:
            data_loader = self.cache_dataloader[data_loader_key]
        else:
            data_loader, _, _, _ = self.prepare_data(self.init_intersect_obj(),
                                                     data_inst_,
                                                     guest_side=False)
            self.cache_dataloader[data_loader_key] = data_loader

        ub_batches = []

        for i in range(len(data_loader)):
            batch_x = data_loader[i]
            ub_batch = self.nn.predict(batch_x)
            ub_batches.append(ub_batch)

        predicts = np.concatenate(ub_batches, axis=0)

        self.transfer_variable.predict_host_u.remote(predicts,
                                                     suffix=(0, 'host_u'))

        LOGGER.debug('ftl host prediction done')

        return None
Exemplo n.º 2
0
    def predict(self, data_inst):

        LOGGER.debug('guest start to predict')

        data_loader_key = self.get_dataset_key(data_inst)

        data_inst_ = data_overview.header_alignment(data_inst, self.store_header)

        if data_loader_key in self.cache_dataloader:
            data_loader = self.cache_dataloader[data_loader_key]
        else:
            data_loader, _, _, _ = self.prepare_data(self.init_intersect_obj(), data_inst_, guest_side=True)
            self.cache_dataloader[data_loader_key] = data_loader

        LOGGER.debug('try to get predict u from host, suffix is {}'.format((0, 'host_u')))
        host_predicts = self.transfer_variable.predict_host_u.get(idx=0, suffix=(0, 'host_u'))

        predict_score = np.matmul(host_predicts, self.phi.transpose())
        predicts = self.sigmoid(predict_score)  # convert to predict scores
        predicts = list(map(float, predicts))

        predict_tb = session.parallelize(zip(data_loader.get_overlap_keys(), predicts,), include_key=True,
                                         partition=data_inst.partitions)

        threshold = self.predict_param.threshold
        predict_result = self.predict_score_to_output(data_inst_, predict_tb, classes=[0, 1], threshold=threshold)


        LOGGER.debug('ftl guest prediction done')

        return predict_result
    def predict(self, data_inst, predict_param=None):
        LOGGER.info("start predict")
        cache_dataset_key = self.predict_data_cache.get_data_key(data_inst)
        if cache_dataset_key in self.data_alignment_map:
            data_inst = self.data_alignment_map[cache_dataset_key]
        else:
            data_inst = self.data_alignment(data_inst)
            header = [None] * len(self.feature_name_fid_mapping)
            for idx, col in self.feature_name_fid_mapping.items():
                header[idx] = col
            data_inst = data_overview.header_alignment(data_inst, header)
            self.data_alignment_map[cache_dataset_key] = data_inst

        rounds = len(self.trees_) // self.tree_dim
        predict_start_round = self.sync_predict_start_round()
        for i in range(predict_start_round, rounds):
            # n_tree = self.trees_[i]
            for tidx in range(self.tree_dim):
                LOGGER.info(
                    "start to predict, boost round: {}, tree index: {}".format(
                        i, tidx))
                tree_inst = HeteroDecisionTreeHost(self.tree_param)
                tree_inst.load_model(self.tree_meta,
                                     self.trees_[i * self.tree_dim + tidx])
                # tree_inst.set_tree_model(self.trees_[i * self.tree_dim + tidx])
                tree_inst.set_flowid(self.generate_flowid(i, tidx))
                tree_inst.set_runtime_idx(
                    self.component_properties.local_partyid)

                tree_inst.predict(data_inst)

        LOGGER.info("end predict")
Exemplo n.º 4
0
 def align_data_header(self, data_instances, pre_header):
     """
     align features of given data, raise error if value in given schema not found
     :param data_instances: data table
     :param pre_header: list, header of model
     :return: dtable, aligned data
     """
     result_data = self._align_cache.get(id(data_instances))
     if result_data is None:
         result_data = header_alignment(data_instances=data_instances, pre_header=pre_header)
         self._align_cache[id(data_instances)] = result_data
     return result_data
Exemplo n.º 5
0
    def data_and_header_alignment(self, data_inst):
        """
        turn data into sparse and align header/ algin data table header
        """

        cache_dataset_key = self.predict_data_cache.get_data_key(data_inst)

        if cache_dataset_key in self.data_alignment_map:
            processed_data = self.data_alignment_map[cache_dataset_key]
        else:
            data_inst_tmp = self.data_alignment(data_inst)
            header = [None] * len(self.feature_name_fid_mapping)
            for idx, col in self.feature_name_fid_mapping.items():
                header[idx] = col
            processed_data = data_overview.header_alignment(
                data_inst_tmp, header)
            self.data_alignment_map[cache_dataset_key] = processed_data

        return processed_data
Exemplo n.º 6
0
    def read_data(self, input_data, mode="fit"):
        LOGGER.info("start to read dense data and change data to instance")

        abnormal_detection.empty_table_detection(input_data)

        input_data_labels = None

        fit_header = None
        if mode == "transform":
            fit_header = self.header

        self.generate_header(input_data, mode=mode)

        if self.label_idx is not None:
            data_shape = data_overview.get_data_shape(input_data)
            if not data_shape or self.label_idx >= data_shape:
                raise ValueError(
                    "input data's value is empty, it does not contain a label")

            input_data_features = input_data.mapValues(
                lambda value: [] if data_shape == 1 else value.split(
                    self.delimitor, -1)[:self.label_idx] + value.split(
                        self.delimitor, -1)[self.label_idx + 1:])

            input_data_labels = input_data.mapValues(
                lambda value: value.split(self.delimitor, -1)[self.label_idx])

        else:
            input_data_features = input_data.mapValues(lambda value: [
            ] if not self.header else value.split(self.delimitor, -1))

        if mode == "fit":
            data_instance = self.fit(input_data, input_data_features,
                                     input_data_labels)
        else:
            data_instance = self.transform(input_data_features,
                                           input_data_labels)
            # data_instance = ModelBase.align_data_header(data_instance, fit_header)
            data_instance = data_overview.header_alignment(
                data_instance, fit_header)

        return data_instance
Exemplo n.º 7
0
    def predict(self, data_inst):
        LOGGER.info("start predict")
        cache_dataset_key = self.predict_data_cache.get_data_key(data_inst)
        if cache_dataset_key in self.data_alignment_map:
            data_inst = self.data_alignment_map[cache_dataset_key]
        else:
            data_inst = self.data_alignment(data_inst)
            header = [None] * len(self.feature_name_fid_mapping)
            for idx, col in self.feature_name_fid_mapping.items():
                header[idx] = col
            data_inst = data_overview.header_alignment(data_inst, header)
            self.data_alignment_map[cache_dataset_key] = data_inst

        self.predict_f_value(data_inst, cache_dataset_key)
        if self.task_type == consts.CLASSIFICATION:
            loss_method = self.loss
            if self.num_classes == 2:
                predicts = self.predict_F.mapValues(
                    lambda f: float(loss_method.predict(f)))
            else:
                predicts = self.predict_F.mapValues(
                    lambda f: loss_method.predict(f).tolist())

        elif self.task_type == consts.REGRESSION:
            if self.objective_param.objective in [
                    "lse", "lae", "huber", "log_cosh", "fair", "tweedie"
            ]:
                predicts = self.predict_F
            else:
                raise NotImplementedError(
                    "objective {} not supported yet".format(
                        self.objective_param.objective))

        if self.task_type == consts.CLASSIFICATION:
            classes_ = self.classes_
            if self.num_classes == 2:
                threshold = self.predict_param.threshold
                predict_result = data_inst.join(
                    predicts, lambda inst, pred: [
                        inst.label, classes_[1]
                        if pred > threshold else classes_[0], pred, {
                            "0": 1 - pred,
                            "1": pred
                        }
                    ])
            else:
                predict_label = predicts.mapValues(
                    lambda preds: classes_[np.argmax(preds)])
                predict_result = data_inst.join(
                    predicts, lambda inst, preds: [
                        inst.label, classes_[np.argmax(preds)],
                        np.max(preds),
                        dict(zip(map(str, classes_), preds))
                    ])

        elif self.task_type == consts.REGRESSION:
            predict_result = data_inst.join(
                predicts, lambda inst, pred:
                [inst.label,
                 float(pred),
                 float(pred), {
                     "label": float(pred)
                 }])

        else:
            raise NotImplementedError("task type {} not supported yet".format(
                self.task_type))

        LOGGER.info("end predict")

        return predict_result