Пример #1
0
    def fit(self, data_instance):
        # local
        data = self._select_columns(data_instance)
        n, normed = self._standardized(data)
        self.local_corr = table_dot(normed, normed)
        self.local_corr /= n

        if self.model_param.cross_parties:
            with SPDZ("pearson",
                      local_party=self.local_party,
                      all_parties=self.parties,
                      use_mix_rand=self.model_param.use_mix_rand) as spdz:
                source = [normed, self.other_party]
                if self.local_party.role == "guest":
                    x, y = FixedPointTensor.from_source(
                        "x", source[0]), FixedPointTensor.from_source(
                            "y", source[1])
                else:
                    y, x = FixedPointTensor.from_source(
                        "y", source[0]), FixedPointTensor.from_source(
                            "x", source[1])
                m1 = len(x.value.first()[1])
                m2 = len(y.value.first()[1])
                self.shapes.append(m1)
                self.shapes.append(m2)

                self.corr = spdz.dot(x, y, "corr").get() / n
        else:
            self.shapes.append(self.local_corr.shape[0])
            self.parties = [self.local_party]

        self._callback()
Пример #2
0
    def fit(self, data_instance):
        # local
        data = self._select_columns(data_instance)
        n, normed = self._standardized(data)
        self.local_corr = table_dot(normed, normed)
        self.local_corr /= n
        if self.model_param.calc_local_vif:
            self.local_vif = self._vif_from_pearson_matrix(self.local_corr)
        self._summary["local_corr"] = self.local_corr.tolist()
        self._summary["num_local_features"] = n

        if self.model_param.cross_parties:
            with SPDZ(
                    "pearson",
                    local_party=self.local_party,
                    all_parties=self.parties,
                    use_mix_rand=self.model_param.use_mix_rand,
            ) as spdz:
                source = [normed, self.other_party]
                if self.local_party.role == "guest":
                    x, y = (
                        FixedPointTensor.from_source("x", source[0]),
                        FixedPointTensor.from_source("y", source[1]),
                    )
                else:
                    y, x = (
                        FixedPointTensor.from_source("y", source[0]),
                        FixedPointTensor.from_source("x", source[1]),
                    )
                m1 = len(x.value.first()[1])
                m2 = len(y.value.first()[1])
                self.shapes.append(m1)
                self.shapes.append(m2)

                self.corr = spdz.dot(x, y, "corr").get() / n
                self._summary["corr"] = self.corr.tolist()
                self._summary["num_remote_features"] = (
                    m2 if self.local_party.role == "guest" else m1)

        else:
            self.shapes.append(self.local_corr.shape[0])
            self.parties = [self.local_party]

        self._callback()
        self.set_summary(self._summary)
Пример #3
0
    def fit(self, data_instance):
        data = self._select_columns(data_instance)
        n, normed = self._standardized(data)
        self.local_corr = table_dot(normed, normed)

        with SPDZ("pearson") as spdz:
            source = [normed, self._other_party]
            if self._local_party.role == "guest":
                x, y = FixedPointTensor.from_source(
                    "x",
                    source[0]), FixedPointTensor.from_source("y", source[1])
            else:
                y, x = FixedPointTensor.from_source(
                    "y",
                    source[0]), FixedPointTensor.from_source("x", source[1])
            m1 = len(x.value.first()[1])
            m2 = len(y.value.first()[1])
            self.shapes.append(m1)
            self.shapes.append(m2)

            self.corr = spdz.dot(x, y, "corr").get() / n
            self.local_corr /= n
        self._callback()
Пример #4
0
    def _test_spdz(self):
        table_list = []
        table_int_data_x, table_float_data_x = None, None
        table_int_data_y, table_float_data_y = None, None
        if self.local_party.role == "guest":
            table_int_data_x = session.parallelize(
                self.int_data_x,
                include_key=False,
                partition=self.data_partition)
            table_int_data_x = table_int_data_x.mapValues(
                lambda x: np.array([x]))
            table_float_data_x = session.parallelize(
                self.float_data_x,
                include_key=False,
                partition=self.data_partition)
            table_float_data_x = table_float_data_x.mapValues(
                lambda x: np.array([x]))
        else:
            table_int_data_y = session.parallelize(
                self.int_data_y,
                include_key=False,
                partition=self.data_partition)
            table_int_data_y = table_int_data_y.mapValues(
                lambda y: np.array([y]))
            table_float_data_y = session.parallelize(
                self.float_data_y,
                include_key=False,
                partition=self.data_partition)
            table_float_data_y = table_float_data_y.mapValues(
                lambda y: np.array([y]))

        for tensor_type in ["numpy", "table"]:
            table = PrettyTable()
            table.set_style(ORGMODE)
            field_name = [
                "DataType", "One time consumption",
                f"{self.data_num} times consumption", "relative acc",
                "log2 acc", "operations per second"
            ]
            self._summary["field_name"] = field_name
            table.field_names = field_name

            with SPDZ(local_party=self.local_party,
                      all_parties=self.parties) as spdz:
                for op_type in self.op_test_list:
                    start_time = time.time()
                    for epoch in range(self.test_round):
                        LOGGER.info(
                            f"test spdz, tensor_type: {tensor_type}, op_type: {op_type}, epoch: {epoch}"
                        )
                        tag = "_".join([tensor_type, op_type, str(epoch)])
                        spdz.set_flowid(tag)
                        if self.local_party.role == "guest":
                            if tensor_type == "table":
                                if op_type.startswith("int"):
                                    fixed_point_x = TableTensor.from_source(
                                        "int_x_" + tag, table_int_data_x)
                                    fixed_point_y = TableTensor.from_source(
                                        "int_y_" + tag, self.other_party)
                                else:
                                    fixed_point_x = TableTensor.from_source(
                                        "float_x_" + tag, table_float_data_x)
                                    fixed_point_y = TableTensor.from_source(
                                        "float_y_" + tag, self.other_party)
                            else:
                                if op_type.startswith("int"):
                                    fixed_point_x = NumpyTensor.from_source(
                                        "int_x_" + tag, self.int_data_x)
                                    fixed_point_y = NumpyTensor.from_source(
                                        "int_y_" + tag, self.other_party)
                                else:
                                    fixed_point_x = NumpyTensor.from_source(
                                        "float_x_" + tag, self.float_data_x)
                                    fixed_point_y = NumpyTensor.from_source(
                                        "float_y_" + tag, self.other_party)
                        else:
                            if tensor_type == "table":
                                if op_type.startswith("int"):
                                    fixed_point_y = TableTensor.from_source(
                                        "int_y_" + tag, table_int_data_y)
                                    fixed_point_x = TableTensor.from_source(
                                        "int_x_" + tag, self.other_party)
                                else:
                                    fixed_point_y = TableTensor.from_source(
                                        "float_y_" + tag, table_float_data_y)
                                    fixed_point_x = TableTensor.from_source(
                                        "float_x_" + tag, self.other_party)
                            else:
                                if op_type.startswith("int"):
                                    fixed_point_y = NumpyTensor.from_source(
                                        "int_y_" + tag, self.int_data_y)
                                    fixed_point_x = NumpyTensor.from_source(
                                        "int_x_" + tag, self.other_party)
                                else:
                                    fixed_point_y = NumpyTensor.from_source(
                                        "float_y_" + tag, self.float_data_y)
                                    fixed_point_x = NumpyTensor.from_source(
                                        "float_x_" + tag, self.other_party)

                        ret = self.calculate_ret(op_type, tensor_type,
                                                 fixed_point_x, fixed_point_y)

                    total_time = time.time() - start_time
                    self.output_table(op_type, table, tensor_type, total_time,
                                      ret)

            table_list.append(table)

        self.tracker.log_component_summary(self._summary)
        for table in table_list:
            LOGGER.info(table)