def test(): federal_info = fed_conf_coordinator sec_param = {"key_exchange_size": 2048} trainer = make_protocol(OTP_SA_FT, federal_info, sec_param) result = trainer.exchange()
def test(): fed_conf_guest['session']['identity'] = 'client' federal_info = fed_conf_guest sec_param = { "symmetric_algo": "aes", } algo_param = { 'n': 10, 'k': 1 } protocol = make_protocol(OT_INV, federal_info, sec_param, algo_param) import random def obfuscator(in_list, n): fake_list = [random.randint(0, 100) for i in range(n - len(in_list))] index = random.randint(0, n - 1) joint_list = fake_list[:index] + in_list + fake_list[index:] return joint_list, index result = protocol.exchange('50', obfuscator) print(result)
def test_invisible_inquiry(): # 联邦通信信息,输入,根据当前的配置环境做相应修改 fed_conf_guest['session']['identity'] = 'client' federal_info = fed_conf_guest # 安全参数,输入 sec_param = { "symmetric_algo": "aes", } # 算法参数,输入 algo_param = {'n': 10, 'k': 1} protocol = make_protocol(OT_INV, federal_info, sec_param, algo_param) import random # 模拟的混淆函数,用于生成和查询id相同格式的混淆id def obfuscator(in_list, n): fake_list = [random.randint(0, 100) for i in range(n - len(in_list))] index = random.randint(0, n - 1) joint_list = fake_list[:index] + in_list + fake_list[index:] return joint_list, index # 模拟的匿踪查询函数 def query_fun(in_list): result = [str(int(i) * 100) for i in in_list] return result # 输入的查询id从1到10做10次不同测试,将调用匿踪查询后server返回的结果和本地计算结果进行比较,验证正确性 for i in range(10): federal_result = protocol.exchange(str(i), obfuscator) # 联邦匿踪查询结果 local_result = query_fun([str(i)])[0] # 本地查询结果,查询函数同联邦匿踪查询函数 assert federal_result == local_result
def test_train(): def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True setup_seed(0) data = load_boston() x_train = data.data[:, :6] # x_train = x_train / np.max(np.abs(x_train), axis=0) x_train_min = np.min(x_train, axis=0) x_train_max = np.max(x_train, axis=0) x_train = (x_train - x_train_min) / (x_train_max - x_train_min) y_train = data.target train_param = { 'lr': 0.1, 'num_epochs': 10, 'iter_per_epoch': 8, 'batch_size': 64 } my_dataset = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train)) my_dataloader = DataLoader(my_dataset, train_param['batch_size'], drop_last=False) class LinearRegression(nn.Module): def __init__(self, in_dim): super().__init__() self.theta = nn.Parameter(torch.randn((in_dim))) model = LinearRegression(6) print(model.state_dict()) # criterion = nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), train_param['lr']) federal_info = fed_conf_guest sec_param = {"he_algo": 'paillier', "he_key_length": 1024} protocol = make_protocol(HE_LINEAR_FT, federal_info, sec_param, None) for epoch in range(train_param['num_epochs']): print(f"epoch: {epoch}") for i, data in enumerate(my_dataloader): feature, label = data u = (feature * model.theta).sum(dim=1) loss = protocol.exchange(u.detach().numpy(), label.detach().numpy()) loss = torch.as_tensor(loss) gradient = torch.mean(feature * loss.unsqueeze(-1), dim=0) optimizer.zero_grad() model.theta.grad = torch.as_tensor(gradient).float() optimizer.step() print('theta:', model.theta)
def test_he_linear_ft(): federal_info = fed_conf_coordinator sec_param = {"he_algo": 'paillier', "he_key_length": 1024} trainer = make_protocol(HE_LINEAR_FT, federal_info, sec_param, algo_param=None) trainer.exchange()
def test(): federal_info = fed_conf_coordinator sec_param = {"key_exchange_size": 2048} #对齐 share = make_protocol(SAL, federal_info, sec_param) share.align() #验证 share.verify()
def test(): federal_info = fed_conf_coordinator sec_param = { "he_algo": 'paillier', "he_key_length": 1024 } predict = make_protocol(HE_LR_FP, federal_info, sec_param) predict.exchange()
def test(): u = np.random.uniform(-1, 1, (32, )) print(u) federal_info = fed_conf_host sec_param = {"he_algo": 'paillier', "he_key_length": 1024} predict = make_protocol(HE_LR_FP, federal_info, sec_param) result = predict.exchange(u) print(result)
def test(): federal_info = fed_conf_coordinator sec_param = { "he_algo": "paillier", "he_key_length": 1024, "key_exchange_size": 2048 } trainer = make_protocol(HE_SA_FT, federal_info, sec_param) result = trainer.exchange()
def test_secure_alignment(): federal_info = fed_conf_coordinator sec_param = {"key_exchange_size": 2048} iters = 2 #对齐 share = make_protocol(SAL, federal_info, sec_param) for i in range(iters): share.align() # 验证 share.verify()
def test_train(): # 固定随机数种子,保证每次运行网络的时候相同输入的输出是固定的 def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True setup_seed(0) data = load_breast_cancer() host_train_data = data.data[:, 20:] # host取20列之后的特征作为联邦训练的特征向量(训练数据共30维特征) # 对数据做归一化处理,归一化方法guest和host需要统一 host_train_min = np.min(host_train_data, axis=0) host_train_max = np.max(host_train_data, axis=0) host_train_data = (host_train_data - host_train_min) / (host_train_max - host_train_min) # PyTorch加载训练数据 host_dataset = TensorDataset(torch.Tensor(host_train_data)) host_dataloader = DataLoader(host_dataset, train_param['batch_size'], drop_last=False) # 模型使用随机梯度下降的方法,并使用设定的学习率来最小化训练模型中的误差 class LogisticRegression(nn.Module): def __init__(self, in_dim): super().__init__() self.theta = nn.Parameter(torch.randn((in_dim))) model = LogisticRegression(10) optimizer = torch.optim.SGD(model.parameters(), train_param['lr']) # 联邦通信初始化,从外部调入,与上面调用示例中的federal_info相同,根据实际部署可以调整server, role, local_id, job_id等 federal_info = fed_conf_host # 安全参数,使用的加密方法为paillier加密,密钥长度为1024位,与guest保持一致 sec_param = {"he_algo": 'paillier', "he_key_length": 1024} # HE_OTP_LR_FT2协议初始化 protocol = make_protocol(HE_OTP_LR_FT2, federal_info, sec_param, None) # 参与联邦训练过程,目标使guest的loss值不断降低或达到一定阈值 for epoch in range(train_param['num_epochs']): for i, data in enumerate(host_dataloader): feature = data[0] gradient = protocol.exchange(model.theta.detach().numpy(), feature.numpy()) optimizer.zero_grad() model.theta.grad = torch.Tensor(gradient) optimizer.step()
def test(): federal_info = fed_conf_guest sec_param = {"he_algo": 'paillier', "he_key_length": 1024} algo_param = {'adjust_value': 0.5} iv_ffs = make_protocol(IV_FFS, federal_info, sec_param, algo_param) table = pd.read_csv(os.path.join(os.path.dirname(__file__), 'shap_finance_c.csv'), nrows=300) label = pd.Series(table['Label']) iv_ffs.exchange(label=label)
def test(): federal_info = fed_conf_host sec_param = {"key_exchange_size": 2048} host_data = list(map(str, range(160))) #对齐 share = make_protocol(SAL, federal_info, sec_param) result = share.align(host_data) print(result) #验证 is_align = share.verify(result) print(is_align)
def test_predict(): len_u1 = 2 federal_info = fed_conf_coordinator sec_param = { "he_algo": 'paillier', "he_key_length": 1024 } protocol = make_protocol(HE_LR_FP, federal_info, sec_param, None) for i in range(len_u1): protocol.exchange()
def test_train(): # 联邦通信初始化,从外部调入,与上面调用示例中的federal_info相同,根据实际部署可以调整server, role, local_id, job_id等 federal_info = fed_conf_coordinator # 安全参数,使用的加密方法为paillier加密,密钥长度为1024位 sec_param = {"he_algo": 'paillier', "he_key_length": 1024} # HE_OTP_LR_FT2协议初始化 trainer = make_protocol(HE_OTP_LR_FT2, federal_info, sec_param) # 训练过程 for epoch in range(train_param['num_epochs']): for i in range(train_param['train_rounds']): trainer.exchange()
def test(): theta = [[ np.random.uniform(-1, 1, (2, 4)).astype(np.float32), np.random.uniform(-1, 1, (2, 6)).astype(np.float32) ], [np.random.uniform(-1, 1, (2, 8)).astype(np.float32)]] print(theta) federal_info = fed_conf_host sec_param = {"key_exchange_size": 2048} trainer = make_protocol(OTP_SA_FT, federal_info, sec_param) result = trainer.exchange(theta) print(result)
def test_predict(): # u1 = [np.array([0.1, 0.05, -3.6, 25.8], dtype=np.float32), np.array([-0.5, 11.2, 9.5], dtype=np.float32)] u2 = [ np.array([0.3, -14, -2.5, 1.7], dtype=np.float32), np.array([0.2, 1.2, -5.6], dtype=np.float32) ] # expected_u = [u1[i] + u2[i] for i in range(len(u1))] federal_info = fed_conf_host sec_param = {"he_algo": 'paillier', "he_key_length": 1024} protocol = make_protocol(HE_LR_FP, federal_info, sec_param, None) for i in range(len(u2)): protocol.exchange(u2[i])
def test(): theta = [[ np.random.uniform(-1, 1, (2, 4)).astype(np.float32), np.random.uniform(-1, 1, (2, 6)).astype(np.float32) ], [np.random.uniform(-1, 1, (2, 8)).astype(np.float32)]] print(theta) federal_info = fed_conf_guest sec_param = {"key_exchange_size": 2048} trainer = make_protocol(OTP_SA_FT, federal_info, sec_param) result = trainer.exchange(theta) var_chan = make_variable_channel('test_otp_sa_ft', fed_conf_guest["federation"]["host"][0], fed_conf_guest["federation"]["guest"][0]) var_chan.send(theta, tag='theta')
def test(): theta = [[ np.random.uniform(-1, 1, (2, 4)).astype(np.float32), np.random.uniform(-1, 1, (2, 6)).astype(np.float32) ], [np.random.uniform(-1, 1, (2, 8)).astype(np.float32)]] print(theta) federal_info = fed_conf_guest sec_param = { "he_algo": "paillier", "he_key_length": 1024, "key_exchange_size": 2048 } trainer = make_protocol(HE_SA_FT, federal_info, sec_param) result = trainer.exchange(theta) print(result)
def test_secure_alignment(): federal_info = fed_conf_guest sec_param = {"key_exchange_size": 2048} data_1 = [list(map(str, range(1000, 2000))), list(range(3000, 5000))] data_2 = [list(map(str, range(1600))), list(range(4200))] # 对齐 share = make_protocol(SAL, federal_info, sec_param) for i, data in enumerate(data_1): result = share.align(data) _, idx_1, _ = np.intersect1d(data_1[i], data_2[i], return_indices=True) local_res = [data[j] for j in idx_1] assert sorted(result) == sorted(local_res) # 验证 is_align = share.verify(result) assert is_align is True
def test(): federal_info = fed_conf_host sec_param = {"he_algo": 'paillier', "he_key_length": 1024} algo_param = {'adjust_value': 0.5} iv_ffs = make_protocol(IV_FFS, federal_info, sec_param, algo_param) table = pd.read_csv(os.path.join(os.path.dirname(__file__), 'shap_finance_c.csv'), nrows=300) data = pd.Series(table['Occupation']) split_info = { 'split_points': np.array([0.0, 1.5, 3.01, 4.15, 6.02, 7.04, 8.28, 10.1]) } iv_value = iv_ffs.exchange(feature=data, is_continuous=True, split_info=split_info) print(iv_value)
def test(): fed_conf_host['session']['identity'] = 'server' federal_info = fed_conf_host sec_param = { "symmetric_algo": "aes", } algo_param = { 'n': 10, 'k': 1 } protocol = make_protocol(OT_INV, federal_info, sec_param, algo_param) def query_fun(in_list): result = [str(int(i) * 100) for i in in_list] return result protocol.exchange(query_fun)
def test_he_linear_ft(): # host和guest的随机初始状态相同 prng = RandomState(0) guest_u = np.array(prng.uniform(-1, 1, (8, ))) host_u = np.array(prng.uniform(-1, 1, (8, ))) guest_labels = np.array(prng.randint(0, 2, (8, ))) federal_info = fed_conf_host sec_param = {"he_algo": 'paillier', "he_key_length": 1024} trainer = make_protocol(HE_LINEAR_FT, federal_info, sec_param, algo_param=None) result = trainer.exchange(host_u) assert almost_equal(result, guest_u + host_u - guest_labels)
def test_invisible_inquiry(): fed_conf_host['session']['identity'] = 'server' federal_info = fed_conf_host sec_param = { "symmetric_algo": "aes", } algo_param = {'n': 10, 'k': 1} protocol = make_protocol(OT_INV, federal_info, sec_param, algo_param) # 模拟的匿踪查询函数 def query_fun(in_list): result = [str(int(i) * 100) for i in in_list] return result # 连续做10次匿踪查询,并将结果返回给client端 for i in range(10): protocol.exchange(query_fun)
def test_he_otp_lr_ft1(): federal_info = fed_conf_guest sec_param = {"he_algo": 'paillier', "he_key_length": 1024} prng = RandomState(0) guest_theta = prng.uniform(-1, 1, (6, )) guest_features = prng.uniform(-1, 1, (32, 6)) guest_labels = prng.randint(0, 2, (32, )) host_theta = prng.uniform(-1, 1, (6, )) host_features = prng.uniform(-1, 1, (32, 6)) def calu_grad(guest_theta, guest_features, guest_labels, host_theta, host_features): u2 = host_theta.dot(host_features.T) u1 = guest_theta.dot(guest_features.T) u = u1 + u2 h_x = 1 / (1 + np.exp(-u)) batch_size = guest_features.shape[0] grads = (-1 / batch_size) * ((guest_labels - h_x).dot(guest_features)) return h_x, grads # print(guest_theta, guest_features, guest_labels) trainer = make_protocol(HE_OTP_LR_FT1, federal_info, sec_param, algo_param=None) # 联邦计算结果 fed_h_x, fed_grads = trainer.exchange(guest_theta, guest_features, guest_labels) # 本地计算结果 local_h_x, local_grads = calu_grad(guest_theta, guest_features, guest_labels, host_theta, host_features) assert almost_equal(fed_h_x, local_h_x) assert almost_equal(fed_grads, local_grads)
def test(): theta = [[ np.random.uniform(-1, 1, (2, 4)).astype(np.float32), np.random.uniform(-1, 1, (2, 6)).astype(np.float32) ], [np.random.uniform(-1, 1, (2, 8)).astype(np.float32)]] print(theta) federal_info = fed_conf_host sec_param = {"key_exchange_size": 2048} trainer = make_protocol(OTP_SA_FT, federal_info, sec_param) result = trainer.exchange(theta) var_chan = make_variable_channel('test_otp_sa_ft', fed_conf_host["federation"]["host"][0], fed_conf_host["federation"]["guest"][0]) guest_theta = var_chan.recv(tag='theta') sum_theta = iterative_add(theta, guest_theta) # 本地计算的平均梯度 avg_theta = iterative_divide(sum_theta, 2.0) assert almost_equal(result, avg_theta)
def test_train(): def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True setup_seed(0) federal_info = fed_conf_coordinator sec_param = {"he_algo": 'paillier', "he_key_length": 1024} train_param = { 'lr': 0.1, 'num_epochs': 10, 'iter_per_epoch': 8, 'batch_size': 64 } protocol = make_protocol(HE_LINEAR_FT, federal_info, sec_param, None) for i in range(train_param['iter_per_epoch'] * train_param['num_epochs']): protocol.exchange()
def test_train(): def setup_seed(seed): torch.manual_seed(seed) # 为CPU设置随机种子 torch.cuda.manual_seed_all(seed) # 为所有GPU设置随机种子 torch.backends.cudnn.deterministic = True # CuDNN卷积使用确定性算法 setup_seed(0) # 固定随机数种子,保证每次运行网络的时候相同输入的输出是固定的 data = load_breast_cancer() # 取sklearn中的数据为训练数据 guest_train_data = data.data[:, :20] # guest取前20列特征作为联邦训练的特征向量 # 对数据先做归一化处理,归一化方法为max-min方法 guest_train_min = np.min(guest_train_data, axis=0) guest_train_max = np.max(guest_train_data, axis=0) guest_train_data = (guest_train_data - guest_train_min) / (guest_train_max - guest_train_min) # guest提供目标label数据 guest_train_label = data.target # PyTorch加载训练数据 guest_dataset = TensorDataset(torch.Tensor(guest_train_data), torch.Tensor(guest_train_label)) guest_dataloader = DataLoader(guest_dataset, train_param['batch_size'], drop_last=False) # 模型使用随机梯度下降的方法,并使用设定的学习率来最小化训练模型中的误差 class LogisticRegression(nn.Module): def __init__(self, in_dim): super().__init__() self.theta = nn.Parameter(torch.randn((in_dim))) model = LogisticRegression(20) criterion = nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), train_param['lr']) # 联邦通信初始化,从外部调入,与上面调用示例中的federal_info相同,根据实际部署可以调整server, role, local_id, job_id等 federal_info = fed_conf_guest # 安全参数,使用的加密方法为paillier加密,密钥长度为1024位 sec_param = {"he_algo": 'paillier', "he_key_length": 1024} # HE_OTP_LR_FT2协议初始化 protocol = make_protocol(HE_OTP_LR_FT2, federal_info, sec_param, None) loss_list = [] # 训练过程,目标使loss值不断降低,将每次训练的loss值记录 for epoch in range(train_param['num_epochs']): for i, data in enumerate(guest_dataloader): feature, label = data predict, gradient = protocol.exchange(model.theta.detach().numpy(), feature.numpy(), label.numpy()) loss = criterion(torch.Tensor(predict), label) loss_list.append(loss.item()) optimizer.zero_grad() model.theta.grad = torch.Tensor(gradient) optimizer.step() # 为了观察方便,打印loss值随着训练次数迭代的过程,命名guest_train_results_loss.png并保存在当前目录下 pl.plot(list(range(1, len(loss_list) + 1)), loss_list, 'r-', label='loss value') pl.legend() pl.xlabel('iters') pl.ylabel('loss') pl.title('logistic regression loss in training') pl.savefig( os.path.join(os.path.dirname(__file__), "guest_train_results_loss.png"))
def test_iv_ffs(): federal_info = fed_conf_host sec_param = {"he_algo": 'paillier', "he_key_length": 1024} algo_param = {'adjust_value': 0.5} def iv_calu(label, data, split_info): bin_hist = dict() bin_hist['index'] = [] edges = split_info['split_points'] for i, value in enumerate(edges): if i != 0: value_l = split_info['split_points'][i - 1] value_r = split_info['split_points'][i] func = lambda x: value_l < x <= value_r else: value_j = split_info['split_points'][0] func = lambda x: x <= value_j index_f = data[data.apply(func)] bin_hist['index'].append(np.array(index_f.index)) value_e = split_info['split_points'][-1] func = lambda x: x > value_e index_f = data[data.apply(func)] bin_hist['index'].append(np.array(index_f.index)) good_num = [] bad_num = [] for i in bin_hist['index']: good_num.append(sum(label[i])) bad_num.append(len(i) - sum(label[i])) good_num = np.array(good_num) bad_num = np.array(bad_num) good_all_count = sum(label) bad_all_count = len(label) - sum(label) iv = 0 for i, good_num_value in enumerate(good_num): if good_num_value == 0 or bad_num[i] == 0: calc_value = math.log( (bad_num[i] / bad_all_count + algo_param['adjust_value']) / (good_num_value / good_all_count + algo_param['adjust_value'])) else: calc_value = math.log((bad_num[i] / bad_all_count) / (good_num_value / good_all_count)) iv += ((bad_num[i] / bad_all_count) - (good_num_value / good_all_count)) * calc_value return iv iv_ffs = make_protocol(IV_FFS, federal_info, sec_param, algo_param) table = pd.read_csv(os.path.join(os.path.dirname(__file__), 'shap_finance_c.csv'), nrows=300) data = pd.Series(table['Occupation']) label = pd.Series(table['Label']) split_info = { 'split_points': np.array([0.0, 1.5, 3.01, 4.15, 6.02, 7.04, 8.28, 10.1]) } # 联邦计算的iv值 iv_value = iv_ffs.exchange(feature=data, is_continuous=True, split_info=split_info) # 本地计算的iv值 local_iv_value = iv_calu(label, data, split_info) assert iv_value == local_iv_value