def main(): args = define_and_get_arguments() hook = sy.TorchHook(torch) # 가상작업자(시뮬레이션) 사용시 이곳으로 분기 if args.use_virtual: alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose) # 웹소켓작업자 사용시 이곳으로 분기 else: a_kwargs_websocket = {"host": "192.168.0.57", "hook": hook} b_kwargs_websocket = {"host": "192.168.0.58", "hook": hook} c_kwargs_websocket = {"host": "192.168.0.59", "hook": hook} baseport = 10002 alice = WebsocketClientWorker(id="alice", port=baseport, **a_kwargs_websocket) bob = WebsocketClientWorker(id="bob", port=baseport, **b_kwargs_websocket) charlie = WebsocketClientWorker(id="charlie", port=baseport, **c_kwargs_websocket) # 워커 객체를 리스트로 묶음 workers = [alice, bob, charlie] # 쿠다 사용 여부 use_cuda = args.cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} # 랜덤 시드 설정 torch.manual_seed(args.seed) labels_resampled_factorized, obs_resampled_with_noise_2 = process_data() # percentage of test/valid set to use for testing and validation from the test_valid_idx (to be called test_size) test_size = 0.1 # obtain training indices that will be used for validation num_train = len(obs_resampled_with_noise_2) indices = list(range(num_train)) np.random.shuffle(indices) split = int(np.floor(test_size * num_train)) train_idx, test_idx = indices[split:], indices[:split] print(type(obs_resampled_with_noise_2[train_idx]), type(labels_resampled_factorized[train_idx])) print(obs_resampled_with_noise_2[train_idx].shape, labels_resampled_factorized[train_idx].shape) print(labels_resampled_factorized[train_idx]) federated_train_dataset = D.TensorDataset( torch.tensor(obs_resampled_with_noise_2[train_idx]), torch.tensor(labels_resampled_factorized[train_idx])) federated_train_loader = sy.FederatedDataLoader( federated_train_dataset.federate(tuple(workers)), batch_size=args.batch_size, shuffle=True, iter_per_worker=True, **kwargs, ) test_dataset = D.TensorDataset( torch.tensor(obs_resampled_with_noise_2[test_idx]), torch.tensor(labels_resampled_factorized[test_idx])) test_loader = D.DataLoader(test_dataset, shuffle=True, batch_size=args.batch_size, num_workers=0, drop_last=True) model = Net(input_features=1, output_dim=5).to(device) criterion = nn.NLLLoss() for epoch in range(1, args.epochs + 1): logger.info("Starting epoch %s/%s", epoch, args.epochs) model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches, criterion=criterion) test(model, test_loader, args.batch_size, criterion=criterion, train_on_gpu=use_cuda) if args.save_model: torch.save(model.state_dict(), "./Model/mnist_cnn.pt")
async def main(): args = define_and_get_arguments() hook = sy.TorchHook(torch) if (args.localworkers): # ----------------------------- This is for localhost workers -------------------------------- kwargs_websocket = { "hook": hook, "verbose": args.verbose, "host": "0.0.0.0" } alice = websocket_client.WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket) bob = websocket_client.WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket) charlie = websocket_client.WebsocketClientWorker(id="charlie", port=8779, **kwargs_websocket) testing = websocket_client.WebsocketClientWorker(id="testing", port=8780, **kwargs_websocket) else: # ----------------------------- This is for remote workers ------------------------------------ kwargs_websocket_alice = {"host": "128.226.78.195", "hook": hook} alice = websocket_client.WebsocketClientWorker( id="alice", port=8777, **kwargs_websocket_alice) kwargs_websocket_bob = {"host": "128.226.77.222", "hook": hook} bob = websocket_client.WebsocketClientWorker(id="bob", port=8777, **kwargs_websocket_bob) kwargs_websocket_charlie = {"host": "128.226.88.120", "hook": hook} charlie = websocket_client.WebsocketClientWorker( id="charlie", port=8777, **kwargs_websocket_charlie) # kwargs_websocket_testing = {"host": "128.226.77.111", "hook": hook} kwargs_websocket_testing = {"host": "128.226.88.210", "hook": hook} testing = websocket_client.WebsocketClientWorker( id="testing", port=8777, **kwargs_websocket_testing) for wcw in [alice, bob, charlie, testing]: wcw.clear_objects_remote() worker_instances = [alice, bob, charlie] use_cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") model = Net().to(device) if (os.path.isfile('mnist_cnn_asyn.pt')): model.load_state_dict(torch.load("mnist_cnn_asyn.pt")) model.eval() traced_model = torch.jit.trace( model, torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device)) learning_rate = args.lr # Execute traning and test process round for curr_round in range(1, args.training_rounds + 1): logger.info("Training round %s/%s", curr_round, args.training_rounds) results = await asyncio.gather(*[ fit_model_on_worker( worker=worker, traced_model=traced_model, batch_size=args.batch_size, curr_round=curr_round, max_nr_batches=args.federate_after_n_batches, lr=learning_rate, ) for worker in worker_instances ]) models = {} loss_values = {} # Apply evaluate model for each 10 round and at the last round test_models = curr_round % 10 == 1 or curr_round == args.training_rounds if test_models: logger.info("Evaluating models") np.set_printoptions(formatter={"float": "{: .0f}".format}) for worker_id, worker_model, _ in results: evaluate_model_on_worker( model_identifier="Model update " + worker_id, worker=testing, dataset_key="mnist_testing", model=worker_model, nr_bins=10, batch_size=128, device=device, print_target_hist=False, ) # Federate models (note that this will also change the model in models[0] for worker_id, worker_model, worker_loss in results: if worker_model is not None: models[worker_id] = worker_model loss_values[worker_id] = worker_loss traced_model = utils.federated_avg(models) if test_models: evaluate_model_on_worker( model_identifier="Federated model", worker=testing, dataset_key="mnist_testing", model=traced_model, nr_bins=10, batch_size=128, device=device, print_target_hist=False, ) # save indermediate model model_dir = "models_asyn" if (not os.path.exists(model_dir)): os.makedirs(model_dir) model_name = "{}/mnist_cnn_{}.pt".format(model_dir, curr_round) torch.save(traced_model.state_dict(), model_name) # decay learning rate learning_rate = max(0.98 * learning_rate, args.lr * 0.01) if args.save_model: torch.save(traced_model.state_dict(), "mnist_cnn_asyn.pt")
import torch import syft from syft.workers.websocket_server import WebsocketServerWorker class WebsocketServerWorkerGood(WebsocketServerWorker): def set_obj(self, obj: object): self._objects[obj.id] = obj self._objects[obj.id].owner = self # Hook and start server hook = syft.TorchHook(torch) server_worker = WebsocketServerWorkerGood(id="good", host="localhost", port=8778, hook=hook) test_data = torch.tensor([1, 2, 3]).tag("test") server_worker.set_obj(test_data) print("Good server started.") server_worker.start()
def main(): args = define_and_get_arguments() print(args) hook = sy.TorchHook(torch) host = "localhost" if args.use_virtual: alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose) else: kwargs_websocket = { "host": host, "hook": hook, "verbose": args.verbose } alice = WebsocketClientWorker(id="alice", port=8771, **kwargs_websocket) bob = WebsocketClientWorker(id="bob", port=8772, **kwargs_websocket) charlie = WebsocketClientWorker(id="charlie", port=8773, **kwargs_websocket) workers = [alice, bob, charlie] use_cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} # Search multiple times should still work tr_alice = alice.search("#mnist", "#alice", "#train_tag") tr_bob = bob.search("#mnist", "#bob", "#train_tag") tr_charlie = charlie.search("#mnist", "#charlie", "#train_tag") base_data = [] base_data.append(BaseDataset(tr_alice[0], tr_alice[1])) base_data.append(BaseDataset(tr_bob[0], tr_bob[1])) base_data.append(BaseDataset(tr_charlie[0], tr_charlie[1])) federated_train_loader = sy.FederatedDataLoader( FederatedDataset(base_data), batch_size=args.batch_size, shuffle=True, iter_per_worker=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( "../data", train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) model = Net().to(device) for epoch in range(1, args.epochs + 1): logger.info("Starting epoch %s/%s", epoch, args.epochs) model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches) test(model, device, test_loader) if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")
from grid import GridNetwork from model import Model import time import sys import syft as sy import torch as th import asyncio hook = sy.TorchHook(th) if __name__ == "__main__": node_id = sys.argv[1] connect = int(sys.argv[2]) destination = sys.argv[3] # args = {"max_size": None, "timeout": 444, "url": "ws://openmined-grid.herokuapp.com"} args = {"max_size": None, "timeout": 444, "url": "ws://34.89.48.186"} grid = GridNetwork(node_id, **args) grid.start() if connect: node = grid.connect(destination) else: time.sleep(10) node = grid._connection_handler.get("bill") # asyncio.run(node.send(b'Hello!')) x = th.tensor([1, 2, 3, 4, 5, 6, 7]).tag("#X", "#test").describe("My Little obj")
def hook(): hook = syft.TorchHook(torch) return hook
def main(): args = define_and_get_arguments() hook = sy.TorchHook(torch) # 가상작업자(시뮬레이션) 사용시 이곳으로 분기 if args.use_virtual: alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose) list_of_object = [alice, bob, charlie] # 웹소켓작업자 사용시 이곳으로 분기 else: base_port = 10002 list_of_id = ["alice", "bob", "charlie"] list_of_ip = ["192.168.0.52", "192.168.0.53", "192.168.0.54"] list_of_object = [] for index in range(len(list_of_id)): kwargs_websockest = {"id": list_of_id[index], "hook": hook} list_of_object.append( WebsocketClientWorker(host=list_of_ip[index], port=base_port, **kwargs_websockest)) workers = list_of_object use_cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} federated_train_loader = sy.FederatedDataLoader( datasets.MNIST( "../data", train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ).federate(tuple(workers)), batch_size=args.batch_size, shuffle=True, iter_per_worker=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( "../data", train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) model = Net().to(device) for epoch in range(1, args.epochs + 1): logger.info("Starting epoch %s/%s", epoch, args.epochs) model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches) test(model, device, test_loader) if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")
''' Part 6: Federated Learning on MNIST using a CNN - Federated Learning aims to build systems that learn on decentralized data, improving data privacy and ownership. ''' # setting import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms import syft as sy #import PySyft library hook = sy.TorchHook(torch) # hook PyTorch bob = sy.VirtualWorker(hook, id = 'bob') alice = sy.VirtualWorker(hook, id = 'alice') class Arguments(): def __init__(self): self.batch_size = 64 self.test_batch_size = 1000 self.epochs = 10 self.lr = 0.01 self.momentum = 0.5 self.no_cuda = False self.seed = 1 self.log_interval = 30 self.save_model = False args = Arguments()
def experiment(num_workers, no_cuda): # Creating num_workers clients clients = [] hook = sy.TorchHook(torch) clients_mem = torch.zeros(num_workers) for i in range(num_workers): clients.append(sy.VirtualWorker(hook, id="c " + str(i))) # Initializing arguments, with GPU usage or not args = Arguments(no_cuda) use_cuda = not args.no_cuda and torch.cuda.is_available() if use_cuda: # TODO Quickhack. Actually need to fix the problem moving the model to CUDA\n", torch.set_default_tensor_type(torch.cuda.FloatTensor) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {} # Federated data loader transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader datasets.CIFAR10('../data', train=True, download=True, transform=transform). federate( clients ), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.CIFAR10( '../data', train=False, transform=transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) #creating the models for each client models, optimizers = [], [] for i in range(num_workers): #print(i) models.append(vgg11_SL()[0].to(device)) models[i] = models[i].send(clients[i]) optimizers.append( optim.SGD(params=models[i].parameters(), lr=args.lr, momentum=0.9)) start = time.time() model = vgg11_SL()[1].to(device) optimizer = optim.SGD( model.parameters(), lr=args.lr, momentum=0.9) # TODO momentum is not supported at the moment for epoch in range(1, args.epochs + 1): train(args, model, device, federated_train_loader, optimizer, epoch, models, optimizers, clients_mem) test(args, model, device, test_loader, models) if (args.save_model): torch.save(model.state_dict(), "mnist_cnn.pt") end = time.time() print(end - start) print("Memory exchanged : ", clients_mem) return clients_mem
model_params = list(model.parameters()) bobs_model = Net() alices_model = Net() import syft import syft as sy from syft.core import utils import torch import torch.nn.functional as F import json import random from syft.core.frameworks.torch import utils as torch_utils from torch.autograd import Variable hook = sy.TorchHook(verbose=False) me = hook.local_worker bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False) alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False) me.is_client_worker = False compute_nodes = [bob, alice] me.add_workers([bob, alice]) bob.add_workers([me, alice]) alice.add_workers([me, bob]) remote_dataset = (list(), list()) for batch_idx, (data, target) in enumerate(train_loader): data = Variable(data)
def __init__(self): with open('data/dcrnn_la.yaml') as f_la, open( 'data/dcrnn_bay.yaml') as f_bay: config_la = yaml.load(f_la, Loader=yaml.FullLoader) config_bay = yaml.load(f_bay, Loader=yaml.FullLoader) sensor_ids1, sensor_id_to_ind1, adj_mx_la = load_graph_data( config_la['data'].get('graph_pkl_filename')) sensor_ids2, sensor_id_to_ind2, adj_mx_bay = load_graph_data( config_bay['data'].get('graph_pkl_filename')) self._kwargs = config_la self._data_kwargs = config_la.get('data') self._model_kwargs = config_la.get('model') self._data_kwargs2 = config_bay.get('data') self._model_kwargs2 = config_bay.get('model') self._train_kwargs = config_la.get('train') self.max_grad_norm = self._train_kwargs.get('max_grad_norm', 1.) # logging. self._log_dir = self._get_log_dir(config_la) self._writer = SummaryWriter('runs/' + self._log_dir) log_level = self._kwargs.get('log_level', 'INFO') self._logger = utils.get_logger(self._log_dir, __name__, 'info.log', level=log_level) # data set self._data = utils.load_dataset(**self._data_kwargs) self._data2 = utils.load_dataset(**self._data_kwargs2) self.standard_scaler = self._data['scaler'] self.standard_scaler2 = self._data2['scaler'] self._logger.info('Setting: {}'.format(args.setting)) self._logger.info("Party A trn samples: {}".format( self._data['train_loader'].size)) self._logger.info("Party A vld samples: {}".format( self._data['val_loader'].size)) self._logger.info("Party A tst samples: {}".format( self._data['test_loader'].size)) self._logger.info("Party B trn samples: {}".format( self._data2['train_loader'].size)) self._logger.info("Party B vld samples: {}".format( self._data2['val_loader'].size)) self._logger.info("Party B tst samples: {}".format( self._data2['test_loader'].size)) self.num_nodes = int(self._model_kwargs.get('num_nodes', 1)) self.num_nodes2 = int(self._model_kwargs2.get('num_nodes', 1)) self._logger.info("num_nodes: {}".format(self.num_nodes)) self._logger.info("num_nodes2: {}".format(self.num_nodes2)) self.input_dim = int(self._model_kwargs.get('input_dim', 1)) self.seq_len = int( self._model_kwargs.get('seq_len')) # for the encoder self.output_dim = int(self._model_kwargs.get('output_dim', 1)) self.use_curriculum_learning = bool( self._model_kwargs.get('use_curriculum_learning', False)) self.horizon = int(self._model_kwargs.get('horizon', 1)) # for the decoder # setup model dcrnn_model = DCRNNModel(adj_mx_la, self._logger, **self._model_kwargs) dcrnn_model2 = DCRNNModel(adj_mx_bay, self._logger, **self._model_kwargs2) if torch.cuda.is_available(): # dcrnn_model = nn.DataParallel(dcrnn_model) # dcrnn_model2 = nn.DataParallel(dcrnn_model2) self.dcrnn_model = dcrnn_model.cuda() self.dcrnn_model2 = dcrnn_model2.cuda() else: self.dcrnn_model = dcrnn_model self.dcrnn_model2 = dcrnn_model2 self._logger.info("Models created") self._logger.info('Local epochs:' + str(args.local_epochs)) self._epoch_num = self._train_kwargs.get('epoch', 0) if self._epoch_num > 0: self.load_model(self._epoch_num) # use PySyft for SPDZ if args.setting == 'fedavg' and args.spdz: import syft as sy self._logger.info('Using SPDZ for FedAvg') hook = sy.TorchHook(torch) self.party_workers = [ sy.VirtualWorker(hook, id="party{:d}".format(i)) for i in range(2) ] self.crypto = sy.VirtualWorker(hook, id="crypto") # DP if args.dp: class HiddenPrints: def __enter__(self): self._original_stdout = sys.stdout sys.stdout = open(os.devnull, 'w') def __exit__(self, exc_type, exc_val, exc_tb): sys.stdout.close() sys.stdout = self._original_stdout def find_sigma(eps, batches_per_lot, dataset_size): lotSize = batches_per_lot * args.batch_size # L N = dataset_size delta = min(10**(-5), 1 / N) lotsPerEpoch = N / lotSize q = lotSize / N # Sampling ratio T = args.epochs * lotsPerEpoch # Total number of lots def compute_dp_sgd_wrapper(_sigma): with HiddenPrints(): return compute_dp_sgd_privacy.compute_dp_sgd_privacy( n=N, batch_size=lotSize, noise_multiplier=_sigma, epochs=args.epochs, delta=delta)[0] - args.epsilon sigma = newton(compute_dp_sgd_wrapper, x0=0.5, tol=1e-4) # adjust x0 to avoid error with HiddenPrints(): actual_eps = compute_dp_sgd_privacy.compute_dp_sgd_privacy( n=N, batch_size=lotSize, noise_multiplier=sigma, epochs=args.epochs, delta=delta)[0] # print('Batches_per_lot={}, q={}, T={}, sigma={}'.format(batches_per_lot, q, T, sigma)) # print('actual epslion = {}'.format(actual_eps)) return sigma self._logger.info('Epsilon: ' + str(args.epsilon)) self._logger.info('Lotsize_scaler: ' + str(args.lotsize_scaler)) lotsizes = [ N**.5 * args.lotsize_scaler for N in [ self._data['train_loader'].size, self._data2['train_loader'].size ] ] batches_per_lot_list = list( map(lambda lotsize: max(round(lotsize / args.batch_size), 1), lotsizes)) batches_per_lot_list = [ min(bpl, loader_len) for bpl, loader_len in zip(batches_per_lot_list, [ self._data['train_loader'].num_batch, self._data2['train_loader'].num_batch ]) ] self._logger.info('Batches per lot: ' + str(batches_per_lot_list)) sigma_list = [ find_sigma(args.epsilon, bpl, N) for bpl, N in zip(batches_per_lot_list, [ self._data['train_loader'].size, self._data2['train_loader'].size ]) ] self._logger.info('Sigma: ' + str(sigma_list)) for mod, bpl, sig in zip([self.dcrnn_model, self.dcrnn_model2], batches_per_lot_list, sigma_list): mod.batch_per_lot = bpl mod.sigma = sig self.dcrnn_model.batch_per_lot = batches_per_lot_list[0] self.dcrnn_model.sigma = sigma_list[0] self.dcrnn_model2.batch_per_lot = batches_per_lot_list[1] self.dcrnn_model2.sigma = sigma_list[1] self._lastNoiseShape = None self._noiseToAdd = None
def experiment(num_workers, no_cuda): # Creating num_workers clients clients = [] hook = sy.TorchHook(torch) clients_mem = torch.zeros(num_workers) for i in range(num_workers): clients.append(sy.VirtualWorker(hook, id="c " + str(i))) # Initializing arguments, with GPU usage or not args = Arguments(no_cuda) use_cuda = not args.no_cuda and torch.cuda.is_available() if use_cuda: # TODO Quickhack. Actually need to fix the problem moving the model to CUDA\n", torch.set_default_tensor_type(torch.cuda.FloatTensor) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {} # Federated data loader federated_train_loader = sy.FederatedDataLoader(datasets.MNIST( '../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])).federate(clients), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) #creating the models and optimizers for each client models, optimizers = [], [] for i in range(num_workers): models.append(Net1().to(device)) models[i] = models[i].send(clients[i]) optimizers.append(optim.SGD(params=models[i].parameters(), lr=0.1)) # measuring training time start = time.time() model = Net2().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): train(args, model, device, federated_train_loader, optimizer, epoch, models, optimizers, clients_mem) test(args, model, device, test_loader, models) if (args.save_model): torch.save(model.state_dict(), "mnist_cnn.pt") end = time.time() print(end - start) # printing the memory exchanged by each client print("Memory exchanged : ", clients_mem) return clients_mem
import torch.nn as nn import os import syft import torchvision from utils import cf_calc from torchvision import transforms as T from torchnet import meter from config import DefaultConfig from torch.utils.data import DataLoader from visdom import Visdom opt = DefaultConfig() vis = Visdom(env=opt.visname, log_to_filename='./logs/logging.txt') hook = syft.TorchHook(torch) #用pysyft的hook增加pytorch的方法库,使其支持联邦学习 os.environ['CUDA_VISIBLE_DEVICES'] = opt.Devices_ID #指定所使用GPU的编号 device = torch.device("cuda" if opt.use_gpu else "cpu") #指定进行模型优化的设备 torch.manual_seed(opt.random_seed) def train(**kwargs): #使用命令行参数更新默认参数配置 opt.parse(kwargs) #定义模型 #这里只给出单GPU训练的定义方式 #多GPU并行训练的模型定义方式略有差异 model = getattr(models, opt.model)(**opt.model_setting[opt.model]) #用字典将模型参数转入 print(model)
# In[5]: import syft import syft as sy from syft.core import utils import torch import torch.nn.functional as F import json import random from syft.core.frameworks.torch import utils as torch_utils from torch.autograd import Variable #local_worker = sy.SocketWorker(id="local", port=2009, hook=None, is_client_worker=False) local_worker = sy.SocketWorker(id="local", port=2009, hook=None) hook = sy.TorchHook(local_worker=local_worker, verbose=False) me = hook.local_worker me.hook = hook # In[6]: alice = sy.SocketWorker(id="alice", hostname="100.65.100.179", port=2006, hook=hook, is_pointer=True, is_client_worker=False) #alice = sy.SocketWorker(id="alice", hostname="172.31.33.80", port=2006, hook=hook, is_pointer=True, is_client_worker=False) bob = sy.SocketWorker(id="bob", hostname="100.65.100.179",
# remote.py import syft as sy import sys import torch import asyncio from sklearn.preprocessing import StandardScaler hook = sy.TorchHook(torch, verbose=True) torch.manual_seed(1) configs = { "id": "hospital_2", "host": "localhost", "hook": hook, "verbose": False, "port": 8084 } async def show_all(worker): await asyncio.sleep(0) while True: print("Objects:", worker._objects) await asyncio.sleep(2.0) from sklearn.datasets import load_breast_cancer data = load_breast_cancer() # Standardise data for input into Neural Net scaler = StandardScaler()
def main(): args = define_and_get_arguments() hook = sy.TorchHook(torch) if args.use_virtual: alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose) else: kwargs_websocket = { "host": "localhost", "hook": hook, "verbose": args.verbose } alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket) bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket) charlie = WebsocketClientWorker(id="charlie", port=8779, **kwargs_websocket) workers = [alice, bob, charlie] use_cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} federated_train_loader = sy.FederatedDataLoader( datasets.MNIST( "../data", train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ).federate(tuple(workers)), batch_size=args.batch_size, shuffle=True, iter_per_worker=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( "../data", train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) model = Net().to(device) for epoch in range(1, args.epochs + 1): logger.info("Starting epoch %s/%s", epoch, args.epochs) model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches) test(model, device, test_loader) if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")
async def main(): hook = sy.TorchHook(torch) device = torch.device("cpu") model = Net() model.build(torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device)) @sy.func2plan() def loss_fn(pred, target): return nll_loss(input=pred, target=target) input_num = torch.randn(3, 5, requires_grad=True) target = torch.tensor([1, 0, 4]) dummy_pred = F.log_softmax(input_num, dim=1) loss_fn.build(dummy_pred, target) epoch_num = 21 batch_size = 64 lr = 0.1 learning_rate = lr optimizer_args = {"lr" : lr} # alice = NodeClient(hook, "ws://172.16.179.20:6666" , id="alice") # bob = NodeClient(hook, "ws://172.16.179.21:6667" , id="bob") # charlie = NodeClient(hook, "ws://172.16.179.22:6668", id="charlie") # testing = NodeClient(hook, "ws://localhost:6669" , id="testing") # worker_list = [alice, bob, charlie] worker_list = [] for i in range(2, 8): worker = NodeClient(hook, "ws://"+flvm_ip[i]+":6666" , id="flvm-"+str(i)) worker_list.append(worker) grid = sy.PrivateGridNetwork(*worker_list) for epoch in range(epoch_num): logger.info("Training round %s/%s", epoch, epoch_num) round_start_time = time.time() results = await asyncio.gather( *[ fit_model_on_worker( worker=worker, built_model=model, built_loss_fn=loss_fn, encrypters=worker_list, batch_size=batch_size, curr_round=epoch, max_nr_batches=-1, lr=0.1, ) for worker in worker_list ] ) local_train_end_time = time.time() print("[trace]", "AllWorkersTrainingTime", "duration", "COORD", local_train_end_time - round_start_time) enc_models = {} loss_values = {} data_amounts = {} total_data_amount = 0 for worker_id, enc_params, worker_loss, num_of_training_data in results: if enc_params is not None: enc_models[worker_id] = enc_params loss_values[worker_id] = worker_loss data_amounts[worker_id] = num_of_training_data total_data_amount += num_of_training_data ## aggregation nr_enc_models = len(enc_models) enc_models_list = list(enc_models.values()) data_amounts_list = list(data_amounts.values()) ## dst_enc_model = enc_models_list[0] aggregation_start_time = time.time() with torch.no_grad(): for i in range(len(dst_enc_model)): for j in range(1, nr_enc_models): dst_enc_model[i] += enc_models_list[j][i] aggregation_end_time = time.time() print("[trace]", "AggregationTime", "duration", "COORD", aggregation_end_time - aggregation_start_time) ## decryption new_params = [] decryption_start_time = time.time() with torch.no_grad(): for i in range(len(dst_enc_model)): decrypt_para = dst_enc_model[i].get() new_para = decrypt_para.float_precision() new_para = new_para / int(total_data_amount) model.parameters()[i].set_(new_para) round_end_time = time.time() print("[trace]", "DecryptionTime", "duration", "COORD", round_end_time - decryption_start_time) print("[trace]", "RoundTime", "duration", "COORD", round_end_time - round_start_time) ## FedAvg # nr_models = len(models) # model_list = list(models.values()) # dst_model = model_list[0] # nr_params = len(dst_model.parameters()) # with torch.no_grad(): # for i in range(1, nr_models): # src_model = model_list[i] # src_params = src_model.parameters() # dst_params = dst_model.parameters() # for i in range(nr_params): # dst_params[i].set_(src_params[i].data + dst_params[i].data) # for i in range(nr_params): # dst_params[i].set_(dst_params[i].data * 1/total_data_amount) # if epoch%5 == 0 or epoch == 49: # evaluate_model_on_worker( # model_identifier="Federated model", # worker=testing, # dataset_key="mnist_testing", # model=model, # built_loss_fn=loss_fn, # nr_bins=10, # batch_size=64, # device=device, # print_target_hist=False, # ) model.pointers = {} loss_fn.pointers = {} # decay learning rate learning_rate = max(0.98 * learning_rate, lr * 0.01)
async def main(): args = define_and_get_arguments() hook = sy.TorchHook(torch) kwargs_websocket = { "hook": hook, "verbose": args.verbose, "host": "0.0.0.0" } alice = websocket_client.WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket) bob = websocket_client.WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket) charlie = websocket_client.WebsocketClientWorker(id="charlie", port=8779, **kwargs_websocket) testing = websocket_client.WebsocketClientWorker(id="testing", port=8780, **kwargs_websocket) for wcw in [alice, bob, charlie, testing]: wcw.clear_objects_remote() worker_instances = [alice, bob, charlie] use_cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") model = Net().to(device) traced_model = torch.jit.trace( model, torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device)) learning_rate = args.lr for curr_round in range(1, args.training_rounds + 1): logger.info("Training round %s/%s", curr_round, args.training_rounds) results = await asyncio.gather(*[ fit_model_on_worker( worker=worker, traced_model=traced_model, batch_size=args.batch_size, curr_round=curr_round, max_nr_batches=args.federate_after_n_batches, lr=learning_rate, ) for worker in worker_instances ]) models = {} loss_values = {} test_models = curr_round % 10 == 1 or curr_round == args.training_rounds if test_models: logger.info("Evaluating models") np.set_printoptions(formatter={"float": "{: .0f}".format}) for worker_id, worker_model, _ in results: evaluate_model_on_worker( model_identifier="Model update " + worker_id, worker=testing, dataset_key="mnist_testing", model=worker_model, nr_bins=10, batch_size=128, device=device, print_target_hist=False, ) # Federate models (note that this will also change the model in models[0] for worker_id, worker_model, worker_loss in results: if worker_model is not None: models[worker_id] = worker_model loss_values[worker_id] = worker_loss traced_model = utils.federated_avg(models) if test_models: evaluate_model_on_worker( model_identifier="Federated model", worker=testing, dataset_key="mnist_testing", model=traced_model, nr_bins=10, batch_size=128, device=device, print_target_hist=False, ) # decay learning rate learning_rate = max(0.98 * learning_rate, args.lr * 0.01) if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")
def main(): # check and use GPU if available if not use CPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device is {}".format(device)) # arguments args = setup_utils.setup_and_load() print("Arguments are") print(args) # set seed comm = MPI.COMM_WORLD rank = comm.Get_rank() time_int = int(time.time()) seed = time_int % 10000 set_global_seeds(seed * 100 + rank) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) if Config.NUM_ENVS > 1: print("To do: add multi env support") nenvs = 1 #Config.NUM_ENVS env = utils.make_general_env(nenvs, seed=rank) # wrap env (not needed with Coinrun options) #env = dqn_utils.wrap_deepmind(env, clip_rewards=False, frame_stack=True, scale=False) action_size = env.action_space.n # set up pysyft workers num_workers = 2 hook = sy.TorchHook(torch) worker_1 = sy.VirtualWorker(hook, id='worker_1') worker_2 = sy.VirtualWorker(hook, id='worker_2') secure_worker = sy.VirtualWorker(hook, id='secure_worker') worker_list = [] worker_list.append(worker_1) worker_list.append(worker_2) # Training hyperparameters timesteps = 250000 #2000000 #1000#2000000 # run env for this many time steps hidden_size = 512 # side of hidden layer of FFNN that connects CNN to outputs is_dueling = True is_impala_net = False learning_rate = 0.0001 # learning rate of optimizer batch_size = 32 # size of batch trained on start_training_after = 10000 # start training NN after this many timesteps discount = 0.99 # discount future states by epsilon_start = 1.0 # epsilon greedy start value epsilon_min = 0.02 # epsilon greedy end value epsilon_decay_steps = timesteps * .5 # decay epsilon over this many timesteps epsilon_step = (epsilon_start - epsilon_min) / ( epsilon_decay_steps) # decrement epsilon by this amount every timestep update_target_every = 1 # update target network every this steps tau = 0.001 # soft target updating amount frame_skip = 4 #hold action for this many frames save_every = 10000 #timesteps to save model after train_every = 1 # number of times to train # create replay buffer replay_size = 50000 # size of replay buffer replay_buffer_list = [] for i in range(num_workers): replay_buffer = dqn_utils.ReplayBuffer(max_size=replay_size) replay_buffer_list.append(replay_buffer) # create DQN Agent dqn_agent = dqn_utils.DQNAgent(action_size, hidden_size, learning_rate, is_dueling, is_impala_net) # create states for every env stats_every = 10 #10 # print stats every this many episodes stats_list = [] # store stats for each env init here for i in range(num_workers): temp_dict = {} temp_dict['episode'] = 0 temp_dict['mean_reward_total'] = 0. temp_dict['mean_ep_length_total'] = 0. temp_dict['mean_reward_recent'] = 0. temp_dict['mean_ep_length_recent'] = 0. temp_dict['episode_loss'] = 0. temp_dict['episode_reward'] = 0. temp_dict['episode_length'] = 0. stats_list.append(temp_dict) # training loop epsilon = epsilon_start # take no_action on first step to get state # use state to tell which level # env.reset() does not produce and observation in CoinRun until an action is taken no_action = np.zeros((nenvs, ), dtype=np.int32) state_list, _, _, _ = env.step(no_action) # assign each level to a worker # coinrun doesn't have a way to tell the current level so take mean of first screen of level and use dictionary to assign levels # worker_level is used to tell which replay buffer to put data into (ie which worker is training) level_worker_dict = {} levels_assigned = 0 def get_worker_level(state, lw_dict, la, nw): temp_key = int(1000 * np.mean(state)) if temp_key not in lw_dict: la += 1 lw_dict[temp_key] = la % nw print("Adding new key to level_worker_dict. current size is: {}". format(len(lw_dict))) print(lw_dict) return lw_dict[temp_key], lw_dict, la worker_level, level_worker_dict, levels_assigned = get_worker_level( state_list[0], level_worker_dict, levels_assigned, num_workers) for ts in range(timesteps): # decay epsilon epsilon -= epsilon_step if epsilon < epsilon_min: epsilon = epsilon_min # select an action from the agent's policy action = dqn_agent.select_action(state_list[0].squeeze(axis=-1), epsilon, env, batch_size) # enter action into the env reward_frame_skip = 0. for _ in range(frame_skip): next_state_list, reward_list, done_list, _ = env.step(action) stats_list[worker_level]['episode_reward'] += reward_list[0] reward_frame_skip += reward_list[0] if done_list[0]: break done = done_list[0] stats_list[worker_level]['episode_length'] += 1 # add experience to replay buffer replay_buffer_list[worker_level].add( (state_list[0].squeeze(axis=-1), next_state_list[0].squeeze(axis=-1), action, reward_frame_skip, float(done))) if done: # env.reset doesn't reset the coinrun env but does produce image of first frame, which we can use get the worker_level state_list = env.reset() worker_level, level_worker_dict, levels_assigned = get_worker_level( state_list[0], level_worker_dict, levels_assigned, num_workers) #update stats stats_list[worker_level]['episode'] += 1 #overall averages stats_list[worker_level]['mean_reward_total'] = (stats_list[worker_level]['mean_reward_total'] * ( stats_list[worker_level]['episode'] - 1) + stats_list[worker_level]['episode_reward']) / \ stats_list[worker_level]['episode'] stats_list[worker_level]['mean_ep_length_total'] = (stats_list[worker_level]['mean_ep_length_total'] * ( stats_list[worker_level]['episode'] - 1) + stats_list[worker_level]['episode_length']) / \ stats_list[worker_level]['episode'] # keep running average of last stats_every episodes if stats_list[worker_level]['episode'] >= stats_every: temp_episodes_num = stats_every else: temp_episodes_num = stats_list[worker_level]['episode'] stats_list[worker_level]['mean_reward_recent'] = ( stats_list[worker_level]['mean_reward_recent'] * (temp_episodes_num - 1) + stats_list[worker_level]['episode_reward']) / temp_episodes_num stats_list[worker_level]['mean_ep_length_recent'] = ( stats_list[worker_level]['mean_ep_length_recent'] * (temp_episodes_num - 1) + stats_list[worker_level]['episode_length']) / temp_episodes_num # reset episode stats stats_list[worker_level]['episode_reward'] = 0. stats_list[worker_level]['episode_length'] = 0 # print stats if stats_list[worker_level]['episode'] % stats_every == 0: print( 'w: {}'.format(worker_level), 'epi: {}'.format(stats_list[worker_level]['episode']), 't: {}'.format(ts), 'r: {:.1f}'.format( stats_list[worker_level]['mean_reward_total']), 'l: {:.1f}'.format( stats_list[worker_level]['mean_ep_length_total']), 'r r: {:.1f}'.format( stats_list[worker_level]['mean_reward_recent']), 'r l: {:.1f}'.format( stats_list[worker_level]['mean_ep_length_recent']), 'eps: {:.2f}'.format(epsilon), 'loss: {:.1f}'.format( stats_list[worker_level]['episode_loss'])) stats_list[worker_level]['episode_loss'] = 0. else: state_list = next_state_list if ts > start_training_after: # train the agent # typical DQN gather experiences and trains once every iteration # train_every can modify that to 'train_every' many times every 'train_every'th iteration # example: if train_every=10 then train 10 times every 10th iteration if ts % train_every == 0: # pysyft federated learning training # copy model to each worker # each worker trains on its own data from its own replay buffer # updated models from each worker sent to a secure worker who updates the new model worker_dqn_list = [] worker_dqn_target_list = [] worker_opt_list = [] for i in range(num_workers): worker_dqn_list.append(dqn_agent.train_net.copy().send( worker_list[i])) worker_dqn_target_list.append( dqn_agent.target_net.copy().send(worker_list[i])) worker_opt_list.append( optim.Adam(params=worker_dqn_list[i].parameters(), lr=learning_rate)) for i in range(num_workers): for _ in range(train_every): # sample a batch from the replay buffer x0, x1, a, r, d = replay_buffer_list[i].sample( batch_size) # turn batches into tensors and attack to GPU if available state_batch = torch.FloatTensor(x0).to(device) state_batch = torch.unsqueeze(state_batch, dim=1) next_state_batch = torch.FloatTensor(x1).to(device) next_state_batch = torch.unsqueeze(next_state_batch, dim=1) action_batch = torch.LongTensor(a).to(device) reward_batch = torch.FloatTensor(r).to(device) done_batch = torch.FloatTensor(1. - d).to(device) # send data to worker worker_state_batch = state_batch.send(worker_list[i]) worker_next_state_batch = next_state_batch.send( worker_list[i]) worker_action_batch = action_batch.send(worker_list[i]) worker_reward_batch = reward_batch.send(worker_list[i]) worker_done_batch = done_batch.send(worker_list[i]) train_q = worker_dqn_list[i]( worker_state_batch).gather(1, worker_action_batch) with torch.no_grad(): # Double DQN: get argmax values from train network, use argmax in target network train_argmax = worker_dqn_list[i]( worker_next_state_batch).max(1)[1].view( batch_size, 1) target_net_q = worker_reward_batch + worker_done_batch * discount * \ worker_dqn_target_list[i](worker_next_state_batch).gather(1, train_argmax) # get loss between train q values and target q values # DQN implementations typically use MSE loss or Huber loss (smooth_l1_loss is similar to Huber) # loss_fn = nn.MSELoss() # loss = loss_fn(train_q, target_net_q) loss = F.smooth_l1_loss(train_q, target_net_q) # optimize the parameters with the loss worker_opt_list[i].zero_grad() loss.backward() for param in worker_dqn_list[i].parameters(): param.grad.data.clamp_(-1, 1) worker_opt_list[i].step() # get loss stats #print("loss is {}".format(loss)) temp_loss = loss.get() #print("loss get is {}".format(temp_loss)) stats_list[i]['episode_loss'] += temp_loss.detach( ).cpu().numpy() # move the worker trained model to secure worker for updating the centralized DQN worker_dqn_list[i].move(secure_worker) with torch.no_grad(): # first worker replaces centralized DQN parameters, then do keep a running average as each new worker's params are found if i == 0: dqn_agent.train_net.load_state_dict( worker_dqn_list[i].get().state_dict()) else: tau = 1. / (1 + i) temp_net = worker_dqn_list[i].get() for dqn_var, temp_var in zip( dqn_agent.train_net.parameters(), temp_net.parameters()): dqn_var.data.copy_((1. - tau) * dqn_var.data + (tau) * temp_var.data) # save the network if ts % save_every == 0: save_string = "saved_models/dqn_model_{}_{}.pt".format( time_int, ts) torch.save(dqn_agent.train_net.state_dict(), save_string) stats_save_string = "saved_models/stats_{}_{}.pickle".format( time_int, ts) with open(stats_save_string, 'wb') as handle: pickle.dump(stats_list, handle) # update the target network dqn_agent.update_target_network_soft(ts, update_target_every, tau) print("save final model") save_string = "saved_models/dqn_model_{}_FINAL.pt".format(time_int) torch.save(dqn_agent.train_net.state_dict(), save_string) stats_save_string = "saved_models/stats_{}_FINAL.pickle".format(time_int) with open(stats_save_string, 'wb') as handle: pickle.dump(stats_list, handle)
def test_init(): hook = syft.TorchHook(torch, verbose=True) tensor_extension = torch.Tensor() assert tensor_extension.id is not None assert tensor_extension.owner is not None
def experiment(no_cuda): # Creating num_workers clients hook = sy.TorchHook(torch) # Initializing arguments, with GPU usage or not args = Arguments(no_cuda) if args.use_virtual: alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose) else: kwargs_websocket = {"host": "localhost", "hook": hook, "verbose": args.verbose} alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket) bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket) charlie = WebsocketClientWorker(id="charlie", port=8779, **kwargs_websocket) use_cuda = not args.no_cuda and torch.cuda.is_available() if use_cuda: # TODO Quickhack. Actually need to fix the problem moving the model to CUDA\n", torch.set_default_tensor_type(torch.cuda.FloatTensor) torch.manual_seed(args.seed) clients = [alice, bob, charlie] clients_mem = torch.zeros(len(clients)) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {} # Federated data loader federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) .federate(clients), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) #creating the models for each client models,optimizers = [], [] #print(device) for i in range(len(clients)): #print(i) models.append(Net1().to(device)) models[i] = models[i].send(clients[i]) optimizers.append(optim.SGD(params=models[i].parameters(),lr=0.1)) start = time.time() #%%time model = Net2().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr) # TODO momentum is not supported at the moment for epoch in range(1, args.epochs + 1): train(args, model, device, federated_train_loader, optimizer, epoch, models, optimizers,clients_mem) test(args, model, device, test_loader, models) t = time.time() print(t-start) if (args.save_model): torch.save(model.state_dict(), "mnist_cnn.pt") end = time.time() print(end - start) print("Memory exchanged : ",clients_mem) return clients_mem
import torch import syft as sy # <-- NEW: import the Pysyft library import random import numpy as np import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from visdom import Visdom from datetime import datetime hook = sy.TorchHook( torch ) # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning import ComputePrivacy as Privacy # Import self definition function to compute the privacy loss import logging import Datasets # Import self definition function to load the federated datasets import os logger = logging.getLogger(__name__) date = datetime.now().strftime('%Y-%m-%d %H:%M') vis = Visdom(env='CELEBA_FixDP_C_Asyn_08_flat') # Define parameters class Arguments(): def __init__(self): self.batch_size = 5 # Number of samples used of each user/device at each iteration. # If this value is less than 1, then it means the sampling ratio, else it means the mini-batch size self.lr = 0.01 # Learning rate self.grad_upper_bound = torch.tensor([1.]) # clipbound self.z = 0.8 # Noise parameter z in Gaussian noise N(0, (zS)^2) where S is sensitivity self.users_total = 800 # Total number of users/devices self.user_sel_prob = 0.02 # Probability for sampling users/devices at each iteration
optimizers = [bobs_optimizer, alice_optimizer] #from syft.core.hooks import TorchHook #from syft.core.workers import VirtualWorker import torch import torch.nn as nn from torch.autograd import Variable as Var import torch.optim as optim import syft as sy # this is our hook hook = sy.TorchHook() me = hook.local_worker me.is_client_worker = False bob = sy.VirtualWorker(id='bob',hook=hook, is_client_worker=False) alice = sy.VirtualWorker(id='alice',hook=hook, is_client_worker=False) #me.add_workers([bob, alice]) bob.add_workers([alice]) alice.add_workers([bob]) compute_nodes = [bob, alice] train_distributed_dataset = [] i = 0 for batch_idx, (data,target) in enumerate(train_loader):
parser.add_argument( "--id", type=str, help="name (id) of the websocket server worker, e.g. --id alice") parser.add_argument( "--testing", action="store_true", help= "if set, websocket server worker will load the test dataset instead of the training dataset", ) parser.add_argument( "--verbose", "-v", action="store_true", help="if set, websocket server worker will be started in verbose mode", ) args = parser.parse_args() # Hook and start server hook = sy.TorchHook(torch) server = start_websocket_server_worker( id=args.id, host=args.host, port=args.port, hook=hook, verbose=args.verbose, keep_labels=KEEP_LABELS_DICT[args.id], training=args.testing, )
async def main(): """ Main """ hook = sy.TorchHook(torch) parser = argparse.ArgumentParser(description='Train and validate a Federated model') parser.add_argument('config', type=str, help='Configuration file') args = parser.parse_args() config = configparser.ConfigParser() config.read(args.config) # Train configuration config_rounds = config.getint('TRAIN', 'rounds') config_epochs = config.getint('TRAIN', 'epochs') config_batch = config.getint('TRAIN', 'batch') config_optimizer = config.get('TRAIN', 'optimizer') config_lr = config.getfloat('TRAIN', 'lr') config_shuffle = config.getboolean('TRAIN', 'shuffle') clients = {} clients_results = {} for section in config.sections(): if section.startswith('WORKER'): kwargs_websocket = {'hook': hook, 'id': config.get(section, 'id'), 'host': config.get(section, 'host'), 'port': config.getint(section, 'port'), 'verbose': config.getboolean(section, 'verbose')} federation_participant = config.getboolean(section, 'federation_participant') client = CustomWebsocketClientWorker(**kwargs_websocket) client.federation_participant = federation_participant client.clear_objects_remote() clients[kwargs_websocket['id']] = client clients_results[kwargs_websocket['id']] = [] model = Classifier() traced_model = trace(model, torch.zeros([1, 10], dtype=torch.float)) for curr_round in range(config_rounds): print('Round %s/%s ¡Ding Ding!:' % (curr_round + 1, config_rounds)) results = await asyncio.gather( *[ fit_model_on_worker( worker=clients[client], traced_model=traced_model, optimizer=config_optimizer, batch_size=config_batch, epochs=config_epochs, lr=config_lr, dataset_key='test', shuffle=config_shuffle ) for client in clients if clients[client].federation_participant ] ) print('Training done!') print('Federating model ... ', end='') models = {} for worker_id, worker_model in results: if worker_model is not None: models[worker_id] = worker_model traced_model = utils.federated_avg(models) print('Done!') for client in clients: # Evaluate train train_loss, train_confusion_matrix = evaluate_model_on_worker( worker=clients[client], dataset_key='train', model=traced_model, batch_size=config_batch, ) # Evaluate test test_loss, test_confusion_matrix = evaluate_model_on_worker( worker=clients[client], dataset_key='test', model=traced_model, batch_size=config_batch, ) clients_results[client].append((train_loss, test_loss, test_confusion_matrix)) print('"%s" => Train loss: %.4f. Test loss: %.4f' % (client, train_loss, test_loss)) print('Confusion matrices:') for client in clients_results: print('Model "%s" stats:' % client) train_losses = [cr[0] for cr in clients_results[client]] test_losses = [cr[1] for cr in clients_results[client]] conf_matrices = [cr[2] for cr in clients_results[client]] show_results(conf_matrices, train_losses, test_losses, label=client, loss_xlabel='Round')