def __init__(self, root, train=True, transform=None, target_transform=None, download=False): super(FEMNIST, self).__init__(root, transform=transform, target_transform=target_transform) self.train = train # training set or test set if download: self.download() if not self._check_exists(): raise RuntimeError('Dataset not found.' + ' You can use download=True to download it') train_data_dir = os.path.join('..', 'data', 'femnist', 'FEMNIST', 'train') test_data_dir = os.path.join('..', 'data', 'femnist', 'FEMNIST', 'test') self.dict_users = {} if self.train == True: self.users, groups, self.data = read_data(train_data_dir, test_data_dir, train_flag=True) else: self.users, groups, self.data = read_data(train_data_dir, test_data_dir, train_flag=False) class_names_map = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 12, 38, 39, 40, 41, 42, 18, 19, 20, 21, 22, 43, 24, 25, 44, 45, 28, 46, 30, 31, 32, 33, 34, 35 ] # TODO: automate this if False: # 47 classess for i in range(len(self.users)): for j in range(len(self.data[self.users[i]]['y'])): ll = self.data[self.users[i]]['y'][j] self.data[self.users[i]]['y'][j] = class_names_map[ll] counter = 0 for i in range(len(self.users)): lst = list(counter + np.arange(len(self.data[self.users[i]]['y']))) self.dict_users.update({i: set(lst)}) counter = lst[-1] + 1 self.dict_index = { } # define a dictionary to keep the location of a sample and the corresponding length_data = 0 for i in range(len(self.users)): for j in range(len(self.data[self.users[i]]['y'])): self.dict_index[length_data] = [i, j] length_data += 1 self.length_data = length_data
def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs, users_per_round, similarity, noise, times): super().__init__(dataset, algorithm, model[0], batch_size, learning_rate, L, num_glob_iters, local_epochs, users_per_round, similarity, noise, times) self.control_norms = [] # Initialize data for all users data = read_data(dataset) total_users = len(data[0]) for i in range(total_users): id, train, test = read_user_data(i, data, dataset) user = UserSCAFFOLD(id, train, test, model, batch_size, learning_rate, L, local_epochs) self.users.append(user) self.total_train_samples += user.train_samples if self.noise: self.communication_thresh = rayleigh.ppf( 1 - users_per_round / total_users) # h_min print("Number of users / total users:", users_per_round, " / ", total_users) self.server_controls = [ torch.zeros_like(p.data) for p in self.model.parameters() if p.requires_grad ] print("Finished creating SCAFFOLD server.")
def setup_clients(dataset, model=None): """Instantiates clients based on given train and test data directories. Return: all_clients: list of Client objects. """ train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test') users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) if len(groups) == 0: groups = [[] for _ in users] #LOOP OVER Predefined NumbOfUsers n = 0 all_clients = [] for u, g in zip(users, groups): all_clients.append(Client(u, g, train_data[u], test_data[u], model)) print("User", n, " Client:", u, " -> Train data size:", len(train_data[u]['y']), "samples") n += 1 if (n == NumbOfUsers): break # all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)] return all_clients
def setup_client_servers(dataset, seed, params, sketcher, model_cls, use_val_set=False, num_client_servers=1): """Instantiates clients based on given train and test data directories. Return: all_clients: list of Client objects. """ eval_set = 'test' if not use_val_set else 'val' # train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train') # test_data_dir = os.path.join('..', 'data', dataset, 'data', eval_set) train_dir = DATA_PATH + ['data', dataset, 'train'] test_dir = DATA_PATH + ['data', dataset, eval_set] train_data_dir = os.path.join(*train_dir) test_data_dir = os.path.join(*test_dir) users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) client_servers = create_client_servers(seed, params, users, groups, train_data, test_data, sketcher, model_cls, num_client_servers) return client_servers
def setup_clients(self, num_setup): """Instantiates clients based on given train and test data directories. Return: all_clients: list of Client objects. """ eval_set = 'test' train_data_dir = os.path.join('..', 'data', self.dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', self.dataset, 'data', eval_set) users, groups, train_data, test_data = read_data( train_data_dir, test_data_dir) n = int(len(users) * num_setup) users = users[:n + 1] if len(groups) == 0: groups = [[] for _ in users] # Create models model_params = MODEL_PARAMS[self.model_path] if self.args.lr != -1: model_params_list = list(model_params) model_params_list[0] = self.args.lr model_params = tuple(model_params_list) for i, u in enumerate(users): # client = Client(u, train_data[u], test_data[u], self.seed ) client = Client(u, train_data[u], test_data[u], self.seed + i) self.clients.append(client) return self.clients
def __init__(self, experiment, device, dataset, algorithm, model, batch_size, learning_rate, alpha, eta, L, num_glob_iters, local_epochs, optimizer, num_edges, times): super().__init__(experiment, device, dataset,algorithm, model[0], batch_size, learning_rate, alpha, eta, L, num_glob_iters, local_epochs, optimizer, num_edges, times) # Initialize data for all edges data = read_data(dataset, read_optimal_weights=False) self.optimal_weights = None self.optimal_loss_unreg = None # Unregularized loss self.optimal_loss_reg = None # Regularized loss with parameter L if data[-1] is not None: # Synthetic dataset: save the optimal weights for comparison later self.optimal_weights = data[-2] self.optimal_loss_unreg = data[-1] self.optimal_loss_reg = (self.L / 2) * (np.linalg.norm(data[-1]) ** 2) total_edges = len(data[0]) for i in range(total_edges): id, train, test = read_edge_data(i, data, dataset) if(algorithm == "DONE"): edge = edgeDONE(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) if(algorithm == "FirstOrder"): edge = edgeFiOrder(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) if(algorithm == "DANE"): edge = edgeDANE(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) if algorithm == "New": edge = edgeNew(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) if algorithm == "GD": edge = edgeGD(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) if algorithm == "FedAvg": edge = edgeAvg(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) if(algorithm == "FEDL"): edge = edgeFEDL(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) if(algorithm == "Newton"): edge = edgeNewton(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) if(algorithm == "GT"): edge = edgeGT(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) if(algorithm == "PGT"): edge = edgePGT(device, id, train, test, model, batch_size, learning_rate, alpha, eta, L, local_epochs, optimizer) self.edges.append(edge) self.total_train_samples += edge.train_samples print("Number of edges / total edges:", num_edges, " / ", total_edges)
def setup_clients(dataset, model=None, use_val_set=None): eval_set = "test" if not use_val_set else "val" train_data_dir = os.path.join("data", dataset, "data", "train") #print(train_data_dir) test_date_dir = os.path.join("data", dataset, "data", eval_set) #print(test_date_dir) data = read_data(train_data_dir, test_date_dir) users, groups, train_data, test_data = data clients = create_clients(users, groups, train_data, test_data, model) return clients
def main(): args = parse_args() train_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'test') users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) trainer = MlheadTrainer(args, users, groups, train_data, test_data) trainer.train(args) trainer.finish(args)
def create_clients(model, args): train_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'test') users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) if len(groups) == 0: groups = [[] for _ in users] clients = [ Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups) ] return clients
def main(): # Read in the command line options options = read_options() # Generate the search space for learning rates search = get_search_space(**options) # Generate the random seeds to test seeds = range(options['seed'], options['seed'] + options['num_seeds']) # Read in the data train_path = os.path.join('..', 'data', options['dataset'], options['dataloc'], 'train.json') test_path = os.path.join('..', 'data', options['dataset'], options['dataloc'], 'test.json') print('Reading data...') dataset = read_data(train_path, test_path) print('Reading data done.') # Import model if options['dataset'].startswith('synthetic'): model_path = '%s.%s.%s' % ('models', 'synthetic', options['model']) else: model_path = '%s.%s.%s' % ('models', options['dataset'], options['model']) # Import server server_path = 'trainers.{}'.format(options['optimizer']) # Try all seeds and learning rates args = list(itertools.product(search, seeds)) results = parallel_execute(args, dataset, options, model_path, server_path) losses = reduce_results(args, results) results_reduced = list(zip(search, losses)) print(results_reduced) print('Best result: {} (index {})'.format( results_reduced[np.argmin(losses)], np.argmin(losses))) # Make output directory if not os.path.exists('results'): os.mkdir('results') note = '_' + options['note'] if len(options['note']) > 0 else '' def get_aux_param(options): optim = options['optimizer'] if optim == 'fedio': return options['rho'] elif optim == 'fedprox': return options['mu'] return 0 out_file = 'results/tune_{}_{}_{}_{}_{:.3f}_{}_{}_{}_{}_{}{}.csv'.format( options['optimizer'], options['dataset'], options['dataloc'], options['model'], get_aux_param(options), options['num_rounds'], options['clients_per_round'], options['num_iters'], options['seed'], options['num_seeds'], note) pd.DataFrame(results_reduced, columns=['lr', 'loss']).set_index('lr').to_csv(out_file) return
def setup_clients(dataset, model=None, use_val_set=False, poison_fraction=0, poison_type=PoisonType.NONE): """Instantiates clients based on given train and test data directories. Return: all_clients: list of Client objects. """ eval_set = 'test' if not use_val_set else 'val' train_data_dir = os.path.join('leaf', 'data', dataset, 'data', 'train') test_data_dir = os.path.join('leaf', 'data', dataset, 'data', eval_set) users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) clients = create_clients(users, groups, train_data, test_data, model) num_malicious_clients = math.floor(len(clients) * poison_fraction) malicious_clients = [ client.id for client in clients[:num_malicious_clients] ] if poison_type == PoisonType.LABELFLIP: # get reference fipping data in case a client possesses no data of class FLIP_FROM_CLASS reference_data = [] for j in range(len(clients)): if len([ clients[j].train_data['x'][i] for i in range(len(clients[j].train_data['y'])) if clients[j].train_data['y'][i] == FLIP_FROM_CLASS ]) > 10: reference_data = [ clients[j].train_data['x'][i] for i in range(len(clients[j].train_data['y'])) if clients[j].train_data['y'][i] == FLIP_FROM_CLASS ] break for client in clients[:num_malicious_clients]: # flip labels client_label_counter = len(client.train_data['y']) flip_data = [ client.train_data['x'][i] for i in range(client_label_counter) if client.train_data['y'][i] == FLIP_FROM_CLASS ] if len(flip_data) == 0: flip_data = reference_data client.train_data['x'] = (flip_data * math.ceil( client_label_counter / len(flip_data)))[:client_label_counter] client.train_data['y'] = [FLIP_TO_CLASS] * client_label_counter return clients, malicious_clients
def setup_clients(dataset, model=None): """Instantiates clients based on given train and test data directories. Return: all_clients: list of Client objects. """ train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test') users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) if len(groups) == 0: groups = [[] for _ in users] all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)] return all_clients
def setup_clients(dataset, model=None): """Instantiates clients based on given train and test data directories. Return: all_clients: list of Client objects. """ train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test') users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) clients = create_clients(users, groups, train_data, test_data, model) return clients
def __init__(self, dataset,algorithm, model, batch_size, learning_rate, beta, lamda, num_glob_iters, local_epochs, optimizer, num_users,times): super().__init__(dataset,algorithm, model[0], batch_size, learning_rate, beta, lamda, num_glob_iters, local_epochs, optimizer, num_users, times) # Initialize data for all users data = read_data(dataset) total_users = len(data[0]) for i in range(total_users): id, train , test = read_user_data(i, data, dataset) user = UserPerAvg(id, train, test, model, batch_size, learning_rate, beta, lamda, local_epochs, optimizer ,total_users , num_users) self.users.append(user) self.total_train_samples += user.train_samples print("Number of users / total users:",num_users, " / " ,total_users) print("Finished creating Local Per-Avg.")
def main(): # Parse command line arguments options, model, server = read_options() # Read data train_path = os.path.join('data', options['dataset'], 'orig', 'train.json') test_path = os.path.join('data', options['dataset'], 'orig', 'test.json') print('Reading data...') dataset = read_data(train_path, test_path) print('Reading data done.') optim = server(options, model, dataset) optim.run() optim.save()
def setup_clients(cfg, model=None, use_val_set=False): """Instantiates clients based on given train and test data directories. Return: all_clients: list of Client objects. """ eval_set = 'test' if not use_val_set else 'val' train_data_dir = os.path.join('..', 'data', cfg.dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', cfg.dataset, 'data', eval_set) users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) clients = create_clients(users, groups, train_data, test_data, model, cfg) return clients
def main(): args = parse_job_args() config = read_yamlconfig(args) train_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', args.dataset, 'data', 'test') users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) exp_seeds, book_keep = config["exp-seeds"], [0.] * len(config["exp-seeds"]) for j, rnd_sed in enumerate(exp_seeds): config["seed"] = rnd_sed if args.experiment == 'fedavg': trainer = Fedavg_Trainer(users, groups, train_data, test_data) metric = trainer.begins(config, args) trainer.ends() elif args.experiment == 'fedprox': trainer = Fedprox_Trainer(users, groups, train_data, test_data) metric = trainer.begins(config, args) trainer.ends() elif args.experiment == 'fedcluster': pass elif args.experiment == 'fedsgd': trainer = Fedsgd_Trainer(users, groups, train_data, test_data) metric =trainer.begins(config, args) trainer.ends() elif args.experiment == 'fedbayes': trainer = Fedbayes_Sing_Trainer(users, groups, train_data, test_data) metric =trainer.begins(config, args) trainer.ends() elif args.experiment == 'modelsaver': trainer = Model_Saver(users, groups, train_data, test_data) metric = trainer.begins(config, args) trainer.ends() elif args.experiment == 'fedsem': trainer = Fedsem_Trainer(users, groups, train_data, test_data) metric = trainer.begins(config, args) trainer.ends() else: print("Applications not defined. Please check configs directory if the name is right.") break book_keep[j] = metric finals = np.array(book_keep) * 100 print(finals) print("{} runs - std: {}, med: {}".format(len(exp_seeds), np.var(finals), np.median(finals)))
def setup_clients(aggregation, e, env, dataset, model=None): """Instantiates clients based on given train and test data directories. Return: all_clients: list of Client objects. """ train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test') users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) print(9898334353252435, len(users)) clients = create_clients(aggregation, e, env, users, groups, train_data, test_data, model) return clients
def setup_clients(dataset, model=None, use_val_set=False): """Load train, test data and instantiate clients. Args: model: The shared ClientModel object for all clients. args: Args entered from the command. Returns: clients: List of all client objects. """ eval_set = "test" if not use_val_set else "val" train_data_dir = os.path.join("data", dataset, "data", "train") test_data_dir = os.path.join("data", dataset, "data", eval_set) data = read_data(train_data_dir, test_data_dir) users, groups, train_data, test_data = data clients = create_clients(users, groups, train_data, test_data, model) return clients
def main(): args = parse_args() # client_id = sys.argv[1] # tangle_name = sys.argv[2] client_id = 'f0044_12' tangle_name = 120 train_data_dir = os.path.join('leaf', 'data', args.dataset, 'data', 'train_sm') test_data_dir = os.path.join('leaf', 'data', args.dataset, 'data', 'test_sm') print("Loading data...") users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) print("Loading data... complete") print( train_single(client_id, None, 1, 0, train_data[client_id], test_data[client_id], tangle_name))
def setup_clients(dataset, data_dir, model=None, validation=False, corruption=None, fraction_corrupt=0.1, seed=-1, subsample=True): """Instantiates clients based on given train and test data directories. If validation is True, use part of training set as validation set Return: all_clients: list of Client objects. """ train_data_dir = os.path.join(data_dir, dataset, 'data', 'train') test_data_dir = os.path.join(data_dir, dataset, 'data', 'test') users, groups, train_data, test_data = read_data(train_data_dir, test_data_dir) # subsample if subsample and dataset == 'femnist': # Pick 1000 fixed users for experiment rng_sub = random.Random(25) users = rng_sub.sample(users, 1000) train_data = {u: p for (u, p) in train_data.items() if u in users} test_data = {u: p for (u, p) in test_data.items() if u in users} # note: groups are empty for femnist if validation: # split training set into train and val in the ratio 80:20 print('Validation mode, splitting train data into train and val sets...') for idx, u in enumerate(users): data = list(zip(train_data[u]['x'], train_data[u]['y'])) rng = random.Random(idx) rng.shuffle(data) split_point = int(0.8 * len(data)) x, y = zip(*data[:split_point]) x1, y1 = zip(*data[split_point:]) train_data[u] = {'x': list(x), 'y': list(y)} test_data[u] = {'x': list(x1), 'y': list(y1)} if len(groups) == 0: groups = [[] for _ in users] all_clients = [Client(u, g, train_data[u], test_data[u], model) for u, g in zip(users, groups)] corrupted_clients = apply_corruption_all(all_clients, dataset, corruption, fraction_corrupt, seed) return all_clients, corrupted_clients
def __init__(self, dataset, algorithm, model, batch_size, learning_rate, L, num_glob_iters, local_epochs, users_per_round, similarity, noise, times): super().__init__(dataset, algorithm, model[0], batch_size, learning_rate, L, num_glob_iters, local_epochs, users_per_round, similarity, noise, times) # Initialize data for all users data = read_data(dataset) total_users = len(data[0]) for i in range(total_users): id, train, test = read_user_data(i, data, dataset) user = UserAVG(id, train, test, model, batch_size, learning_rate, L, local_epochs) self.users.append(user) self.total_train_samples += user.train_samples if self.noise: self.communication_thresh = rayleigh.ppf( 1 - users_per_round / total_users) # h_min print("Number of users / total users:", users_per_round, " / ", total_users) print("Finished creating FedAvg server.")
def main(dataset, num_users, user_labels, niid): read_data(dataset, niid, num_users, user_labels)
def __init__(self, dataset, algorithm, model, async_process, batch_size, learning_rate, lamda, beta, num_glob_iters, local_epochs, optimizer, num_users, user_labels, niid, times, data_load, extra): self.dataset = dataset self.model = copy.deepcopy(model) self.algorithm = algorithm self.optimizer = optimizer self.batch_size = batch_size self.learning_rate = learning_rate self.async_process = async_process self.lamda = lamda self.beta = beta self.times = 8 self.data_load = data_load self.extra = extra self.num_users = num_users self.num_glob_iters = num_glob_iters self.local_epochs = local_epochs self.user_labels = user_labels self.niid = niid self.users = [] self.local_acc = [] self.avg_local_acc = [] self.avg_local_train_acc = [] self.avg_local_train_loss = [] self.server_acc = [] # old data split data = read_data(dataset, niid, num_users, user_labels) self.num_users = num_users test_data = [] # id, train, test = read_user_data(0, data, dataset) # if algorithm == 'FedAvg': # user = UserFedAvg(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, self.times) # if algorithm == 'ASO': # user = UserASO(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, self.times) # if algorithm == 'LGP': # user = UserLGP(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, self.times) # if algorithm == 'PerFed': # user = UserPerFed(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, self.times) # self.users.append(user) # test_data.extend(test) for i in range(self.times): id, train, test = read_user_data(i, data, dataset) if algorithm == 'FedAvg': user = UserFedAvg(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, i + 9) if algorithm == 'ASO': user = UserASO(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, i + 9) if algorithm == 'LGP': user = UserLGP(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, i + 9) if algorithm == 'PerFed': user = UserPerFed(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, i + 9) if algorithm == 'FedAsync': user = UserFedAsync(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load, i + 9) self.users.append(user) test_data.extend(test) for i in range(self.times, self.num_users): id, train, test = read_user_data(i, data, dataset) if algorithm == 'PerFed': user = UserPerFed(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load) if algorithm == 'FedAvg': user = UserFedAvg(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load) if algorithm == 'ASO': user = UserASO(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load) if algorithm == 'LGP': user = UserLGP(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load) if algorithm == 'FedAsync': user = UserFedAsync(id, train, test, model, async_process, batch_size, learning_rate, lamda, beta, local_epochs, optimizer, data_load) self.users.append(user) test_data.extend(test) if algorithm == 'FedAvg': self.server = ServerFedAvg(algorithm, model, async_process, test_data, batch_size) if algorithm == 'PerFed': self.server = ServerPerFed(algorithm, model, async_process, test_data, batch_size) if algorithm == 'ASO': self.server = ServerASO(algorithm, model, async_process, test_data, batch_size) if algorithm == 'LGP': self.server = ServerLGP(algorithm, model, async_process, test_data, batch_size) if algorithm == 'FedAsync': self.server = serverFedAsync(algorithm, model, async_process, test_data, batch_size) for user in self.users: self.server.append_user(user.id, user.train_data_samples)
def main(experiment, dataset, algorithm, model, batch_size, learning_rate, beta, L_k, num_glob_iters, local_epochs, optimizer, numusers, K, personal_learning_rate, times, commet, gpu, cutoff): # Get device status: Check GPU or CPU device = torch.device("cuda:{}".format(gpu) if torch.cuda.is_available() and gpu != -1 else "cpu") data = read_data(dataset), dataset for i in range(times): print("---------------Running time:------------", i) # Generate model if (model == "mclr"): if (dataset == "human_activity"): model = Mclr_Logistic(561, 6).to(device), model elif (dataset == "gleam"): model = Mclr_Logistic(561, 6).to(device), model elif (dataset == "vehicle_sensor"): model = Mclr_Logistic(100, 2).to(device), model elif (dataset == "Synthetic"): model = Mclr_Logistic(60, 10).to(device), model elif (dataset == "EMNIST"): model = Mclr_Logistic(784, 62).to(device), model else: #(dataset == "Mnist"): model = Mclr_Logistic().to(device), model elif (model == "dnn"): if (dataset == "human_activity"): model = DNN(561, 100, 12).to(device), model elif (dataset == "gleam"): model = DNN(561, 20, 6).to(device), model elif (dataset == "vehicle_sensor"): model = DNN(100, 20, 2).to(device), model elif (dataset == "Synthetic"): model = DNN(60, 20, 10).to(device), model elif (dataset == "EMNIST"): model = DNN(784, 200, 62).to(device), model else: #(dataset == "Mnist"): model = DNN2().to(device), model elif (model == "cnn"): if (dataset == "Cifar10"): model = CNNCifar(10).to(device), model else: return # select algorithm if (algorithm == "FedAvg"): if (commet): experiment.set_name(dataset + "_" + algorithm + "_" + model[1] + "_" + str(batch_size) + "_" + str(learning_rate) + "_" + str(num_glob_iters) + "_" + str(local_epochs) + "_" + str(numusers)) server = FedAvg(experiment, device, data, algorithm, model, batch_size, learning_rate, beta, L_k, num_glob_iters, local_epochs, optimizer, numusers, i, cutoff) elif (algorithm == "PerAvg"): if (commet): experiment.set_name(dataset + "_" + algorithm + "_" + model[1] + "_" + str(batch_size) + "_" + str(learning_rate) + "_" + str(personal_learning_rate) + "_" + str(learning_rate) + "_" + str(num_glob_iters) + "_" + str(local_epochs) + "_" + str(numusers)) server = PerAvg(experiment, device, data, algorithm, model, batch_size, learning_rate, beta, L_k, num_glob_iters, local_epochs, optimizer, numusers, i, cutoff) elif (algorithm == "FedU"): if (commet): experiment.set_name(dataset + "_" + algorithm + "_" + model[1] + "_" + str(batch_size) + "_" + str(learning_rate) + "_" + str(L_k) + "L_K" + "_" + str(num_glob_iters) + "_" + str(local_epochs) + "_" + str(numusers)) server = FedU(experiment, device, data, algorithm, model, batch_size, learning_rate, beta, L_k, num_glob_iters, local_epochs, optimizer, numusers, K, i, cutoff) elif (algorithm == "pFedMe"): if (commet): experiment.set_name(dataset + "_" + algorithm + "_" + model[1] + "_" + str(batch_size) + "_" + str(learning_rate) + "_" + str(personal_learning_rate) + "_" + str(num_glob_iters) + "_" + str(local_epochs) + "_" + str(numusers)) server = pFedMe(experiment, device, data, algorithm, model, batch_size, learning_rate, beta, L_k, num_glob_iters, local_epochs, optimizer, numusers, K, personal_learning_rate, i, cutoff) elif (algorithm == "Local"): if (commet): experiment.set_name(dataset + "_" + algorithm + "_" + model[1] + "_" + str(batch_size) + "_" + str(learning_rate) + "_" + str(L_k) + "_" + str(num_glob_iters) + "_" + str(local_epochs) + "_" + str(numusers)) server = FedLocal(experiment, device, data, algorithm, model, batch_size, learning_rate, beta, L_k, num_glob_iters, local_epochs, optimizer, numusers, i, cutoff) elif (algorithm == "Global"): if (commet): experiment.set_name(dataset + "_" + algorithm + "_" + model[1] + "_" + str(batch_size) + "_" + str(learning_rate) + "_" + str(L_k) + "_" + str(num_glob_iters) + "_" + str(local_epochs) + "_" + str(numusers)) server = FedGlobal(experiment, device, data, algorithm, model, batch_size, learning_rate, beta, L_k, num_glob_iters, local_epochs, optimizer, numusers, i, cutoff) else: print("Algorithm is invalid") return server.train() server.test() average_data(num_users=numusers, loc_ep1=local_epochs, Numb_Glob_Iters=num_glob_iters, lamb=L_k, learning_rate=learning_rate, beta=beta, algorithms=algorithm, batch_size=batch_size, dataset=dataset, k=K, personal_learning_rate=personal_learning_rate, times=times, cutoff=cutoff)
def setup_clients(dataset, model_name=None, model=None, validation=False, seed=-1, split_by_user=True, subsample_fraction=0.5): """Instantiates clients based on given train and test data directories. If validation is True, use part of training set as validation set Return: all_clients: list of Client objects. """ train_data_dir = os.path.join('..', 'data', dataset, 'data', 'train') test_data_dir = os.path.join('..', 'data', dataset, 'data', 'test') clients, groups, train_data, test_data = read_data(train_data_dir, test_data_dir, split_by_user=split_by_user, dataset=dataset) if seed != -1: np.random.seed(seed) else: np.random.seed(42) if split_by_user: train_users = clients['train_users'] test_users = clients['test_users'] if dataset == 'femnist': # Only subsample femnist indices_train = np.arange(len(train_users)) indices_test = np.arange(len(test_users)) fraction_used = int(subsample_fraction * len(train_users)) subindices_train = np.random.choice(indices_train, fraction_used, replace=False) subindices_test = np.random.choice(indices_test, fraction_used, replace=False) train_users = [train_users[i] for i in subindices_train] test_users = [test_users[i] for i in subindices_test] train_groups = [[] for _ in train_users] test_groups = [[] for _ in test_users] if validation: fraction = 0.5 if (dataset == 'sent140') else 0.8 split_point = int(fraction * len(train_users)) validation_users = train_users[split_point:] validation_data = {} validation_groups = train_groups[split_point:] for u in validation_users: validation_data[u] = dict(train_data[u]) del train_data[u] train_users = train_users[:split_point] train_groups = train_groups[:split_point] else: # split by sample users = clients['train_users'] # subsample if dataset == 'femnist': # TODO : WHAT IS THIS ??? rng_sub = random.Random(25) users = rng_sub.sample(users, 100) train_data = {u: p for (u, p) in train_data.items() if u in users} test_data = {u: p for (u, p) in test_data.items() if u in users} # note: groups are empty elif dataset == 'shakespeare': pass else: print('Not subsampling data') sys.exit(-1) if validation: # split training set into train and val in the ratio 80:20 print('Validation mode, splitting train data into train and val sets...') for idx, u in enumerate(users): data = list(zip(train_data[u]['x'], train_data[u]['y'])) rng = random.Random(idx) rng.shuffle(data) split_point = int(0.8 * len(data)) x, y = zip(*data[:split_point]) x1, y1 = zip(*data[split_point:]) train_data[u] = {'x': list(x), 'y': list(y)} test_data[u] = {'x': list(x1), 'y': list(y1)} if len(groups) == 0: groups = [[] for _ in users] if split_by_user: print('------>', len(train_users)) train_clients = [] test_clients = [] # if True, do not preprocess data do_preprocess = not ( dataset == 'sent140' and model_name in ['erm_lstm_log_reg', 'rsm_lstm_log_reg'] ) print('do_preprocess = ', do_preprocess) for u, g in zip(train_users, train_groups): train_data_u_x = ( preprocess_data_x(train_data[u]['x'], dataset=dataset, model_name=model_name) if do_preprocess else train_data[u]['x'] ) train_data_u_y = ( preprocess_data_y(train_data[u]['y'], dataset=dataset, model_name=model_name) if do_preprocess else train_data[u]['y'] ) train_data_u = {'x': train_data_u_x, 'y': train_data_u_y} train_clients.append(Client(u, g, train_data=train_data_u, model=model, dataset=dataset)) for u, g in zip(test_users, test_groups): test_data_u_x = ( preprocess_data_x(test_data[u]['x'], dataset=dataset, model_name=model_name) if do_preprocess else test_data[u]['x'] ) test_data_u_y = ( preprocess_data_y(test_data[u]['y'], dataset=dataset, model_name=model_name) if do_preprocess else test_data[u]['y'] ) test_data_u = {'x': test_data_u_x, 'y': test_data_u_y} test_clients.append(Client(u, g, eval_data=test_data_u, model=model, dataset=dataset)) all_clients = { 'train_clients': train_clients, 'test_clients': test_clients } if validation: validation_clients = [] for u, g in zip(validation_users, validation_groups): validation_data_u_x = ( preprocess_data_x(validation_data[u]['x'], dataset=dataset, model_name=model_name) if do_preprocess else validation_data[u]['x'] ) validation_data_u_y = ( preprocess_data_y(validation_data[u]['y'], dataset=dataset, model_name=model_name) if do_preprocess else validation_data[u]['y'] ) validation_data_u = {'x': validation_data_u_x, 'y': validation_data_u_y} validation_clients.append(Client(u, g, train_data=validation_data_u, model=model, dataset=dataset)) all_clients['validation_clients'] = validation_clients return all_clients else: # every client is used for training when split_by_user is False train_clients = [] for u, g in zip(users, groups): # TODO: skip preprocess if necessary train_data_u_x = preprocess_data_x(train_data[u]['x'], dataset=dataset) train_data_u_y = preprocess_data_y(train_data[u]['y'], dataset=dataset) test_data_u_x = preprocess_data_x(test_data[u]['x'], dataset=dataset) test_data_u_y = preprocess_data_y(test_data[u]['y'], dataset=dataset) train_data_u = {'x': train_data_u_x, 'y': train_data_u_y} test_data_u = {'x': test_data_u_x, 'y': test_data_u_y} train_clients.append(Client(u, g, train_data_u, test_data_u, model, dataset=dataset)) all_clients = { 'train_clients':train_clients } return all_clients