def __init__(self, train_data, test_data): self.model = CifarCnn((3, 32, 32), 10) self.optimizer = GD(self.model.parameters(), lr=0.1, weight_decay=0.001) self.batch_size = 64 self.num_epoch = 100 self.train_dataloader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True) self.test_dataloader = DataLoader(test_data, batch_size=self.batch_size, shuffle=False) self.criterion = CrossEntropyLoss()
def __init__(self, options, dataset): model = choose_model(options) self.move_model_to_gpu(model, options) self.optimizer = GD(model.parameters(), lr=options['lr'], weight_decay=options['wd']) self.num_epoch = options['num_epoch'] worker = LrdWorker(model, self.optimizer, options) super(FedAvg5Trainer, self).__init__(options, dataset, worker=worker)
class FedAvg5Trainer(BaseTrainer): """ Original Scheme """ def __init__(self, options, dataset): model = choose_model(options) self.move_model_to_gpu(model, options) self.optimizer = GD(model.parameters(), lr=options['lr'], weight_decay=options['wd']) self.num_epoch = options['num_epoch'] worker = LrdWorker(model, self.optimizer, options) super(FedAvg5Trainer, self).__init__(options, dataset, worker=worker) def train(self): print('>>> Select {} clients per round \n'.format( self.clients_per_round)) # Fetch latest flat model parameter self.latest_model_params = self.worker.get_flat_model_params().detach() for round_i in range(self.num_round): # Test latest model on train data self.test_latest_model_on_traindata(round_i) self.test_latest_model_on_evaldata(round_i) # Choose K clients prop to data size selected_clients = self.select_clients(seed=round_i) # Solve minimization locally solns, stats = self.local_train(round_i, selected_clients) # Track communication cost self.metrics.extend_commu_stats(round_i, stats) # Update latest model self.latest_model_params = self.aggregate(solns) self.optimizer.inverse_prop_decay_learning_rate(round_i) # Test final model on train data self.test_latest_model_on_traindata(self.num_round) self.test_latest_model_on_evaldata(self.num_round) # Save tracked information self.metrics.write() def aggregate(self, solns): averaged_solution = torch.zeros_like(self.latest_model_params) accum_sample_num = 0 for num_sample, local_solution in solns: accum_sample_num += num_sample averaged_solution += num_sample * local_solution averaged_solution /= self.all_train_data_num averaged_solution += (1 - accum_sample_num / self.all_train_data_num ) * self.latest_model_params return averaged_solution.detach()
def __init__(self, options, dataset): model = choose_model(options) self.move_model_to_gpu(model, options) self.optimizer = GD(model.parameters(), lr=options['lr'], weight_decay=options['wd']) super(FedAvgTrainer, self).__init__(options, dataset, model, self.optimizer)
def __init__(self, model, options): # Basic parameters self.model = model self.optimizer = GD(model.parameters(), lr=options['lr'], weight_decay=options['wd']) self.num_epoch = options['num_epoch'] self.lr = options['lr'] self.meta_lr = options['meta_lr'] self.gpu = options['gpu'] if 'gpu' in options else False
class BatchCNN: def __init__(self, train_data, test_data): self.model = CifarCnn((3, 32, 32), 10) self.optimizer = GD(self.model.parameters(), lr=0.1, weight_decay=0.001) self.batch_size = 64 self.num_epoch = 100 self.train_dataloader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True) self.test_dataloader = DataLoader(test_data, batch_size=self.batch_size, shuffle=False) self.criterion = CrossEntropyLoss() def train(self): self.model.train() train_loss = train_acc = train_total = 0 for epoch in range(self.num_epoch): for batch_idx, (x, y) in enumerate(self.train_dataloader): # print('%d ' % batch_idx, end='') self.optimizer.zero_grad() pred = self.model(x) loss = self.criterion(pred, y) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 60) self.optimizer.step() _, predicted = torch.max(pred, 1) correct = predicted.eq(y).sum().item() target_size = y.size(0) train_loss += loss.item() * y.size(0) train_acc += correct train_total += target_size print("Epoch: {:>2d} | train loss {:>.4f} | train acc {:>5.2f}%".format( epoch, train_loss/train_total, train_acc/train_total*100)) if epoch % 5 == 0: test_loss = test_acc = test_total = 0. with torch.no_grad(): for x, y, _ in self.test_dataloader: pred = self.model(x) loss = self.criterion(pred, y) _, predicted = torch.max(pred, 1) correct = predicted.eq(y).sum() test_acc += correct.item() test_loss += loss.item() * y.size(0) test_total += y.size(0) print("Epoch: {:>2d} | test loss {:>.4f} | test acc {:>5.2f}%".format( epoch, test_loss / test_total, test_acc / test_total * 100))
class FedAvgTrainer(BaseTrainer): def __init__(self, options, dataset): model = choose_model(options) self.move_model_to_gpu(model, options) self.optimizer = GD(model.parameters(), lr=options['lr'], weight_decay=options['wd']) super(FedAvgTrainer, self).__init__(options, dataset, model, self.optimizer) def train(self): print('>>> Select {} clients per round \n'.format( self.clients_per_round)) # Fetch latest flat model parameter self.latest_model = self.worker.get_flat_model_params().detach() for round_i in range(self.num_round): # Test latest model on train data self.test_latest_model_on_traindata(round_i) self.test_latest_model_on_evaldata(round_i) # Choose K clients prop to data size selected_clients = self.select_clients(seed=round_i) # Solve minimization locally solns, stats = self.local_train(round_i, selected_clients) # Track communication cost self.metrics.extend_commu_stats(round_i, stats) # Update latest model self.latest_model = self.aggregate(solns) self.optimizer.inverse_prop_decay_learning_rate(round_i) # Test final model on train data self.test_latest_model_on_traindata(self.num_round) self.test_latest_model_on_evaldata(self.num_round) # Save tracked information self.metrics.write()
class Worker(object): """ Base worker for all algorithm. Only need to rewrite `self.local_train` method. All solution, parameter or grad are Tensor type. """ def __init__(self, model, options): # Basic parameters self.model = model self.optimizer = GD(model.parameters(), lr=options['lr'], weight_decay=options['wd']) self.num_epoch = options['num_epoch'] self.lr = options['lr'] self.meta_lr = options['meta_lr'] self.gpu = options['gpu'] if 'gpu' in options else False # Setup local model and evaluate its statics # self.flops, self.params_num, self.model_bytes = \ # get_model_complexity_info(self.model, options['input_shape'], gpu=options['gpu']) @property def model_bits(self): return self.model_bytes * 8 def get_model_params(self): state_dict = self.model.state_dict() return state_dict def set_model_params(self, model_params_dict: dict): state_dict = self.model.state_dict() for key, value in state_dict.items(): state_dict[key] = model_params_dict[key] self.model.load_state_dict(state_dict) def load_model_params(self, file): model_params_dict = get_state_dict(file) self.set_model_params(model_params_dict) def get_flat_model_params(self): flat_params = get_flat_params_from(self.model) return flat_params.detach() def set_flat_model_params(self, flat_params): set_flat_params_to(self.model, flat_params) def get_flat_grads(self, dataloader): self.optimizer.zero_grad() loss, total_num = 0., 0 for x, y, _ in dataloader: if self.gpu: x, y = x.cuda(), y.cuda() pred = self.model(x, y) loss += criterion(pred, y) * y.size(0) total_num += y.size(0) loss /= total_num flat_grads = get_flat_grad(loss, self.model.parameters(), create_graph=True) return flat_grads def local_train(self, train_dataloader, **kwargs): """Train model locally and return new parameter and computation cost Args: train_dataloader: DataLoader class in Pytorch Returns 1. local_solution: updated new parameter 2. stat: Dict, contain stats 2.1 comp: total FLOPS, computed by (# epoch) * (# data) * (# one-shot FLOPS) 2.2 loss """ self.model.train() y_total = [] pred_total = [] prob_total = [] train_loss = 0 for epoch in range(self.num_epoch): for batch_idx, (x, y, _) in enumerate(train_dataloader): if self.gpu: x, y = x.cuda(), y.cuda() self.optimizer.zero_grad() prob = self.model(x) loss = criterion(prob, y) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 60) self.optimizer.step() _, predicted = torch.max(prob, 1) train_loss += loss.item() * y.size(0) prob_total.append(prob.cpu().detach().numpy()) pred_total.extend(predicted.cpu().numpy()) y_total.extend(y.cpu().numpy()) train_total = len(y_total) local_solution = self.get_flat_model_params() param_dict = { "norm": torch.norm(local_solution).item(), "max": local_solution.max().item(), "min": local_solution.min().item() } comp = self.num_epoch * train_total * self.flops return_dict = {"comp": comp, "loss": train_loss / train_total} return_dict.update(param_dict) multiclass_eval_dict = evaluate_multiclass(y_total, pred_total, prob_total) return_dict.update(multiclass_eval_dict) return local_solution, return_dict def local_test(self, test_dataloader): self.model.eval() test_loss = 0 y_total = [] pred_total = [] prob_total = [] with torch.no_grad(): for x, y, _ in test_dataloader: if self.gpu: x, y = x.cuda(), y.cuda() # prob = self.model(x) # loss = criterion(prob, y) prob = self.model(x, y) loss = criterion(prob, y) _, predicted = torch.max(prob, 1) prob_total.append(prob.cpu().detach().numpy()) pred_total.extend(predicted.cpu().numpy()) y_total.extend(y.cpu().numpy()) test_loss += loss.item() * y.size(0) multiclass_eval_dict = evaluate_multiclass(y_total, pred_total, prob_total) return multiclass_eval_dict, test_loss
class FedAvg9Trainer(BaseTrainer): """ Only Transformed II """ def __init__(self, options, dataset): model = choose_model(options) self.move_model_to_gpu(model, options) self.optimizer = GD(model.parameters(), lr=options['lr'], weight_decay=options['wd']) self.num_epoch = options['num_epoch'] worker = LrAdjustWorker(model, self.optimizer, options) super(FedAvg9Trainer, self).__init__(options, dataset, worker=worker) def train(self): print('>>> Select {} clients per round \n'.format( self.clients_per_round)) # Fetch latest flat model parameter self.latest_model = self.worker.get_flat_model_params().detach() for round_i in range(self.num_round): # Test latest model on train data self.test_latest_model_on_traindata(round_i) self.test_latest_model_on_evaldata(round_i) # Choose K clients prop to data size selected_clients = self.select_clients(seed=round_i) # Solve minimization locally solns, stats = self.local_train(round_i, selected_clients) # Track communication cost self.metrics.extend_commu_stats(round_i, stats) # Update latest model self.latest_model = self.aggregate(solns) self.optimizer.inverse_prop_decay_learning_rate(round_i) # Test final model on train data self.test_latest_model_on_traindata(self.num_round) self.test_latest_model_on_evaldata(self.num_round) # Save tracked information self.metrics.write() def aggregate(self, solns, **kwargs): averaged_solution = torch.zeros_like(self.latest_model) # averaged_solution = np.zeros(self.latest_model.shape) assert self.simple_average for num_sample, local_solution in solns: averaged_solution += local_solution averaged_solution /= self.clients_per_round # averaged_solution = from_numpy(averaged_solution, self.gpu) return averaged_solution.detach() def local_train(self, round_i, selected_clients, **kwargs): solns = [] # Buffer for receiving client solutions stats = [] # Buffer for receiving client communication costs for i, c in enumerate(selected_clients, start=1): # Communicate the latest model c.set_flat_model_params(self.latest_model) # Solve minimization locally m = len(c.train_data) / self.all_train_data_num * 100 soln, stat = c.local_train(multiplier=m) if self.print_result: print("Round: {:>2d} | CID: {: >3d} ({:>2d}/{:>2d})| " "Param: norm {:>.4f} ({:>.4f}->{:>.4f})| " "Loss {:>.4f} | Acc {:>5.2f}% | Time: {:>.2f}s".format( round_i, c.cid, i, self.clients_per_round, stat['norm'], stat['min'], stat['max'], stat['loss'], stat['acc'] * 100, stat['time'])) # Add solutions and stats solns.append(soln) stats.append(stat) return solns, stats
class FedAvg4Trainer(BaseTrainer): """ Scheme I and Scheme II, based on the flag of self.simple_average """ def __init__(self, options, dataset): model = choose_model(options) self.move_model_to_gpu(model, options) self.optimizer = GD(model.parameters(), lr=options['lr'], weight_decay=options['wd']) self.num_epoch = options['num_epoch'] worker = LrdWorker(model, self.optimizer, options) super(FedAvg4Trainer, self).__init__(options, dataset, worker=worker) self.prob = self.compute_prob() def train(self): print('>>> Select {} clients per round \n'.format( self.clients_per_round)) # Fetch latest flat model parameter self.latest_model = self.worker.get_flat_model_params().detach() for round_i in range(self.num_round): # Test latest model on train data self.test_latest_model_on_traindata(round_i) self.test_latest_model_on_evaldata(round_i) # Choose K clients prop to data size if self.simple_average: selected_clients, repeated_times = self.select_clients_with_prob( seed=round_i) else: selected_clients = self.select_clients(seed=round_i) repeated_times = None # Solve minimization locally solns, stats = self.local_train(round_i, selected_clients) # Track communication cost self.metrics.extend_commu_stats(round_i, stats) # Update latest model self.latest_model = self.aggregate(solns, repeated_times=repeated_times) self.optimizer.inverse_prop_decay_learning_rate(round_i) # Test final model on train data self.test_latest_model_on_traindata(self.num_round) self.test_latest_model_on_evaldata(self.num_round) # Save tracked information self.metrics.write() def compute_prob(self): probs = [] for c in self.clients: probs.append(len(c.train_data)) return np.array(probs) / sum(probs) def select_clients_with_prob(self, seed=1): num_clients = min(self.clients_per_round, len(self.clients)) np.random.seed(seed) index = np.random.choice(len(self.clients), num_clients, p=self.prob) index = sorted(index.tolist()) select_clients = [] select_index = [] repeated_times = [] for i in index: if i not in select_index: select_clients.append(self.clients[i]) select_index.append(i) repeated_times.append(1) else: repeated_times[-1] += 1 return select_clients, repeated_times def aggregate(self, solns, **kwargs): averaged_solution = torch.zeros_like(self.latest_model) # averaged_solution = np.zeros(self.latest_model.shape) if self.simple_average: repeated_times = kwargs['repeated_times'] assert len(solns) == len(repeated_times) for i, (num_sample, local_solution) in enumerate(solns): averaged_solution += local_solution * repeated_times[i] averaged_solution /= self.clients_per_round else: for num_sample, local_solution in solns: averaged_solution += num_sample * local_solution averaged_solution /= self.all_train_data_num averaged_solution *= (100 / self.clients_per_round) # averaged_solution = from_numpy(averaged_solution, self.gpu) return averaged_solution.detach()