class BatchCNN: def __init__(self, train_data, test_data): self.model = CifarCnn((3, 32, 32), 10) self.optimizer = GD(self.model.parameters(), lr=0.1, weight_decay=0.001) self.batch_size = 64 self.num_epoch = 100 self.train_dataloader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True) self.test_dataloader = DataLoader(test_data, batch_size=self.batch_size, shuffle=False) self.criterion = CrossEntropyLoss() def train(self): self.model.train() train_loss = train_acc = train_total = 0 for epoch in range(self.num_epoch): for batch_idx, (x, y) in enumerate(self.train_dataloader): # print('%d ' % batch_idx, end='') self.optimizer.zero_grad() pred = self.model(x) loss = self.criterion(pred, y) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 60) self.optimizer.step() _, predicted = torch.max(pred, 1) correct = predicted.eq(y).sum().item() target_size = y.size(0) train_loss += loss.item() * y.size(0) train_acc += correct train_total += target_size print("Epoch: {:>2d} | train loss {:>.4f} | train acc {:>5.2f}%".format( epoch, train_loss/train_total, train_acc/train_total*100)) if epoch % 5 == 0: test_loss = test_acc = test_total = 0. with torch.no_grad(): for x, y, _ in self.test_dataloader: pred = self.model(x) loss = self.criterion(pred, y) _, predicted = torch.max(pred, 1) correct = predicted.eq(y).sum() test_acc += correct.item() test_loss += loss.item() * y.size(0) test_total += y.size(0) print("Epoch: {:>2d} | test loss {:>.4f} | test acc {:>5.2f}%".format( epoch, test_loss / test_total, test_acc / test_total * 100))
class Worker(object): """ Base worker for all algorithm. Only need to rewrite `self.local_train` method. All solution, parameter or grad are Tensor type. """ def __init__(self, model, options): # Basic parameters self.model = model self.optimizer = GD(model.parameters(), lr=options['lr'], weight_decay=options['wd']) self.num_epoch = options['num_epoch'] self.lr = options['lr'] self.meta_lr = options['meta_lr'] self.gpu = options['gpu'] if 'gpu' in options else False # Setup local model and evaluate its statics # self.flops, self.params_num, self.model_bytes = \ # get_model_complexity_info(self.model, options['input_shape'], gpu=options['gpu']) @property def model_bits(self): return self.model_bytes * 8 def get_model_params(self): state_dict = self.model.state_dict() return state_dict def set_model_params(self, model_params_dict: dict): state_dict = self.model.state_dict() for key, value in state_dict.items(): state_dict[key] = model_params_dict[key] self.model.load_state_dict(state_dict) def load_model_params(self, file): model_params_dict = get_state_dict(file) self.set_model_params(model_params_dict) def get_flat_model_params(self): flat_params = get_flat_params_from(self.model) return flat_params.detach() def set_flat_model_params(self, flat_params): set_flat_params_to(self.model, flat_params) def get_flat_grads(self, dataloader): self.optimizer.zero_grad() loss, total_num = 0., 0 for x, y, _ in dataloader: if self.gpu: x, y = x.cuda(), y.cuda() pred = self.model(x, y) loss += criterion(pred, y) * y.size(0) total_num += y.size(0) loss /= total_num flat_grads = get_flat_grad(loss, self.model.parameters(), create_graph=True) return flat_grads def local_train(self, train_dataloader, **kwargs): """Train model locally and return new parameter and computation cost Args: train_dataloader: DataLoader class in Pytorch Returns 1. local_solution: updated new parameter 2. stat: Dict, contain stats 2.1 comp: total FLOPS, computed by (# epoch) * (# data) * (# one-shot FLOPS) 2.2 loss """ self.model.train() y_total = [] pred_total = [] prob_total = [] train_loss = 0 for epoch in range(self.num_epoch): for batch_idx, (x, y, _) in enumerate(train_dataloader): if self.gpu: x, y = x.cuda(), y.cuda() self.optimizer.zero_grad() prob = self.model(x) loss = criterion(prob, y) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 60) self.optimizer.step() _, predicted = torch.max(prob, 1) train_loss += loss.item() * y.size(0) prob_total.append(prob.cpu().detach().numpy()) pred_total.extend(predicted.cpu().numpy()) y_total.extend(y.cpu().numpy()) train_total = len(y_total) local_solution = self.get_flat_model_params() param_dict = { "norm": torch.norm(local_solution).item(), "max": local_solution.max().item(), "min": local_solution.min().item() } comp = self.num_epoch * train_total * self.flops return_dict = {"comp": comp, "loss": train_loss / train_total} return_dict.update(param_dict) multiclass_eval_dict = evaluate_multiclass(y_total, pred_total, prob_total) return_dict.update(multiclass_eval_dict) return local_solution, return_dict def local_test(self, test_dataloader): self.model.eval() test_loss = 0 y_total = [] pred_total = [] prob_total = [] with torch.no_grad(): for x, y, _ in test_dataloader: if self.gpu: x, y = x.cuda(), y.cuda() # prob = self.model(x) # loss = criterion(prob, y) prob = self.model(x, y) loss = criterion(prob, y) _, predicted = torch.max(prob, 1) prob_total.append(prob.cpu().detach().numpy()) pred_total.extend(predicted.cpu().numpy()) y_total.extend(y.cpu().numpy()) test_loss += loss.item() * y.size(0) multiclass_eval_dict = evaluate_multiclass(y_total, pred_total, prob_total) return multiclass_eval_dict, test_loss