def test_multihead(self, task_id, testloader): self.net.eval() test_loss = 0.0 correct = 0 total = 0 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.to(self.device), targets.to( self.device) inputs = Variable(inputs) targets = Variable(targets) outputs, _ = self.net(inputs) loss = self.criterion(outputs, targets) test_loss += loss.item() _, predicted = outputs[:, args.classes_per_task * task_id:args.classes_per_task * (task_id + 1)].max(1) total += targets.size(0) correct += ( predicted + args.classes_per_task * task_id).eq(targets).sum().item() progress_bar( batch_idx, len(testloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Test' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) return correct / total
def test(self, testloader): self.net.eval() test_loss = 0.0 correct = 0 total = 0 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.to(self.device), targets.to(self.device) inputs = Variable(inputs) targets = Variable(targets) outputs = self.net(inputs) loss = self.criterion(outputs, targets) test_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() progress_bar(batch_idx, len(testloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Test' % (test_loss/(batch_idx+1), 100.*correct/total, correct, total)) # # print('target', targets) # print('predicted', predicted) return correct/total
def train(self, epoch, trainloader): if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir( 'checkpoint'), 'Error: no checkpoint directory found!' checkpoint = torch.load('./checkpoint/ckpt.t7') self.net.load_state_dict(checkpoint['net']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] print('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr())) self.scheduler.step() self.net.train() train_loss = 0.0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(self.device), targets.to(self.device) inputs_var = Variable(inputs) targets_var = Variable(targets) self.optimizer.zero_grad() outputs, _ = self.net(inputs_var) loss = self.criterion(outputs, targets_var) loss.backward() self.optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) # outputs.shape: (batch, classes) total += targets.size(0) correct += predicted.eq(targets).sum().item() self.loss = train_loss progress_bar( batch_idx, len(trainloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) return correct / total
def train_fc(self, epoch, trainloader): for name, param in self.net.named_parameters(): if re.search('conv', name) or re.search('bn', name): param.requires_grad = False elif re.search('linear', name): param.requires_grad = True self.optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.net.parameters()), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr())) self.optimizer.step() self.scheduler.step() self.net.train() train_loss = 0.0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(self.device), targets.to(self.device) inputs_var = Variable(inputs) targets_var = Variable(targets) self.optimizer.zero_grad() outputs = self.net(inputs_var) loss = self.criterion(outputs, targets_var) loss.backward() self.optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) # outputs.shape: (batch, classes) total += targets.size(0) correct += predicted.eq(targets).sum().item() self.loss = train_loss progress_bar( batch_idx, len(trainloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) return correct / total
def train_with_mask_with_KD(self, epoch, trainloader, KD_target_list, len_onehot): mask_dict = pickle.load(open(save_mask_file, "rb")) mask_reverse_dict = pickle.load(open(save_mask_fileR, "rb")) lr_list = self.scheduler.get_lr() logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr())) self.scheduler.step() self.net.train() train_loss = 0.0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): targets_KD = self.make_one_hot(targets, len_onehot) w = KD_target_list[batch_idx].shape[1] targets_KD[:, 0:w] = KD_target_list[batch_idx][:, 0:w] inputs, targets_KD, targets = inputs.to(self.device), targets_KD.to(self.device), targets.to(self.device) inputs_var = Variable(inputs) targets_var_KD = Variable(targets_KD) self.optimizer.zero_grad() # print(targets_var_KD.shape) outputs = self.net(inputs_var)/args.temperature loss = self.xentropy_cost(outputs, targets_var_KD) # break loss.backward() self.optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets_var_KD.size(0) correct += predicted.eq(targets).sum().item() self.loss = train_loss progress_bar(batch_idx, len(trainloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' % (train_loss/(batch_idx+1), 100.*correct/total, correct, total)) return correct / total
def train(self, epoch, trainloader): logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr())) self.net.train() train_loss = 0.0 correct = 0 total = 0 self.optimizer.step() self.scheduler.step() for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(self.device), targets.to(self.device) inputs_var = Variable(inputs) targets_var = Variable(targets) self.optimizer.zero_grad() outputs = self.net(inputs_var) loss = self.criterion(outputs, targets_var) loss.backward() self.optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) # outputs.shape: (batch, classes) total += targets.size(0) correct += predicted.eq(targets).sum().item() self.loss = train_loss acc = 100. * correct / total progress_bar( batch_idx, len(trainloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' % (train_loss / (batch_idx + 1), acc, correct, total)) if epoch == args.epoch - 1: self.save_checkpoint_t7(epoch, acc, train_loss) return correct / total
def train_with_mask_with_EWC(self, current_trainloader, previous_trainloader, fisher_estimation_sample_size=256): # retrain percentage all_loader = [] all_loader.append(previous_trainloader) all_loader.append(current_trainloader) for task_id, trainloader in enumerate(all_loader): # self.initialization(args.lr_mutant, args.lr_mutant_step_size, args.weight_decay_2) self.optimizer = optim.SGD(self.net.parameters(), lr=args.lr_mutant, momentum=0.9, weight_decay=args.weight_decay_2) self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=args.lr_mutant_step_size, gamma=args.lr_gamma) # self.save_mutant(55, 1) train_acc = np.zeros([1, args.num_epoch]) test_acc = np.zeros([1, args.num_epoch]) for epoch in range(args.num_epoch): lr_list = self.scheduler.get_lr() logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr())) self.scheduler.step() self.net.train() train_loss = 0.0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): # print(batch_idx) # targets = self.make_one_hot(targets, len_onehot) # print(target) inputs, targets = inputs.to(self.device), targets.to(self.device) inputs_var = Variable(inputs) targets_var = Variable(targets) self.optimizer.zero_grad() outputs = self.net(inputs_var) loss = self.criterion(outputs, targets_var) if args.ewc and task_id > 0 : ewc_loss = self.net.ewc_loss(cuda = self.device) else: ewc_loss = 0.0 loss = loss + ewc_loss loss.backward() self.optimizer.step() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() self.loss = loss progress_bar(batch_idx, len(trainloader), ' | Loss:%.3f| ewc_loss:%.3f | Acc:%.3f%% (%d/%d) -- Train Task(%d/%d)' % ( loss, ewc_loss, 100.*correct/total, correct, total, task_id, len(all_loader))) train_acc[0, epoch] = correct / total # test_acc[0, epoch] = self.test(trainloader) if args.consolidate and task_id < len(all_loader): # estimate the fisher information of the parameters and consolidate # them in the network. print( '=> Estimating diagonals of the fisher information matrix...', end='', flush=True ) self.net.consolidate(self.net.estimate_fisher( trainloader, fisher_estimation_sample_size )) print(' Done!') else: logging.info('No consolidate/EWC loss available') return test_acc
def train_with_frozen_filter(self, epoch, trainloader, mask_dict, mask_dict_R): param_old_dict = OrderedDict([(k, None) for k in self.net.state_dict().keys()]) for layer_name, param in self.net.state_dict().items(): param_old_dict[layer_name] = param.clone() print('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr())) self.scheduler.step() self.net.train() train_loss = 0.0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(self.device), targets.to(self.device) inputs_var = Variable(inputs) targets_var = Variable(targets) self.optimizer.zero_grad() outputs, _ = self.net(inputs_var) loss = self.criterion(outputs, targets) loss.backward() self.optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() # apply mask param_processed = OrderedDict([ (k, None) for k in self.net.state_dict().keys() ]) for layer_name, param_new in self.net.state_dict().items(): param_new = param_new.type(torch.cuda.FloatTensor) param_old_dict[layer_name] = param_old_dict[layer_name].type( torch.cuda.FloatTensor) if re.search('conv', layer_name): param_processed[layer_name] = Variable( torch.mul(param_old_dict[layer_name], mask_dict[layer_name]) + torch.mul(param_new, mask_dict_R[layer_name]), requires_grad=True) elif re.search('shortcut', layer_name): if len(param_new.shape) == 4: # conv in shortcut param_processed[layer_name] = Variable( torch.mul(param_old_dict[layer_name], mask_dict[layer_name]) + torch.mul(param_new, mask_dict_R[layer_name]), requires_grad=True) else: param_processed[layer_name] = Variable( param_new, requires_grad=True) elif re.search('linear', layer_name): param_processed[layer_name] = Variable( torch.mul(param_old_dict[layer_name], mask_dict[layer_name]) + torch.mul(param_new, mask_dict_R[layer_name]), requires_grad=True) else: param_processed[layer_name] = Variable( param_new, requires_grad=True) # num_batches_tracked # raise ValueError('some parameters are skipped, plz check {}'.format(layer_name)) # num_batches_tracked self.net.load_state_dict(param_processed) progress_bar( batch_idx, len(trainloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) return correct / total
def train_with_frozen_filter(self, epoch, trainloader, mask_dict, mask_dict_R, path_postfix=''): param_old_dict = OrderedDict([(k, None) for k in self.net.state_dict().keys()]) for layer_name, param in self.net.state_dict().items(): param_old_dict[layer_name] = param.clone() self.net.train() logging.info('\nEpoch: %d lr: %s' % (epoch, self.scheduler.get_lr())) train_loss = 0.0 correct = 0 total = 0 self.optimizer.step() self.scheduler.step() for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(self.device), targets.to(self.device) inputs_var = Variable(inputs) targets_var = Variable(targets) self.optimizer.zero_grad() outputs = self.net(inputs_var) loss = self.criterion(outputs, targets) loss.backward() self.optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() acc = 100. * correct / total # apply mask param_processed = OrderedDict([ (k, None) for k in self.net.state_dict().keys() ]) for layer_name, param_new in self.net.state_dict().items(): param_new = param_new.type(torch.cuda.FloatTensor) param_old_dict[layer_name] = param_old_dict[layer_name].type( torch.cuda.FloatTensor) # print(layer_name) if re.search('conv', layer_name): param_processed[layer_name] = Variable( torch.mul(param_old_dict[layer_name], mask_dict[layer_name]) + torch.mul(param_new, mask_dict_R[layer_name]), requires_grad=True) # print('new\n', param_new[0:3, 0, :, :]) elif re.search('shortcut', layer_name): if len(param_new.shape) == 4: # conv in shortcut param_processed[layer_name] = Variable( torch.mul(param_old_dict[layer_name], mask_dict[layer_name]) + torch.mul(param_new, mask_dict_R[layer_name]), requires_grad=True) else: param_processed[layer_name] = Variable( param_new, requires_grad=True) elif re.search('linear', layer_name): param_processed[layer_name] = Variable( torch.mul(param_old_dict[layer_name], mask_dict[layer_name]) + torch.mul(param_new, mask_dict_R[layer_name]), requires_grad=True) else: param_processed[layer_name] = Variable( param_new, requires_grad=True) # num_batches_tracked # print('old\n', param_old_dict['conv1.weight'][0:3, 0, :, :]) # print('mask\n', mask_dict['conv1.weight'][0:3, 0, :, :]) # print('mask_R\n', mask_dict_R['conv1.weight'][0:3, 0, :, :]) # print('param_processed\n', param_processed['conv1.weight'][0:3, 0, :, :]) self.net.load_state_dict(param_processed) progress_bar( batch_idx, len(trainloader), 'Loss:%.3f|Acc:%.3f%% (%d/%d)--Train' % (train_loss / (batch_idx + 1), acc, correct, total)) if epoch == 0 or epoch == args.epoch_edge - 1 or epoch == args.epoch_edge // 2: self.save_checkpoint_t7( epoch, acc, train_loss, '_edge_model', path_postfix, ) return correct / total