def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.float().cuda(), targets.long().cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss/(batch_idx+1), 100.*correct/total, correct, total)) return train_loss / batch_idx, 100. * correct / total
def test(epoch): global best_acc, batch_idx net.eval() test_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(testloader): if use_cuda: inputs, targets = inputs.float().cuda(), targets.long().cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) test_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss/(batch_idx+1), 100.0*float(correct)/total, correct, total)) # Save checkpoint. acc = 100.*correct/total if acc > best_acc: best_acc = acc checkpoint(acc, epoch) return test_loss / batch_idx, 100. * correct / total
def evaluate(model, iterator, criterion): model.eval() epoch_loss = 0 with torch.no_grad(): for batch_idx, batch in enumerate(iterator): src = batch.src trg = batch.trg output = model(src, trg, 0) #turn off teacher forcing output = output[1:].view(-1, output.shape[-1]) trg = trg[1:].view(-1) loss = criterion(output, trg) epoch_loss += loss.item() progress_bar(batch_idx, len(iterator), 'Testing...') return epoch_loss / len(iterator)
def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): # print(inputs.shape) if use_cuda: inputs, targets = inputs.float().cuda(), targets.long().cuda() # generate mixed inputs, two one-hot label vectors and mixing coefficient inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, opt.alpha, use_cuda) optimizer.zero_grad() inputs, targets_a, targets_b = Variable(inputs), Variable( targets_a), Variable(targets_b) outputs = net(inputs) loss_func = mixup_criterion(targets_a, targets_b, lam) loss = loss_func(criterion, outputs) loss.backward() optimizer.step() train_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += lam * predicted.eq(targets_a.data).cpu().sum() + ( 1 - lam) * predicted.eq(targets_b.data).cpu().sum() progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) return (train_loss / batch_idx, 100. * correct / total)
def train(self, epoches=10): # for i in range(epoches): # print("Epoch: ", i) # self.train_epoch() # self.test() # print("Finished fine tuning.") for epoch in range(epoches): total = 0 correct = 0 loss = 0 print('\nEpoch: %d' % epoch) for batch_idx, (inputs, targets) in enumerate(self.train_data_loader): inputs, targets = inputs.cuda(), targets.cuda() outputs = self.model(inputs) losses = nn.CrossEntropyLoss()(outputs, targets) self.optimizer.zero_grad() losses.backward() loss += losses.item() self.optimizer.step() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum().item() progress_bar( batch_idx, len(self.train_data_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (loss / (batch_idx + 1), 100. * float(correct) / float(total), correct, total)) self.test()
def test(net, quantized_type, test_loader, use_cuda=True): net.eval() correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(test_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = net(inputs, quantized_type) _, predicted = torch.max(outputs.data, dim=1) correct += predicted.eq(targets.data).cpu().sum().item() total += targets.size(0) progress_bar(batch_idx, len(test_loader), "Test Acc: %.3f%%" % (100.0 * correct / total)) return 100.0 * correct / total
def print_training_result(self, batch_idx, n_batch, monitor_freq=100, append=''): if self.dataset_type == 'small': progress_bar( batch_idx, n_batch, "Loss: %.3f, Acc: %.3f%% | %s" % (self.train_loss / (batch_idx + 1), self.top1.avg, append)) else: # raise NotImplementedError if batch_idx % monitor_freq == 0: print('Training: [%d / %d] \t Time %.3f (%.3f) \t Loss %.4f(%.4f)\n' 'Prec@1 %.4f(%.4f) \t Prec@5 %.4f(%.4f) \n' '%s\n' \ %(batch_idx, n_batch, self.batch_time.val, self.batch_time.sum, self.loss_ImageNet.val, self.loss_ImageNet.avg, self.top1.val, self.top1.avg, self.top5.val, self.top5.avg, append))
def test(epoch): global best_acc global best_prediction global best_target net.eval() test_loss = 0 correct = 0 total = 0 pred_all = [] target_all = [] for batch_idx, (inputs, targets) in enumerate(testloader): if use_cuda: inputs, targets = inputs.float().cuda(), targets.long().cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) test_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() pred_all = merge_ndarray(pred_all, predicted.cpu()) target_all = merge_ndarray(target_all, targets.data.cpu()) progress_bar( batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * float(correct) / total, correct, total)) # Save checkpoint. acc = 100. * float(correct) / total if acc > best_acc: best_acc = acc best_prediction = pred_all best_target = target_all #checkpoint(acc, epoch) #Confusion Mat print(confusion_matrix(pred_all, target_all)) return (test_loss / batch_idx, 100. * correct / total)
def evaluate(task_name, model, eval_dataloader, model_type, output_mode = 'classification', device='cuda'): # results = {} eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch_idx, batch in enumerate(eval_dataloader): model.eval() batch = tuple(t.to(device) for t in batch) with torch.no_grad(): inputs = {"input_ids": batch[0], "attention_mask": batch[1], "labels": batch[3]} if model_type != "distilbert": inputs["token_type_ids"] = ( batch[2] if model_type in ["bert", "xlnet", "albert"] else None ) # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use segment_ids outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) progress_bar(batch_idx, len(eval_dataloader), 'Evaluating...') eval_loss = eval_loss / nb_eval_steps if output_mode == "classification": preds = np.argmax(preds, axis=1) elif output_mode == "regression": preds = np.squeeze(preds) result = glue_compute_metrics(task_name, preds, out_label_ids) # [ # print(result) # results.update(result) return result
def test(net, target_path, test_loader, use_cuda=True): correct = 0 total = 0 net.eval() for batch_idx, (inputs, targets) in enumerate(test_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() _, output = net(inputs, target_path) _, predicted = torch.max(output, dim=1) correct += predicted.eq(targets).cpu().sum().item() total += targets.size(0) progress_bar(batch_idx, len(test_loader), "Acc: %.3f%%" % (100.0 * correct / total)) return 100.0 * correct / total
def print_training_result(self, batch_idx, n_batch, monitor_freq=100, append=None): if self.dataset_type == 'small': progress_bar( batch_idx, n_batch, "Loss: %.3f (%.3f), Acc: %.3f%% (%.3f%%) | %s" % (self.loss.val, self.loss.avg, 100.0 * self.top1.val, 100.0 * self.top1.avg, append)) else: # raise NotImplementedError if batch_idx % monitor_freq == 0: print('Training: [%d / %d] \t Time %.3f (%.3f) \t Loss %.4f(%.4f)\n' 'Prec@1 %.4f(%.4f) \t Prec@5 %.4f(%.4f) \n'\ %(batch_idx, n_batch, self.batch_time.val, self.batch_time.sum, self.loss_ImageNet.val, self.loss_ImageNet.avg, self.top1.val, self.top1.avg, self.top5.val, self.top5.avg)) if append is not None: print(append + '\n')
def test(net, quantized_type, test_loader, use_cuda=True, dataset_name='CIFAR10', n_batches_used=None): """ Test method for baseline quantization method :param net: :param quantized_type: :param test_loader: :param use_cuda: :param dataset_name: :param n_batches_used: :return: """ net.eval() if dataset_name != 'ImageNet': correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(test_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() with torch.no_grad(): outputs = net(inputs, quantized_type) _, predicted = torch.max(outputs.data, dim=1) correct += predicted.eq(targets.data).cpu().sum().item() total += targets.size(0) progress_bar(batch_idx, len(test_loader), "Test Acc: %.3f%%" % (100.0 * correct / total)) return 100.0 * correct / total else: batch_time = AverageMeter() train_loss = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() with torch.no_grad(): end = time.time() for batch_idx, (inputs, targets) in enumerate(test_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = net(inputs, quantized_type) losses = nn.CrossEntropyLoss()(outputs, targets) prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) train_loss.update(losses.data.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % 100 == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( batch_idx, len(test_loader), batch_time=batch_time, loss=train_loss, top1=top1, top5=top5)) if n_batches_used is not None and batch_idx >= n_batches_used: break print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg, top5.avg
def print_training_result(self, batch_idx, n_batch, append=None): progress_bar( batch_idx, n_batch, "Loss: %.3f (%.3f) %s" % (self.loss.val, self.loss.avg, None if append is None else "| %s" % append))
model.zero_grad() # global_step += 1 # ------ # Record # ------ preds = logits.data.cpu().numpy() preds = np.argmax(preds, axis=1) out_label_ids = inputs["labels"].data.cpu().numpy() result = glue_compute_metrics( task_name, preds, out_label_ids) # ['acc', 'f1', 'acc_and_f1'] if recorder is not None: recorder.update(losses.item(), acc=[result['acc_and_f1']], batch_size=args.train_batch_size, is_train=True) recorder.print_training_result(batch_idx=step, n_batch=len(train_dataloader)) else: train_loss += losses.item() progress_bar(step, len(train_dataloader), "Loss: %.3f" % (train_loss / (step + 1))) result = evaluate(task_name, model, eval_dataloader, model_type) print(result) if recorder is not None: recorder.update(acc=result['acc_and_f1'], is_train=False) if recorder is not None: recorder.close()
losses = criterion(output, trg) losses.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # ------ # Record # ------ if recoder is not None: recoder.update(losses.item(), batch_size=args.batch_size, cur_lr=optimizer.param_groups[0]['lr']) recoder.print_training_result(batch_idx, len(train_loader)) else: train_loss += losses.item() progress_bar(batch_idx, len(train_loader), "Loss: %.3f" % (train_loss / (batch_idx + 1))) # ----- # Test # ----- eval_loss = evaluate(model, test_loader, criterion) if recoder is not None: recoder.update(eval_loss, is_train=False) print('[%2d] Test loss: %.3f' % (epoch_idx, eval_loss)) if recoder is not None: recoder.close()