def train(args, model, device, train_loader, optimizer, loss_func, epoch): model.train() train_loss = 0 running_loss = 0 running_datasize = 0 for batch_idx, (data, targets) in enumerate(train_loader): data = data.to(device) targets = [target.to(device) for target in targets] optimizer.zero_grad() predictions = model(data) location_loss, confidence_loss = loss_func(predictions, targets) loss = location_loss + confidence_loss loss.backward() optimizer.step() train_loss += loss.item() running_loss += loss.item() running_datasize += 1 if (batch_idx + 1) % args.log_interval == 0: print('Train Epoch: {} [{}/{} ([{:.0f}%)]\tLoss: {:.4e}'.format( epoch, (batch_idx + 1) * len(data), len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader), running_loss / running_datasize)) running_loss = 0 running_datasize = 0 return train_loss
def train_model(model, criterion, optimizer, dataload, valdataloader, num_epochs=2): setDir(args.logdir) writer = SummaryWriter(args.logdir) max_val_acc = 0 lr = args.lr for epoch in range(num_epochs): if epoch%5 == 0: lr = lr / 2 optimizer = optim.Adam(model.parameters(), lr=lr) print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) dataset_size = len(dataload.dataset) epoch_loss = 0 step = 0 # minibatch数 val_loss = 0 val_epoch_acc = 0 val_step = 0 for x, y in dataload: # 分100次遍历数据集,每次遍历batch_size=4 optimizer.zero_grad() # 每次minibatch都要将梯度(dw,db,...)清零 inputs = x.to(device) labels = y.to(device) outputs = model(inputs)# 前向传播 loss = cross_entropy2d(outputs, torch.squeeze(labels, dim=1).long()) # 计算损失 loss.backward() # 梯度下降,计算出梯度 optimizer.step() # 更新参数一次:所有的优化器Optimizer都实现了step()方法来对所有的参数进行更新 epoch_loss += loss.item() step += 1 print("EPOCH:%d,%d/%d,train_loss:%0.3f" % (epoch, step, dataset_size // dataload.batch_size, loss.item())) for x, y in valdataloader: inputs = x.to(device) labels = y.to(device) outputs = model(inputs)# 前向传播 val_acc = accuracy(outputs, torch.squeeze(labels, dim=1).long()) loss = cross_entropy2d(outputs, torch.squeeze(labels, dim=1).long()) # 计算损失 val_epoch_acc += float(val_acc.numpy()) val_loss += loss.item() val_step += 1 val_epoch_acc = val_epoch_acc / val_step # if val_epoch_acc > max_val_acc: # torch.save(model.state_dict(), os.path.join(args.weight, 'weights_%d.pth' % epoch)) # 返回模型的所有内容 # max_val_acc = val_epoch_acc writer.add_scalars('train_epoch_loss', {'epoch_loss': epoch_loss}, epoch) writer.add_scalars('val_epoch_loss', {'val_loss': val_loss}, epoch) writer.add_scalars('val_epoch_acc', {'val_epoch_acc': val_epoch_acc}, epoch) print("epoch %d loss:%0.3f val_loss:%0.3f, val_acc:%0.3f" % (epoch, epoch_loss, val_loss, val_epoch_acc)) torch.save(model.state_dict(), os.path.join(args.weight, 'weights_%d.pth' % epoch)) torch.cuda.empty_cache() writer.close() return model
def __test_epoch(self): self.model.eval() losses = [] accuracies = [] TN = FN = TP = FP = 0 progress = tqdm(enumerate(self.test_loader), total=len(self.test_loader), desc='Test', file=sys.stdout) predictions = np.array([]) all_targets = np.array([]) for batch_idx, data in progress: with torch.no_grad(): samples, targets = data if self.cuda: samples = samples.cuda() targets = targets.cuda() outputs = self.model(samples) loss = self.criterion(outputs, targets) losses.append(loss.item()) targets = targets.data.cpu() _, predicted = torch.max(outputs.data, 1) predicted = predicted.data.cpu() all_targets = np.concatenate((all_targets, targets)) predictions = np.concatenate((predictions, predicted)) perf = self.__perf_measure(targets, predicted) TN += perf[2] FN += perf[3] TP += perf[0] FP += perf[1] acc = (TP + TN) / (FP + FN + TP + TN) if FP + FN + TP + TN > 0 else 0 accuracies.append(acc) precision = TP / (TP + FP) if TP + FP > 0 else 0 recall = TP / (TP + FN) if TP + FN > 0 else 0 f1 = 2 * (precision * recall) / ( precision + recall) if precision + recall > 0 else 0 progress.set_description( 'Test Loss: {:.4f} | Accuracy: {:.4f} | F1: {:.4f} | Precision: {:.4f} | Recall: {:.4f} | TP: {} | TN: {} | FP: {} | FN: {}' .format(loss.item(), acc, f1, precision, recall, TP, TN, FP, FN)) #classification_report = sklearn.metrics.classification_report(all_targets, predictions, target_names=['Open','Partially Closed', 'Closed']) classification_report = sklearn.metrics.classification_report( all_targets, predictions, target_names=['Open', 'Closed']) classification_metrics = sklearn.metrics.precision_recall_fscore_support( all_targets, predictions, average='macro') confussion_matrix = sklearn.metrics.confusion_matrix( all_targets, predictions) return classification_report, classification_metrics, confussion_matrix
def train_loop(dataloader, model, loss_fn, optimizer, scheduler=None): size = len(dataloader.dataset) for batch, dic in enumerate(dataloader): x = dic['x'].to(device) z = dic['z'].to(device) label = dic['label'].to(device) pred = model(x, z) # print(x) # print(z) # print(label) # print(pred) # print(pred) # print(label) loss = loss_fn(pred, label) optimizer.zero_grad() loss.backward() optimizer.step() if batch % 10 == 0: loss, current = loss.item(), batch * len(x) print("loss : {:>7f} [{:>5d}/{:>5d}]".format(loss, current, size)) #break if scheduler: scheduler.step()
def run_epoch(self, phase, data_loader, criterion): if phase == 'train': self.model.train() else: self.model.eval() running_loss = 0. for data_dict in data_loader: for name in data_dict: data_dict[name] = data_dict[name].to(device=self.device, non_blocking=True) if phase == 'train': self.optimizer.zero_grad() with torch.enable_grad(): pr_decs = self.model(data_dict['input']) loss = criterion(pr_decs, data_dict) loss.backward() self.optimizer.step() else: with torch.no_grad(): pr_decs = self.model(data_dict['input']) loss = criterion(pr_decs, data_dict) running_loss += loss.item() epoch_loss = running_loss / len(data_loader) print('{} loss: {}'.format(phase, epoch_loss)) return epoch_loss
def evaluate(self, step, epoch): self.model.eval() running_loss = 0 running_psnr = 0 for idx, (noisy, clean) in enumerate(self.valid_loader): noisy = noisy.to(self.device, dtype=torch.float) clean = clean.to(self.device, dtype=torch.float) output = self.model(noisy) if idx == 1: save_image(output, 'step_%d_output.png' % step, nrow=4) save_image(clean, 'step_%d_clean.png' % step, nrow=4) loss = self.criterion(output, clean) running_loss += loss.item() clean = clean.cpu().detach().numpy() output = np.clip(output.cpu().detach().numpy(), 0., 1.) # ------------ PSNR ------------ for m in range(self.cfg.batch_size): running_psnr += PSNR(clean[m], output[m]) epoch_loss = running_loss / (len(self.valid_loader) * self.cfg.batch_size) epoch_psnr = running_psnr / (len(self.valid_loader) * self.cfg.batch_size) ckpt_name = f"step_{step}_epoch_{epoch}_val_loss_{epoch_loss:.3}_psnr_{epoch_psnr:.3}.pth" self._save_ckpt(epoch, ckpt_name) self.model.train() return epoch_loss, epoch_psnr
def train(train_loader, model, criterion, optimizer, args): # switch to train mode model.train() if args.freeze_BN: for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() run_loss = 0 for i, (input, target) in enumerate(train_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) loss = criterion(output, target) run_loss += loss.item() if i % num_avg_iter == 0: print('Training loss running avg', run_loss / float(num_avg_iter)) run_loss = 0 # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step()
def run_epoch(self, phase, data_loader, criterion): """封装一个epoch中的forward、loss、backward过程""" if phase == 'train': self.model.train() else: self.model.eval() running_loss = 0. # visualize the training process data_loader = iter(data_loader) for i in tqdm(range(len(data_loader))): data_dict = next(data_loader) for name in data_dict: data_dict[name] = data_dict[name].to(device=self.device, non_blocking=True) if phase == 'train': self.optimizer.zero_grad() with torch.enable_grad(): # 前向传播只调用了model,需要调查一下decoder调用的位置 pr_decs = self.model(data_dict['input']) loss = criterion(pr_decs, data_dict) loss.backward() self.optimizer.step() else: with torch.no_grad(): pr_decs = self.model(data_dict['input']) loss = criterion(pr_decs, data_dict) running_loss += loss.item() epoch_loss = running_loss / len(data_loader) print('{} loss: {}'.format(phase, epoch_loss)) return epoch_loss
def __train_epoch(self): self.model.train() losses = [] progress = tqdm(enumerate(self.train_loader), total=len(self.train_loader), desc='Training', file=sys.stdout) for batch_idx, data in progress: samples, targets = data samples1, samples2 = samples if self.cuda: samples1 = samples1.cuda() samples2 = samples2.cuda() targets = targets.cuda() self.optimizer.zero_grad() outputs = self.model((samples1, samples2)) #output1, output2 = self.model((samples1, samples2)) loss = self.criterion(outputs, targets.float()) loss.backward() self.optimizer.step() losses.append(loss.item()) progress.set_description('Mean Training Loss: {:.4f}'.format( np.mean(losses))) return np.mean(losses)
def train(epoch_idx, mAP): f.train() logging.info('In epoch {}:\n'.format(epoch_idx + 1)) for batch_idx, (posv, posa, negv, nega, pos_label, neg_label) in enumerate(train_loader): opt.zero_grad() # b,p,dim = axi.shape posv = posv.to(device) posa = posa.to(device) negv = negv.to(device) nega = nega.to(device) b1, _, _ = posv.shape vfeat = torch.cat((posv, negv), 0) afeat = torch.cat((posa, nega), 0) pos_label = pos_label.to(device) neg_label = neg_label.to(device) label = torch.cat((pos_label, neg_label), 0).long().view(-1) # pdb.set_trace() ins_scores, bag_predict = f(vfeat, afeat) # print(ins_scores) celoss = CELoss(bag_predict, label) ahloss = AHLoss(ins_scores[:b1, :], ins_scores[b1:, :]) # pdb.set_trace() loss = celoss + ahloss # loss = ahloss if args.dataset != "youtube": print( "Domain: {}; In epoch {}, [{}/{}]: loss: {:.6f}, max mAP_5: {:.4f}, current mAP_5: {:.4f}" .format(args.domain, epoch_idx + 1, batch_idx, len(train_loader), loss.item(), max_mAP_5, mAP_5)) else: print( "Domain: {}; In epoch {}, [{}/{}]: loss: {:.6f}, max test mAP: {:.4f}, current test mAP: {:.4f}" .format(args.domain, epoch_idx + 1, batch_idx, len(train_loader), loss.item(), max_mAP, mAP)) # recoder.update('loss', loss.data, epoch_idx*len(train_loader)+batch_idx) loss.backward() opt.step() # loss = attloss+att_visual_i_loss+att_audio_i_loss+att_visual_j_loss+att_audio_j_loss #0.6543 recoder.update('celoss', celoss.item(), epoch_idx) recoder.update('ahloss', ahloss.item(), epoch_idx) recoder.save()
def train(trainloader, t_model, s_model, criterion, optimizer, epoch, use_cuda, args): # switch to train mode global kd_loss_fun, cmclloss_v1, indeploss, mclloss t_model.eval() s_model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_kl = AverageMeter() losses_ce = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (batch_data) in enumerate(trainloader): # measure data loading time if len(batch_data) == 2: inputs, targets = batch_data else: inputs, targets, indexes = batch_data data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output t_outputs = t_model(inputs) s_outputs = s_model(inputs) t_prec1, t_prec5 = accuracy(t_outputs.data, targets.data, topk=(1, 5)) # measure accuracy and record loss prec1, prec5 = accuracy(s_outputs.data, targets.data, topk=(1, 5)) loss_kl = kd_loss_fun(s_outputs, t_outputs.detach(), targets) loss_ce = criterion(s_outputs, targets) loss = loss_kl losses.update(loss.item(), inputs.size(0)) losses_kl.update(loss_kl.item(), inputs.size(0)) losses_ce.update(loss_ce.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() progress_bar( batch_idx, len(trainloader), 'Loss: %.2f | KLloss: %.2f | ce_loss: %.2f | Top1: %.2f | Top5: %.2f | t_top1: %.2f | t_top5: %.2f' % (losses.avg, loss_kl, loss_ce, top1.avg, top5.avg, t_prec1, t_prec5)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() return (losses.avg, losses_kl.avg, losses_ce.avg, top1.avg, top5.avg)
def train(self): self.model.train() val_loss, val_psnr = self.evaluate(self.step - 1, self.start_epoch) print( "[*] Preliminary check: Epoch: {} Step: {} Validation Loss: {:.5f} PSNR: {:.3f}" .format(self.start_epoch, (self.step - 1), val_loss, val_psnr)) print('-' * 40) # Resume training from stopped epoch for epoch in range(self.start_epoch, self.cfg.num_epochs): step_loss = 0 start_time = time.time() for idx, (noisy, clean) in enumerate(self.train_loader, start=1): # Input/Target noisy = noisy.to(self.device, dtype=torch.float) clean = clean.to(self.device, dtype=torch.float) # BackProp self.optimizer.zero_grad() output = self.model(noisy) loss = self.criterion(output, clean) loss.backward() self.optimizer.step() # STATS step_loss += loss.item() if idx % self.cfg.verbose_step == 0: val_loss, val_psnr = self.evaluate(self.step, epoch) self.writer.add_scalar("Loss/Train", step_loss / self.cfg.verbose_step, self.step) self.writer.add_scalar("Loss/Validation", val_loss, self.step) self.writer.add_scalar( "Stats/LR", self.optimizer.param_groups[0]['lr'], self.step) self.writer.add_scalar("Stats/PSNR", val_psnr, self.step) print( "[{}/{}/{}] Loss [T/V]: [{:.5f}/{:.5f}] PSNR: {:.3f} LR: {} Time: {:.1f} Output: [{}-{}]" .format(epoch, self.step, idx, (step_loss / self.cfg.verbose_step), val_loss, val_psnr, self.optimizer.param_groups[0]['lr'], (time.time() - start_time), torch.min(output).item(), torch.max(output).item())) self.step += 1 if self.cfg.scheduler == "step": self.lr_sch.step() elif self.cfg.scheduler == "plateau": self.lr_sch.step(metrics=val_loss) step_loss, start_time = 0, time.time() self.model.train()
def __train_epoch(self): self.model.train() losses = [] accuracies = [] TN = FN = TP = FP = 0 progress = tqdm(enumerate(self.train_loader), total=len(self.train_loader), desc='Training', file=sys.stdout) for batch_idx, data in progress: samples, targets = data if self.cuda: samples = samples.cuda() targets = targets.cuda() self.optimizer.zero_grad() outputs = self.model(samples) loss = self.criterion(outputs, targets) loss.backward() self.optimizer.step() losses.append(loss.item()) targets = targets.data.cpu() _, predicted = torch.max(outputs.data, 1) predicted = predicted.data.cpu() perf = self.__perf_measure(targets, predicted) TN += perf[2] FN += perf[3] TP += perf[0] FP += perf[1] acc = (TP + TN) / (FP + FN + TP + TN) if FP + FN + TP + TN > 0 else 0 precision = TP / (TP + FP) if TP + FP > 0 else 0 recall = TP / (TP + FN) if TP + FN > 0 else 0 f1 = 2 * (precision * recall) / ( precision + recall) if precision + recall > 0 else 0 accuracies.append(acc) progress.set_description( 'Training Loss: {:.4f} | Accuracy: {:.4f} | F1: {:.4f} | Precision: {:.4f} | Recall: {:.4f} | TP: {} | TN: {} | FP: {} | FN: {}' .format(loss.item(), acc, f1, precision, recall, TP, TN, FP, FN)) return np.mean(losses)
def train_model(model, criterion, optimizer_ft, scheduler, epoch): scheduler.step() lambda1 = sigmoid_rampup(epoch, args.LabelWt) train_loss = AverageMeter() data_time = AverageMeter() batch_time = AverageMeter() model.train() correct = 0 total = 0 end = time.time() for batch_idx, (inputs, targets, weights) in enumerate(dataloaders_train): if use_gpu: inputs = Variable(inputs.cuda()) targets = Variable(targets.cuda()) weights = Variable(weights.cuda()) data_time.update(time.time() - end) optimizer_ft.zero_grad() outputs = model(inputs) if args.stage2: loss = criterion(outputs, targets, weights) else: loss = criterion(outputs, targets, lambda1) loss.backward() optimizer_ft.step() train_loss.update(loss.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() _, predicted = outputs.max(1) correct += predicted.eq(targets).sum().item() total += inputs.size(0) if batch_idx % 10 == 0: print('Epoch: [{}][{}/{}] ' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f}) ' 'Accu: {:.2f}'.format(epoch, batch_idx, len(dataloaders_train), 100. * correct / total, batch_time=batch_time, data_time=data_time, train_loss=train_loss)) writer.add_scalar('training acc (train)', 100. * correct / total, epoch) writer.add_scalar('loss', train_loss.avg, epoch)
def train(epoch_idx, mAP): # return f.train() logging.info('In epoch {}:\n'.format(epoch_idx + 1)) for batch_idx, (posv, posa, negv, nega, pos_label, neg_label) in enumerate(train_loader): opt.zero_grad() # b,p,dim = axi.shape posv = posv.to(device) posa = posa.to(device) negv = negv.to(device) nega = nega.to(device) b1, ds, _, _ = posv.shape vfeat = torch.cat((posv, negv), 0) afeat = torch.cat((posa, nega), 0) pos_label = pos_label.to(device) neg_label = neg_label.to(device) label = torch.cat((pos_label, neg_label), 0).long().squeeze(-1) # pdb.set_trace() ins_scores, bag_predicts = f(vfeat, afeat) # print(ins_scores) loss = 0 ahs = [] ces = [] for i in range(ds): bag_predict = bag_predicts[i] ins_score = ins_scores[i] celoss = CELoss(bag_predict, label[:, i]) ahloss = AHLoss(ins_score[:b1, :], ins_score[b1:, :]) # ahs.append(ahloss) # ces.append(celoss) if loss == 0: loss = celoss + ahloss else: loss = loss + celoss + ahloss # ahs = torch.stack(ahs) # ces = torch.stack(ces) # loss = torch.mean(ahs)+torch.mean(celoss) print( "In epoch {}, [{}/{}]: loss: {:.6f}, max avg_mAP: {:.4f}, current test mAP: {:.4f}" .format(epoch_idx + 1, batch_idx, len(train_loader), loss.item(), maxavgMap, avg_mAP)) print("current mAP: {};".format(mAP)) print("max mAP: {};".format(maxMAP)) # recoder.update('loss', loss.data, epoch_idx*len(train_loader)+batch_idx) loss.backward() opt.step() # loss = attloss+att_visual_i_loss+att_audio_i_loss+att_visual_j_loss+att_audio_j_loss #0.6543 recoder.update('celoss', celoss.item(), epoch_idx) recoder.update('ahloss', ahloss.item(), epoch_idx) recoder.save()
def iterate(self, epoch: int, phase: str): self.net.train(phase == "train") dataloader = self.dataloaders[phase] # self.meter.on_epoch_begin(epoch, phase) for itr, (images, targets) in tqdm(enumerate(dataloader), total=len(dataloader)): images = images.to(self.device).float() N = images.shape[0] np_logits, hv_logits, nc_logits = self.net(images) np_targets = utils.get_np_targets(targets[:, 0, :, :]) hv_targets = utils.get_hv_targets(targets[:, 0, :, :]) nc_targets = utils.get_nc_targets(targets[:, 1, :, :]) np_targets = np_targets.to(self.device) hv_targets = hv_targets.to(self.device) nc_targets = nc_targets.to(self.device) assert np_targets.shape == (N, 256, 256) and hv_targets.shape == (N, 2, 256, 256) \ and nc_targets.shape == (N, 256, 256) loss, loss_np, loss_hv, loss_nc = self.hoverloss(np_logits, np_targets, hv_logits, hv_targets, nc_logits, nc_targets) if phase == "train": loss.backward() self.optimizer.step() self.optimizer.zero_grad() self.scheduler.step(epoch) # Update metrics for this batch with torch.no_grad(): loss = loss.detach() loss_np = loss_np.detach() loss_nc = loss_nc.detach() loss_hv = loss_hv.detach() self.epoch_loss['loss'].append(loss.item()) self.epoch_loss['loss_np'].append(2*loss_np.item()) self.epoch_loss['loss_nc'].append(loss_nc.item()) self.epoch_loss['loss_hv'].append(40*loss_hv.item()) self.store[phase]['loss'].append(sum(self.epoch_loss['loss']) / len(self.epoch_loss['loss'])) self.store[phase]['loss_np'].append(sum(self.epoch_loss['loss_np']) / len(self.epoch_loss['loss_np'])) self.store[phase]['loss_nc'].append(sum(self.epoch_loss['loss_nc']) / len(self.epoch_loss['loss_nc'])) self.store[phase]['loss_hv'].append(sum(self.epoch_loss['loss_hv']) / len(self.epoch_loss['loss_hv'])) self.epoch_loss['loss'] = [0] self.epoch_loss['loss_np'] = [0] self.epoch_loss['loss_nc'] = [0] self.epoch_loss['loss_hv'] = [0] if torch.cuda.is_available(): torch.cuda.empty_cache() return self.store[phase]['loss'][-1]
def train(cfg, model, post_processor, criterion, device, train_loader, optimizer, epoch): model.train() post_processor.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) feature = model(data) if cfg.task == "classification": output = post_processor(feature) elif cfg.task == "semantic_segmentation" or cfg.task == "few_shot_semantic_segmentation_fine_tuning": ori_spatial_res = data.shape[-2:] output = post_processor(feature, ori_spatial_res) loss = criterion(output, target) optimizer.zero_grad() # reset gradient loss.backward() optimizer.step() if cfg.task == "classification": if batch_idx % cfg.TRAIN.log_interval == 0: pred = output.argmax(dim=1, keepdim=True) correct_prediction = pred.eq(target.view_as(pred)).sum().item() batch_acc = correct_prediction / data.shape[0] print( 'Train Epoch: {0} [{1}/{2} ({3:.0f}%)]\tLoss: {4:.6f}\tBatch Acc: {5:.6f}' .format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item(), batch_acc)) elif cfg.task == "semantic_segmentation" or cfg.task == "few_shot_semantic_segmentation_fine_tuning": if batch_idx % cfg.TRAIN.log_interval == 0: pred_map = output.max(dim=1)[1] batch_acc, _ = utils.compute_pixel_acc( pred_map, target, fg_only=cfg.METRIC.SEGMENTATION.fg_only) print( 'Train Epoch: {0} [{1}/{2} ({3:.0f}%)]\tLoss: {4:.6f}\tBatch Pixel Acc: {5:.6f}' .format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item(), batch_acc)) else: raise NotImplementedError
def evaluate_model(model, data_loader, loss_function, logfile=None): """ Evaluates the model performance on a dataset (validation or test). Parameters: ----------- model : torch.nn.Module The classifier to evaluate. data_loader : torch.utils.data.DataLoader Loader for the dataset to evaluate on. loss_function : function The loss function that is optimized. logfile : file-like or None The file to put logs into. Returns: -------- metrics : defaultdict(float) The statistics (metrics) for the model on the given dataset. """ model.eval() metrics = defaultdict(float) number_classes = data_loader.dataset.get_number_classes() if number_classes == 2: y_pred = np.zeros(len(data_loader.dataset)) else: y_pred = np.zeros((len(data_loader.dataset), number_classes)) y_true = np.zeros(len(data_loader.dataset)) total_loss = 0 with torch.no_grad(): for batch_idx, data in enumerate(data_loader): data = data.to('cuda') print(f'\rEvaluating {batch_idx + 1} / {len(data_loader)}', end='\r') y_pred_i = model(data) loss = loss_function(y_pred_i, data.y, data.weight) y_pred[batch_idx * data_loader.batch_size:(batch_idx + 1) * data_loader.batch_size] = y_pred_i.data.cpu().numpy( ).squeeze() y_true[batch_idx * data_loader.batch_size:(batch_idx + 1) * data_loader.batch_size] = data.y.data.cpu().numpy().squeeze( ) total_loss += loss.item() metrics = get_metrics(y_true, y_pred) metrics['loss'] = total_loss / len(data_loader) print(metrics) values = ' -- '.join( map(lambda metric: f'{metric} : {(metrics[metric]):.4f}', metrics)) log(logfile, f'\nMetrics: {values}') return metrics
def test(testloader, model, criterion, epoch, use_cuda, args): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() wholedata_num = testloader.dataset.__len__() fine_accuracy = np.zeros(100, dtype=float) # 100*1 classes_confidence = np.zeros((100, 100), dtype=float) # 100*100 samples_confidence = np.zeros((wholedata_num, 100), dtype=float) # n*100 end = time.time() start_index = 0 for batch_idx, (batch_data) in enumerate(testloader): if len(batch_data) == 2: inputs, targets = batch_data else: inputs, targets, indexes = batch_data # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() with torch.no_grad(): inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure accuracy and record loss batch_time.update(time.time() - end) end = time.time() progress_bar( batch_idx, len(testloader), 'Loss: %.2f | Top1: %.2f | Top5: %.2f' % (losses.avg, top1.avg, top5.avg)) return (losses.avg, top1.avg, top5.avg)
def eval(model, data_loader, i): model.eval() fin_loss = 0.0 tk = tqdm(data_loader, desc="Epoch" + " [VALID] " + str(i + 1)) with torch.no_grad(): for t, data in enumerate(tk): for k, v in data.items(): data[k] = v.to(Config.DEVICE) _, loss = model(**data) fin_loss += loss.item() tk.set_postfix({'loss': '%.6f' % float(fin_loss / (t + 1))}) return fin_loss / len(data_loader)
def train(epoch): fcn_model.train() # tran mode total_loss = 0. st = time.time() for batch_idx, (imgs, labels, Image_Path) in enumerate(train_loader): # train_batch += 1 if use_cuda: imgs = imgs.cuda() labels = labels.cuda() # batch_idx += 1 imgs_tensor = Variable(imgs) # torch.Size([2, 3, 320, 320]) target = Variable(labels) # torch.Size([2, 320, 320]) out = fcn_model(imgs_tensor) # torch.Size([2, 21, 320, 320]) loss = criterion(out, target) optimizer.zero_grad() loss.backward() optimizer.step() # update all arguments total_loss += loss.item() # return float if (batch_idx) % 20 == 0: ed = time.time() print( 'train epoch [%d/%d], iter[%d/%d], lr %.7f, aver_loss %.5f, time_use = %.1f' % (epoch, epochs, batch_idx, len(train_loader), learning_rate, total_loss / (batch_idx + 1), ed - st)) st = ed # # visiualize scalar # label_img = tools.labelToimg(labels[0]) # net_out = out[0].data.max(1)[1].squeeze_(0) # out_img = tools.labelToimg(net_out) # writer.add_scalar("loss", loss, train_batch) # writer.add_scalar("total_loss", total_loss, train_batch) # writer.add_scalars('loss/scalar_group', {"loss": train_batch * loss, # "total_loss": train_batch * total_loss}) # writer.add_image('Image', imgs[0], epoch) # writer.add_image('label', label_img, epoch) # writer.add_image("out", out_img, epoch) assert total_loss is not np.nan assert total_loss is not np.inf torch.save(fcn_model.state_dict(), './models/temp.pth') # save for 5 epochs total_loss /= len(train_loader) print('train epoch [%d/%d] average_loss %.5f' % (epoch, epochs, total_loss)) return total_loss
def training(self, epoch): pbar = tqdm(total=self.n_train, desc=f'Epoch {epoch + 1}/{self.num_epoch}', unit='img', bar_format='{l_bar}%s{bar:10}%s{r_bar}{bar:-10b}' % (Fore.RED, Fore.RESET)) mean_loss, mean_score = 0, 0 self.net.train() n_iter = len(self.loader_train) for k, btchs in enumerate(self.loader_train): imgs = btchs[0].to(device=self.dvc_main, dtype=self.dtype) labels = btchs[1].to(device=self.dvc_main, dtype=self.dtype) self.scheduler.step(epoch + k / n_iter) self.optim.zero_grad() preds = self.net(imgs) loss = self.criterion(preds, labels) loss.backward() self.optim.step() with torch.no_grad(): img_dt = imgs.data label_dt = labels.data pred_dt = preds.data mean_score += F1Score(pred_dt, label_dt) mean_loss += loss.item() lrs = f"{self.scheduler.get_last_lr()[0]:.3f}" pbar.set_postfix(**{self.name_loss: mean_loss / (k + 1), 'F1Score': mean_score / (k + 1), 'LRs' : lrs}) pbar.update(imgs.shape[0]) if k == 0: img_dict = {'Train/': img_dt, 'Train/true': label_dt, 'Train/pred': pred_dt} self.writing(epoch, self.writer_main, img_dict, opt='image') scalar_dict = {self.name_loss: mean_loss / (n_iter + 1), 'F1Score': mean_score / (n_iter + 1)} pbar.write(_term_move_up(), end='\r') self.writing(epoch, self.writer_main, scalar_dict, opt='scalar') pbar.close()
def train(epoch, model, loss_fn, train_loader, optimizer): model.train() # Horovod: set epoch to sampler for shuffling. train_loader.sampler.set_epoch(epoch) for batch_idx, (data, target) in tqdm(enumerate(train_loader), total=len(train_loader), ascii=True): for key in data: if type(data[key][0]) != np.str_: data[key] = data[key].cuda() target = target.cuda() optimizer.zero_grad() output = model(data) loss = loss_fn(output, target) loss.backward() optimizer.step() if batch_idx % 100 == 0: # Horovod: use train_sampler to determine the number of examples in # this worker's partition. logging.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.sampler), 100. * batch_idx / len(train_loader), loss.item()))
def train(epoch_idx, mAP): # return f.train() logging.info('In epoch {}:\n'.format(epoch_idx + 1)) for batch_idx, (posv, posa, negv, nega, pos_label, neg_label) in enumerate(train_loader): opt.zero_grad() # b,p,dim = axi.shape posv = posv.to(device) posa = posa.to(device) negv = negv.to(device) nega = nega.to(device) b1, ds, _, _ = posv.shape vfeats = torch.cat((posv, negv), 0) afeats = torch.cat((posa, nega), 0) pos_label = pos_label.to(device) neg_label = neg_label.to(device) labels = torch.cat((pos_label, neg_label), 0).long().squeeze(-1) # pdb.set_trace() for dm in range(ds): vfeat, afeat, label = vfeats[:, dm, :, :], afeats[:, dm, :, :], labels[:, dm] ins_score, bag_predict = networks[dm](vfeat, afeat) celoss = CELoss(bag_predict, label) ahloss = AHLoss(ins_score[:b1, :], ins_score[b1:, :]) loss = celoss + ahloss print( "Domain: {}; In epoch {}, [{}/{}]: loss: {:.6f}, max test mAP: {:.4f}, current test mAP: {:.4f}" .format(domains[dm], epoch_idx + 1, batch_idx, len(train_loader), loss.item(), MaxmAP[dm], mAP[dm])) # recoder.update('loss', loss.data, epoch_idx*len(train_loader)+batch_idx) loss.backward() opts[dm].step() # loss = attloss+att_visual_i_loss+att_audio_i_loss+att_visual_j_loss+att_audio_j_loss #0.6543 recoder.update(domains[dm] + ' celoss', celoss.item(), epoch_idx) recoder.update(domains[dm] + ' ahloss', ahloss.item(), epoch_idx) recoder.save()
def train(model, data_loader, optimizer, scheduler, i): model.train() fin_loss = 0.0 tk = tqdm(data_loader, desc="Epoch" + " [TRAIN] " + str(i + 1)) for t, data in enumerate(tk): for k, v in data.items(): data[k] = v.cuda() optimizer.zero_grad() _, loss = model(**data) loss.backward() optimizer.step() fin_loss += loss.item() tk.set_postfix({ 'loss': '%.6f' % float(fin_loss / (t + 1)), 'LR': optimizer.param_groups[0]['lr'] }) scheduler.step() return fin_loss / len(data_loader)
def train(model, config, epoch): model.class_classifier.train() model.feature.train() iter_source = iter(config['source_train_loader']) iter_target = iter(config['target_train_loader']) len_source_loader = len(config['source_train_loader']) len_target_loader = len(config['target_train_loader']) num_iter = len_source_loader for i in range(1, num_iter): data_source, label_source = iter_source.next() data_target, _ = iter_target.next() if i % len_target_loader == 0: iter_target = iter(config['target_train_loader']) if torch.cuda.is_available(): data_source, label_source = data_source.cuda( ), label_source.cuda() data_target = data_target.cuda() optimizer.zero_grad() preds = model.class_classify(data_source) loss_cls = criterion(preds, label_source) source = model.feature(data_source) source = source.view(source.size(0), -1) target = model.feature(data_target) target = target.view(target.size(0), -1) loss_mmd = mmd.mmd_linear(source, target) loss = loss_cls + config['mmd_gamma'] * loss_mmd if i % 50 == 0: print( 'loss_cls {}, loss_mmd {}, gamma {}, total loss {}'.format( loss_cls.item(), loss_mmd.item(), config['mmd_gamma'], loss.item())) loss.backward() optimizer.step()
def train(train_loader, model, criterion, optimizer, args): # switch to train mode loss_collect = [] loss_samples_collect = [] model.train() if args.freeze_BN: for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() trainloader = tqdm(train_loader, desc='Epoch {} Training...'.format(args.cur_epoch)) for i, (input, target) in enumerate(trainloader): if args.gpu is not None: # input = input.cuda(args.gpu, non_blocking=True) .to(args.device) input = input.to(args.device) # target = target.cuda(args.gpu, non_blocking=True) target = target.to(args.device) # compute output output = model(input) loss, loss_samples = criterion(output, target) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() loss_collect.append(loss.item()) loss_samples_collect.append(loss_samples.item()) if i == len(train_loader) - 1: trainloader.set_description( 'Epoch (Train) {0:}: Mean Loss [{1:.4f}]: Mean Samples Loss [{2:.4f}]'.format(args.cur_epoch, np.mean(loss_collect), np.mean( loss_samples_collect))) print("mem={:.3f}MiB, max_mem={:.0f}MiB\n".format(torch.cuda.memory_allocated() / 1e6, torch.cuda.max_memory_allocated() / 1e6)) return np.mean(loss_collect)
def validation(self, epoch): pbar = tqdm(total=self.n_valid, desc=f'Validation', unit='img', leave=True, bar_format='{l_bar}%s{bar:10}%s{r_bar}{bar:-10b}' % (Fore.BLUE, Fore.RESET)) mean_loss, mean_score = 0, [0, 0] self.net.eval() n_iter = len(self.loader_valid) with torch.no_grad(): for k, btchs in enumerate(self.loader_valid): imgs = btchs[0].to(device=self.dvc_main, dtype=self.dtype) labels = btchs[1].to(device=self.dvc_main, dtype=self.dtype) preds = self.net(imgs) loss = self.criterion(preds, labels) img_dt = imgs.data label_dt = labels.data pred_dt = preds.data mean_score += F1Score(pred_dt, label_dt) mean_loss += loss.item() pbar.set_postfix(**{self.name_loss: mean_loss / (k + 1), 'F1Score': mean_score / (k + 1)}) pbar.update(imgs.shape[0]) if k == 0: init_dict = {f'Test{k}/': img_dt, f'Test{k}/true': label_dt} self.writing(epoch, self.writer_test, init_dict, opt='image') img_dict = {f'Test{k}/pred': pred_dt} self.writing(epoch, self.writer_test, img_dict, opt='image') self.scalar_dict = {self.name_loss: mean_loss / (n_iter + 1), 'F1Score': mean_score / (n_iter + 1)} pbar.close() self.writing(epoch, self.writer_test, self.scalar_dict, opt='scalar')
def __test_epoch(self): self.model.eval() losses = [] progress = tqdm(enumerate(self.test_loader), total=len(self.test_loader), desc='Test', file=sys.stdout) for batch_idx, data in progress: with torch.no_grad(): samples, targets = data samples1, samples2 = samples if self.cuda: samples1 = samples1.cuda() samples2 = samples2.cuda() targets = targets.cuda() outputs = self.model((samples1, samples2)) loss = self.criterion(outputs, targets.float()) losses.append(loss.item()) progress.set_description('Mean Test Loss: {:.4f}'.format( np.mean(losses))) return np.mean(losses)
if gpu_id >= 0: inputs, labels = inputs.cuda(), labels.cuda() segmentation_outputs = net.forward(inputs) mask_loss = criterion_mask(labels, segmentation_outputs) # edge_loss = criterion_mask(labels, edge_outputs) # viz.plot(name='mask_loss', y=mask_loss.data[0]) # viz.plot(name='edge_loss', y=edge_loss.data[0]) # loss = mask_loss + edge_loss loss = mask_loss running_loss_tr += loss.item() # Print stuff if ii % num_img_tr == (num_img_tr - 1): running_loss_tr = running_loss_tr / num_img_tr # writer.add_scalar('data/total_loss_epoch', running_loss_tr, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, ii * p['trainBatch'] + inputs.data.shape[0])) print('Loss: %f' % running_loss_tr) running_loss_tr = 0 stop_time = timeit.default_timer() print("Execution time: " + str(stop_time - start_time) + "\n") # Backward the averaged gradient loss /= p['nAveGrad'] loss.backward()