def valid_trainer(model, valid_loader, criterion): model.eval() loss_meter = AverageMeter() preds_probs = [] gt_list = [] with torch.no_grad(): for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)): imgs = imgs.cuda() gt_label = gt_label.cuda() gt_list.append(gt_label.cpu().numpy()) gt_label[gt_label == -1] = 0 valid_logits = model(imgs) valid_loss = criterion(valid_logits, gt_label) #去除sigmoid for mcc valid_probs = torch.sigmoid(valid_logits) # valid_probs = valid_logits preds_probs.append(valid_probs.cpu().numpy()) loss_meter.update(to_scalar(valid_loss)) valid_loss = loss_meter.avg print(f'valid losss: {valid_loss}') gt_label = np.concatenate(gt_list, axis=0) preds_probs = np.concatenate(preds_probs, axis=0) return valid_loss, gt_label, preds_probs
def predict_set(nets, dataloader, runtime_params): run_type = runtime_params['run_type'] #net = net.eval() progbar = Progbar(len(dataloader.dataset), stateful_metrics=['run-type']) batch_time = AverageMeter() names = [] pred_landmarks = np.array([]) gt_landmarks = np.array([]) with torch.no_grad(): for i, (landmarks, imgs, img_paths) in enumerate(dataloader): s_time = time.time() imgs = imgs.cuda() names.extend(img_paths) net = nets[0] if 'half' in runtime_params.values(): output = net(imgs.half()) else: output = net(imgs) output = output.cpu().numpy() pred_landmarks = np.concatenate((pred_landmarks, output), axis=0) gt_landmarks = np.concatenate( (gt_landmarks, landmarks.data.numpy()), axis=0) progbar.add(imgs.size(0), values=[ ('run-type', run_type) ]) # ,('batch_time', batch_time.val)]) batch_time.update(time.time() - s_time) if runtime_params['debug'] and i: break pred_landmarks = pred_landmarks.reshape((-1, 28, 2)) gt_landmarks = gt_landmarks.reshape((-1, 28, 2)) assert gt_landmarks.shape == pred_landmarks.shape return gt_landmarks, gt_landmarks, names
def train(train_source_iter: ForeverDataIterator, train_target_iter: ForeverDataIterator, model: ImageClassifier, domain_adv: DomainAdversarialLoss, optimizer: SGD, lr_scheduler: StepwiseLR, epoch: int, args: argparse.Namespace): batch_time = AverageMeter('Time', ':5.2f') data_time = AverageMeter('Data', ':5.2f') losses = AverageMeter('Loss', ':6.2f') cls_accs = AverageMeter('Cls Acc', ':3.1f') domain_accs = AverageMeter('Domain Acc', ':3.1f') progress = ProgressMeter( args.iters_per_epoch, [batch_time, data_time, losses, cls_accs, domain_accs], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() domain_adv.train() end = time.time() for i in range(args.iters_per_epoch): lr_scheduler.step() # measure data loading time data_time.update(time.time() - end) x_s, labels_s = next(train_source_iter) x_t, _ = next(train_target_iter) x_s = x_s.to(device) x_t = x_t.to(device) labels_s = labels_s.to(device) # compute output x = torch.cat((x_s, x_t), dim=0) y, f = model(x) y_s, y_t = y.chunk(2, dim=0) f_s, f_t = f.chunk(2, dim=0) cls_loss = F.cross_entropy(y_s, labels_s) transfer_loss = domain_adv(f_s, f_t) domain_acc = domain_adv.domain_discriminator_accuracy loss = cls_loss + transfer_loss * args.trade_off cls_acc = accuracy(y_s, labels_s)[0] losses.update(loss.item(), x_s.size(0)) cls_accs.update(cls_acc.item(), x_s.size(0)) domain_accs.update(domain_acc.item(), x_s.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)
def train_model(model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu , optimizer_center , criterion_center_loss, criterion_osm_caa, beta_ratio): model.train() losses = AverageMeter() cetner_loss_weight = 0.0005 for batch_idx, (imgs, pids, _) in enumerate(trainloader): if use_gpu: imgs, pids = imgs.cuda(), pids.cuda() imgs, pids = Variable(imgs), Variable(pids) outputs, features = model(imgs) ide_loss = criterion_xent(outputs , pids) triplet_loss = criterion_htri(features, features, features, pids, pids, pids) center_loss = criterion_center_loss(features, pids) # hosm_loss = criterion_osm_caa(features, pids , model.module.classifier.classifier.weight.t()) hosm_loss = criterion_osm_caa(features, pids , criterion_center_loss.centers.t() ) loss = ide_loss + (1-beta_ratio )* triplet_loss + center_loss * cetner_loss_weight + beta_ratio * hosm_loss optimizer.zero_grad() optimizer_center.zero_grad() loss.backward() optimizer.step() for param in criterion_center_loss.parameters(): param.grad.data *= (1./cetner_loss_weight) optimizer_center.step() losses.update(loss.data.item(), pids.size(0)) return (losses.avg , ide_loss.item() , triplet_loss.item() , hosm_loss.item())
def valid_trainer(model, valid_loader, criterion): model.eval() loss_meter = AverageMeter() preds_probs = [] gt_list = [] with torch.no_grad(): for step, (imgs, gt_label, gt_depth, imgname) in enumerate(tqdm(valid_loader)): imgs = imgs.cuda() gt_label = gt_label.cuda() gt_depth = gt_depth.cuda() gt_list.append(gt_label.cpu().numpy()) gt_label[gt_label == -1] = 0 valid_logits, _ = model(imgs) valid_loss = criterion(valid_logits, gt_label) + loss_autoencoder( _, gt_depth) valid_probs = torch.sigmoid(valid_logits) preds_probs.append(valid_probs.cpu().numpy()) loss_meter.update(to_scalar(valid_loss)) valid_loss = loss_meter.avg gt_label = np.concatenate(gt_list, axis=0) preds_probs = np.concatenate(preds_probs, axis=0) return valid_loss, gt_label, preds_probs
def valid_trainer(model, valid_loader, criterion): model.eval() loss_meter = AverageMeter() preds_probs = [] gt_list = [] with torch.no_grad(): for step, (imgs, depth, gt_label, imgname) in enumerate(tqdm(valid_loader)): imgs = imgs.cuda() gt_label = gt_label.cuda() gt_list.append(gt_label.cpu().numpy()) gt_label[gt_label == -1] = 0 valid_logits = model(imgs, depth) #valid_logits = model(imgs, gt_label) #valid_logits, output_depth_0,output_depth_1,output_depth_2 = model(imgs, depth) #valid_logits, depth_logits = model(imgs, depth) valid_loss = criterion(valid_logits, gt_label) valid_probs = torch.sigmoid(valid_logits) preds_probs.append(valid_probs.cpu().numpy()) loss_meter.update(to_scalar(valid_loss)) #show_on_image(imgname, output_0) #vif(imgname,output_depth_0,output_depth_1,output_depth_2,output_depth_3, output_depth_4, output_depth_5) #return 0 valid_loss = loss_meter.avg gt_label = np.concatenate(gt_list, axis=0) preds_probs = np.concatenate(preds_probs, axis=0) return valid_loss, gt_label, preds_probs
def batch_trainer(epoch, model, train_loader, criterion, optimizer, loss): model.train() epoch_time = time.time() loss_meter = AverageMeter() batch_num = len(train_loader) gt_list = [] preds_probs = [] lr = optimizer.param_groups[0]['lr'] for step, (imgs, gt_label, imgname) in enumerate(train_loader): batch_time = time.time() imgs, gt_label = imgs.cuda(), gt_label.cuda() train_logit_1, train_logit_2, train_logit_3, train_logit_4 = model( imgs) if loss == 'Multi_Level_Loss': train_loss = 0.1 * criterion( train_logit_1, gt_label) + 0.3 * criterion( train_logit_2, gt_label) + 0.7 * criterion( train_logit_3, gt_label) + criterion( train_logit_4, gt_label) train_loss.backward() clip_grad_norm_(model.parameters(), max_norm=10.0) # make larger learning rate works optimizer.step() optimizer.zero_grad() loss_meter.update(to_scalar(train_loss)) gt_list.append(gt_label.cpu().numpy()) train_probs = torch.sigmoid(train_logit_4) #train_probs_2 = torch.sigmoid(train_logit_2) #train_probs_3 = torch.sigmoid(train_logit_3) #train_probs_4 = torch.sigmoid(train_logit_4) #train_max = (train_probs + train_probs_2)/2 #preds_probs.append(train_max.detach().cpu().numpy()) preds_probs.append(train_probs.detach().cpu().numpy()) log_interval = 20 if (step + 1) % log_interval == 0 or (step + 1) % len(train_loader) == 0: print( f'{time_str()}, Step {step}/{batch_num} in Ep {epoch}, {time.time() - batch_time:.2f}s ', f'train_loss:{loss_meter.val:.4f}') train_loss = loss_meter.avg gt_label = np.concatenate(gt_list, axis=0) preds_probs = np.concatenate(preds_probs, axis=0) print( f'Epoch {epoch}, LR {lr}, Train_Time {time.time() - epoch_time:.2f}s, Loss: {loss_meter.avg:.4f}' ) return train_loss, gt_label, preds_probs
def train_epoch(current_epoch, loss_function, model, optimizer, scheduler, train_data_loader, summary_writer, conf, local_rank, debug): #存储平均值 progbar = Progbar(len(train_data_loader.dataset), stateful_metrics=['epoch', 'config', 'lr']) batch_time = AverageMeter() end = time.time() losses = AverageMeter() max_iters = conf['optimizer']['schedule']['params']['max_iter'] print("training epoch {}".format(current_epoch)) model.train() for i, (landmarks, imgs, img_path) in enumerate(train_data_loader): numm = imgs.shape[0] optimizer.zero_grad() imgs = imgs.reshape((-1, imgs.size(-3), imgs.size(-2), imgs.size(-1))) imgs = Variable(imgs, requires_grad=True).cuda() landmarks = landmarks.cuda().float() output = model(imgs) loss = loss_function(output, landmarks) losses.update(loss.item(), imgs.size(0)) summary_writer.add_scalar('train/loss', loss.item(), global_step=i + current_epoch * max_iters) summary_writer.add_scalar('train/lr', float(scheduler.get_lr()[-1]), global_step=i + current_epoch * max_iters) if conf['fp16']: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1) optimizer.step() torch.cuda.synchronize() batch_time.update(time.time() - end) end = time.time() if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"): scheduler.step(i + current_epoch * max_iters) if (i == max_iters - 1) or debug: break progbar.add(numm, values=[('epoch', current_epoch), ('loss', losses.avg), ("lr", float(scheduler.get_lr()[-1]))]) if conf["optimizer"]["schedule"]["mode"] == "epoch": scheduler.step(current_epoch) if local_rank == 0: for idx, param_group in enumerate(optimizer.param_groups): lr = param_group['lr'] summary_writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=current_epoch)
def batch_trainer(epoch, model, train_loader, criterion, optimizer): model.train() epoch_time = time.time() loss_meter = AverageMeter() batch_num = len(train_loader) gt_list = [] preds_probs = [] lr = optimizer.param_groups[1]['lr'] for step, (imgs, gt_label, imgname) in enumerate(train_loader): batch_time = time.time() imgs, gt_label = imgs.cuda(), gt_label.cuda() feat_map, output = model(imgs) loss_list = [] for k in range(len(output)): out = output[k] loss_list.append(criterion(out, gt_label)) loss = sum(loss_list) #maximum voting output = torch.max( torch.max(torch.max(torch.max(output[0], output[1]), output[2]), output[3]), output[4]) train_loss = loss optimizer.zero_grad() train_loss.backward() clip_grad_norm_(model.parameters(), max_norm=10.0) # make larger learning rate works optimizer.step() loss_meter.update(to_scalar(train_loss)) gt_list.append(gt_label.cpu().numpy()) train_probs = torch.sigmoid(output) preds_probs.append(train_probs.detach().cpu().numpy()) log_interval = 20 if (step + 1) % log_interval == 0 or (step + 1) % len(train_loader) == 0: print( f'{time_str()}, Step {step}/{batch_num} in Ep {epoch}, {time.time() - batch_time:.2f}s ', f'train_loss:{loss_meter.val:.4f}') train_loss = loss_meter.avg gt_label = np.concatenate(gt_list, axis=0) preds_probs = np.concatenate(preds_probs, axis=0) print( f'Epoch {epoch}, LR {lr}, Train_Time {time.time() - epoch_time:.2f}s, Loss: {loss_meter.avg:.4f}' ) return train_loss, gt_label, preds_probs
def train(train_loader, model, criterion, optimizer): objs = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() model.train() end = time.time() for step, data in enumerate(train_loader): data_time.update(time.time() - end) x = data[0].cuda(non_blocking=True) target = data[1].cuda(non_blocking=True) # forward batch_start = time.time() logits = model(x) loss = criterion(logits, target) # backward optimizer.zero_grad() if args.opt_level is not None: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() if args.grad_clip > 0: torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.grad_clip) optimizer.step() batch_time.update(time.time() - batch_start) if step % args.print_freq == 0: # For better performance, don't accumulate these metrics every iteration, # since they may incur an allreduce and some host<->device syncs. prec1, prec5 = accuracy(logits, target, topk=(1, 5)) if args.distributed: reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) else: reduced_loss = loss.data objs.update(reduced_loss.item(), x.size(0)) top1.update(prec1.item(), x.size(0)) top5.update(prec5.item(), x.size(0)) torch.cuda.synchronize() duration = 0 if step == 0 else time.time() - duration_start duration_start = time.time() if args.local_rank == 0: logging.info( 'TRAIN Step: %03d Objs: %e R1: %f R5: %f Duration: %ds BTime: %.3fs DTime: %.4fs', step, objs.avg, top1.avg, top5.avg, duration, batch_time.avg, data_time.avg) end = time.time() return top1.avg, objs.avg
def train(self, epoch): cls_loss_ = AverageMeter() box_offset_loss_ = AverageMeter() landmark_loss_ = AverageMeter() total_loss_ = AverageMeter() accuracy_ = AverageMeter() self.scheduler.step() self.model.train() for batch_idx, (data, target) in enumerate(self.train_loader): gt_label = target['label'] gt_bbox = target['bbox_target'] gt_landmark = target['landmark_target'] data, gt_label, gt_bbox, gt_landmark = data.to(self.device), gt_label.to( self.device), gt_bbox.to(self.device).float(), gt_landmark.to(self.device).float() cls_pred, box_offset_pred, landmark_offset_pred = self.model(data) # compute the loss cls_loss = self.lossfn.cls_loss(gt_label, cls_pred) box_offset_loss = self.lossfn.box_loss( gt_label, gt_bbox, box_offset_pred) landmark_loss = self.lossfn.landmark_loss(gt_label, gt_landmark, landmark_offset_pred) total_loss = cls_loss + box_offset_loss * 0.5 + landmark_loss accuracy = self.compute_accuracy(cls_pred, gt_label) self.optimizer.zero_grad() total_loss.backward() self.optimizer.step() cls_loss_.update(cls_loss, data.size(0)) box_offset_loss_.update(box_offset_loss, data.size(0)) landmark_loss_.update(landmark_loss, data.size(0)) total_loss_.update(total_loss, data.size(0)) accuracy_.update(accuracy, data.size(0)) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'.format( epoch, batch_idx * len(data), len(self.train_loader.dataset), 100. * batch_idx / len(self.train_loader), total_loss.item(), accuracy.item())) self.scalar_info['cls_loss'] = cls_loss_.avg self.scalar_info['box_offset_loss'] = box_offset_loss_.avg self.scalar_info['landmark_loss'] = landmark_loss_.avg self.scalar_info['total_loss'] = total_loss_.avg self.scalar_info['accuracy'] = accuracy_.avg self.scalar_info['lr'] = self.scheduler.get_lr()[0] if self.logger is not None: for tag, value in list(self.scalar_info.items()): self.logger.scalar_summary(tag, value, self.run_count) self.scalar_info = {} self.run_count += 1 print("|===>Loss: {:.4f}".format(total_loss_.avg)) return cls_loss_.avg, box_offset_loss_.avg, landmark_loss_.avg, total_loss_.avg, accuracy_.avg
def train_val(model, optimizer, train_loader, test_loader, epoch, margin=1.0, use_ohem=False, log_interval=100, test_interval=2000, is_cuda=True): loss = AverageMeter() batch_num = len(train_loader) for batch_idx, (data_a, data_p, data_n, target) in enumerate(train_loader): model.train() if is_cuda: data_a = data_a.cuda() data_p = data_p.cuda() data_n = data_n.cuda() #target = target.cuda() #print('data_size = ',data_a.size()) #print(data_a) #print('-----------------------------------------') data_a = Variable(data_a) data_p = Variable(data_p) data_n = Variable(data_n) target = Variable(target) optimizer.zero_grad() out_a = model(data_a) out_p = model(data_p) out_n = model(data_n) triploss_layer = TripletMarginLoss(margin, use_ohem=use_ohem) trip_loss = triploss_layer(out_a, out_p, out_n) trip_loss.backward() optimizer.step() loss.update(trip_loss.data[0]) if (batch_idx + 1) % log_interval == 0: logging('Train-Epoch:{:04d}\tbatch:{:06d}/{:06d}\tloss:{:.04f}'\ .format(epoch, batch_idx+1, batch_num, trip_loss.data[0])) if (batch_idx + 1) % test_interval == 0: threshlod, accuracy, mean_d_a_p, mean_d_a_n = best_test( model, test_loader) logging( 'Test-T-A Epoch {:04d}-{:06d} accuracy: {:.04f} threshold: {:.05} ap_mean: {:.04f} an_mean: {:.04f}' .format(epoch, batch_idx + 1, accuracy, threshlod, mean_d_a_p, mean_d_a_n)) cutoff = len(model.module.feat_model._modules) model_name = 'models/epoch_{:04d}-{:06d}_feat.weights'.format( epoch, batch_idx + 1) save_weights(model.module.feat_model, model_name, cutoff) logging('save model: {:s}'.format(model_name))
def valid_trainer(model, valid_loader, criterion): model.eval() loss_meter = AverageMeter() preds_probs = [] gt_list = [] with torch.no_grad(): for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)): imgs = imgs.cuda() gt_label = gt_label.cuda() valid_logit_1, valid_logit_2, valid_logit_3, valid_logit_4 = model( imgs) #pdb.set_trace() gt_list.append(gt_label.cpu().numpy()) gt_label[gt_label == -1] = 0 #valid_logits, cha_att, spa_att = model(imgs) #pdb.set_trace() #valid_loss = 0 valid_loss = criterion(valid_logit_4, gt_label) valid_probs = torch.sigmoid(valid_logit_4) #valid_probs_2 = torch.sigmoid(valid_logit_2) #valid_probs_3 = torch.sigmoid(valid_logit_3) #valid_probs_4 = torch.sigmoid(valid_logit_4) #pdb.set_trace() #pred_max = (valid_probs + valid_probs_2)/2 #preds_probs.append(pred_max.cpu().numpy()) preds_probs.append(valid_probs.detach().cpu().numpy()) loss_meter.update(to_scalar(valid_loss)) #show_filter(imgname, gt_label, valid_logit_2) #pdb.set_trace() #nmf_show(imgname, feature_map) #show_att(imgname, mask,mask ) #affine(imgname, theta) #vif(imgname, valid_logit_4, valid_logit_4) #return 0 #get_mask_block(imgname, gt_label, valid_logit_l, valid_logit_3, valid_logit_2) #get_att(imgname, gt_label, valid_logit_2, valid_logit_l) #pdb.set_trace() #get_detector(gt_label, valid_logit_4,valid_probs_2, valid_logit_3) #np.save('part_detector.py', part_detector) valid_loss = loss_meter.avg gt_label = np.concatenate(gt_list, axis=0) preds_probs = np.concatenate(preds_probs, axis=0) #save_part() return valid_loss, gt_label, preds_probs
def train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf, local_rank): losses = AverageMeter() mious = AverageMeter() iterator = tqdm(train_data_loader) model.train() if conf["optimizer"]["schedule"]["mode"] == "epoch": scheduler.step(current_epoch) for i, sample in enumerate(iterator): imgs = sample["image"].cuda() masks = sample["mask"].cuda().float() masks_orig = sample["mask_orig"].cuda().float() out_mask = model(imgs) with torch.no_grad(): pred = torch.softmax(out_mask, dim=1) argmax = torch.argmax(pred, dim=1) ious = miou_round(argmax, masks_orig).item() mious.update(ious, imgs.size(0)) mask_loss = loss_functions["mask_loss"](out_mask, masks.contiguous()) loss = mask_loss losses.update(loss.item(), imgs.size(0)) iterator.set_description( "epoch: {}; lr {:.7f}; Loss ({loss.avg:.4f}); miou ({miou.avg:.4f}); " .format(current_epoch, scheduler.get_lr()[-1], loss=losses, miou=mious)) optimizer.zero_grad() if conf['fp16']: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1) optimizer.step() torch.cuda.synchronize() if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"): scheduler.step(i + current_epoch * len(train_data_loader)) if local_rank == 0: for idx, param_group in enumerate(optimizer.param_groups): lr = param_group['lr'] summary_writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=current_epoch) summary_writer.add_scalar('train/loss', float(losses.avg), global_step=current_epoch)
def main(): args.n_resgroups = 5 args.n_resblocks = 3 args.n_feats = 64 args.n_reduction = 16 data_path = './data/valid/lr3' gt_path = './data/valid/hr' result_path = './track1_valid_data/' var_name = 'data' if not os.path.exists(result_path): os.makedirs(result_path) model_path = './model/track1_model.pkl' save_point = torch.load(model_path) model_param = save_point['state_dict'] model = make_model(args) model.load_state_dict(model_param) model = model.cuda() model.eval() mrae = AverageMeter() for mat_name in sorted(os.listdir(data_path)): mat_path_name = os.path.join(data_path, mat_name) f = h5py.File(mat_path_name,'r') input_data = f.get(var_name) input_data = np.array(input_data) mat_name = mat_name[:-8] + '_tr1.mat' mat_path_name = os.path.join(gt_path, mat_name) f = h5py.File(mat_path_name,'r') target = f.get(var_name) target = np.array(target) target = np.transpose(target,[2,1,0]) input_data = input_data/65535 img_res = self_ensemble(model,input_data,target) MRAEs = cal_mrae(target,img_res) mat_name = mat_name[:-8] + '_tr1.mat' mat_dir= os.path.join(result_path, mat_name) save_matv73(mat_dir, var_name, img_res) mrae.update(MRAEs) print(mat_name) print(img_res.shape) print(MRAEs) print(mrae.avg)
def test(epoch, test_loader, save=True): global best_acc net.eval() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(test_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = net(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # timing batch_time.update(time.time() - end) end = time.time() progress_bar( batch_idx, len(test_loader), 'Loss: {:.3f} | Acc1: {:.3f}% | Acc5: {:.3f}%'.format( losses.avg, top1.avg, top5.avg)) if save: writer.add_scalar('loss/test', losses.avg, epoch) writer.add_scalar('acc/test_top1', top1.avg, epoch) writer.add_scalar('acc/test_top5', top5.avg, epoch) is_best = False if top1.avg > best_acc: best_acc = top1.avg is_best = True print('Current best acc: {}'.format(best_acc)) save_checkpoint( { 'epoch': epoch, 'model': args.model, 'dataset': args.dataset, 'state_dict': net.module.state_dict() if isinstance(net, nn.DataParallel) else net.state_dict(), 'acc': top1.avg, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint_dir=log_dir)
def batch_trainer(epoch, model, train_loader, criterion, optimizer, loss): model.train() epoch_time = time.time() loss_meter = AverageMeter() batch_num = len(train_loader) gt_list = [] preds_probs = [] lr = optimizer.param_groups[0]['lr'] for step, (imgs, gt_label, gt_depth, imgname) in enumerate(train_loader): #print(step) batch_time = time.time() imgs, gt_label, gt_depth = imgs.cuda(), gt_label.cuda(), gt_depth.cuda( ) train_logits, _ = model(imgs) #print("sssssssssssssssssssssssssssss") #pdb.set_trace() if loss == 'BCE_LOSS': train_loss = criterion(train_logits, gt_label) + loss_autoencoder( _, gt_depth) train_loss.backward() clip_grad_norm_(model.parameters(), max_norm=10.0) # make larger learning rate works optimizer.step() optimizer.zero_grad() loss_meter.update(to_scalar(train_loss)) gt_list.append(gt_label.cpu().numpy()) train_probs = torch.sigmoid(train_logits) preds_probs.append(train_probs.detach().cpu().numpy()) log_interval = 20 if (step + 1) % log_interval == 0 or (step + 1) % len(train_loader) == 0: print( f'{time_str()}, Step {step}/{batch_num} in Ep {epoch}, {time.time() - batch_time:.2f}s ', f'train_loss:{loss_meter.val:.4f}') train_loss = loss_meter.avg gt_label = np.concatenate(gt_list, axis=0) preds_probs = np.concatenate(preds_probs, axis=0) print( f'Epoch {epoch}, LR {lr}, Train_Time {time.time() - epoch_time:.2f}s, Loss: {loss_meter.avg:.4f}' ) return train_loss, gt_label, preds_probs
def train(epoch, model, classifier, criterion_cla, criterion_pair, optimizer, trainloader): batch_cla_loss = AverageMeter() batch_pair_loss = AverageMeter() corrects = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() model.train() classifier.train() end = time.time() for batch_idx, (imgs, pids, _) in enumerate(trainloader): imgs, pids = imgs.cuda(), pids.cuda() # Measure data loading time data_time.update(time.time() - end) # Zero the parameter gradients optimizer.zero_grad() # Forward features = model(imgs) outputs = classifier(features) _, preds = torch.max(outputs.data, 1) # Compute loss cla_loss = criterion_cla(outputs, pids) pair_loss = criterion_pair(features, pids) loss = cla_loss + pair_loss # Backward + Optimize loss.backward() optimizer.step() # statistics corrects.update( torch.sum(preds == pids.data).float() / pids.size(0), pids.size(0)) batch_cla_loss.update(cla_loss.item(), pids.size(0)) batch_pair_loss.update(pair_loss.item(), pids.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() print('Epoch{0} ' 'Time:{batch_time.sum:.1f}s ' 'Data:{data_time.sum:.1f}s ' 'ClaLoss:{cla_loss.avg:.4f} ' 'PairLoss:{pair_loss.avg:.4f} ' 'Acc:{acc.avg:.2%} '.format(epoch + 1, batch_time=batch_time, data_time=data_time, cla_loss=batch_cla_loss, pair_loss=batch_pair_loss, acc=corrects))
def extract(test_loader, model): batch_time = AverageMeter(10) model.eval() features = [] with torch.no_grad(): end = time.time() for i, input in enumerate(test_loader): # compute output output = model(input) features.append(output.data.cpu().numpy()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() return np.vstack(features)
def validate(val_queue, model): top1 = AverageMeter() top5 = AverageMeter() model.eval() for data in tqdm.tqdm(val_queue): x = data[0].cuda(non_blocking=True) target = data[1].cuda(non_blocking=True) with torch.no_grad(): logits = model(x) prec1, prec5 = accuracy(logits, target, topk=(1, 5)) n = x.size(0) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) return top1.avg, top5.avg
def train(self, epoch): cls_loss_ = AverageMeter() accuracy_ = AverageMeter() self.model.train() for batch_idx, (data, gt_label) in enumerate(self.train_loader): data, gt_label = data.to(self.device), gt_label.to(self.device) cls_pred = self.model(data) # compute the loss cls_loss = self.lossfn.cls_loss(gt_label, cls_pred) accuracy = self.compute_accuracy(cls_pred, gt_label) self.optimizer.zero_grad() cls_loss.backward() self.optimizer.step() cls_loss_.update(cls_loss, data.size(0)) accuracy_.update(accuracy, data.size(0)) if batch_idx % 50 == 0: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}\tTrain Accuracy: {:.6f}' .format(epoch, batch_idx * len(data), len(self.train_loader.dataset), 100. * batch_idx / len(self.train_loader), cls_loss.item(), accuracy)) self.scalar_info['cls_loss'] = cls_loss_.avg self.scalar_info['accuracy'] = accuracy_.avg self.scalar_info['lr'] = self.lr # if self.logger is not None: # for tag, value in list(self.scalar_info.items()): # self.logger.scalar_summary(tag, value, self.run_count) # self.scalar_info = {} # self.run_count += 1 print("|===>Loss: {:.4f} Train Accuracy: {:.6f} ".format( cls_loss_.avg, accuracy_.avg)) return cls_loss_.avg, accuracy_.avg
def train(train_queue, model, criterion, optimizer): objs = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() model.train() end = time.time() for step, data in enumerate(train_queue): data_time.update(time.time() - end) x = data[0].cuda(non_blocking=True) target = data[1].cuda(non_blocking=True) # forward batch_start = time.time() logits = model(x) loss = criterion(logits, target) # backward optimizer.zero_grad() loss.backward() if args.grad_clip > 0: nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() batch_time.update(time.time() - batch_start) prec1, prec5 = accuracy(logits, target, topk=(1, 5)) n = x.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) if step % args.print_freq == 0: duration = 0 if step == 0 else time.time() - duration_start duration_start = time.time() logging.info( 'TRAIN Step: %03d Objs: %e R1: %f R5: %f Duration: %ds BTime: %.3fs DTime: %.4fs', step, objs.avg, top1.avg, top5.avg, duration, batch_time.avg, data_time.avg) end = time.time() return top1.avg, objs.avg
def valid_trainer(model, valid_loader, criterion): model.eval() loss_meter = AverageMeter() preds_probs = [] gt_list = [] with torch.no_grad(): for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)): imgs = imgs.cuda() gt_label = gt_label.cuda() gt_list.append(gt_label.cpu().numpy()) gt_label[gt_label == -1] = 0 #valid_logits = model(imgs) valid_logits, valid_logits_2 = model(imgs) #pdb.set_trace() #valid_logits = model(imgs) #valid_loss = criterion(valid_logits, gt_label) #+ criterion(valid_logits_2, gt_label) #valid_loss = criterion(valid_logits, gt_label) #+ F.kl_div(torch.mean(valid_logits_2.squeeze(), 0)[0], torch.from_numpy(label_att).float().cuda(), reduction='sum') valid_loss = criterion( valid_logits_2, gt_label ) #+criterion(valid_logits_2, gt_label) #+ 0.3*mse_loss_fn(torch.mean(valid_logits_2.squeeze()[0], 0), torch.from_numpy(label_att).float().cuda()) valid_probs = torch.sigmoid(valid_logits_2) ''' valid_probs_2 = torch.sigmoid(valid_logits_2) #pdb.set_trace() # accessory valid_prob = valid_probs > 0.5 for i in range(valid_logits.size()[0]): if (valid_prob[i,15] and valid_prob[i,16]): print(str(valid_probs[i,15]) + ' '+str(valid_probs[i,16])) ''' preds_probs.append(valid_probs.cpu().numpy()) loss_meter.update(to_scalar(valid_loss)) #show_on_image(imgname, output) #vif(imgname,output_depth_0,output_depth_1,output_depth_2,output_depth_3, output_depth_4, output_depth_5) #return 0 valid_loss = loss_meter.avg gt_label = np.concatenate(gt_list, axis=0) preds_probs = np.concatenate(preds_probs, axis=0) return valid_loss, gt_label, preds_probs
def train_one_epoch(model, data_queue, opt, gm, epoch, args): def train_func(image, im_info, gt_boxes): with gm: loss_dict = model(image=image, im_info=im_info, gt_boxes=gt_boxes) gm.backward(loss_dict["total_loss"]) loss_list = list(loss_dict.values()) opt.step().clear_grad() return loss_list meter = AverageMeter(record_len=model.cfg.num_losses) time_meter = AverageMeter(record_len=2) log_interval = model.cfg.log_interval tot_step = model.cfg.nr_images_epoch // (args.batch_size * dist.get_world_size()) for step in range(tot_step): adjust_learning_rate(opt, epoch, step, model.cfg, args) data_tik = time.time() mini_batch = next(data_queue) data_tok = time.time() tik = time.time() loss_list = train_func(image=mge.tensor(mini_batch["data"]), im_info=mge.tensor(mini_batch["im_info"]), gt_boxes=mge.tensor(mini_batch["gt_boxes"])) tok = time.time() time_meter.update([tok - tik, data_tok - data_tik]) if dist.get_rank() == 0: info_str = "e%d, %d/%d, lr:%f, " loss_str = ", ".join( ["{}:%f".format(loss) for loss in model.cfg.losses_keys]) time_str = ", train_time:%.3fs, data_time:%.3fs" log_info_str = info_str + loss_str + time_str meter.update([loss.numpy() for loss in loss_list]) if step % log_interval == 0: logger.info(log_info_str, epoch, step, tot_step, opt.param_groups[0]["lr"], *meter.average(), *time_meter.average()) meter.reset() time_meter.reset()
def validate(val_loader, model, criterion): objs = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() for step, data in enumerate(val_loader): x = data[0].cuda(non_blocking=True) target = data[1].cuda(non_blocking=True) with torch.no_grad(): logits = model(x) loss = criterion(logits, target) prec1, prec5 = accuracy(logits, target, topk=(1, 5)) if args.distributed: reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) else: reduced_loss = loss.data objs.update(reduced_loss.item(), x.size(0)) top1.update(prec1.item(), x.size(0)) top5.update(prec5.item(), x.size(0)) if args.local_rank == 0 and step % args.print_freq == 0: duration = 0 if step == 0 else time.time() - duration_start duration_start = time.time() logging.info( 'VALIDATE Step: %03d Objs: %e R1: %f R5: %f Duration: %ds', step, objs.avg, top1.avg, top5.avg, duration) return top1.avg, top5.avg, objs.avg
def valid_trainer(model, valid_loader, criterion): model.eval() grad_cam = GradCam(model=model, target_layer_names=["layer4"], use_cuda=True) loss_meter = AverageMeter() preds_probs = [] gt_list = [] if True: #with torch.no_grad(): for step, (imgs, gt_label, imgname) in enumerate(tqdm(valid_loader)): #pdb.set_trace() imgs = imgs.cuda() gt_label = gt_label.cuda() gt_list.append(gt_label.cpu().numpy()) #gt_list.append(gt_label[:, 6:].cpu().numpy()) gt_label[gt_label == -1] = 0 valid_logits = model(imgs) #mask_cam = grad_cam(imgs, 22)#mask: bs, 256, 192 valid_loss = criterion( valid_logits, gt_label ) #+ 0.2*(torch.sum(torch.abs(cha_att))+torch.sum(torch.abs(spa_att))) #valid_loss = criterion(valid_logits * (mask.expand_as(valid_logits)), gt_label * (mask.expand_as(gt_label)))#+ 0.2*(torch.sum(torch.abs(cha_att))+torch.sum(torch.abs(spa_att))) valid_probs = torch.sigmoid(valid_logits) #valid_probs = torch.sigmoid(valid_logits[:, 6:]) preds_probs.append(valid_probs.detach().cpu().numpy()) loss_meter.update(to_scalar(valid_loss)) #show_att(imgname, spa_att,spa_att ) #affine(imgname, theta) #vif(imgname, spa_att, spa_att) #show_on_image(imgname, mask_cam, 22, gt_label) #return 0 valid_loss = loss_meter.avg gt_label = np.concatenate(gt_list, axis=0) preds_probs = np.concatenate(preds_probs, axis=0) return valid_loss, gt_label, preds_probs
def validate(val_queue, model, criterion): objs = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # model.eval() # disable moving average model.train() for step, (x, target) in enumerate(val_queue): x = x.cuda(non_blocking=True) target = target.cuda(non_blocking=True) with torch.no_grad(): logits, _ = model(x, sampling=True, mode='gumbel') loss = criterion(logits, target) # reset switches of log_alphas model.module.reset_switches() prec1, prec5 = accuracy(logits, target, topk=(1, 5)) n = x.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.print_freq == 0: logging.info('VALIDATE Step: %04d Objs: %f R1: %f R5: %f', step, objs.avg, top1.avg, top5.avg) return top1.avg
def validate(val_queue, model, criterion): objs = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() for step, data in enumerate(val_queue): x = data[0].cuda(non_blocking=True) target = data[1].cuda(non_blocking=True) with torch.no_grad(): logits = model(x) loss = criterion(logits, target) prec1, prec5 = accuracy(logits, target, topk=(1, 5)) n = x.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) if step % args.print_freq == 0: duration = 0 if step == 0 else time.time() - duration_start duration_start = time.time() logging.info( 'VALID Step: %03d Objs: %e R1: %f R5: %f Duration: %ds', step, objs.avg, top1.avg, top5.avg, duration) return top1.avg, top5.avg, objs.avg
def train(self, epoch): self.scheduler.step() self.model.train() landmark_loss_ = AverageMeter() for batch_idx, sample in enumerate(self.train_loader): image = sample['image'] gt_landmarks = sample['landmarks'] image, gt_landmarks = image.to(self.device), gt_landmarks.to( self.device) pred_landmarks = self.model(image) loss = self.lossfn(pred_landmarks, gt_landmarks) self.optimizer.zero_grad() loss.backward() self.optimizer.step() landmark_loss_.update(loss, image.size(0)) if batch_idx % 20 == 0: print( "Train Epoch: {:03} [{:05}/{:05} ({:03.0f}%)]\tLoss:{:.6f} LR: {:.7f}" .format(epoch, batch_idx * len(sample['image']), len(self.train_loader.dataset), 100. * batch_idx / len(self.train_loader), loss.item(), self.optimizer.param_groups[0]['lr'])) self.scalar_info['loss'] = landmark_loss_.avg self.scalar_info['lr'] = self.scheduler.get_lr()[0] if self.logger is not None: for tag, value in list(self.scalar_info.items()): self.logger.scalar_summary(tag, value, self.run_count) self.scalar_info = {} self.run_count += 1 print("|===>Loss: {:.4f}".format(landmark_loss_.avg)) self.evaluate(epoch, image, gt_landmarks, pred_landmarks)
def train(self, epoch): cls_loss_ = AverageMeter() accuracy_ = AverageMeter() accuracy_valid_ = AverageMeter() #训练集作为模型输入 self.scheduler.step() self.model.train() for batch_idx, (data, gt_label) in enumerate(self.train_loader): data, gt_label = data.to(self.device), gt_label.to( self.device) cls_pred, feature = self.model(data) # compute the loss cls_loss = self.lossfn.cls_loss(gt_label, cls_pred) accuracy = self.compute_accuracy(cls_pred, gt_label) self.optimizer.zero_grad() cls_loss.backward() self.optimizer.step() cls_loss_.update(cls_loss.item(), data.size(0)) accuracy_.update(accuracy, data.size(0)) if batch_idx%20 == 10: print(batch_idx) print(cls_loss.item()) # 验证集作为模型输入 with torch.no_grad(): self.model.eval() for batch_idx, (data, gt_label) in enumerate(self.valid_loader): data, gt_label = data.to(self.device), gt_label.to( self.device) cls_pred, feature = self.model(data) accuracy_valid = self.compute_accuracy(cls_pred, gt_label) accuracy_valid_.update(accuracy_valid, data.size(0)) #记录数据 self.scalar_info['cls_loss'] = cls_loss_.avg self.scalar_info['accuracy'] = accuracy_.avg self.scalar_info['lr'] = self.scheduler.get_lr()[0] # if self.logger is not None: # for tag, value in list(self.scalar_info.items()): # self.logger.scalar_summary(tag, value, self.run_count) # self.scalar_info = {} # self.run_count += 1 print("\r\nEpoch: {}|===>Train Loss: {:.8f} Train Accuracy: {:.6f} valid Accuracy: {:.6f}\r\n" .format(epoch, cls_loss_.avg, accuracy_.avg, accuracy_valid_.avg)) return cls_loss_.avg, accuracy_.avg, accuracy_valid_.avg