def validate(val_loader, model, criterion, epoch, log=None, tf_writer=None): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): target = target.cuda() # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(output) if log is not None: log.write(output + '\n') log.flush() output = ( 'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses)) print(output) if log is not None: log.write(output + '\n') log.flush() if tf_writer is not None: tf_writer.add_scalar('loss/test', losses.avg, epoch) tf_writer.add_scalar('acc/test_top1', top1.avg, epoch) tf_writer.add_scalar('acc/test_top5', top5.avg, epoch) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO print(output)
def validate(model, epoch): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() ''' Make sure that the validation works so I can confirm that there is an improvement/loss in accuracy when running this model. Run model ''' val_loader = get_val_loader(model) criterion = torch.nn.CrossEntropyLoss().cuda() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): target = target.cuda() # compute output output = model.net(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 100 == 0: output = ( 'Test: [{0}/{1}]\t' #'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(output) output = ( 'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses)) print(output) return top1.avg
def test(val_loader, model, epoch): batch_time = AverageMeter(args.print_freq) top1 = AverageMeter(args.print_freq) top5 = AverageMeter(args.print_freq) mAPs = AverageMeter(args.print_freq) # switch to evaluate mode model.eval() dup_samples = args.random_crops * args.temporal_clips end = time.time() total_num = 0 with torch.no_grad(): for i, (input, target) in enumerate(val_loader): if i % 50 ==0: print('Test Complete: %d / %d' % (i, len(val_loader))) input = input.cuda() target = target.cuda() sizes = input.shape input = input.view((sizes[0] * dup_samples, -1, sizes[2], sizes[3])) # compute output output = model(input) sizes = output.shape output = output.view((sizes[0] // dup_samples, -1, sizes[1])) output = torch.nn.functional.softmax(output, 2) output = torch.mean(output, 1) num = input.size(0) total_num += num # measure elapsed time batch_time.update(time.time() - end) end = time.time() if args.multi_class: from ops.calculate_map import calculate_mAP mAP = calculate_mAP(output.data, target) mAPs.update(mAP, num) else: prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) top1.update(prec1.item(), num) top5.update(prec5.item(), num) if args.multi_class: final_mAP = mAPs.avg output = (' * Map {:.3f}\t total_num={}'.format(final_mAP, total_num)) else: output = (' * Prec@1 {:.3f}\t Prec@5 {:.3f}\ttotal_num={}'.format(top1.avg, top5.avg, total_num)) print(output) if args.multi_class: return mAPs.avg else: return top1.avg
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (r, f, path, label, video_index, seg_index, verify) in enumerate(train_loader): r = r.cuda() f = f.cuda() output, _ = model(r, f) target_var = label.cuda(async=True).type(torch.cuda.LongTensor) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output, target_var, topk=(1, 5)) losses.update(loss.item(), r.shape[0]) top1.update(prec1.item(), r.shape[0]) top5.update(prec5.item(), r.shape[0]) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() if args.clip_gradient is not None: total_norm = torch.nn.utils.clip_grad_norm(model.parameters(), args.clip_gradient) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'])))
def train(train_loader, model, criterion, optimizer, epoch, logger=None, scheduler=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) model.train() end = time.time() for i, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) target = target.cuda() input_var = input.cuda() target_var = target output = model(input_var) loss = criterion(output, target_var) prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) optimizer.zero_grad() loss.backward() if args.clip_gradient is not None: clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() scheduler.step() batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logger.info(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr']))) # TODO return losses.avg, top1.avg, top5.avg
def validate(val_loader, model, criterion, iter, logger=None): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() for i, (r, f, path, label, video_index, seg_index, verify) in enumerate(val_loader): if i == 500: break with torch.no_grad(): r = r.cuda() f = f.cuda() target_var = label.cuda(async=True).type(torch.cuda.LongTensor) output, _ = model(r, f) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output, target_var, topk=(1, 5)) losses.update(loss.item(), r.shape[0]) top1.update(prec1.item(), r.shape[0]) top5.update(prec5.item(), r.shape[0]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print(('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.6f} ({loss.avg:.6f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5))) # if i == 99: break # Randomly Test 100 batch data (which is in the training data, so this is not a really evaluation). print(( 'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses))) return top1.avg, top5.avg
def validate(val_loader, model, criterion, logger=None): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): target = target.cuda() output = model(input) loss = criterion(output, target) prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) loss = reduce_tensor(loss) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logger.info( ('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5))) logger.info(( 'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses))) return top1.avg, top5.avg, losses.avg
def validate(val_loader, model, criterion, iter=None, logger=None): batch_time = AverageMeter() losses = AverageMeter() accu = AverageMeter() # switch to evaluate mode model.eval() end = time.time() for i, (r, f, path, label, video_index, seg_index, verify) in enumerate(val_loader): with torch.no_grad(): r = r.cuda() f = f.cuda() target_var = label.cuda(async=True).type(torch.cuda.LongTensor) output = model([r, f]) loss = criterion(output, target_var) # measure accuracy and record loss prec1, = accuracy(output, target_var, topk=(1,)) losses.update(loss.item(), r.shape[0]) accu.update(prec1.item(), r.shape[0]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print(('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.6f} ({loss.avg:.6f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=accu))) print(('Testing Results: Prec@1 {top1.avg:.3f} Loss {loss.avg:.5f}' .format(top1=accu, loss=losses))) return accu.avg
def validate(val_loader, model, criterion, epoch, log, tf_writer): losses = AverageMeter() top1 = AverageMeter() model.eval() with torch.no_grad(): for input, target in val_loader: input, target = input.cuda(), target.cuda() output = model(input) loss = criterion(output, target) # accuracy and loss prec1, = accuracy(output.data, target, topk=(1, )) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) output = ('Validate: Prec@1 {top1.avg:.2f} Loss {loss.avg:.3f}'.format( top1=top1, loss=losses)) print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/val', losses.avg, epoch) tf_writer.add_scalar('acc/val_top1', top1.avg, epoch) return top1.avg
def validate(val_loader, model, criterion, epoch, base_model_gflops, gflops_list, g_meta, exp_full_path, args): batch_time, top1, top5 = get_average_meters(3) all_results = [] all_targets = [] tau = args.init_tau if args.ada_reso_skip: if "batenet" in args.arch or "AdaBNInc" in args.arch: mask_stack_list_list = [0 for _ in gflops_list] else: mask_stack_list_list = [[] for _ in gflops_list] upb_batch_gflops_list = [] real_batch_gflops_list = [] losses_dict = {} # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for i, input_tuple in enumerate(val_loader): # input and target batchsize = input_tuple[0].size(0) input_data = input_tuple[0].cuda(non_blocking=True) target = input_tuple[-1].cuda(non_blocking=True) # model forward function output, mask_stack_list, _, gate_meta = \ model(input=[input_data], tau=tau, is_training=False, curr_step=0) # measure losses, accuracy and predictions if args.ada_reso_skip: upb_gflops_tensor, real_gflops_tensor = compute_gflops_by_mask( mask_stack_list, base_model_gflops, gflops_list, g_meta, args) loss_dict = compute_losses(criterion, output, target, mask_stack_list, upb_gflops_tensor, real_gflops_tensor, epoch, model, base_model_gflops, args) upb_batch_gflops_list.append(upb_gflops_tensor) real_batch_gflops_list.append(real_gflops_tensor) else: loss_dict = {"loss": criterion(output, target[:, 0])} prec1, prec5 = accuracy(output.data, target[:, 0], topk=(1, 5)) all_results.append(output) all_targets.append(target) # record loss and accuracy if len(losses_dict) == 0: losses_dict = { loss_name: get_average_meters(1)[0] for loss_name in loss_dict } for loss_name in loss_dict: losses_dict[loss_name].update(loss_dict[loss_name].item(), batchsize) top1.update(prec1.item(), batchsize) top5.update(prec5.item(), batchsize) if args.ada_reso_skip: # gather masks for layer_i, mask_stack in enumerate(mask_stack_list): if "batenet" in args.arch: mask_stack_list_list[layer_i] += torch.sum( mask_stack.detach(), dim=0) # TODO remvoed .cpu() elif "AdaBNInc" in args.arch: mask_stack_list_list[layer_i] += torch.sum( mask_stack.detach(), dim=0) # TODO remvoed .cpu() else: # TODO CGNet mask_stack_list_list[layer_i].append( mask_stack.detach()) # TODO remvoed .cpu() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_output = ( 'Test: [{0:03d}/{1:03d}] ' 'Time {batch_time.val:.3f}({batch_time.avg:.3f})\t' 'Loss{loss.val:.4f}({loss.avg:.4f})' 'Prec@1 {top1.val:.3f}({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f}({top5.avg:.3f})\t'.format( i, len(val_loader), batch_time=batch_time, loss=losses_dict["loss"], top1=top1, top5=top5)) for loss_name in losses_dict: if loss_name == "loss" or "mask" in loss_name: continue print_output += ' {header:s} {loss.val:.3f}({loss.avg:.3f})'. \ format(header=loss_name[0], loss=losses_dict[loss_name]) print(print_output) if args.ada_reso_skip: if "cgnet" in args.arch: for layer_i in range(len(mask_stack_list_list)): mask_stack_list_list[layer_i] = torch.cat( mask_stack_list_list[layer_i], dim=0) upb_batch_gflops = torch.mean(torch.stack(upb_batch_gflops_list)) real_batch_gflops = torch.mean(torch.stack(real_batch_gflops_list)) mAP, _ = cal_map( torch.cat(all_results, 0).cpu(), torch.cat(all_targets, 0)[:, 0:1].cpu()) # single-label mAP mmAP, _ = cal_map( torch.cat(all_results, 0).cpu(), torch.cat(all_targets, 0).cpu()) # multi-label mAP print( 'Testing: mAP {mAP:.3f} mmAP {mmAP:.3f} Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(mAP=mAP, mmAP=mmAP, top1=top1, top5=top5, loss=losses_dict["loss"])) if args.ada_reso_skip: usage_str = get_policy_usage_str(upb_batch_gflops, real_batch_gflops) print(usage_str) else: usage_str = "Base Model" return mAP, mmAP, top1.avg, top5.avg, usage_str
def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer, scaler, batch_size, use_amp, rank=None): if rank == 0: batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() if rank == 0: end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time if rank == 0: data_time.update(time.time() - end) target = target.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) if use_amp: # compute output with torch.cuda.amp.autocast(enabled=use_amp): output = model(input_var) loss = criterion(output, target_var) else: output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step #loss.backward() if use_amp: scaler.scale(loss).backward() else: loss.backward() if args.clip_gradient is not None: if use_amp: scaler.unscale_(optimizer) total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) if use_amp: scaler.step(optimizer) scaler.update() else: optimizer.step() optimizer.zero_grad() # measure elapsed time if rank == 0: batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0 and rank == 0: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def validate(val_loader, model, criterion, epoch, logger, exp_full_path, tf_writer=None): batch_time, losses, top1, top5 = get_average_meters(4) tau = 0 # TODO(yue) all_results = [] all_targets = [] all_all_preds = [] i_dont_need_bb = True if use_ada_framework: tau = get_current_temperature(epoch) alosses, elosses = get_average_meters(2) iter_list = args.backbone_list if not i_dont_need_bb: all_bb_results = [[] for _ in range(len(iter_list))] if args.policy_also_backbone: all_bb_results.append([]) each_terms = get_average_meters(NUM_LOSSES) r_list = [] if args.save_meta: name_list = [] indices_list = [] meta_offset = -2 if args.save_meta else 0 # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for i, input_tuple in enumerate(val_loader): target = input_tuple[-1].cuda() input = input_tuple[0] # compute output if args.ada_reso_skip: if args.real_scsampler: output, r, real_pred, lite_pred = model(input=input_tuple[:-1 + meta_offset], tau=tau) if args.sal_rank_loss: acc_loss = cal_sal_rank_loss(real_pred, lite_pred, target) else: acc_loss = get_criterion_loss(criterion, lite_pred.mean(dim=1), target) else: if args.save_meta and args.save_all_preds: output, r, all_preds = model(input=input_tuple[:-1 + meta_offset], tau=tau) acc_loss = get_criterion_loss(criterion, output, target) else: if args.use_reinforce: output, r, r_log_prob, base_outs = model(input=input_tuple[:-1 + meta_offset], tau=tau) acc_loss = get_criterion_loss(criterion, output, target) else: output, r, feat_outs, base_outs = model(input=input_tuple[:-1 + meta_offset], tau=tau) acc_loss = get_criterion_loss(criterion, output, target) if use_ada_framework: acc_loss, eff_loss, each_losses = compute_every_losses(r, acc_loss, epoch) if args.use_reinforce and not args.freeze_policy: if args.separated: acc_loss_items = [] eff_loss_items = [] for b_i in range(output.shape[0]): acc_loss_item = get_criterion_loss(criterion, output[b_i:b_i + 1], target[b_i:b_i + 1]) acc_loss_item, eff_loss_item, each_losses_item = compute_every_losses(r[b_i:b_i + 1], acc_loss_item, epoch) acc_loss_items.append(acc_loss_item) eff_loss_items.append(eff_loss_item) if args.no_baseline: b_acc = 0 b_eff = 0 else: b_acc = sum(acc_loss_items) / len(acc_loss_items) b_eff = sum(eff_loss_items) / len(eff_loss_items) log_p = torch.mean(r_log_prob, dim=1) acc_loss = 0 eff_loss = 0 for b_i in range(len(acc_loss_items)): acc_loss += -log_p[b_i] * (acc_loss_items[b_i] - b_acc) eff_loss += -log_p[b_i] * (eff_loss_items[b_i] - b_eff) acc_loss = acc_loss / len(acc_loss_items) eff_loss = eff_loss / len(eff_loss_items) each_losses = [0 * each_l for each_l in each_losses] else: sum_log_prob = torch.sum(r_log_prob) / r_log_prob.shape[0] / r_log_prob.shape[1] acc_loss = - sum_log_prob * acc_loss eff_loss = - sum_log_prob * eff_loss each_losses = [-sum_log_prob * each_l for each_l in each_losses] alosses.update(acc_loss.item(), input.size(0)) elosses.update(eff_loss.item(), input.size(0)) for l_i, each_loss in enumerate(each_losses): each_terms[l_i].update(each_loss, input.size(0)) loss = acc_loss + eff_loss else: loss = acc_loss else: output = model(input=[input]) loss = get_criterion_loss(criterion, output, target) # TODO(yue) all_results.append(output) all_targets.append(target) if not i_dont_need_bb: for bb_i in range(len(all_bb_results)): all_bb_results[bb_i].append(base_outs[:, bb_i]) if args.save_meta and args.save_all_preds: all_all_preds.append(all_preds) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target[:, 0], topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if use_ada_framework: r_list.append(r.cpu().numpy()) if args.save_meta: name_list += input_tuple[-3] indices_list.append(input_tuple[-2]) if i % args.print_freq == 0: print_output = ('Test: [{0:03d}/{1:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) wandb.log({"Test Loss" : losses.val, "Test Prec@1" : top1.val, "Test Prec@5" : top5.val }) if use_ada_framework: roh_r = reverse_onehot(r[-1, :, :].cpu().numpy()) print_output += ' a {aloss.val:.4f} ({aloss.avg:.4f}) e {eloss.val:.4f} ({eloss.avg:.4f}) r {r}'.format( aloss=alosses, eloss=elosses, r=elastic_list_print(roh_r) ) print_output += extra_each_loss_str(each_terms) print(print_output) mAP, _ = cal_map(torch.cat(all_results, 0).cpu(), torch.cat(all_targets, 0)[:, 0:1].cpu()) # TODO(yue) single-label mAP mmAP, _ = cal_map(torch.cat(all_results, 0).cpu(), torch.cat(all_targets, 0).cpu()) # TODO(yue) multi-label mAP print('Testing: mAP {mAP:.3f} mmAP {mmAP:.3f} Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(mAP=mAP, mmAP=mmAP, top1=top1, top5=top5, loss=losses)) if not i_dont_need_bb: bbmmaps = [] bbprec1s = [] all_targets_cpu = torch.cat(all_targets, 0).cpu() for bb_i in range(len(all_bb_results)): bb_results_cpu = torch.mean(torch.cat(all_bb_results[bb_i], 0), dim=1).cpu() bb_i_mmAP, _ = cal_map(bb_results_cpu, all_targets_cpu) # TODO(yue) multi-label mAP bbmmaps.append(bb_i_mmAP) bbprec1, = accuracy(bb_results_cpu, all_targets_cpu[:, 0], topk=(1,)) bbprec1s.append(bbprec1) print("bbmmAP: " + " ".join(["{0:.3f}".format(bb_i_mmAP) for bb_i_mmAP in bbmmaps])) print("bb_Acc: " + " ".join(["{0:.3f}".format(bbprec1) for bbprec1 in bbprec1s])) gflops = 0 if use_ada_framework: usage_str, gflops = get_policy_usage_str(r_list, model.module.reso_dim) print(usage_str) if args.save_meta: # TODO save name, label, r, result npa = np.concatenate(r_list) npb = np.stack(name_list) npc = torch.cat(all_results).cpu().numpy() npd = torch.cat(all_targets).cpu().numpy() if args.save_all_preds: npe = torch.cat(all_all_preds).cpu().numpy() else: npe = np.zeros(1) npf = torch.cat(indices_list).cpu().numpy() np.savez("%s/meta-val-%s.npy" % (exp_full_path, logger._timestr), rs=npa, names=npb, results=npc, targets=npd, all_preds=npe, indices=npf) if tf_writer is not None: tf_writer.add_scalar('loss/test', losses.avg, epoch) tf_writer.add_scalar('acc/test_top1', top1.avg, epoch) tf_writer.add_scalar('acc/test_top5', top5.avg, epoch) return mAP, mmAP, top1.avg, usage_str if use_ada_framework else None, gflops
def train(train_loader, model, criterion, optimizer, epoch, logger, exp_full_path, tf_writer): batch_time, data_time, losses, top1, top5 = get_average_meters(5) tau = 0 if use_ada_framework: tau = get_current_temperature(epoch) alosses, elosses = get_average_meters(2) each_terms = get_average_meters(NUM_LOSSES) r_list = [] meta_offset = -2 if args.save_meta else 0 model.module.partialBN(not args.no_partialbn) # switch to train mode model.train() end = time.time() print("#%s# lr:%.4f\ttau:%.4f" % ( args.exp_header, optimizer.param_groups[-1]['lr'] * 0.1, tau if use_ada_framework else 0)) for i, input_tuple in enumerate(train_loader): data_time.update(time.time() - end) # TODO(yue) measure data loading time target = input_tuple[-1].cuda() target_var = torch.autograd.Variable(target) input = input_tuple[0] if args.ada_reso_skip: input_var_list = [torch.autograd.Variable(input_item) for input_item in input_tuple[:-1 + meta_offset]] if args.real_scsampler: output, r, real_pred, lite_pred = model(input=input_var_list, tau=tau) if args.sal_rank_loss: acc_loss = cal_sal_rank_loss(real_pred, lite_pred, target_var) else: acc_loss = get_criterion_loss(criterion, lite_pred.mean(dim=1), target_var) else: if args.use_reinforce: output, r, r_log_prob, base_outs = model(input=input_var_list, tau=tau) acc_loss = get_criterion_loss(criterion, output, target_var) else: output, r, feat_outs, base_outs = model(input=input_var_list, tau=tau) acc_loss = get_criterion_loss(criterion, output, target_var) if use_ada_framework: acc_loss, eff_loss, each_losses = compute_every_losses(r, acc_loss, epoch) if args.use_reinforce and not args.freeze_policy: if args.separated: acc_loss_items = [] eff_loss_items = [] for b_i in range(output.shape[0]): acc_loss_item = get_criterion_loss(criterion, output[b_i:b_i + 1], target_var[b_i:b_i + 1]) acc_loss_item, eff_loss_item, each_losses_item = compute_every_losses(r[b_i:b_i + 1], acc_loss_item, epoch) acc_loss_items.append(acc_loss_item) eff_loss_items.append(eff_loss_item) if args.no_baseline: b_acc = 0 b_eff = 0 else: b_acc = sum(acc_loss_items) / len(acc_loss_items) b_eff = sum(eff_loss_items) / len(eff_loss_items) log_p = torch.mean(r_log_prob, dim=1) acc_loss = sum(acc_loss_items) / len(acc_loss_items) eff_loss = sum(eff_loss_items) / len(eff_loss_items) if args.detach_reward: acc_loss_vec = (torch.stack(acc_loss_items) - b_acc).detach() eff_loss_vec = (torch.stack(eff_loss_items) - b_eff).detach() else: acc_loss_vec = (torch.stack(acc_loss_items) - b_acc) eff_loss_vec = (torch.stack(eff_loss_items) - b_eff) intended_acc_loss = torch.mean(log_p * acc_loss_vec) intended_eff_loss = torch.mean(log_p * eff_loss_vec) each_losses = [0 * each_l for each_l in each_losses] else: sum_log_prob = torch.sum(r_log_prob) / r_log_prob.shape[0] / r_log_prob.shape[1] acc_loss = - sum_log_prob * acc_loss eff_loss = - sum_log_prob * eff_loss each_losses = [-sum_log_prob * each_l for each_l in each_losses] intended_loss = intended_acc_loss + intended_eff_loss alosses.update(acc_loss.item(), input.size(0)) elosses.update(eff_loss.item(), input.size(0)) for l_i, each_loss in enumerate(each_losses): each_terms[l_i].update(each_loss, input.size(0)) loss = acc_loss + eff_loss else: loss = acc_loss else: input_var = torch.autograd.Variable(input) output = model(input=[input_var]) loss = get_criterion_loss(criterion, output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target[:, 0], topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step if args.use_reinforce and not args.freeze_policy: intended_loss.backward() else: loss.backward() if args.clip_gradient is not None: clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if use_ada_framework: r_list.append(r.detach().cpu().numpy()) if i % args.print_freq == 0: print_output = ('Epoch:[{0:02d}][{1:03d}/{2:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' '{data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) # TODO wandb.log({ "Train Loss val" : losses.val, "Train Prec@1 val" : top1.val, "Train Prec@5 val" : top5.val }) if use_ada_framework: roh_r = reverse_onehot(r[-1, :, :].detach().cpu().numpy()) print_output += ' a {aloss.val:.4f} ({aloss.avg:.4f}) e {eloss.val:.4f} ({eloss.avg:.4f}) r {r}'.format( aloss=alosses, eloss=elosses, r=elastic_list_print(roh_r) ) print_output += extra_each_loss_str(each_terms) if args.show_pred: print_output += elastic_list_print(output[-1, :].detach().cpu().numpy()) print(print_output) if use_ada_framework: usage_str, gflops = get_policy_usage_str(r_list, model.module.reso_dim) print(usage_str) if tf_writer is not None: tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch) return usage_str if use_ada_framework else None
def train(labeled_trainloader, unlabeled_trainloader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() total_losses = AverageMeter() supervised_losses = AverageMeter() contrastive_losses = AverageMeter() group_contrastive_losses = AverageMeter() pl_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model = model.cuda() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() if epoch >= args.sup_thresh or (args.use_finetuning and epoch >= args.finetune_start_epoch): data_loader = zip(labeled_trainloader, unlabeled_trainloader) else: data_loader = labeled_trainloader end = time.time() for i, data in enumerate(data_loader): # measure data loading time data_time.update(time.time() - end) #reseting losses contrastive_loss = torch.tensor(0.0).cuda() pl_loss = torch.tensor(0.0).cuda() loss = torch.tensor(0.0).cuda() group_contrastive_loss = torch.tensor(0.0).cuda() if epoch >= args.sup_thresh or (args.use_finetuning and epoch >= args.finetune_start_epoch): (labeled_data, unlabeled_data) = data images_fast, images_slow = unlabeled_data images_slow = images_slow.cuda() images_fast = images_fast.cuda() images_slow = torch.autograd.Variable(images_slow) images_fast = torch.autograd.Variable(images_fast) # contrastive_loss output_fast = model(images_fast) if not args.use_finetuning or epoch < args.finetune_start_epoch: output_slow = model(images_slow, unlabeled=True) output_fast_detach = output_fast.detach() if epoch >= args.sup_thresh and epoch < args.finetune_start_epoch: contrastive_loss = simclr_loss( torch.softmax(output_fast_detach, dim=1), torch.softmax(output_slow, dim=1)) if args.use_group_contrastive: grp_unlabeled_8seg = get_group(output_fast_detach) grp_unlabeled_4seg = get_group(output_slow) group_contrastive_loss = compute_group_contrastive_loss( grp_unlabeled_8seg, grp_unlabeled_4seg) elif args.use_finetuning and epoch >= args.finetune_start_epoch: pseudo_label = torch.softmax(output_fast_detach, dim=-1) max_probs, targets_pl = torch.max(pseudo_label, dim=-1) mask = max_probs.ge(args.threshold).float() targets_pl = torch.autograd.Variable(targets_pl) pl_loss = (F.cross_entropy( output_fast, targets_pl, reduction='none') * mask).mean() else: labeled_data = data input, target = labeled_data target = target.cuda() input = input.cuda() input = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) output = model(input) loss = criterion(output, target_var) total_loss = loss + args.gamma * contrastive_loss + group_contrastive_loss + args.gamma_finetune * pl_loss # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) if epoch >= args.sup_thresh: total_losses.update(total_loss.item(), input.size(0) + args.mu * input.size(0)) else: total_losses.update(total_loss.item(), input.size(0)) supervised_losses.update(loss.item(), input.size(0)) contrastive_losses.update(contrastive_loss.item(), input.size(0) + args.mu * input.size(0)) group_contrastive_losses.update( group_contrastive_loss.item(), input.size(0) + args.mu * input.size(0)) pl_losses.update(pl_loss.item(), input.size(0) + args.mu * input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step total_loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ( 'Epoch: [{0}][{1}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'TotalLoss {total_loss.val:.4f} ({total_loss.avg:.4f})\t' 'Supervised Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Contrastive_Loss {contrastive_loss.val:.4f} ({contrastive_loss.avg:.4f})\t' 'Group_contrastive_Loss {group_contrastive_loss.val:.4f} ({group_contrastive_loss.avg:.4f})\t' 'Pseudo_Loss {pl_loss.val:.4f} ({pl_loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, batch_time=batch_time, data_time=data_time, total_loss=total_losses, loss=supervised_losses, contrastive_loss=contrastive_losses, group_contrastive_loss=group_contrastive_losses, pl_loss=pl_losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO print(output) log.write(output + '\n') log.flush()
def validate(val_loader, model, criterion, epoch, log=None, tf_writer=None): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() all_preds = [] all_target = [] with torch.no_grad(): for i, (input, target) in enumerate(val_loader): target = target.cuda() # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) all_target.append(target.cpu().numpy()) all_preds.append( output.data.topk(5, 1, True, True)[1].cpu().numpy()) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(output) if log is not None: log.write(output + '\n') log.flush() output = ( 'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses)) print(output) if log is not None: log.write(output + '\n') log.flush() #import ipdb; ipdb.set_trace() columns = [ 'idx', 'target', 'pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5' ] lines = [] with open('log/val_results_epoch{}.txt'.format(str(epoch)), 'w') as f: for b in range(len(all_target)): for i in range(len(all_target[b])): preds_formatted = [ s.rjust(6) for s in all_preds[b][i].astype(str).tolist() ] lines.append( str(all_target[b][i]).rjust(6) + '\t' + '\t'.join(preds_formatted)) header = '\t'.join([s.rjust(6) for s in columns]) f.write(header + '\n') for i in range(len(lines)): f.write(str(i).rjust(6) + '\t' + lines[i] + '\n') if tf_writer is not None: tf_writer.add_scalar('loss/test', losses.avg, epoch) tf_writer.add_scalar('acc/test_top1', top1.avg, epoch) tf_writer.add_scalar('acc/test_top5', top5.avg, epoch) return top1.avg
def train(train_loader, model, criterion, kl_loss, logsoftmax, softmax, optimizer, epoch, log, tf_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() loss_kl = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() total = 0 shift = 0 for i, moda in enumerate(args.modality): tmp = total if moda == 'RGB': total += 3 elif moda == 'Flow': total += 10 elif moda == 'RGBDiff': total += 18 if i == 0: shift = total if i == args.rank and i > 0: start_ind = tmp - shift end_ind = total - shift elif i == args.rank and i == 0: start_ind = 0 end_ind = total if args.rank == 0: inds = [] for x in range(args.num_segments): inds.extend(list(range(x * total + start_ind, x * total + end_ind))) send_inds = [] for x in range(args.num_segments): send_inds.extend( list(range(x * total + end_ind, x * total + total))) else: inds = [] for x in range(args.num_segments): inds.extend( list( range(x * (total - shift) + start_ind, x * (total - shift) + end_ind))) if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode5r model.train() if args.rank == 0: iter_through = train_loader else: iter_through = range( int(len([x for x in open(args.train_list[0])]) / args.batch_size)) end = time.time() for i, data in enumerate(iter_through): # measure data loading time data_time.update(time.time() - end) if args.rank == 0: input, target = data target = target.cuda(args.gpus[-1]) input = input.cuda(args.gpus[0]) if args.world_size > 1: torch.distributed.broadcast(input[:, send_inds].contiguous(), 0) torch.distributed.broadcast(target, 0) else: input = torch.zeros( (args.batch_size, (total - shift) * args.num_segments, 224, 224)).cuda(args.gpus[0]) target = torch.zeros((args.batch_size, ), dtype=torch.int64).cuda(args.gpus[-1]) torch.distributed.broadcast(input, 0) torch.distributed.broadcast(target, 0) input_var = torch.autograd.Variable(input[:, inds].contiguous()) target_var = torch.autograd.Variable(target) # compute output output = model(input_var).cuda(args.gpus[-1]) loss1 = criterion(output, target_var) if args.world_size > 1: reduce_output = output.clone().detach() distr.all_reduce(reduce_output) reduce_output = (reduce_output - output.detach()) / (args.world_size - 1) loss2 = kl_loss(logsoftmax(output), softmax(reduce_output.detach())) else: loss2 = torch.tensor(0.) loss = loss1 + loss2 # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss1.item(), input.size(0)) loss_kl.update(loss2.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss1.val:.4f} ({loss1.avg:.4f})\t' 'LossKL {loss2.val:.4f} ({loss2.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(iter_through), batch_time=batch_time, data_time=data_time, loss1=losses, loss2=loss_kl, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('loss/mutual', loss_kl.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def v_train(train_loader, val_loader, model, num_class, vnet, criterion, valcriterion, optimizer, epoch, log, tf_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() val_loader_iter = iter(val_loader) if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) vnet_temp = VNet(1, 100, 1).cuda() optimizer_vnet_temp = torch.optim.Adam(vnet_temp.params(), 1e-3, weight_decay=1e-4) vnet_temp.load_state_dict(vnet.state_dict()) v_model = v_TSN( num_class, args.num_segments, args.modality, base_model=args.arch, consensus_type=args.consensus_type, dropout=args.dropout, img_feature_dim=args.img_feature_dim, partial_bn=not args.no_partialbn, pretrain=args.pretrain, is_shift=args.shift, shift_div=args.shift_div, shift_place=args.shift_place, fc_lr5=not (args.tune_from and args.dataset in args.tune_from), temporal_pool=args.temporal_pool, non_local=args.non_local, print_spec=False) v_model = torch.nn.DataParallel(v_model, device_ids=args.gpus).cuda() if args.temporal_pool and not args.resume: make_temporal_pool(v_model.module.base_model, args.num_segments) v_model.load_state_dict(model.state_dict()) # compute output output = v_model(input_var) # loss = criterion(output, target_var) cost = criterion(output, target_var) cost_v = torch.reshape(cost, (-1, 1)) v_lambda = vnet_temp(cost_v.data) l_f_v = torch.sum(cost_v * v_lambda) / len(cost_v) v_model.zero_grad() grads = torch.autograd.grad(l_f_v, (v_model.module.params()), create_graph=True) # to be modified v_lr = args.lr * ((0.1**int(epoch >= 80)) * (0.1**int(epoch >= 100))) v_model.module.update_params(lr_inner=v_lr, source_params=grads) del grads # phase 2. pixel weights step try: inputs_val, targets_val = next(val_loader_iter) # 拿一个val set图片 except StopIteration: val_loader_iter = iter(val_loader) inputs_val, targets_val = next(val_loader_iter) # inputs_val, targets_val = sample_val['image'], sample_val['label'] inputs_val, targets_val = inputs_val.cuda(), targets_val.cuda() y_g_hat = v_model(inputs_val) l_g_meta = valcriterion(y_g_hat, targets_val) # val loss optimizer_vnet_temp.zero_grad() l_g_meta.backward() optimizer_vnet_temp.step() vnet.load_state_dict(vnet_temp.state_dict()) # phase 1. network weight step (w) output = model(input_var) cost = criterion(output, target) cost_v = torch.reshape(cost, (-1, 1)) with torch.no_grad(): v_new = vnet(cost_v) loss = torch.sum(cost_v * v_new) / len(cost_v) optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step # loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) # optimizer.step() # optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO print(output, end=" ") for n, p in vnet.named_params(vnet): print("vnet param: ", n, p[0].item()) break log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def validate(val_loader, model, criterion, epoch, log=None, tf_writer=None, scheduler=None): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): each_class_output_buffer = torch.Tensor() each_class_target_buffer = torch.Tensor() for i, (input, target, path) in enumerate(val_loader): target = target.cuda() # compute output output = model(input) loss = criterion(output, target) # tmp = torch.nn.functional.softmax(output[0]).cpu().detach().tolist() # for idx in range(3): # print("predict label: {}, conf: {:.2f}".format(idx2label[idx], float(tmp[idx]))) # measure accuracy and record loss # with open("/home/huyihui/workspace/tsm-huawei/predict_result3.txt", "a+", encoding="utf-8") as f: # f.write(f"{path[0]},{tmp[0]},{tmp[1]}\n") prec1, prec5 = accuracy(output.data, target, topk=(1, 2)) each_class_output_buffer = torch.cat( (each_class_output_buffer, output.detach().cpu().float()), 0) each_class_target_buffer = torch.cat( (each_class_target_buffer, target.detach().cpu().float()), 0) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(output) if log is not None: log.write(output + '\n') log.flush() if scheduler: scheduler.step(loss.avg) output = ( 'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses)) # acc = calculate_accuracy_each_class(each_class_output_buffer, each_class_target_buffer, 2) print(output) # print("-------------------\n", acc) if log is not None: log.write(output + '\n') log.flush() if tf_writer is not None: tf_writer.add_scalar('loss/test', losses.avg, epoch) tf_writer.add_scalar('acc/test_top1', top1.avg, epoch) tf_writer.add_scalar('acc/test_top5', top5.avg, epoch) return top1.avg
def validate(val_loader, model, criterion, epoch, log=None, tf_writer=None): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() mAPs = AverageMeter() # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(val_loader): target = target.cuda() # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss if args.multi_class: mAP = calculate_mAP(output.data, target) mAPs.update(mAP, input.size(0)) else: prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) losses.update(loss.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: if args.multi_class: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'mAP {mAPs.val:.3f} ({mAPs.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, mAPs=mAPs, lr=optimizer.param_groups[-3]['lr'] * 0.1)) # TODO print(output) if log is not None: log.write(output + '\n') log.flush() else: output = ('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(output) if log is not None: log.write(output + '\n') log.flush() output = ('Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses)) print(output) if log is not None: log.write(output + '\n') log.flush() if tf_writer is not None: if args.multi_class: tf_writer.add_scalar('acc/train_mAP', mAPs.avg, epoch) else: tf_writer.add_scalar('acc/test_top1', top1.avg, epoch) tf_writer.add_scalar('acc/test_top5', top5.avg, epoch) tf_writer.add_scalar('loss/test', losses.avg, epoch) if args.multi_class: return mAPs.avg else: return top1.avg
def v_train(train_loader, val_loader, model, v_model, vnet, criterion, valcriterion, optimizer, v_optimizer, optimizer_vnet, epoch, log, tf_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() val_loader_iter = iter(val_loader) if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) v_model.load_state_dict(model.state_dict()) target = target.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = v_model(input_var) # loss = criterion(output, target_var) cost = criterion(output, target_var) cost_v = torch.reshape(cost, (-1, 1)) v_lambda = vnet(cost_v.data) l_f_v = torch.sum(cost_v * v_lambda) / len(cost_v) v_model.zero_grad() grads = torch.autograd.grad(l_f_v, (v_model.module.params()), create_graph=True) # to be modified v_lr = args.lr * ((0.1**int(epoch >= 80)) * (0.1**int(epoch >= 100))) v_model.module.update_params(lr_inner=v_lr, source_params=grads) del grads # phase 2. pixel weights step try: inputs_val, targets_val = next(val_loader_iter) except StopIteration: val_loader_iter = iter(val_loader) inputs_val, targets_val = next(val_loader_iter) inputs_val, targets_val = inputs_val.cuda(), targets_val.cuda() y_g_hat = v_model(inputs_val) l_g_meta = valcriterion(y_g_hat, targets_val) # val loss optimizer_vnet.zero_grad() l_g_meta.backward() optimizer_vnet.step() # phase 1. network weight step (w) output = model(input_var) cost = criterion(output, target) cost_v = torch.reshape(cost, (-1, 1)) with torch.no_grad(): v_new = vnet(cost_v) loss = torch.sum(cost_v * v_new) / len(cost_v) optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step # loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) # optimizer.step() # optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if i == 20: os.system("gpustat") target = target.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step loss.backward() no_grad_cnt = 0 if i % args.iter_size == 0: # scale down gradients when iter size is functioning if args.iter_size != 1: for g in optimizer.param_groups: for p in g['params']: if isinstance(p.grad, torch.Tensor): p.grad /= args.iter_size else: no_grad_cnt = no_grad_cnt + 1 if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) else: total_norm = 0 optimizer.step() optimizer.zero_grad() #if i == 0: # print("{}\nWARNING: There are {} params without gradient!!!!!\n{}".format("*"*50, no_grad_cnt, "*"*50)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def train(train_loader, model, criterion, optimizer, epoch, base_model_gflops, gflops_list, g_meta, args): batch_time, data_time, top1, top5 = get_average_meters(4) losses_dict = {} if args.ada_reso_skip: if "batenet" in args.arch or "AdaBNInc" in args.arch: mask_stack_list_list = [0 for _ in gflops_list] else: mask_stack_list_list = [[] for _ in gflops_list] upb_batch_gflops_list = [] real_batch_gflops_list = [] tau = args.init_tau # switch to train mode model.module.partialBN(not args.no_partialbn) model.train() end = time.time() print("#%s# lr:%.6f\ttau:%.4f" % (args.exp_header, optimizer.param_groups[-1]['lr'] * 0.1, tau)) for i, input_tuple in enumerate(train_loader): data_time.update(time.time() - end) if args.warmup_epochs > 0: adjust_learning_rate(optimizer, epoch, len(train_loader), i, "linear", None, args) # input and target batchsize = input_tuple[0].size(0) input_var_list = [ torch.autograd.Variable(input_item).cuda(non_blocking=True) for input_item in input_tuple[:-1] ] target = input_tuple[-1].cuda(non_blocking=True) target_var = torch.autograd.Variable(target) # model forward function & measure losses and accuracy output, mask_stack_list, _, _ = \ model(input=input_var_list, tau=tau, is_training=True, curr_step=epoch * len(train_loader) + i) if args.ada_reso_skip: upb_gflops_tensor, real_gflops_tensor = compute_gflops_by_mask( mask_stack_list, base_model_gflops, gflops_list, g_meta, args) loss_dict = compute_losses(criterion, output, target_var, mask_stack_list, upb_gflops_tensor, real_gflops_tensor, epoch, model, base_model_gflops, args) upb_batch_gflops_list.append(upb_gflops_tensor.detach()) real_batch_gflops_list.append(real_gflops_tensor.detach()) else: loss_dict = {"loss": criterion(output, target_var[:, 0])} prec1, prec5 = accuracy(output.data, target[:, 0], topk=(1, 5)) # record losses and accuracy if len(losses_dict) == 0: losses_dict = { loss_name: get_average_meters(1)[0] for loss_name in loss_dict } for loss_name in loss_dict: losses_dict[loss_name].update(loss_dict[loss_name].item(), batchsize) top1.update(prec1.item(), batchsize) top5.update(prec5.item(), batchsize) # compute gradient and do SGD step loss_dict["loss"].backward() if args.clip_gradient is not None: clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() optimizer.zero_grad() # gather masks if args.ada_reso_skip: for layer_i, mask_stack in enumerate(mask_stack_list): if "batenet" in args.arch: mask_stack_list_list[layer_i] += torch.sum( mask_stack.detach(), dim=0) elif "AdaBNInc" in args.arch: mask_stack_list_list[layer_i] += torch.sum( mask_stack.detach(), dim=0) else: # TODO CGNet mask_stack_list_list[layer_i].append( mask_stack.detach()) # TODO removed cpu() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # logging if i % args.print_freq == 0: print_output = ('Epoch:[{0:02d}][{1:03d}/{2:03d}] lr {3:.6f} ' 'Time {batch_time.val:.3f}({batch_time.avg:.3f}) ' '{data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss{loss.val:.4f}({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f}({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f}({top5.avg:.3f})\t'.format( epoch, i, len(train_loader), optimizer.param_groups[-1]['lr'] * 0.1, batch_time=batch_time, data_time=data_time, loss=losses_dict["loss"], top1=top1, top5=top5)) # TODO for loss_name in losses_dict: if loss_name == "loss" or "mask" in loss_name: continue print_output += ' {header:s} ({loss.avg:.3f})'. \ format(header=loss_name[0], loss=losses_dict[loss_name]) print(print_output) if args.ada_reso_skip: if "cgnet" in args.arch: for layer_i in range(len(mask_stack_list_list)): mask_stack_list_list[layer_i] = torch.cat( mask_stack_list_list[layer_i], dim=0) upb_batch_gflops = torch.mean(torch.stack(upb_batch_gflops_list)) real_batch_gflops = torch.mean(torch.stack(real_batch_gflops_list)) if args.ada_reso_skip: usage_str = get_policy_usage_str(upb_batch_gflops, real_batch_gflops) print(usage_str) else: usage_str = "Base Model" return usage_str
def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() mAPs = AverageMeter() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() end = time.time() checkpoint_dir = os.path.join(args.root_model, args.store_name) for i, (input, target) in enumerate(train_loader): adjust_learning_rate(optimizer, epoch, args.lr_type, args.lr_steps, epoch + float(i) / len(train_loader)) # measure data loading time data_time.update(time.time() - end) target = target.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss if args.multi_class: mAP = calculate_mAP(output.data, target) mAPs.update(mAP, input.size(0)) else: prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) losses.update(loss.item(), input.size(0)) # compute gradient and do SGD step loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: if args.multi_class: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'mAP {mAPs.val:.3f} ({mAPs.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, mAPs=mAPs, lr=optimizer.param_groups[2]['lr'])) print(output) log.write(output + '\n') log.flush() else: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[2]['lr'])) print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) if args.multi_class: tf_writer.add_scalar('acc/train_mAP', mAPs.avg, epoch) else: tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() if args.no_partialbn: try: model.module.partialBN(False) except: model.partialBN(False) else: try: model.module.partialBN(True) except: model.partialBN(True) model.train() end = time.time() for idx, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) input, target = input.cuda(), target.cuda() output = model(input) loss = criterion(output, target) # accuracy and loss prec1, = accuracy(output.data, target, topk=(1, )) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) # gradient and optimizer loss.backward() if (idx + 1) % args.update_weight == 0: optimizer.step() optimizer.zero_grad() # time batch_time.update(time.time() - end) end = time.time() if (idx + 1) % args.print_freq == 0: output = ('Train: epoch-{0} ({1}/{2})\t' 'batch_time {batch_time.avg:.2f}\t\t' 'data_time {data_time.avg:.2f}\t\t' 'loss {loss.avg:.3f}\t' 'prec@1 {top1.avg:.2f}\t'.format(epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) batch_time.reset() data_time.reset() losses.reset() top1.reset() print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_extra = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() top1_extra = AverageMeter() top5_extra = AverageMeter() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output wExtraLoss = 1 if args.prune == '' else 0.1 output, extra = model(input_var) loss_main = criterion(output, target_var) extra_loss = criterion(extra, target_var)*wExtraLoss loss = loss_main + extra_loss # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss_main.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) prec1_extra, prec5_extra = accuracy(extra.data, target, topk=(1, 5)) losses_extra.update(extra_loss.item(), input.size(0)) top1_extra.update(prec1_extra.item(), input.size(0)) top5_extra.update(prec5_extra.item(), input.size(0)) # compute gradient and do SGD step loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.7f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Loss_h {losses_extra.val:.4f} ({losses_extra.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' .format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, losses_extra=losses_extra, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer, warmup, lr_type, lr_steps): batch_time = AverageMeter() data_time = AverageMeter() forward_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) ite_ratio = (i + 1.) / len(train_loader) adjust_learning_rate(optimizer, epoch, ite_ratio, lr_type, lr_steps, warmup) target = target.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) forward_time.update(time.time() - end) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step loss.backward() # if args.clip_gradient is not None: # total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: # forward_t = forward_time.avg - data_time.avg # backward_t = batch_time.avg - forward_time.avg output = ( 'Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' # 'Forward {forward_t:.3f}\t' # 'Backward {backward_t:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Prec@1 {top1.avg:.3f}\t' 'Prec@5 {top5.avg:.3f}'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, # forward_t=forward_t, backward_t=backward_t, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'])) # TODO print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer, args, rank): torch.cuda.empty_cache() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() end = time.time() model.zero_grad() loss_tmp = [] acc_tmp = [] for i, (input, target) in enumerate(train_loader): adjust_learning_rate(optimizer, epoch, args.lr_type, args.lr_steps, args, (epoch-1) + float(i) / len(train_loader)) i += 1 # if (i+1) % args.batch_multiplier == 0: # optimizer.step() # optimizer.zero_grad() # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) input_var = input target_var = target # compute output output = model(input_var) loss = criterion(output, target_var) / args.batch_multiplier # divide batch_multiplier as grad accumulation # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item()*args.batch_multiplier, input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) loss_tmp.append(loss.item()*args.batch_multiplier) acc_tmp.append(prec1.item()) # compute gradient and do SGD step if i % args.batch_multiplier != 0: if args.multiprocessing_distributed: with model.no_sync(): loss.backward() else: loss.backward() else: loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() # optimizer.zero_grad() if rank == 0: # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % (args.print_freq*args.batch_multiplier*5) == 0: tf_writer.add_scalar('loss/step', np.mean(loss_tmp), ((epoch-1)*len(train_loader)+i)/args.batch_multiplier) tf_writer.add_scalar('acc/step', np.mean(acc_tmp), ((epoch-1)*len(train_loader)+i)/args.batch_multiplier) loss_tmp = [] acc_tmp = [] if i % (args.print_freq * args.batch_multiplier) == 0: output = ('Epoch: [{0:3d}][{1:4d}/{2:4d}], lr: {lr:.5f}\t' 'Time {time:.1f}\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, int(i/args.batch_multiplier), int(len(train_loader)/args.batch_multiplier), time=(time.time()-global_time)/60., data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO print(output) log.write(output + '\n') log.flush() if i % (args.print_freq*args.batch_multiplier*10) == 0: for tag, value in model.named_parameters(): tag = tag.replace('.', '/') tf_writer.add_histogram('weights/'+tag, value.detach(), (epoch*len(train_loader)+i)/args.batch_multiplier) tf_writer.add_histogram('grads/'+tag, value.grad.detach().abs().mean(), (epoch*len(train_loader)+i)/args.batch_multiplier) if i % args.batch_multiplier == 0: optimizer.zero_grad() if rank == 0: tf_writer.add_scalar('loss/train', losses.avg, epoch-1) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch-1)
def validate(val_loader, model, criterion, epoch): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() torch.set_grad_enabled(False) # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for i, (input, target, name) in enumerate(val_loader): # discard final batch if i == len(val_loader) - 1: break target = target.cuda(async=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if epoch > int(args.epochs * 0.7): batch_out = torch.argmax(output, dim=1) batch_reslut = (batch_out != target).tolist() for i in range(len(batch_reslut)): if batch_reslut[i] == 1: print(name[i]) if i % args.print_freq == 0: output = ('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(output) output = ( 'Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}' .format(top1=top1, top5=top5, loss=losses)) print(output) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() end = time.time() output_final = [] for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm_(model.parameters(), args.clip_gradient) optimizer.step() optimizer.zero_grad() # measure elapsed time batch_time.update(time.time() - end) end = time.time() output_list = [] if i % args.print_freq == 0: output = ('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'] * 0.1)) # TODO loss_val = ('{loss.val:.4f}'.format(loss=losses)) loss_val = float(loss_val[1:6]) top1_val = float(('{top1.val:.3f}'.format(top1=top1))[1:5]) top5_val = float(('{top5.val:.3f}'.format(top5=top5))[1:5]) output_list = [epoch, i, loss_val, top1_val, top5_val] print(output_list) output_final.append(output_list) log.write(output + '\n') log.flush() print(output_final) np_array = np.asarray(output_final) file_name = data + str(epoch) + '.npy' np.save(file_name, np_array) tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('acc/train_top5', top5.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)