def validate_seq(val_loader, tracking_module, fusion_list=None, fuse_prob=False): batch_time = AverageMeter(0) # switch to evaluate mode tracking_module.eval() logger = logging.getLogger('global_logger') end = time.time() # Create an accumulator that will be updated during each frame with torch.no_grad(): for i, (input, det_info, dets, det_split) in enumerate(val_loader): input = input.cuda() if len(det_info) > 0: for k, v in det_info.items(): det_info[k] = det_info[k].cuda() if not isinstance( det_info[k], list) else det_info[k] # compute output aligned_ids, aligned_dets, frame_start = tracking_module.predict( input[0], det_info, dets, det_split) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.print_freq == 0: logger.info( 'Test Frame: [{0}/{1}]\tTime' ' {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time)) return
def validate(val_loader, tracking_module, step, part='train', fusion_list=None, fuse_prob=False): prec = AverageMeter(0) rec = AverageMeter(0) mota = AverageMeter(0) motp = AverageMeter(0) logger = logging.getLogger('global_logger') for i, (sequence) in enumerate(val_loader): logger.info('Test: [{}/{}]\tSequence ID: KITTI-{}'.format( i, len(val_loader), sequence.name)) seq_loader = DataLoader(sequence, batch_size=config.batch_size, shuffle=False, num_workers=config.workers, pin_memory=True) if len(seq_loader) == 0: tracking_module.eval() logger.info('Empty Sequence ID: KITTI-{}, skip'.format( sequence.name)) else: if args.memory: seq_prec, seq_rec, seq_mota, seq_motp = validate_mem_seq( seq_loader, tracking_module) else: seq_prec, seq_rec, seq_mota, seq_motp = validate_seq( seq_loader, tracking_module) prec.update(seq_prec, 1) rec.update(seq_rec, 1) mota.update(seq_mota, 1) motp.update(seq_motp, 1) write_kitti_result(args.result_path, sequence.name, step, tracking_module.frames_id, tracking_module.frames_det, part=part) total_num = torch.Tensor([prec.count]) logger.info( '* Prec: {:.3f}\tRec: {:.3f}\tMOTA: {:.3f}\tMOTP: {:.3f}\ttotal_num={}' .format(prec.avg, rec.avg, mota.avg, motp.avg, total_num.item())) MOTA, MOTP, recall, prec, F1, fp, fn, id_switches = evaluate( step, args.result_path, part=part) tracking_module.train() return MOTA, MOTP, recall, prec, F1, fp, fn, id_switches
def validate(val_loader, model, criterion): global args, rank, world_size, best_prec1 # validation don't need track the history batch_time = AverageMeter(args.print_freq) losses = AverageMeter(args.print_freq) top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() c1 = 0 c5 = 0 end = time.time() for i, (input, target) in enumerate(val_loader): if i == len(val_loader) / (args.batch_size * world_size): break input = input.cuda() target = target.cuda() if args.double: input = input.double() if args.half: input = input.half() # compute output with torch.no_grad(): output = model(input, -1) # measure accuracy and record loss loss = criterion(output, target) / world_size prec1, prec5 = accuracy(output.float().data, target, topk=(1, 5)) reduced_loss = loss.data.clone() reduced_prec1 = prec1.clone() / world_size reduced_prec5 = prec5.clone() / world_size if args.dist: dist.all_reduce(reduced_loss) dist.all_reduce(reduced_prec1) dist.all_reduce(reduced_prec5) losses.update(reduced_loss.item()) top1.update(reduced_prec1.item()) top5.update(reduced_prec5.item()) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0 and rank == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) if rank == 0: print( ' * All Loss {loss.avg:.4f} Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' .format(loss=losses, top1=top1, top5=top5)) model.train() return losses.avg, top1.avg, top5.avg
def train(train_loader, val_loader, model, criterion, optimizer, lr_scheduler, start_iter, tb_logger): global args, rank, world_size, best_prec1, emulate_node global grad_exp, grad_man, param_exp, param_man batch_time = AverageMeter(args.print_freq) data_time = AverageMeter(args.print_freq) losses = AverageMeter(args.print_freq) model.train() end = time.time() curr_step = start_iter emulate_step = 0 for i, (input, target) in enumerate(train_loader): emulate_step += 1 if emulate_step == emulate_node: curr_step += 1 if curr_step > args.max_iter: break # lr_scheduler.step(curr_step) # current_lr = lr_scheduler.get_lr()[0] current_lr = adjust_learning_rate(optimizer, curr_step) target = target.cuda() input_var = input.cuda() if args.double: input_var = input_var.double() if args.half: input_var = input_var.half() data_time.update(time.time() - end) output = model(input_var, rank) loss = criterion(output, target) / (world_size * emulate_node) reduced_loss = loss.data.clone() if args.dist: dist.all_reduce(reduced_loss) losses.update(float(reduced_loss.item())) loss = loss * args.loss_scale model.zero_grad() loss.backward() if args.dist: sum_gradients(model) for model_p, master_p in zip(model_params, master_params): if model_p.grad is not None: if args.double: master_p.backward(model_p.grad.double() / args.loss_scale) else: master_p.backward(model_p.grad.float() / args.loss_scale) if emulate_node == emulate_step: emulate_step = 0 optimizer.step() param_list = [] for model_p, master_p in zip(model_params, master_params): model_p.data.copy_(master_p.data) optimizer.zero_grad() batch_time.update(time.time() - end) end = time.time() if (curr_step == 1 or curr_step % args.print_freq == 0) and rank == 0: if tb_logger: tb_logger.add_scalar('loss_train', losses.avg, curr_step) tb_logger.add_scalar('lr', current_lr, curr_step) print('Iter: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'LR {lr:.4f}'.format(curr_step, args.max_iter, batch_time=batch_time, data_time=data_time, loss=losses, lr=current_lr)) if curr_step % args.val_freq == 0 and curr_step != 0: val_loss, prec1, prec5 = validate(val_loader, model, criterion) if tb_logger: tb_logger.add_scalar('loss_val', val_loss, curr_step) tb_logger.add_scalar('acc1_val', prec1, curr_step) tb_logger.add_scalar('acc5_val', prec5, curr_step) if rank == 0: # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'step': curr_step, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.save_path + '/ckpt_' + str(curr_step)) val_loss, prec1, prec5 = validate(val_loader, model, criterion)
def train(train_loader, val_loader, trainval_loader, tracking_module, lr_scheduler, start_iter, tb_logger): global best_mota batch_time = AverageMeter(config.print_freq) data_time = AverageMeter(config.print_freq) losses = AverageMeter(config.print_freq) # switch to train mode tracking_module.model.train() logger = logging.getLogger('global_logger') end = time.time() for i, (input, det_info, det_id, det_cls, det_split) in enumerate(train_loader): curr_step = start_iter + i # measure data loading time if lr_scheduler is not None: lr_scheduler.step(curr_step) current_lr = lr_scheduler.get_lr() data_time.update(time.time() - end) # transfer input to gpu input = input.cuda() if len(det_info) > 0: for k, v in det_info.items(): det_info[k] = det_info[k].cuda() if not isinstance( det_info[k], list) else det_info[k] # forward loss = tracking_module.step( input.squeeze(0), det_info, det_id, det_cls, det_split) # measure elapsed time batch_time.update(time.time() - end) losses.update(loss.item()) if (curr_step + 1) % config.print_freq == 0: tb_logger.add_scalar('loss_train', losses.avg, curr_step) logger.info('Iter: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( curr_step + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) if curr_step > 0 and (curr_step + 1) % config.val_freq == 0: logger.info('Evaluation on validation set:') MOTA, MOTP, recall, prec, F1, fp, fn, id_switches = validate( val_loader, tracking_module, str(curr_step + 1), part=args.part) if tb_logger is not None: tb_logger.add_scalar('prec', prec, curr_step) tb_logger.add_scalar('recall', recall, curr_step) tb_logger.add_scalar('mota', MOTA, curr_step) tb_logger.add_scalar('motp', MOTP, curr_step) tb_logger.add_scalar('fp', fp, curr_step) tb_logger.add_scalar('fn', fn, curr_step) tb_logger.add_scalar('f1', F1, curr_step) tb_logger.add_scalar('id_switches', id_switches, curr_step) if lr_scheduler is not None: tb_logger.add_scalar('lr', current_lr, curr_step) # remember best mota and save checkpoint is_best = MOTA > best_mota best_mota = max(MOTA, best_mota) save_checkpoint( { 'step': curr_step, 'score_arch': config.model.score_arch, 'appear_arch': config.model.appear_arch, 'best_mota': best_mota, 'state_dict': tracking_module.model.state_dict(), 'optimizer': tracking_module.optimizer.state_dict(), }, is_best, config.save_path + '/ckpt') end = time.time()
def train(train_loader, val_loader, model, criterion, optimizer, lr_scheduler, start_iter, tb_logger): global args, rank, world_size, best_prec1, emulate_node global grad_exp, grad_man, param_exp, param_man batch_time = AverageMeter(args.print_freq) data_time = AverageMeter(args.print_freq) losses = AverageMeter(args.print_freq) model.train() end = time.time() curr_step = start_iter emulate_step = 0 momentum_buffer = [] for master_p in master_params: momentum_buffer.append(torch.zeros_like(master_p)) grad_buffer = [] for param_g in model.parameters(): grad_buffer.append([]) for i, (input, target) in enumerate(train_loader): emulate_step += 1 if emulate_step == emulate_node: curr_step += 1 if curr_step > args.max_iter: break current_lr = adjust_learning_rate(optimizer, curr_step) target = target.cuda() input_var = input.cuda() data_time.update(time.time() - end) output = model(input_var, rank) loss = criterion(output, target) / (world_size * emulate_node) reduced_loss = loss.data.clone() if args.dist: dist.all_reduce(reduced_loss) losses.update(float(reduced_loss.item())) model.zero_grad() loss.backward() for idx, param in enumerate(model.parameters()): if param.grad is not None: grad_buffer[idx].append(param.grad.detach().clone().data) model.zero_grad() if emulate_node == emulate_step: emulate_step = 0 # reduce all gradients with low precision for idx, param in enumerate(model.parameters()): if param.grad is not None: if emulate_node == 1: param.grad.data.copy_(grad_buffer[idx][0]) continue # find maximum exponent max_exp = -100 for val in grad_buffer[idx]: t_exp = torch.log2( torch.abs(val * args.emulate_node).max()).ceil( ).detach().cpu().numpy() if t_exp > max_exp: max_exp = t_exp upper_bound = 2**(args.grad_exp - 1) - 1 shift_factor = upper_bound - max_exp if max_exp == -100 or not args.use_APS: shift_factor = 0 for grad in grad_buffer[idx]: grad.data.copy_( float_quantize(grad * (2**shift_factor), args.grad_exp, args.grad_man)) # as we use a single node to emulate multi-node, we should # first accumulate gradients within a single node and then # communicate them in the distributed system res = torch.zeros_like(grad_buffer[idx][0]) for val in grad_buffer[idx]: res = float_quantize(res + val, args.grad_exp, args.grad_man) param.grad.data.copy_(res.data / (2**shift_factor)) grad_buffer = [] for param_g in model.parameters(): grad_buffer.append([]) if args.dist: sum_gradients(model, use_APS=args.use_APS, use_kahan=args.use_kahan, grad_exp=args.grad_exp, grad_man=args.grad_man) for model_p, master_p in zip(model_params, master_params): if model_p.grad is not None: master_p.backward(model_p.grad.float()) # update parameters if args.use_lars: for idx, master_p in enumerate(master_params): if master_p.grad is not None: local_lr = master_p.norm(2) /\ (master_p.grad.data.norm(2) + args.weight_decay * master_p.norm(2)) lars_coefficient = 0.001 local_lr = local_lr * lars_coefficient momentum_buffer[idx] = args.momentum * momentum_buffer[idx].data \ + current_lr \ * local_lr \ * (master_p.grad.data + args.weight_decay * master_p.data) update = momentum_buffer[idx] master_p.data.copy_(master_p - update) else: optimizer.step() for model_p, master_p in zip(model_params, master_params): model_p.data.copy_(master_p.data) optimizer.zero_grad() batch_time.update(time.time() - end) end = time.time() if (curr_step == 1 or curr_step % args.print_freq == 0) and rank == 0: if tb_logger: tb_logger.add_scalar('loss_train', losses.avg, curr_step) tb_logger.add_scalar('lr', current_lr, curr_step) print('Iter: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'LR {lr:.4f}'.format(curr_step, args.max_iter, batch_time=batch_time, data_time=data_time, loss=losses, lr=current_lr)) if curr_step % args.val_freq == 0 and curr_step != 0: val_loss, prec1, prec5 = validate(val_loader, model, criterion) if tb_logger: tb_logger.add_scalar('loss_val', val_loss, curr_step) tb_logger.add_scalar('acc1_val', prec1, curr_step) tb_logger.add_scalar('acc5_val', prec5, curr_step) if rank == 0: # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'step': curr_step, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.save_path + '/ckpt_' + str(curr_step)) del momentum_buffer val_loss, prec1, prec5 = validate(val_loader, model, criterion)