def main(): args = parse_args() global local_rank local_rank = args.local_rank if local_rank == 0: global logger logger = get_logger(__name__, args.log) if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.deterministic = True torch.cuda.set_device(local_rank) dist.init_process_group(backend='nccl', init_method='env://') global gpus_num gpus_num = torch.cuda.device_count() if local_rank == 0: logger.info(f'use {gpus_num} gpus') logger.info(f"args: {args}") cudnn.benchmark = True cudnn.enabled = True start_time = time.time() if local_rank == 0: logger.info('start loading data') train_sampler = torch.utils.data.distributed.DistributedSampler( Config.train_dataset, shuffle=True) train_loader = DataLoader(Config.train_dataset, batch_size=args.per_node_batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers, sampler=train_sampler) val_loader = DataLoader(Config.val_dataset, batch_size=args.per_node_batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers) if local_rank == 0: logger.info('finish loading data') if local_rank == 0: logger.info(f"creating model '{args.network}'") model = models.__dict__[args.network](**{ "pretrained": args.pretrained, "num_classes": args.num_classes, }) flops_input = torch.randn(1, 3, args.input_image_size, args.input_image_size) flops, params = profile(model, inputs=(flops_input, )) flops, params = clever_format([flops, params], "%.3f") if local_rank == 0: logger.info( f"model: '{args.network}', flops: {flops}, params: {params}") for name, param in model.named_parameters(): if local_rank == 0: logger.info(f"{name},{param.requires_grad}") model = model.cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.milestones, gamma=0.1) if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if args.apex: model, optimizer = amp.initialize(model, optimizer, opt_level='O1') model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) if args.sync_bn: model = apex.parallel.convert_syncbn_model(model) else: model = nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) if args.evaluate: # load best model if not os.path.isfile(args.evaluate): if local_rank == 0: logger.exception( '{} is not a file, please check it again'.format( args.resume)) sys.exit(-1) if local_rank == 0: logger.info('start only evaluating') logger.info(f"start resuming model from {args.evaluate}") checkpoint = torch.load(args.evaluate, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) acc1, acc5, throughput = validate(val_loader, model, args) if local_rank == 0: logger.info( f"epoch {checkpoint['epoch']:0>3d}, top1 acc: {acc1:.2f}%, top5 acc: {acc5:.2f}%, throughput: {throughput:.2f}sample/s" ) return start_epoch = 1 # resume training if os.path.exists(args.resume): if local_rank == 0: logger.info(f"start resuming model from {args.resume}") checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) start_epoch += checkpoint['epoch'] model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) if local_rank == 0: logger.info( f"finish resuming model from {args.resume}, epoch {checkpoint['epoch']}, " f"loss: {checkpoint['loss']:3f}, lr: {checkpoint['lr']:.6f}, " f"top1_acc: {checkpoint['acc1']}%") if not os.path.exists(args.checkpoints): os.makedirs(args.checkpoints) if local_rank == 0: logger.info('start training') for epoch in range(start_epoch, args.epochs + 1): train_sampler.set_epoch(epoch) acc1, acc5, losses = train(train_loader, model, criterion, optimizer, scheduler, epoch, args) if local_rank == 0: logger.info( f"train: epoch {epoch:0>3d}, top1 acc: {acc1:.2f}%, top5 acc: {acc5:.2f}%, losses: {losses:.2f}" ) acc1, acc5, throughput = validate(val_loader, model, args) if local_rank == 0: logger.info( f"val: epoch {epoch:0>3d}, top1 acc: {acc1:.2f}%, top5 acc: {acc5:.2f}%, throughput: {throughput:.2f}sample/s" ) # remember best prec@1 and save checkpoint if local_rank == 0: torch.save( { 'epoch': epoch, 'acc1': acc1, 'loss': losses, 'lr': scheduler.get_lr()[0], 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), }, os.path.join(args.checkpoints, 'latest.pth')) if epoch == args.epochs: if local_rank == 0: torch.save( model.module.state_dict(), os.path.join( args.checkpoints, "{}-epoch{}-acc{}.pth".format( args.network, epoch, acc1))) training_time = (time.time() - start_time) / 3600 if local_rank == 0: logger.info( f"finish training, total training time: {training_time:.2f} hours")
def main(): args = parse_args() global local_rank local_rank = args.local_rank if local_rank == 0: global logger logger = get_logger(__name__, args.log) torch.cuda.empty_cache() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.deterministic = True torch.cuda.set_device(local_rank) dist.init_process_group(backend='nccl', init_method='env://') global gpus_num gpus_num = torch.cuda.device_count() if local_rank == 0: logger.info(f'use {gpus_num} gpus') logger.info(f"args: {args}") cudnn.benchmark = True cudnn.enabled = True start_time = time.time() # dataset and dataloader if local_rank == 0: logger.info('start loading data') train_sampler = torch.utils.data.distributed.DistributedSampler( Config.train_dataset, shuffle=True) train_loader = DataLoader(Config.train_dataset, batch_size=args.per_node_batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collater, sampler=train_sampler) if local_rank == 0: logger.info('finish loading data') model = retinanet.__dict__[args.network](**{ "pretrained": args.pretrained, "num_classes": args.num_classes, }) for name, param in model.named_parameters(): if local_rank == 0: logger.info(f"{name},{param.requires_grad}") flops_input = torch.randn(1, 3, args.input_image_size, args.input_image_size) flops, params = profile(model, inputs=(flops_input, )) flops, params = clever_format([flops, params], "%.3f") if local_rank == 0: logger.info( f"model: '{args.network}', flops: {flops}, params: {params}") criterion = RetinaLoss(image_w=args.input_image_size, image_h=args.input_image_size).cuda() decoder = RetinaDecoder(image_w=args.input_image_size, image_h=args.input_image_size).cuda() model = model.cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if args.apex: amp.register_float_function(torch, 'sigmoid') amp.register_float_function(torch, 'softmax') model, optimizer = amp.initialize(model, optimizer, opt_level='O1') model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) if args.sync_bn: model = apex.parallel.convert_syncbn_model(model) else: model = nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) if args.evaluate: if not os.path.isfile(args.evaluate): if local_rank == 0: logger.exception( '{} is not a file, please check it again'.format( args.resume)) sys.exit(-1) if local_rank == 0: logger.info('start only evaluating') logger.info(f"start resuming model from {args.evaluate}") checkpoint = torch.load(args.evaluate, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) if local_rank == 0: logger.info(f"start eval.") all_eval_result = validate(Config.val_dataset, model, decoder) logger.info(f"eval done.") if all_eval_result is not None: logger.info( f"val: epoch: {checkpoint['epoch']:0>5d}, IoU=0.5:0.95,area=all,maxDets=100,mAP:{all_eval_result[0]:.3f}, IoU=0.5,area=all,maxDets=100,mAP:{all_eval_result[1]:.3f}, IoU=0.75,area=all,maxDets=100,mAP:{all_eval_result[2]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAP:{all_eval_result[3]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAP:{all_eval_result[4]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAP:{all_eval_result[5]:.3f}, IoU=0.5:0.95,area=all,maxDets=1,mAR:{all_eval_result[6]:.3f}, IoU=0.5:0.95,area=all,maxDets=10,mAR:{all_eval_result[7]:.3f}, IoU=0.5:0.95,area=all,maxDets=100,mAR:{all_eval_result[8]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAR:{all_eval_result[9]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAR:{all_eval_result[10]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAR:{all_eval_result[11]:.3f}" ) return best_map = 0.0 start_epoch = 1 # resume training if os.path.exists(args.resume): if local_rank == 0: logger.info(f"start resuming model from {args.resume}") checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) start_epoch += checkpoint['epoch'] best_map = checkpoint['best_map'] model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) if local_rank == 0: logger.info( f"finish resuming model from {args.resume}, epoch {checkpoint['epoch']}, best_map: {checkpoint['best_map']}, " f"loss: {checkpoint['loss']:3f}, cls_loss: {checkpoint['cls_loss']:2f}, reg_loss: {checkpoint['reg_loss']:2f}" ) if local_rank == 0: if not os.path.exists(args.checkpoints): os.makedirs(args.checkpoints) if local_rank == 0: logger.info('start training') for epoch in range(start_epoch, args.epochs + 1): train_sampler.set_epoch(epoch) cls_losses, reg_losses, losses = train(train_loader, model, criterion, optimizer, scheduler, epoch, args) if local_rank == 0: logger.info( f"train: epoch {epoch:0>3d}, cls_loss: {cls_losses:.2f}, reg_loss: {reg_losses:.2f}, loss: {losses:.2f}" ) if epoch % 5 == 0 or epoch == args.epochs: if local_rank == 0: logger.info(f"start eval.") all_eval_result = validate(Config.val_dataset, model, decoder) logger.info(f"eval done.") if all_eval_result is not None: logger.info( f"val: epoch: {epoch:0>5d}, IoU=0.5:0.95,area=all,maxDets=100,mAP:{all_eval_result[0]:.3f}, IoU=0.5,area=all,maxDets=100,mAP:{all_eval_result[1]:.3f}, IoU=0.75,area=all,maxDets=100,mAP:{all_eval_result[2]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAP:{all_eval_result[3]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAP:{all_eval_result[4]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAP:{all_eval_result[5]:.3f}, IoU=0.5:0.95,area=all,maxDets=1,mAR:{all_eval_result[6]:.3f}, IoU=0.5:0.95,area=all,maxDets=10,mAR:{all_eval_result[7]:.3f}, IoU=0.5:0.95,area=all,maxDets=100,mAR:{all_eval_result[8]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAR:{all_eval_result[9]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAR:{all_eval_result[10]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAR:{all_eval_result[11]:.3f}" ) if all_eval_result[0] > best_map: torch.save(model.module.state_dict(), os.path.join(args.checkpoints, "best.pth")) best_map = all_eval_result[0] if local_rank == 0: torch.save( { 'epoch': epoch, 'best_map': best_map, 'cls_loss': cls_losses, 'reg_loss': reg_losses, 'loss': losses, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), }, os.path.join(args.checkpoints, 'latest.pth')) if local_rank == 0: logger.info(f"finish training, best_map: {best_map:.3f}") training_time = (time.time() - start_time) / 3600 if local_rank == 0: logger.info( f"finish training, total training time: {training_time:.2f} hours")
if all_eval_result is not None: logger.info( f"val: epoch: {epoch:0>5d}, IoU=0.5:0.95,area=all,maxDets=100,mAP:{all_eval_result[0]:.3f}, IoU=0.5,area=all,maxDets=100,mAP:{all_eval_result[1]:.3f}, IoU=0.75,area=all,maxDets=100,mAP:{all_eval_result[2]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAP:{all_eval_result[3]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAP:{all_eval_result[4]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAP:{all_eval_result[5]:.3f}, IoU=0.5:0.95,area=all,maxDets=1,mAR:{all_eval_result[6]:.3f}, IoU=0.5:0.95,area=all,maxDets=10,mAR:{all_eval_result[7]:.3f}, IoU=0.5:0.95,area=all,maxDets=100,mAR:{all_eval_result[8]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAR:{all_eval_result[9]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAR:{all_eval_result[10]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAR:{all_eval_result[11]:.3f}" ) if all_eval_result[0] > best_map: torch.save(model.module.state_dict(), os.path.join(args.checkpoints, "best.pth")) best_map = all_eval_result[0] torch.save( { 'epoch': epoch, 'best_map': best_map, 'cls_loss': cls_losses, 'reg_loss': reg_losses, 'loss': losses, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), }, os.path.join(args.checkpoints, 'latest.pth')) logger.info(f"finish training, best_map: {best_map:.3f}") training_time = (time.time() - start_time) / 3600 logger.info( f"finish training, total training time: {training_time:.2f} hours") if __name__ == '__main__': args = parse_args() logger = get_logger(__name__, args.log) main(logger, args)
def main(): args = parse_args() global local_rank local_rank = args.local_rank if local_rank == 0: global logger logger = get_logger(__name__, args.log) torch.cuda.empty_cache() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.deterministic = True torch.cuda.set_device(local_rank) dist.init_process_group(backend='nccl', init_method='env://') global gpus_num gpus_num = torch.cuda.device_count() if local_rank == 0: logger.info(f'use {gpus_num} gpus') logger.info(f"args: {args}") cudnn.benchmark = True cudnn.enabled = True start_time = time.time() # dataset and dataloader if local_rank == 0: logger.info('start loading data') train_sampler = torch.utils.data.distributed.DistributedSampler( Config.train_dataset, shuffle=True, rank=local_rank) train_loader = DataLoader(Config.train_dataset, batch_size=args.per_node_batch_size, shuffle=False, pin_memory=True, drop_last=True, num_workers=args.num_workers, sampler=train_sampler) if local_rank == 0: logger.info('finish loading data') model = centernet.__dict__[args.network](**{ "pretrained": args.pretrained, "num_classes": args.num_classes, "multi_head": args.multi_head, "selayer": args.selayer, "use_ttf": args.use_ttf, "cls_mlp": args.cls_mlp }) if args.multi_head: pre_model = torch.load(args.pre_model_dir, map_location='cpu') if local_rank == 0: logger.info(f"pretrained_model: {args.pre_model_dir}") if args.load_head: def copyStateDict(state_dict): if list(state_dict.keys())[0].startswith('module'): start_idx = 1 else: start_idx = 0 new_state_dict = OrderedDict() for k,v in state_dict.items(): name = '.'.join(k.split('.')[start_idx:]) new_state_dict[name] = v return new_state_dict new_dict=copyStateDict(pre_model) keys=[] keys2 = [] for k,v in new_dict.items(): keys.append(k) # if k.startswith('centernet_head.heatmap_head.0'): # continue # else: # keys2.append(k) keys2.append(k) final_dict = {k:new_dict[k] for k in keys} for item in keys2: temp_name = copy.deepcopy(item) final_dict[temp_name.replace('centernet_head', 'centernet_head_2')] = new_dict[item] model.load_state_dict({k:new_dict[k] for k in keys}, strict = False) else: model.load_state_dict(pre_model, strict=False) for p in model.backbone.parameters(): p.requires_grad = False for p in model.centernet_head.parameters(): p.requires_grad = False if local_rank == 0: for name, param in model.named_parameters(): logger.info(f"{name},{param.requires_grad}") flops_input = torch.randn(1, 3, args.input_image_size, args.input_image_size) flops, params = profile(model, inputs=(flops_input, )) flops, params = clever_format([flops, params], "%.3f") if local_rank == 0: logger.info( f"model: '{args.network}', flops: {flops}, params: {params}") criterion = CenterNetLoss(max_object_num=Config.max_object_num).cuda() decoder = CenterNetDecoder(image_w=args.input_image_size, image_h=args.input_image_size).cuda() model = model.cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.milestones, gamma=0.1) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, # patience=3, # verbose=True) if args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if args.apex: amp.register_float_function(torch, 'sigmoid') amp.register_float_function(torch, 'softmax') model, optimizer = amp.initialize(model, optimizer, opt_level='O1') model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) if args.sync_bn: model = apex.parallel.convert_syncbn_model(model) else: model = nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) if args.evaluate: if not os.path.isfile(args.evaluate): if local_rank == 0: logger.exception( '{} is not a file, please check it again'.format( args.resume)) sys.exit(-1) if local_rank == 0: logger.info('start only evaluating') logger.info(f"start resuming model from {args.evaluate}") checkpoint = torch.load(args.evaluate, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) if local_rank == 0: logger.info(f"start eval.") all_eval_result = validate(Config.val_dataset, model, decoder, args) logger.info(f"eval done.") if all_eval_result is not None: logger.info( f"val: epoch: {checkpoint['epoch']:0>5d}, IoU=0.5:0.95,area=all,maxDets=100,mAP:{all_eval_result[0]:.3f}, IoU=0.5,area=all,maxDets=100,mAP:{all_eval_result[1]:.3f}, IoU=0.75,area=all,maxDets=100,mAP:{all_eval_result[2]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAP:{all_eval_result[3]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAP:{all_eval_result[4]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAP:{all_eval_result[5]:.3f}, IoU=0.5:0.95,area=all,maxDets=1,mAR:{all_eval_result[6]:.3f}, IoU=0.5:0.95,area=all,maxDets=10,mAR:{all_eval_result[7]:.3f}, IoU=0.5:0.95,area=all,maxDets=100,mAR:{all_eval_result[8]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAR:{all_eval_result[9]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAR:{all_eval_result[10]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAR:{all_eval_result[11]:.3f}" ) return best_map = 0.0 start_epoch = 1 # resume training if os.path.exists(args.resume): if local_rank == 0: logger.info(f"start resuming model from {args.resume}") checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) start_epoch += checkpoint['epoch'] best_map = checkpoint['best_map'] model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) if local_rank == 0: logger.info( f"finish resuming model from {args.resume}, epoch {checkpoint['epoch']}, best_map: {checkpoint['best_map']}, " f"loss: {checkpoint['loss']:3f}, heatmap_loss: {checkpoint['heatmap_loss']:2f}, offset_loss: {checkpoint['offset_loss']:2f},wh_loss: {checkpoint['wh_loss']:2f}" ) if local_rank == 0: if not os.path.exists(args.checkpoints): os.makedirs(args.checkpoints) if local_rank == 0: logger.info('start training') for epoch in range(start_epoch, args.epochs + 1): train_sampler.set_epoch(epoch) heatmap_losses, offset_losses, wh_losses, losses = train( train_loader, model, criterion, optimizer, scheduler, epoch, args) if local_rank == 0: logger.info( f"train: epoch {epoch:0>3d}, heatmap_loss: {heatmap_losses:.2f}, offset_loss: {offset_losses:.2f}, wh_loss: {wh_losses:.2f}, loss: {losses:.2f}" ) if epoch % 10 == 0 or epoch == args.epochs: if local_rank == 0: logger.info(f"start eval.") all_eval_result = validate(Config.val_dataset, model, decoder, args) logger.info(f"eval done.") if all_eval_result is not None: logger.info( f"val: epoch: {epoch:0>5d}, IoU=0.5:0.95,area=all,maxDets=100,mAP:{all_eval_result[0]:.3f}, IoU=0.5,area=all,maxDets=100,mAP:{all_eval_result[1]:.3f}, IoU=0.75,area=all,maxDets=100,mAP:{all_eval_result[2]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAP:{all_eval_result[3]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAP:{all_eval_result[4]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAP:{all_eval_result[5]:.3f}, IoU=0.5:0.95,area=all,maxDets=1,mAR:{all_eval_result[6]:.3f}, IoU=0.5:0.95,area=all,maxDets=10,mAR:{all_eval_result[7]:.3f}, IoU=0.5:0.95,area=all,maxDets=100,mAR:{all_eval_result[8]:.3f}, IoU=0.5:0.95,area=small,maxDets=100,mAR:{all_eval_result[9]:.3f}, IoU=0.5:0.95,area=medium,maxDets=100,mAR:{all_eval_result[10]:.3f}, IoU=0.5:0.95,area=large,maxDets=100,mAR:{all_eval_result[11]:.3f}" ) if all_eval_result[0] > best_map: torch.save(model.module.state_dict(), os.path.join(args.checkpoints, "best.pth")) best_map = all_eval_result[0] if local_rank == 0: torch.save( { 'epoch': epoch, 'best_map': best_map, 'heatmap_loss': heatmap_losses, 'offset_loss': offset_losses, 'wh_loss': wh_losses, 'loss': losses, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), }, os.path.join(args.checkpoints, 'latest.pth')) if local_rank == 0: logger.info(f"finish training, best_map: {best_map:.3f}") training_time = (time.time() - start_time) / 3600 if local_rank == 0: logger.info( f"finish training, total training time: {training_time:.2f} hours")