def test_mb2_ssd_coco_80(self): model = get_model_by_name( model_name="mb2_ssd", dataset_name="coco_80", pretrained=True, progress=False, ) from deeplite_torch_zoo.src.objectdetection.datasets.coco_config import ( DATA, MISSING_IDS, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/coco2017/", dataset_name="coco", model_name="mb2_ssd", batch_size=32, missing_ids=MISSING_IDS, classes=DATA["CLASSES"], )["test"] cocoGt = COCO( "/neutrino/datasets/coco2017/annotations/instances_val2017.json") eval_fn = get_eval_function("mb2_ssd", "coco_80") APs = eval_fn( model, test_loader, gt=cocoGt, _set="coco", ) print(APs) self.assertEqual(abs(APs["mAP"] - 0.138) < 0.001, True)
def test_create_detection_model_output_shape(model_name, dataset_name, datasplit_kwargs, output_shapes): model = create_model( model_name=model_name, pretraining_dataset=dataset_name, num_classes=CUSTOM_NUM_CLASSES, progress=False, device="cpu", ) if model_name in MODEL_NAME_DATASPLIT_FN_ARG_MAP: model_name = MODEL_NAME_DATASPLIT_FN_ARG_MAP[model_name] train_loader = get_data_splits_by_name( data_root=MOCK_VOC_PATH, dataset_name=dataset_name, model_name=model_name, batch_size=TEST_BATCH_SIZE, num_workers=0, device="cpu", **datasplit_kwargs, )["train"] if 'yolo' in model_name: dataset = train_loader.dataset img, _, _, _ = dataset[0] y = model(torch.unsqueeze(img, dim=0)) assert y[0].shape == (1, *output_shapes[0]) assert y[1].shape == (1, *output_shapes[1]) assert y[2].shape == (1, *output_shapes[2]) else: img, _, _ = next(iter(train_loader)) model.eval() y1, y2 = model(img) assert y1.shape == (TEST_BATCH_SIZE, *output_shapes[0]) assert y2.shape == (TEST_BATCH_SIZE, *output_shapes[1])
def test_create_segmentation_model_output_shape(model_name, dataset_name, datasplit_kwargs, output_shape): model = create_model( model_name=model_name, pretraining_dataset=dataset_name, num_classes=CUSTOM_NUM_CLASSES, progress=False, device="cpu", ) if model_name in MODEL_NAME_DATASPLIT_FN_ARG_MAP: model_name = MODEL_NAME_DATASPLIT_FN_ARG_MAP[model_name] test_loader = get_data_splits_by_name( data_root=MOCK_DATASETS_PATH if 'voc' in dataset_name else MOCK_CARVANA_PATH, dataset_name=dataset_name, model_name=model_name, num_workers=0, device="cpu", **datasplit_kwargs, )["test"] dataset = test_loader.dataset if 'unet' in model_name: img, msk, _ = dataset[0] else: img, msk = dataset[0] model.eval() y = model(torch.unsqueeze(img, dim=0)) assert y.shape == (*output_shape, *msk.shape)
def test_mb2_ssd_coco_6(self): model = get_model_by_name( model_name="mb2_ssd", dataset_name="coco_gm_6", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/home/ehsan/data/", dataset_name="coco_gm", model_name="mb2_ssd", batch_size=32, train_ann_file="train_data_COCO.json", train_dir="images/train", val_ann_file="test_data_COCO.json", val_dir="images/test", classes=[ "class1", "class2", "class3", "class4", "class5", "class6" ], )["test"] cocoGt = COCO("/home/ehsan/data/test_data_COCO.json") eval_fn = get_eval_function("mb2_ssd", "coco_gm") APs = eval_fn( model, test_loader, gt=cocoGt, _set="coco", ) self.assertEqual(abs(APs["mAP"] - 0.227) < 0.001, True)
def test_cifar100_dataset(self): BATCH_SIZE = 128 datasplit = get_data_splits_by_name(dataset_name="cifar100", batch_size=BATCH_SIZE) train_len = len(datasplit["train"]) test_len = len(datasplit["test"]) self.assertEqual(train_len, 391) self.assertEqual(test_len, 79)
def test_mnist_dataset(self): BATCH_SIZE = 128 datasplit = get_data_splits_by_name(dataset_name="mnist", batch_size=BATCH_SIZE) train_len = len(datasplit["train"]) test_len = len(datasplit["test"]) self.assertEqual(train_len, 469) self.assertEqual(test_len, 79)
def test_vww_dataset(self): BATCH_SIZE = 128 datasplit = get_data_splits_by_name( dataset_name="vww", data_root=str(DATASETS_ROOT / "vww"), batch_size=BATCH_SIZE, ) train_len = len(datasplit["train"]) test_len = len(datasplit["test"]) self.assertEqual(train_len, 901) self.assertEqual(test_len, 63)
def test_imagenet1000_dataset(self): BATCH_SIZE = 128 datasplit = get_data_splits_by_name( data_root=str(DATASETS_ROOT / "imagenet"), dataset_name="imagenet", batch_size=BATCH_SIZE, ) train_len = len(datasplit["train"]) test_len = len(datasplit["test"]) self.assertEqual(train_len, 10010) self.assertEqual(test_len, 391)
def test_coco_yolo_dataset(self): BATCH_SIZE = 10 datasplit = get_data_splits_by_name( data_root=str(DATASETS_ROOT / "coco"), dataset_name="coco", model_name="yolo", batch_size=BATCH_SIZE, ) train_len = len(datasplit["train"]) test_len = len(datasplit["test"]) self.assertEqual(train_len, 11829) self.assertEqual(test_len, 500)
def test_voc0712_dataset(self): BATCH_SIZE = 128 datasplit = get_data_splits_by_name( data_root=str(DATASETS_ROOT / "VOCdevkit"), dataset_name="voc", model_name="vgg16_ssd", batch_size=BATCH_SIZE, ) train_len = len(datasplit["train"]) test_len = len(datasplit["test"]) self.assertEqual(train_len, 130) self.assertEqual(test_len, 39)
def test_mb3_small_vww(self): model = get_model_by_name( model_name="mobilenetv3_small", dataset_name="vww", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/vww", dataset_name="vww", batch_size=128, )["test"] eval_fn = get_eval_function("mobilenetv3_small", "vww") ACC = eval_fn(model, test_loader) self.assertEqual(abs(ACC["acc"] - 0.892) < 0.001, True)
def test_mb2_ssd_voc_20(self): model = get_model_by_name( model_name="mb2_ssd", dataset_name="voc_20", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/VOCdevkit", dataset_name="voc", model_name="mb2_ssd_lite", batch_size=32, )["test"] eval_fn = get_eval_function("mb2_ssd", "voc_20") APs = eval_fn(model, test_loader) self.assertEqual(abs(APs["mAP"] - 0.443) < 0.001, True)
def test_resnet50_tinyimagenet(self): model = get_model_by_name( model_name="resnet50", dataset_name="tinyimagenet", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/TinyImageNet/", dataset_name="tinyimagenet", batch_size=128, num_workers=0, )["val"] eval_fn = get_eval_function("resnet50", "tinyimagenet") ACC = eval_fn(model, test_loader) print(ACC) self.assertEqual(abs(ACC["acc"] - 0.730) < 0.001, True)
def test_vgg16_ssd_wider_face(self): model = get_model_by_name( model_name="vgg16_ssd", dataset_name="wider_face", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/wider_face", dataset_name="wider_face", model_name="vgg16_ssd", batch_size=8, )["test"] eval_fn = get_eval_function("vgg16_ssd", "wider_face") APs = eval_fn(model, test_loader) print(APs) self.assertEqual(abs(APs["mAP"] - 0.7071) < 0.001, True)
def test_unet_scse_resnet18_carvana(self): model = get_model_by_name( model_name="unet_scse_resnet18", dataset_name="carvana", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/carvana", dataset_name="carvana", model_name="unet", num_workers=1, )["test"] eval_fn = get_eval_function("unet_scse_resnet18", "carvana") acc = eval_fn(model, test_loader, net="unet_scse_resnet18") miou = acc["miou"] print(miou) self.assertEqual(abs(miou - 0.989) < 0.001, True)
def test_unet_carvana(self): model = get_model_by_name( model_name="unet", dataset_name="carvana", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/carvana", dataset_name="carvana", model_name="unet", num_workers=1, )["test"] eval_fn = get_eval_function("unet", "carvana") acc = eval_fn(model, test_loader, net="unet") dc = acc["dice_coeff"] print(dc) self.assertEqual(abs(dc - 0.983) < 0.001, True)
def test_fasterrcnn_resnet50_fpn_coco(self): model = get_model_by_name( model_name="fasterrcnn_resnet50_fpn", dataset_name="coco_80", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/coco2017/", dataset_name="coco", model_name="fasterrcnn_resnet50_fpn", batch_size=32, )["test"] cocoGt = COCO( "/neutrino/datasets/coco2017/annotations/instances_val2017.json") eval_fn = get_eval_function("fasterrcnn_resnet50_fpn", "coco_80") APs = eval_fn(model, test_loader, gt=cocoGt) self.assertEqual(abs(APs["mAP"] - 0.369) < 0.001, True)
def test_deeplab_mobilenet_voc_20(self): model = get_model_by_name( model_name="deeplab_mobilenet", dataset_name="voc_20", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets", sbd_root=None, dataset_name="voc", model_name="deeplab_mobilenet", num_workers=2, backbone="vgg", )["test"] eval_fn = get_eval_function("deeplab_mobilenet", "voc_20") acc = eval_fn(model, test_loader, net="deeplab") miou = acc["miou"] print(miou) self.assertEqual(abs(miou - 0.571) < 0.001, True)
def test_fcn32_voc_20(self): model = get_model_by_name( model_name="fcn32", dataset_name="voc_20", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets", dataset_name="voc", model_name="fcn32", num_workers=1, batch_size=1, backbone="vgg", )["test"] eval_fn = get_eval_function("fcn32", "voc_20") acc = eval_fn(model, test_loader, net="fcn32") miou = acc["miou"] print(miou) self.assertEqual(abs(miou - 0.713) < 0.001, True)
def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch training Example") parser.add_argument( "--batch-size", type=int, default=64, metavar="N", help="input batch size for training (default: 64)", ) parser.add_argument("--dataset", metavar="DATASET", default="cifar100", help="dataset to use") parser.add_argument( "-j", "--workers", type=int, metavar="N", default=4, help="number of data loading workers", ) parser.add_argument("-r", "--data_root", metavar="PATH", default="", help="dataset data root path") parser.add_argument( "--test-batch-size", type=int, default=1000, metavar="N", help="input batch size for testing (default: 1000)", ) parser.add_argument( "--epochs", type=int, default=14, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "--lr", type=float, default=0.1, metavar="LR", help="learning rate (default: 1.0)", ) parser.add_argument( "--gamma", type=float, default=0.7, metavar="M", help="Learning rate step gamma (default: 0.7)", ) parser.add_argument( "--log-interval", type=int, default=100, metavar="N", help="how many batches to wait before logging training status", ) parser.add_argument('-a', '--arch', metavar='ARCH', default='vgg19', help='model architecture') args = parser.parse_args() device = torch.device("cuda") data_splits = get_data_splits_by_name( dataset_name=args.dataset, data_root=args.data_root, batch_size=args.batch_size, num_torch_workers=args.workers, ) model = get_model_by_name(model_name=args.arch, dataset_name=args.dataset, pretrained=True, progress=True, device=device) model.to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr) criterion = CrossEntropyLoss() scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) for epoch in range(1, args.epochs + 1): train(args, model, device, data_splits["train"], optimizer, criterion, epoch) test(model, device, data_splits["test"]) scheduler.step() torch.save(model.state_dict(), "{}_checkpoint.pt".format(args.arch))
def train(opt, device): epochs, batch_size, noval, nosave, workers, freeze, = \ opt.epochs, opt.batch_size, opt.noval, opt.nosave, opt.workers, opt.freeze d = datetime.datetime.now() run_id = '{:%Y-%m-%d__%H-%M-%S}'.format(d) save_dir = Path(opt.save_dir) / run_id # Directories w = save_dir / 'weights' # weights dir w.mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' # Get hyperparameter dict hyp, hyp_loss = get_hyperparameter_dict(opt.dataset_name, opt.hp_config) # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.safe_dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.safe_dump(vars(opt), f, sort_keys=False) tb_writer = SummaryWriter(save_dir) opt.img_dir = Path(opt.img_dir) # Config cuda = device.type != 'cpu' init_seeds(1 + RANK) # Dataloaders dataset_kwargs = {} if opt.train_img_res: dataset_kwargs = {'img_size': opt.train_img_res} dataset_splits = get_data_splits_by_name( data_root=opt.img_dir, dataset_name=opt.dataset_name, model_name=opt.model_name, batch_size=batch_size, num_workers=workers, distributed=(cuda and RANK != -1), **dataset_kwargs ) test_img_size = dataset_splits["test"].dataset._img_size train_img_size = dataset_splits["train"].dataset._img_size if opt.test_img_res: test_img_size = opt.test_img_res train_loader = dataset_splits["train"] dataset = train_loader.dataset nc = dataset.num_classes nb = len(train_loader) # number of batches # Model model = create_model( model_name=opt.model_name, pretraining_dataset=opt.pretraining_source_dataset, pretrained=opt.pretrained, num_classes=nc, progress=True, device=device, ) # Freeze freeze = [f'model.{x}.' for x in range(freeze)] # layers to freeze for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print(f'freezing {k}') v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") g0, g1, g2 = [], [], [] # optimizer parameter groups for v in model.modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias g2.append(v.bias) if isinstance(v, nn.BatchNorm2d): # weight (no decay) g0.append(v.weight) elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) g1.append(v.weight) if opt.adam: optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']}) # add g1 with weight_decay optimizer.add_param_group({'params': g2}) # add g2 (biases) LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " f"{len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias") del g0, g1, g2 # Scheduler if opt.linear_lr: lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear else: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in [-1, 0] else None start_epoch, best_fitness = 0, 0.0 # Image sizes gs = max(int(model.stride.max()), 32) # grid size (max stride) nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj']) # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n' 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Process 0 if RANK in [-1, 0]: # Anchors model.half().float() # pre-reduce anchor precision # DDP mode if cuda and RANK != -1: model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) # Model parameters hyp['giou'] *= 3. / nl # scale to layers hyp['box'] = hyp['giou'] hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers hyp['obj'] *= (train_img_size / 640) ** 2 * 3. / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model eval_function = get_eval_function(dataset_name=opt.dataset_name, model_name=opt.model_name) criterion = YoloV5Loss( model=model, num_classes=nc, device=device, hyp_cfg=hyp_loss, ) if opt.eval_before_train: ap_dict = evaluate(model, eval_function, opt.dataset_name, opt.img_dir, nc, test_img_size, device) LOGGER.info(f'Eval metrics: {ap_dict}') # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) last_opt_step = -1 scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) stopper = EarlyStopping(patience=opt.patience) loss_giou_mean = AverageMeter() loss_conf_mean = AverageMeter() loss_cls_mean = AverageMeter() loss_mean = AverageMeter() LOGGER.info(f'Image sizes {train_img_size} train, {test_img_size} val\n' f'Using {train_loader.num_workers} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch model.train() mloss = torch.zeros(3, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size')) if RANK in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, (imgs, targets, labels_length, _) in pbar: # batch ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() # Warmup if ni <= nw: xi = [0, nw] # x interp accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(train_img_size * 0.5, train_img_size * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_giou, loss_conf, loss_cls = criterion( pred, targets, labels_length, imgs.shape[-1] ) # Update running mean of tracked metrics loss_items = torch.tensor([loss_giou, loss_conf, loss_cls]).to(device) if RANK in (-1, 0): loss_giou_mean.update(loss_giou, imgs.size(0)) loss_conf_mean.update(loss_conf, imgs.size(0)) loss_cls_mean.update(loss_cls, imgs.size(0)) loss_mean.update(loss, imgs.size(0)) if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode # Backward scaler.scale(loss).backward() # Optimize if ni - last_opt_step >= accumulate: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % ( f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) # end batch # Scheduler scheduler.step() if RANK in [-1, 0]: for idx, param_group in enumerate(optimizer.param_groups): tb_writer.add_scalar(f'learning_rate/gr{idx}', param_group['lr'], epoch) tb_writer.add_scalar('train/giou_loss', loss_giou_mean.avg, epoch) tb_writer.add_scalar('train/conf_loss', loss_conf_mean.avg, epoch) tb_writer.add_scalar('train/cls_loss', loss_cls_mean.avg, epoch) tb_writer.add_scalar('train/loss', loss_mean.avg, epoch) # mAP ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if (not noval or final_epoch) and epoch % opt.eval_freq == 0: # Calculate mAP ap_dict = evaluate(ema.ema, eval_function, opt.dataset_name, opt.img_dir, nc, test_img_size, device) LOGGER.info(f'Eval metrics: {ap_dict}') tb_writer.add_scalar('eval/mAP', ap_dict['mAP'], epoch) for eval_key, eval_value in ap_dict.items(): if eval_key != 'mAP': tb_writer.add_scalar(f'ap_per_class/{eval_key}', eval_value, epoch) # Update best mAP fi = ap_dict['mAP'] if fi > best_fitness: best_fitness = fi # Save model if (not nosave) or final_epoch: # if save ckpt = {'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') del ckpt # Stop Single-GPU if RANK == -1 and stopper(epoch=epoch, fitness=fi): break # end epoch # end training if RANK in [-1, 0]: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') ckpt = torch.load(f, map_location=device) model = ckpt['ema' if ckpt.get('ema') else 'model'] model.float().eval() ap_dict = evaluate(model, eval_function, opt.dataset_name, opt.img_dir, nc, test_img_size, device) LOGGER.info(f'Eval metrics: {ap_dict}') LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") torch.cuda.empty_cache()
def main(): setup_default_logging() args, args_text = _parse_args() if args.log_wandb: if has_wandb: wandb.init(project=args.experiment, config=args) else: _logger.warning( "You've requested to log metrics to wandb but package not found. " "Metrics not being logged to wandb, try `pip install wandb`") args.prefetcher = not args.no_prefetcher args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank if args.distributed: args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() _logger.info( 'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (args.rank, args.world_size)) else: _logger.info('Training with a single process on 1 GPUs.') assert args.rank >= 0 # resolve AMP arguments based on PyTorch / Apex availability use_amp = None if args.amp: # `--amp` chooses native amp before apex (APEX ver not actively maintained) if has_native_amp: args.native_amp = True elif has_apex: args.apex_amp = True if args.apex_amp and has_apex: use_amp = 'apex' elif args.native_amp and has_native_amp: use_amp = 'native' elif args.apex_amp or args.native_amp: _logger.warning( "Neither APEX or native Torch AMP is available, using float32. " "Install NVIDA apex or upgrade to PyTorch 1.6") random_seed(args.seed, args.rank) if args.fuser: set_jit_fuser(args.fuser) data_splits = get_data_splits_by_name( dataset_name=args.dataset_name, data_root=args.data_dir, batch_size=args.batch_size, ) loader_train, loader_eval = data_splits['train'], data_splits['test'] model_wrapper_fn = MODEL_WRAPPER_REGISTRY.get( model_name=args.model.lower(), dataset_name=args.pretraining_original_dataset) model = model_wrapper_fn(pretrained=args.pretrained, progress=True, num_classes=len(loader_train.dataset.classes)) if args.local_rank == 0: _logger.info( f'Model {safe_model_name(args.model)} created, param count:{sum([m.numel() for m in model.parameters()])}' ) data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) # setup augmentation batch splits for contrastive loss or split bn num_aug_splits = 0 if args.aug_splits > 0: assert args.aug_splits > 1, 'A split of 1 makes no sense' num_aug_splits = args.aug_splits # enable split bn (separate bn stats per batch-portion) if args.split_bn: assert num_aug_splits > 1 or args.resplit model = convert_splitbn_model(model, max(num_aug_splits, 2)) # move model to GPU, enable channels last layout if set model.cuda() if args.channels_last: model = model.to(memory_format=torch.channels_last) # setup synchronized BatchNorm for distributed training if args.distributed and args.sync_bn: assert not args.split_bn if has_apex and use_amp == 'apex': # Apex SyncBN preferred unless native amp is activated model = convert_syncbn_model(model) else: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if args.local_rank == 0: _logger.info( 'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using ' 'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.' ) optimizer = create_optimizer_v2(model, **optimizer_kwargs(cfg=args)) # setup automatic mixed-precision (AMP) loss scaling and op casting amp_autocast = suppress # do nothing loss_scaler = None if use_amp == 'apex': model, optimizer = amp.initialize(model, optimizer, opt_level='O1') loss_scaler = ApexScaler() if args.local_rank == 0: _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.') elif use_amp == 'native': amp_autocast = torch.cuda.amp.autocast loss_scaler = NativeScaler() if args.local_rank == 0: _logger.info( 'Using native Torch AMP. Training in mixed precision.') else: if args.local_rank == 0: _logger.info('AMP not enabled. Training in float32.') # optionally resume from a checkpoint resume_epoch = None if args.resume: resume_epoch = resume_checkpoint( model, args.resume, optimizer=None if args.no_resume_opt else optimizer, loss_scaler=None if args.no_resume_opt else loss_scaler, log_info=args.local_rank == 0) # setup exponential moving average of model weights, SWA could be used here too model_ema = None if args.model_ema: # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper model_ema = ModelEmaV2( model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else None) if args.resume: load_checkpoint(model_ema.module, args.resume, use_ema=True) # setup distributed training if args.distributed: if has_apex and use_amp == 'apex': # Apex DDP preferred unless native amp is activated if args.local_rank == 0: _logger.info("Using NVIDIA APEX DistributedDataParallel.") model = ApexDDP(model, delay_allreduce=True) else: if args.local_rank == 0: _logger.info("Using native Torch DistributedDataParallel.") model = NativeDDP(model, device_ids=[args.local_rank], broadcast_buffers=not args.no_ddp_bb) # NOTE: EMA model does not need to be wrapped by DDP # setup learning rate schedule and starting epoch lr_scheduler, num_epochs = create_scheduler(args, optimizer) start_epoch = 0 if args.start_epoch is not None: # a specified start_epoch will always override the resume epoch start_epoch = args.start_epoch elif resume_epoch is not None: start_epoch = resume_epoch if lr_scheduler is not None and start_epoch > 0: lr_scheduler.step(start_epoch) if args.local_rank == 0: _logger.info('Scheduled epochs: {}'.format(num_epochs)) # setup loss function if args.jsd_loss: assert num_aug_splits > 1 # JSD only valid with aug splits set train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing) elif args.smoothing: if args.bce_loss: train_loss_fn = BinaryCrossEntropy( smoothing=args.smoothing, target_threshold=args.bce_target_thresh) else: train_loss_fn = LabelSmoothingCrossEntropy( smoothing=args.smoothing) else: train_loss_fn = nn.CrossEntropyLoss() train_loss_fn = train_loss_fn.cuda() validate_loss_fn = nn.CrossEntropyLoss().cuda() # setup checkpoint saver and eval metric tracking eval_metric = args.eval_metric best_metric = None best_epoch = None saver = None output_dir = None if args.rank == 0: if args.experiment: exp_name = args.experiment else: exp_name = '-'.join([ datetime.now().strftime("%Y%m%d-%H%M%S"), safe_model_name(args.model), str(data_config['input_size'][-1]) ]) output_dir = get_outdir( args.output if args.output else './output/train', exp_name) decreasing = True if eval_metric == 'loss' else False saver = CheckpointSaver(model=model, optimizer=optimizer, args=args, model_ema=model_ema, amp_scaler=loss_scaler, checkpoint_dir=output_dir, recovery_dir=output_dir, decreasing=decreasing, max_history=args.checkpoint_hist) with open(os.path.join(output_dir, 'args.yaml'), 'w') as f: f.write(args_text) try: for epoch in range(start_epoch, num_epochs): if args.distributed and hasattr(loader_train.sampler, 'set_epoch'): loader_train.sampler.set_epoch(epoch) train_metrics = train_one_epoch(epoch, model, loader_train, optimizer, train_loss_fn, args, lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, amp_autocast=amp_autocast, loss_scaler=loss_scaler, model_ema=model_ema) if args.distributed and args.dist_bn in ('broadcast', 'reduce'): if args.local_rank == 0: _logger.info( "Distributing BatchNorm running means and vars") distribute_bn(model, args.world_size, args.dist_bn == 'reduce') eval_metrics = validate(model, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast) if model_ema is not None and not args.model_ema_force_cpu: if args.distributed and args.dist_bn in ('broadcast', 'reduce'): distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce') ema_eval_metrics = validate(model_ema.module, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast, log_suffix=' (EMA)') eval_metrics = ema_eval_metrics if lr_scheduler is not None: # step LR for next epoch lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) if output_dir is not None: update_summary(epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), write_header=best_metric is None, log_wandb=args.log_wandb and has_wandb) if saver is not None: # save proper checkpoint with eval metric save_metric = eval_metrics[eval_metric] best_metric, best_epoch = saver.save_checkpoint( epoch, metric=save_metric) except KeyboardInterrupt: pass if best_metric is not None: _logger.info('*** Best metric: {0} (epoch {1})'.format( best_metric, best_epoch))