def get_val_dataset(args): dboxes = dboxes300_coco() val_trans = SSDTransformer(dboxes, (300, 300), val=True) val_annotate = os.path.join(args.data, "annotations/instances_val2017.json") val_coco_root = os.path.join(args.data, "val2017") val_coco = COCODetection(val_coco_root, val_annotate, val_trans) return val_coco
def get_train_pytorch_loader(args, num_workers, default_boxes): dataset = COCODetection( args.train_coco_root, args.train_annotate, SSDTransformer(default_boxes, args, (300, 300), val=False)) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( dataset) else: train_sampler = None train_dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), sampler=train_sampler, drop_last=True, num_workers=num_workers) return train_dataloader
def train(args): args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.N_gpu = torch.distributed.get_world_size() else: args.N_gpu = 1 dboxes = dboxes300_coco() encoder = Encoder(dboxes) cocoGt = get_coco_ground_truth(args) val_dataset = get_val_dataset(args) val_dataloader = get_val_dataloader(val_dataset, args) ssd300 = SSD300(len(cocoGt.cats) + 1) args.learning_rate = args.learning_rate * \ args.N_gpu * (args.batch_size / 32) iteration = 0 loss_func = Loss(dboxes) ssd300.cuda() loss_func.cuda() if args.fp16: ssd300 = network_to_half(ssd300) if args.distributed: ssd300 = DDP(ssd300) optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1) if args.fp16: optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.) inv_map = {v: k for k, v in val_dataset.label_map.items()} avg_loss = 0.0 acc = 0 batch_perf = AverageMeter() end = time.time() train_start = end args.train_annotate = os.path.join(args.data, "annotations/instances_train2017.json") args.train_coco_root = os.path.join(args.data, "train2017") local_seed = set_seeds(args) if args.data_pipeline == 'no_dali': train_trans = SSDTransformer(dboxes, args, (300, 300), val=False) train_dataset = get_train_dataset(args, train_trans) train_loader = get_train_loader(train_dataset, args, args.num_workers) elif args.data_pipeline == 'dali': train_loader = get_train_dali_loader(args, dboxes, local_seed) for epoch in range(args.epochs): start_epoch_time = time.time() scheduler.step() epoch_loop(train_loader, args, ssd300, time.time(), loss_func, optimizer, iteration, avg_loss, batch_perf, epoch) torch.cuda.synchronize() if epoch in args.evaluation: acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) try: train_loader.reset() except AttributeError: pass if args.local_rank == 0: print( "Training end: Average speed: {:3f} img/sec, Total time: {:3f} sec, Final accuracy: {:3f} mAP" .format(args.N_gpu * args.batch_size / batch_perf.avg, time.time() - train_start, acc))