Exemplo n.º 1
0
def get_val_dataset(args):
    dboxes = dboxes300_coco()
    val_trans = SSDTransformer(dboxes, (300, 300), val=True)

    val_annotate = os.path.join(args.data, "annotations/instances_val2017.json")
    val_coco_root = os.path.join(args.data, "val2017")

    val_coco = COCODetection(val_coco_root, val_annotate, val_trans)
    return val_coco
Exemplo n.º 2
0
def get_train_pytorch_loader(args, num_workers, default_boxes):
    dataset = COCODetection(
        args.train_coco_root, args.train_annotate,
        SSDTransformer(default_boxes, args, (300, 300), val=False))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            dataset)
    else:
        train_sampler = None

    train_dataloader = DataLoader(dataset,
                                  batch_size=args.batch_size,
                                  shuffle=(train_sampler is None),
                                  sampler=train_sampler,
                                  drop_last=True,
                                  num_workers=num_workers)

    return train_dataloader
Exemplo n.º 3
0
def train(args):
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.N_gpu = torch.distributed.get_world_size()
    else:
        args.N_gpu = 1

    dboxes = dboxes300_coco()
    encoder = Encoder(dboxes)
    cocoGt = get_coco_ground_truth(args)

    val_dataset = get_val_dataset(args)
    val_dataloader = get_val_dataloader(val_dataset, args)

    ssd300 = SSD300(len(cocoGt.cats) + 1)
    args.learning_rate = args.learning_rate * \
        args.N_gpu * (args.batch_size / 32)
    iteration = 0
    loss_func = Loss(dboxes)

    ssd300.cuda()
    loss_func.cuda()

    if args.fp16:
        ssd300 = network_to_half(ssd300)

    if args.distributed:
        ssd300 = DDP(ssd300)

    optimizer = torch.optim.SGD(tencent_trick(ssd300),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scheduler = MultiStepLR(optimizer=optimizer,
                            milestones=args.multistep,
                            gamma=0.1)

    if args.fp16:
        optimizer = FP16_Optimizer(optimizer, static_loss_scale=128.)

    inv_map = {v: k for k, v in val_dataset.label_map.items()}

    avg_loss = 0.0
    acc = 0
    batch_perf = AverageMeter()
    end = time.time()
    train_start = end

    args.train_annotate = os.path.join(args.data,
                                       "annotations/instances_train2017.json")
    args.train_coco_root = os.path.join(args.data, "train2017")
    local_seed = set_seeds(args)

    if args.data_pipeline == 'no_dali':
        train_trans = SSDTransformer(dboxes, args, (300, 300), val=False)
        train_dataset = get_train_dataset(args, train_trans)
        train_loader = get_train_loader(train_dataset, args, args.num_workers)
    elif args.data_pipeline == 'dali':
        train_loader = get_train_dali_loader(args, dboxes, local_seed)

    for epoch in range(args.epochs):
        start_epoch_time = time.time()
        scheduler.step()

        epoch_loop(train_loader, args, ssd300, time.time(), loss_func,
                   optimizer, iteration, avg_loss, batch_perf, epoch)
        torch.cuda.synchronize()

        if epoch in args.evaluation:
            acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map,
                           args)

        try:
            train_loader.reset()
        except AttributeError:
            pass

    if args.local_rank == 0:
        print(
            "Training end: Average speed: {:3f} img/sec, Total time: {:3f} sec, Final accuracy: {:3f} mAP"
            .format(args.N_gpu * args.batch_size / batch_perf.avg,
                    time.time() - train_start, acc))