示例#1
0
    def train_dataloader(
        self, batch_size: int = 1, image_transforms: Union[List[Callable], Callable] = None
    ) -> DataLoader:
        """
        VOCDetection train set uses the `train` subset

        Args:
            batch_size: size of batch
            transforms: custom transforms
        """
        transforms = [_prepare_voc_instance]
        image_transforms = image_transforms or self.train_transforms or self._default_transforms()
        transforms = Compose(transforms, image_transforms)
        dataset = VOCDetection(self.data_dir, year=self.year, image_set="train", transforms=transforms)
        loader = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=self.shuffle,
            num_workers=self.num_workers,
            drop_last=self.drop_last,
            pin_memory=self.pin_memory,
            collate_fn=_collate_fn,
        )
        return loader
示例#2
0
def create_data():
    def collate_fn(batch):
        result = [[] for _ in range(len(batch[0]))]
        for data in batch:
            for i, item in enumerate(data):
                result[i].append(item)
        return result

    voc_transforms = voc_Compose([CvtLabel(LABEL_NAMES), voc_ToTensor()])
    voc_train_set = VOCDetection(voc.DATA_DIR, 'UA-DETRAC', "train", transforms=voc_transforms)
    voc_train_loader = DataLoader(voc_train_set, voc.BATCH_SIZE, True, num_workers=voc.NUM_WORKERS,
                                  collate_fn=collate_fn)

    amap_train_transforms = Compose(
        [Resize((640, 320)), RandomHorizontalFlip(), RandomGrayscale(), RandomCrop((640, 320), 20), ToTensor()])
    amap_val_transforms = Compose([Resize((640, 320)), ToTensor()])

    amap_train_dataset = AmapDataset(amap.DATA_DIR, "train", transforms=amap_train_transforms)
    amap_val_dataset = AmapDataset(amap.DATA_DIR, "trainval", transforms=amap_val_transforms)

    amap_train_loader = DataLoader(amap_train_dataset, amap.BATCH_SIZE, True, num_workers=amap.NUM_WORKERS)
    amap_val_loader = DataLoader(amap_val_dataset, amap.BATCH_SIZE, num_workers=amap.NUM_WORKERS)

    return voc_train_loader, amap_train_loader, amap_val_loader
示例#3
0
 def __init__(self, bg_image_dataset_folder, size=(128, 128)):
     self.dataset = VOCDetection(bg_image_dataset_folder)
     self.resize = transforms.Resize(size)
示例#4
0
# -*- coding: utf-8 -*-
"""
@date: 2020/2/29 下午2:51
@file: pascal_voc.py
@author: zj
@description: 加载PASCAL VOC 2007数据集
"""

import cv2
import numpy as np
from torchvision.datasets import VOCDetection

if __name__ == '__main__':
    """
    下载PASCAL VOC数据集
    """
    dataset = VOCDetection('../../data',
                           year='2007',
                           image_set='trainval',
                           download=True)

    img, target = dataset.__getitem__(1000)
    img = np.array(img)

    print(target)
    print(img.shape)

    cv2.imshow('img', img)
    cv2.waitKey(0)
示例#5
0
def main(args):

    print(args)

    torch.backends.cudnn.benchmark = True

    # Data loading
    normalize = T.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
    crop_pct = 0.875
    scale_size = int(math.floor(args.img_size / crop_pct))

    train_loader, val_loader = None, None

    if not args.test_only:
        st = time.time()
        train_set = VOCDetection(datadir,
                                 image_set='train',
                                 download=True,
                                 transforms=Compose([
                                     VOCTargetTransform(VOC_CLASSES),
                                     RandomResizedCrop(
                                         (args.img_size, args.img_size),
                                         scale=(0.3, 1.0)),
                                     RandomHorizontalFlip(),
                                     convert_to_relative,
                                     ImageTransform(
                                         T.ColorJitter(brightness=0.3,
                                                       contrast=0.3,
                                                       saturation=0.1,
                                                       hue=0.02)),
                                     ImageTransform(T.ToTensor()),
                                     ImageTransform(normalize)
                                 ]))

        train_loader = torch.utils.data.DataLoader(
            train_set,
            batch_size=args.batch_size,
            drop_last=True,
            collate_fn=collate_fn,
            sampler=RandomSampler(train_set),
            num_workers=args.workers,
            pin_memory=True,
            worker_init_fn=worker_init_fn)

        print(f"Training set loaded in {time.time() - st:.2f}s "
              f"({len(train_set)} samples in {len(train_loader)} batches)")

    if args.show_samples:
        x, target = next(iter(train_loader))
        plot_samples(x, target)
        return

    if not (args.lr_finder or args.check_setup):
        st = time.time()
        val_set = VOCDetection(datadir,
                               image_set='val',
                               download=True,
                               transforms=Compose([
                                   VOCTargetTransform(VOC_CLASSES),
                                   Resize(scale_size),
                                   CenterCrop(args.img_size),
                                   convert_to_relative,
                                   ImageTransform(T.ToTensor()),
                                   ImageTransform(normalize)
                               ]))

        val_loader = torch.utils.data.DataLoader(
            val_set,
            batch_size=args.batch_size,
            drop_last=False,
            collate_fn=collate_fn,
            sampler=SequentialSampler(val_set),
            num_workers=args.workers,
            pin_memory=True,
            worker_init_fn=worker_init_fn)

        print(
            f"Validation set loaded in {time.time() - st:.2f}s ({len(val_set)} samples in {len(val_loader)} batches)"
        )

    model = detection.__dict__[args.model](args.pretrained,
                                           num_classes=len(VOC_CLASSES),
                                           pretrained_backbone=True)

    model_params = [p for p in model.parameters() if p.requires_grad]
    if args.opt == 'sgd':
        optimizer = torch.optim.SGD(model_params,
                                    args.lr,
                                    momentum=0.9,
                                    weight_decay=args.weight_decay)
    elif args.opt == 'adam':
        optimizer = torch.optim.Adam(model_params,
                                     args.lr,
                                     betas=(0.95, 0.99),
                                     eps=1e-6,
                                     weight_decay=args.weight_decay)
    elif args.opt == 'radam':
        optimizer = holocron.optim.RAdam(model_params,
                                         args.lr,
                                         betas=(0.95, 0.99),
                                         eps=1e-6,
                                         weight_decay=args.weight_decay)
    elif args.opt == 'ranger':
        optimizer = Lookahead(
            holocron.optim.RAdam(model_params,
                                 args.lr,
                                 betas=(0.95, 0.99),
                                 eps=1e-6,
                                 weight_decay=args.weight_decay))
    elif args.opt == 'tadam':
        optimizer = holocron.optim.TAdam(model_params,
                                         args.lr,
                                         betas=(0.95, 0.99),
                                         eps=1e-6,
                                         weight_decay=args.weight_decay)

    trainer = DetectionTrainer(model, train_loader, val_loader, None,
                               optimizer, args.device, args.output_file)

    if args.resume:
        print(f"Resuming {args.resume}")
        checkpoint = torch.load(args.resume, map_location='cpu')
        trainer.load(checkpoint)

    if args.test_only:
        print("Running evaluation")
        eval_metrics = trainer.evaluate()
        print(
            f"Loc error: {eval_metrics['loc_err']:.2%} | Clf error: {eval_metrics['clf_err']:.2%} | "
            f"Det error: {eval_metrics['det_err']:.2%}")
        return

    if args.lr_finder:
        print("Looking for optimal LR")
        trainer.lr_find(args.freeze_until, num_it=min(len(train_loader), 100))
        trainer.plot_recorder()
        return

    if args.check_setup:
        print("Checking batch overfitting")
        is_ok = trainer.check_setup(args.freeze_until,
                                    args.lr,
                                    num_it=min(len(train_loader), 100))
        print(is_ok)
        return

    print("Start training")
    start_time = time.time()
    trainer.fit_n_epochs(args.epochs, args.lr, args.freeze_until, args.sched)
    total_time_str = str(
        datetime.timedelta(seconds=int(time.time() - start_time)))
    print(f"Training time {total_time_str}")
示例#6
0
 def prepare_data(self, *args, **kwargs):
     VOCDetection(root=self.data_dir,
                  download=self.download,
                  image_set='trainval')
示例#7
0
文件: voc.py 项目: nairbv/vision
 def _parse_detection_ann(self, buffer: BinaryIO) -> Dict[str, Any]:
     return cast(
         Dict[str, Any],
         VOCDetection.parse_voc_xml(
             ElementTree.parse(buffer).getroot())["annotation"])
def plot_bb(img, target):
    im = np.asarray(img)
    import cv2
    for sample in zip(target['labels'], target['boxes']):
        label = sample[0]
        bbox = sample[1]
        x1, y1 = bbox[0], bbox[1]
        x2, y2 = bbox[2], bbox[3]
        cv2.rectangle(im, (x1, y1), (x2, y2), (255, 0, 0), 2)
    cv2.imshow('image', im)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


if __name__ == "__main__":
    pascal_voc = VOCDetection(root="VOC",
                              year="2012",
                              image_set="train",
                              download=False)

    sample = pascal_voc[0]
    img, gt_dict = sample

    bbox_gt = gt_dict['annotation']['object']
    print(bbox_gt)
    plot_bb_voc(img, bbox_gt)
    #coco = CocoDetection("COCO")
    """
    root, annFile, transform=None, target_transform=None, transforms=None
    """
    #sample = coco[0]
 def prepare_data(self) -> None:
     """
     Saves VOCDetection files to data_dir
     """
     VOCDetection(self.data_dir, year=self.year, image_set="train", download=True)
     VOCDetection(self.data_dir, year=self.year, image_set="val", download=True)
示例#10
0
 def __len__(self):
     return VOCDetection.__len__(self)
示例#11
0
def main():
    transforms = Compose([CvtLabel(LABEL_NAMES), ToTensor()])

    train_set = VOCDetection(DATA_DIR,
                             'UA-DETRAC',
                             "train",
                             transforms=transforms)
    val_set = VOCDetection(DATA_DIR,
                           'UA-DETRAC',
                           "trainval",
                           transforms=transforms)

    train_loader = DataLoader(train_set,
                              BATCH_SIZE,
                              True,
                              num_workers=NUM_WORKERS,
                              collate_fn=collate_fn)
    val_loader = DataLoader(val_set,
                            BATCH_SIZE,
                            num_workers=NUM_WORKERS,
                            collate_fn=collate_fn)

    device = torch.device(DEVICE)
    model = fasterrcnn_resnet50_fpn(num_classes=len(LABEL_NAMES)).to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    if os.path.exists(MODEL_FILE):
        model.load_state_dict(torch.load(MODEL_FILE))

    for epoch in range(1, EPOCHS + 1):
        model.train()
        for step, (img, target) in enumerate(train_loader):
            step, total_step = step + 1, len(train_loader)

            img = [i.to(device) for i in img]
            target = [{n: item[n].to(device)
                       for n in ['labels', 'boxes']} for item in target]

            loss_dict = model(img, target)
            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            loss = losses.cpu().detach().numpy()
            print(
                f"Epoch:{epoch}/{EPOCHS}, Step:{step}/{total_step}, Loss={loss:.04f}",
                end='\r',
                flush=True)

        # model.eval()
        # with torch.no_grad():
        #     for img, target in val_loader:
        #         img = [i.to(device) for i in img]
        #         target = [{n: item[n].to(device) for n in ['labels', 'boxes']} for item in target]
        #
        #         pred = model(img)
        #         for item in pred:
        #             print(item["boxes"].shape, item['labels'].shape, item['scores'].shape)
        #             pass
        #
        #         pass

        lr_scheduler.step()
        torch.save(model.state_dict(), MODEL_FILE)
        print()

    pass