示例#1
0
文件: data.py 项目: samerhjr/AutoML
def get_data(dataset,
             dataroot,
             augment,
             resize=608,
             split=0.15,
             split_idx=0,
             multinode=False,
             target_lb=-1):

    transform_train = transforms.Compose(
        [Normalizer(), Augmenter(),
         Resizer(min_side=resize)])
    transform_test = transforms.Compose(
        [Normalizer(), Resizer(min_side=resize)])

    if isinstance(C.get().aug, list):
        logger.debug('augmentation provided.')
        policies = policy_decoder(augment, augment['num_policy'],
                                  augment['num_op'])
        transform_train.transforms.insert(
            0, Augmentation(policies, detection=True))

    if dataset == 'coco':
        total_trainset = CocoDataset(dataroot,
                                     set_name='train',
                                     transform=transform_train)
        testset = CocoDataset(dataroot,
                              set_name='val',
                              transform=transform_test)

    return total_trainset, testset
示例#2
0
def detect_single_image(checkpoint, image_path, visualize=False):
    device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu')
    configs = combine_values(checkpoint['model_specs']['training_configs'], checkpoint['hp_values'])
    labels = checkpoint['labels']
    num_classes = len(labels)
    retinanet = model.resnet152(num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter
    retinanet.load_state_dict(checkpoint['model'])
    retinanet = retinanet.to(device=device)
    retinanet.eval()

    img = skimage.io.imread(image_path)

    if len(img.shape) == 2:
        img = skimage.color.gray2rgb(img)

    img = img.astype(np.float32) / 255.0
    transform = transforms.Compose([Normalizer(), Resizer(min_side=608)]) #TODO: make this dynamic
    data = transform({'img': img, 'annot': np.zeros((0, 5))})
    img = data['img']
    img = img.unsqueeze(0)
    img = img.permute(0, 3, 1, 2)
    with torch.no_grad():
        scores, classification, transformed_anchors = retinanet(img.to(device=device).float())


        idxs = np.where(scores.cpu() > 0.5)[0]
        scale = data['scale']
        detections_list = []
        for j in range(idxs.shape[0]):
            bbox = transformed_anchors[idxs[j], :]
            label_idx = int(classification[idxs[j]])
            label_name = labels[label_idx]
            score = scores[idxs[j]].item()

            # un resize for eval against gt
            bbox /= scale
            bbox.round()
            x1 = int(bbox[0])
            y1 = int(bbox[1])
            x2 = int(bbox[2])
            y2 = int(bbox[3])
            detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)])
        img_name = image_path.split('/')[-1].split('.')[0]
        filename = img_name + '.txt'
        path = os.path.dirname(image_path)
        filepathname = os.path.join(path, filename)
        with open(filepathname, 'w', encoding='utf8') as f:
            for single_det_list in detections_list:
                for i, x in enumerate(single_det_list):
                    f.write(str(x))
                    f.write(' ')
                f.write('\n')

        if visualize:
            unnormalize = UnNormalizer()


    return filepathname
    def preprocess(self, dataset='csv', csv_train=None, csv_val=None, csv_classes=None, coco_path=False,
                   train_set_name='train2017', val_set_name='val2017', resize=608):
        self.dataset = dataset
        if self.dataset == 'coco':
            if coco_path is None:
                raise ValueError('Must provide --home_path when training on COCO,')
            self.dataset_train = CocoDataset(self.home_path, set_name=train_set_name,
                                             transform=transforms.Compose(
                                                 [Normalizer(), Augmenter(), Resizer(min_side=resize)]))
            self.dataset_val = CocoDataset(self.home_path, set_name=val_set_name,
                                           transform=transforms.Compose([Normalizer(), Resizer(min_side=resize)]))

        elif self.dataset == 'csv':
            if csv_train is None:
                raise ValueError('Must provide --csv_train when training on COCO,')
            if csv_classes is None:
                raise ValueError('Must provide --csv_classes when training on COCO,')
            self.dataset_train = CSVDataset(train_file=csv_train, class_list=csv_classes,
                                            transform=transforms.Compose(
                                                [Normalizer(), Augmenter(), Resizer(min_side=resize)])
                                            )

            if csv_val is None:
                self.dataset_val = None
                print('No validation annotations provided.')
            else:
                self.dataset_val = CSVDataset(train_file=csv_val, class_list=csv_classes,
                                              transform=transforms.Compose([Normalizer(), Resizer(min_side=resize)]))
        else:
            raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

        sampler = AspectRatioBasedSampler(self.dataset_train, batch_size=2, drop_last=False)
        self.dataloader_train = DataLoader(self.dataset_train, num_workers=0, collate_fn=collater,
                                           batch_sampler=sampler)
        if self.dataset_val is not None:
            sampler_val = AspectRatioBasedSampler(self.dataset_val, batch_size=1, drop_last=False)
            self.dataloader_val = DataLoader(self.dataset_val, num_workers=3, collate_fn=collater,
                                             batch_sampler=sampler_val)

        print('Num training images: {}'.format(len(self.dataset_train)))
        if len(self.dataset_val) == 0:
            raise Exception('num val images is 0!')
        print('Num val images: {}'.format(len(self.dataset_val)))
示例#4
0
文件: data.py 项目: samerhjr/AutoML
def get_dataloaders(dataset,
                    batch,
                    dataroot,
                    resize=608,
                    split=0.15,
                    split_idx=0,
                    multinode=False,
                    target_lb=-1):
    multilabel = False
    detection = False
    if 'coco' in dataset:
        transform_train = transforms.Compose(
            [Normalizer(), Augmenter(),
             Resizer(min_side=resize)])
        transform_test = transforms.Compose(
            [Normalizer(), Resizer(min_side=resize)])

        multilabel = True
        detection = True
    elif 'cifar' in dataset or 'svhn' in dataset:
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
        ])
    elif 'imagenet' in dataset:
        input_size = 224
        sized_size = 256

        if 'efficientnet' in C.get()['model']:
            input_size = EfficientNet.get_image_size(C.get()['model'])
            sized_size = input_size + 32  # TODO
            # sized_size = int(round(input_size / 224. * 256))
            # sized_size = input_size
            logger.info('size changed to %d/%d.' % (input_size, sized_size))

        transform_train = transforms.Compose([
            EfficientNetRandomCrop(input_size),
            transforms.Resize((input_size, input_size),
                              interpolation=Image.BICUBIC),
            # transforms.RandomResizedCrop(input_size, scale=(0.1, 1.0), interpolation=Image.BICUBIC),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(
                brightness=0.4,
                contrast=0.4,
                saturation=0.4,
            ),
            transforms.ToTensor(),
            Lighting(0.1, _IMAGENET_PCA['eigval'], _IMAGENET_PCA['eigvec']),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        transform_test = transforms.Compose([
            EfficientNetCenterCrop(input_size),
            transforms.Resize((input_size, input_size),
                              interpolation=Image.BICUBIC),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

    else:
        raise ValueError('dataset=%s' % dataset)

    total_aug = augs = None
    if isinstance(C.get().aug, list):
        logger.debug('augmentation provided.')
        transform_train.transforms.insert(
            0, Augmentation(C.get().aug, detection=detection))
    else:
        logger.debug('augmentation: %s' % C.get().aug)
        if C.get().aug == 'fa_reduced_cifar10':
            transform_train.transforms.insert(
                0, Augmentation(fa_reduced_cifar10()))

        elif C.get().aug == 'fa_reduced_imagenet':
            transform_train.transforms.insert(
                0, Augmentation(fa_resnet50_rimagenet()))

        elif C.get().aug == 'fa_reduced_svhn':
            transform_train.transforms.insert(0,
                                              Augmentation(fa_reduced_svhn()))

        elif C.get().aug == 'arsaug':
            transform_train.transforms.insert(0, Augmentation(arsaug_policy()))
        elif C.get().aug == 'autoaug_cifar10':
            transform_train.transforms.insert(
                0, Augmentation(autoaug_paper_cifar10()))
        elif C.get().aug == 'autoaug_extend':
            transform_train.transforms.insert(0,
                                              Augmentation(autoaug_policy()))
        elif C.get().aug in ['default']:
            pass
        else:
            raise ValueError('not found augmentations. %s' % C.get().aug)

    if C.get()['cutout'] > 0:
        transform_train.transforms.append(CutoutDefault(C.get()['cutout']))

    if dataset == 'coco':
        total_trainset = CocoDataset(dataroot,
                                     set_name='train2017',
                                     transform=transform_train)
        testset = CocoDataset(dataroot,
                              set_name='val2017',
                              transform=transform_test)

    elif dataset == 'cifar10':
        total_trainset = torchvision.datasets.CIFAR10(
            root=dataroot,
            train=True,
            download=True,
            transform=transform_train)
        testset = torchvision.datasets.CIFAR10(root=dataroot,
                                               train=False,
                                               download=True,
                                               transform=transform_test)
    elif dataset == 'reduced_cifar10':
        total_trainset = torchvision.datasets.CIFAR10(
            root=dataroot,
            train=True,
            download=True,
            transform=transform_train)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=46000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        train_idx, valid_idx = next(sss)
        targets = [total_trainset.targets[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        testset = torchvision.datasets.CIFAR10(root=dataroot,
                                               train=False,
                                               download=True,
                                               transform=transform_test)
    elif dataset == 'cifar100':
        total_trainset = torchvision.datasets.CIFAR100(
            root=dataroot,
            train=True,
            download=True,
            transform=transform_train)
        testset = torchvision.datasets.CIFAR100(root=dataroot,
                                                train=False,
                                                download=True,
                                                transform=transform_test)
    elif dataset == 'svhn':
        trainset = torchvision.datasets.SVHN(root=dataroot,
                                             split='train',
                                             download=True,
                                             transform=transform_train)
        extraset = torchvision.datasets.SVHN(root=dataroot,
                                             split='extra',
                                             download=True,
                                             transform=transform_train)
        total_trainset = ConcatDataset([trainset, extraset])
        testset = torchvision.datasets.SVHN(root=dataroot,
                                            split='test',
                                            download=True,
                                            transform=transform_test)
    elif dataset == 'reduced_svhn':
        total_trainset = torchvision.datasets.SVHN(root=dataroot,
                                                   split='train',
                                                   download=True,
                                                   transform=transform_train)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=73257 - 1000,
                                     random_state=0)  # 1000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        train_idx, valid_idx = next(sss)
        targets = [total_trainset.targets[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        testset = torchvision.datasets.SVHN(root=dataroot,
                                            split='test',
                                            download=True,
                                            transform=transform_test)
    elif dataset == 'imagenet':
        total_trainset = ImageNet(root=os.path.join(dataroot,
                                                    'imagenet-pytorch'),
                                  transform=transform_train,
                                  download=True)
        testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'),
                           split='val',
                           transform=transform_test)

        # compatibility
        total_trainset.targets = [lb for _, lb in total_trainset.samples]
    elif dataset == 'reduced_imagenet':
        # randomly chosen indices
        #         idx120 = sorted(random.sample(list(range(1000)), k=120))
        idx120 = [
            16, 23, 52, 57, 76, 93, 95, 96, 99, 121, 122, 128, 148, 172, 181,
            189, 202, 210, 232, 238, 257, 258, 259, 277, 283, 289, 295, 304,
            307, 318, 322, 331, 337, 338, 345, 350, 361, 375, 376, 381, 388,
            399, 401, 408, 424, 431, 432, 440, 447, 462, 464, 472, 483, 497,
            506, 512, 530, 541, 553, 554, 557, 564, 570, 584, 612, 614, 619,
            626, 631, 632, 650, 657, 658, 660, 674, 675, 680, 682, 691, 695,
            699, 711, 734, 736, 741, 754, 757, 764, 769, 770, 780, 781, 787,
            797, 799, 811, 822, 829, 830, 835, 837, 842, 843, 845, 873, 883,
            897, 900, 902, 905, 913, 920, 925, 937, 938, 940, 941, 944, 949,
            959
        ]
        total_trainset = ImageNet(root=os.path.join(dataroot,
                                                    'imagenet-pytorch'),
                                  transform=transform_train)
        testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'),
                           split='val',
                           transform=transform_test)

        # compatibility
        total_trainset.targets = [lb for _, lb in total_trainset.samples]

        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=len(total_trainset) - 50000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        train_idx, valid_idx = next(sss)

        # filter out
        train_idx = list(
            filter(lambda x: total_trainset.labels[x] in idx120, train_idx))
        valid_idx = list(
            filter(lambda x: total_trainset.labels[x] in idx120, valid_idx))
        test_idx = list(
            filter(lambda x: testset.samples[x][1] in idx120,
                   range(len(testset))))

        targets = [
            idx120.index(total_trainset.targets[idx]) for idx in train_idx
        ]
        for idx in range(len(total_trainset.samples)):
            if total_trainset.samples[idx][1] not in idx120:
                continue
            total_trainset.samples[idx] = (total_trainset.samples[idx][0],
                                           idx120.index(
                                               total_trainset.samples[idx][1]))
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        for idx in range(len(testset.samples)):
            if testset.samples[idx][1] not in idx120:
                continue
            testset.samples[idx] = (testset.samples[idx][0],
                                    idx120.index(testset.samples[idx][1]))
        testset = Subset(testset, test_idx)
        print('reduced_imagenet train=', len(total_trainset))
    else:
        raise ValueError('invalid dataset name=%s' % dataset)

    if total_aug is not None and augs is not None:
        total_trainset.set_preaug(augs, total_aug)
        print('set_preaug-')

    train_sampler = None
    if split > 0.0:
        if multilabel:
            # not sure how important stratified is, especially for val,
            # might want to test with and without and add it for multilabel in the future
            sss = ShuffleSplit(n_splits=5, test_size=split, random_state=0)
            sss = sss.split(list(range(len(total_trainset))))
            for _ in range(split_idx + 1):
                train_idx, valid_idx = next(sss)
        else:
            sss = StratifiedShuffleSplit(n_splits=5,
                                         test_size=split,
                                         random_state=0)
            sss = sss.split(list(range(len(total_trainset))),
                            total_trainset.targets)
            for _ in range(split_idx + 1):
                train_idx, valid_idx = next(sss)

        if target_lb >= 0:
            train_idx = [
                i for i in train_idx if total_trainset.targets[i] == target_lb
            ]
            valid_idx = [
                i for i in valid_idx if total_trainset.targets[i] == target_lb
            ]

        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetSampler(valid_idx)

        if multinode:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                Subset(total_trainset, train_idx),
                num_replicas=dist.get_world_size(),
                rank=dist.get_rank())
    else:
        valid_sampler = SubsetSampler([])

        if multinode:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                total_trainset,
                num_replicas=dist.get_world_size(),
                rank=dist.get_rank())
            logger.info(
                f'----- dataset with DistributedSampler  {dist.get_rank()}/{dist.get_world_size()}'
            )

    trainloader = DataLoader(total_trainset,
                             batch_size=batch,
                             shuffle=True if train_sampler is None else False,
                             num_workers=8,
                             pin_memory=True,
                             sampler=train_sampler,
                             drop_last=True,
                             collate_fn=collater)
    validloader = DataLoader(total_trainset,
                             batch_size=batch,
                             shuffle=False,
                             num_workers=4,
                             pin_memory=True,
                             sampler=valid_sampler,
                             drop_last=False,
                             collate_fn=collater)

    testloader = DataLoader(testset,
                            batch_size=batch,
                            shuffle=False,
                            num_workers=8,
                            pin_memory=True,
                            drop_last=False,
                            collate_fn=collater)

    return train_sampler, trainloader, validloader, testloader
示例#5
0
def detect(checkpoint,
           pred_on_path,
           output_path,
           threshold=0.5,
           visualize=False,
           red_label='sick'):
    device = torch.device(
        type='cuda') if torch.cuda.is_available() else torch.device(type='cpu')

    if os.path.exists(output_path):
        shutil.rmtree(output_path)
        os.makedirs(output_path)
    logger.info('inside ' + str(pred_on_path) + ': ' +
                str(os.listdir(pred_on_path)))
    dataset_val = PredDataset(pred_on_path=pred_on_path,
                              transform=transforms.Compose([
                                  Normalizer(),
                                  Resizer(min_side=608)
                              ]))  #TODO make resize an input param
    logger.info('dataset prepared')
    dataloader_val = DataLoader(dataset_val,
                                num_workers=0,
                                collate_fn=collater,
                                batch_sampler=None)
    logger.info('data loader initialized')
    labels = checkpoint['labels']
    logger.info('labels are: ' + str(labels))
    num_classes = len(labels)
    configs = deepcopy(checkpoint['model_specs']['training_configs'])
    configs.update(checkpoint['hp_values'])
    logger.info('initializing object_detection model')
    retinanet = ret50(
        num_classes=num_classes,
        scales=configs['anchor_scales'],
        ratios=configs['anchor_ratios'])  #TODO: make depth an input parameter
    logger.info('loading weights')
    retinanet.load_state_dict(checkpoint['model'])
    retinanet = retinanet.to(device=device)
    logger.info('model to device: ' + str(device))
    retinanet.eval()
    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN,
                    1, (255, 255, 255), 1)

    inference_times = []
    for idx, data in enumerate(dataloader_val):
        scale = data['scale'][0]
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(
                data['img'].to(device=device).float())
            elapsed_time = time.time() - st
            print('Elapsed time: {}'.format(elapsed_time))
            inference_times.append(elapsed_time)
            idxs = np.where(scores.cpu() > threshold)[0]
            if visualize:
                img = np.array(255 *
                               unnormalize(data['img'][0, :, :, :])).copy()

                img[img < 0] = 0
                img[img > 255] = 255

                img = np.transpose(img, (1, 2, 0))
                img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            detections_list = []
            for j in range(idxs.shape[0]):
                bbox = transformed_anchors[idxs[j], :]
                if visualize:
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])

                label_idx = int(classification[idxs[j]])
                label_name = labels[label_idx]
                score = scores[idxs[j]].item()
                if visualize:
                    draw_caption(img, (x1, y1, x2, y2), label_name)
                    if red_label in label_name:
                        cv2.rectangle(img, (x1, y1), (x2, y2),
                                      color=(0, 0, 255),
                                      thickness=2)
                    else:
                        cv2.rectangle(img, (x1, y1), (x2, y2),
                                      color=(0, 255, 0),
                                      thickness=2)
                    print(label_name)

                # un resize for eval against gt
                bbox /= scale
                bbox.round()
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                detections_list.append([
                    label_name,
                    str(score),
                    str(x1),
                    str(y1),
                    str(x2),
                    str(y2)
                ])
            img_name = dataset_val.image_names[idx].split('/')[-1]
            i_name = img_name.split('.')[0]
            filename = i_name + '.txt'
            filepathname = os.path.join(output_path, filename)
            with open(filepathname, 'w', encoding='utf8') as f:
                for single_det_list in detections_list:
                    for i, x in enumerate(single_det_list):
                        f.write(str(x))
                        f.write(' ')
                    f.write('\n')
            if visualize:
                save_to_path = os.path.join(output_path, img_name)
                cv2.imwrite(save_to_path, img)
                cv2.waitKey(0)
    print('average inference time per image: ', np.mean(inference_times))
    return output_path
示例#6
0
def detect(checkpoint, output_dir, home_path=None, visualize=False):
    device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu')
    if home_path is None:
        home_path = checkpoint['model_specs']['data']['home_path']
    if os.getcwd().split('/')[-1] == 'ObjectDetNet':
        home_path = os.path.join('..', home_path)
    # must have a file to predict on called "predict_on"
    pred_on_path = os.path.join(home_path, 'predict_on')

    #create output path
    output_path = os.path.join(home_path, 'predictions', output_dir)

    try:
        os.makedirs(output_path)
    except FileExistsError:
        if output_dir != 'check0':
            raise Exception('there are already predictions for model: ' + output_dir)
        else:
            logger.info('there was already a check0 in place, erasing and predicting again from scratch')
            shutil.rmtree(output_path)
            os.makedirs(output_path)
    logger.info('inside ' + str(pred_on_path) + ': ' + str(os.listdir(pred_on_path)))
    dataset_val = PredDataset(pred_on_path=pred_on_path,
                              transform=transforms.Compose([Normalizer(), Resizer(min_side=608)])) #TODO make resize an input param
    logger.info('dataset prepared')
    dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=None)
    logger.info('data loader initialized')
    labels = checkpoint['labels']
    logger.info('labels are: ' + str(labels))
    num_classes = len(labels)

    configs = combine_values(checkpoint['model_specs']['training_configs'], checkpoint['hp_values'])
    logger.info('initializing retinanet model')
    if checkpoint['model_specs']['training_configs']['depth'] == 50:
        retinanet = model.resnet50(num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter
    elif checkpoint['model_specs']['training_configs']['depth'] == 152:
        retinanet = model.resnet152(num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios'])
    logger.info('loading weights')
    retinanet.load_state_dict(checkpoint['model'])
    retinanet = retinanet.to(device=device)
    logger.info('model to device: ' + str(device))
    retinanet.eval()
    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        scale = data['scale'][0]
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].to(device=device).float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)[0]
            if visualize:
                img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

                img[img < 0] = 0
                img[img > 255] = 255

                img = np.transpose(img, (1, 2, 0))
                img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            detections_list = []
            for j in range(idxs.shape[0]):
                bbox = transformed_anchors[idxs[j], :]
                if visualize:
                    x1 = int(bbox[0])
                    y1 = int(bbox[1])
                    x2 = int(bbox[2])
                    y2 = int(bbox[3])

                label_idx = int(classification[idxs[j]])
                label_name = labels[label_idx]
                score = scores[idxs[j]].item()
                if visualize:
                    draw_caption(img, (x1, y1, x2, y2), label_name)
                    cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                    print(label_name)

                # un resize for eval against gt
                bbox /= scale
                bbox.round()
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)])
            img_name = dataset_val.image_names[idx].split('/')[-1]
            i_name = img_name.split('.')[0]
            filename = i_name + '.txt'
            filepathname = os.path.join(output_path, filename)
            with open(filepathname, 'w', encoding='utf8') as f:
                for single_det_list in detections_list:
                    for i, x in enumerate(single_det_list):
                        f.write(str(x))
                        f.write(' ')
                    f.write('\n')
            if visualize:
                save_to_path = os.path.join(output_path, img_name)
                cv2.imwrite(save_to_path, img)
                cv2.waitKey(0)

    return output_path
示例#7
0
def detect(home_path, checkpoint_path):

    class_names_path = os.path.join(home_path, "d.names")
    # compute number of classes
    num_classes = sum(1 for line in open(class_names_path))
    # must have a file to predict on called "predict_on"
    pred_on_path = os.path.join(home_path, 'predict_on')

    #create output path
    checkpoint_name = checkpoint_path.split('.')[0]
    output_path = os.path.join(home_path, 'predictions', checkpoint_name)
    if not os.path.exists(os.path.join(home_path, 'predictions')):
        os.mkdir(os.path.join(home_path, 'predictions'))
    if os.path.exists(output_path):
        raise Exception('there are already predictions for model: ' + checkpoint_name)
    os.mkdir(output_path)

    #copy annotations to predictions
    gt_file = glob.glob(os.path.join(pred_on_path, '*.json'))[0]
    set_name = gt_file.split('/')[-1].split('.')[0].split('_')[1]
    if os.path.exists(gt_file):
        if not os.path.exists(os.path.join(home_path, 'predictions', 'annotations')):
            os.mkdir(os.path.join(home_path, 'predictions', 'annotations'))
        copyfile(gt_file, os.path.join(home_path, 'predictions', 'annotations', gt_file.split('/')[-1]))
    # dataset_val = PredDataset(pred_on_path=pred_on_path, class_list_path=class_names_path,
    #                          transform=transforms.Compose([Normalizer(), Resizer(min_side=608)])) #TODO make resize an input param
    dataset_val = PredDataset(pred_on_path, set_name=set_name,
                        transform=transforms.Compose([Normalizer(), Resizer(min_side=608)]))
    # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=None)

    if torch.cuda.is_available():
        checkpoint = torch.load(checkpoint_path)
    else:
        checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
    scales = checkpoint['scales']
    ratios = checkpoint['ratios']

    retinanet = model.resnet152(num_classes=num_classes, scales=scales, ratios=ratios) #TODO: make depth an input parameter
    retinanet.load_state_dict(checkpoint['model'])
    retinanet = retinanet.cuda()
    retinanet.eval()
    unnormalize = UnNormalizer()

    def draw_caption(image, box, caption):
        b = np.array(box).astype(int)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
        cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

    for idx, data in enumerate(dataloader_val):
        scale = data['scale'][0]
        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(data['img'].cuda().float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)[0]
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))
            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            detections_list = []
            for j in range(idxs.shape[0]):
                bbox = transformed_anchors[idxs[j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_idx = int(classification[idxs[j]])
                label_name = dataset_val.labels[label_idx]
                score = scores[idxs[j]].item()

                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)
                print(label_name)
                # un resize for eval against gt
                bbox /= scale
                bbox.round()
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)])
            img_name = dataset_val.coco.dataset['images'][idx]['file_name'].split('.')[0]
            filename = img_name + '.txt'
            filepathname = os.path.join(output_path, filename)
            with open(filepathname, 'w', encoding='utf8') as f:
                for single_det_list in detections_list:
                    for i, x in enumerate(single_det_list):
                        f.write(str(x))
                        f.write(' ')
                    f.write('\n')

            img_save_name = dataset_val.coco.dataset['images'][idx]['file_name']
            save_to_path = os.path.join(output_path, img_save_name)
            cv2.imwrite(save_to_path, img)
            cv2.waitKey(0)