Exemplo n.º 1
0
    def __init__(self,
                 root=None,
                 transform=None,
                 mode='train',
                 target_transform=None,
                 return_paths=False,
                 loader=default_loader,
                 ignore_flag=False,
                 dataset_type='A'):
        if root:
            annFile = root + '/trainval_withkeypoints.json'
            imgDir = root + '/VOCdevkit/VOC2010/JPEGImages'
        details = Detail(annFile, imgDir, mode)
        self.details = details
        if dataset_type == 'A':
            classes = A
        elif dataset_type == 'B':
            classes = B
        else:
            classes = C
        dtimage = {}
        for c in classes:
            for dp in details.getImgs(cats=[str(c)]):
                dtimage[dp['file_name']] = dp
        self.dtimgs = dtimage.values()
        images = [imgDir + '/' + i['file_name'] for i in self.dtimgs]
        self.allCategories = details.getCats()

        self.generalIdtoname = {}
        self.idTogeneralId = {}
        self.generalIdtoid = {}

        self.classesIdtoname = {}
        self.idToclassesId = {}
        self.classesIdtoid = {}

        i = 1
        self.pixelmap = ['background']
        for cat in self.allCategories:
            if cat['name'] in classes:
                self.classesIdtoname[int(cat['category_id'])] = cat['name']
                self.idToclassesId[i] = int(cat['category_id'])
                self.classesIdtoid[int(cat['category_id'])] = i
                self.pixelmap.append(cat['name'])
                i += 1
            else:
                self.generalIdtoname[int(cat['category_id'])] = cat['name']
                self.idTogeneralId[i] = int(cat['category_id'])
        self.mapping = []
        self.mode = 'train'
        self.images = images
        self.transform = transform
        self.target_transform = target_transform
        self.return_paths = return_paths
        self.loader = loader
        self.num_classes = len(classes) + 1
Exemplo n.º 2
0
    def generate_label(self):
        _image_dir = os.path.join(self.args.ori_root_dir, 'JPEGImages')
        annFile = os.path.join(self.args.ori_root_dir, 'trainval_merged.json')
        _mapping = np.sort(np.array([
            0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22,
            23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296,
            427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424,
            68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360,
            98, 187, 104, 105, 366, 189, 368, 113, 115]))
        _key = np.array(range(len(_mapping))).astype('uint8')

        from detail import Detail
        train_detail = Detail(annFile, _image_dir, 'train')
        train_ids = train_detail.getImgs()
        for img_id in train_ids:
            mask = Image.fromarray(self._class_to_index(
                train_detail.getMask(img_id), _mapping=_mapping, _key=_key))
            filename = img_id['file_name']
            basename, _ = os.path.splitext(filename)
            if filename.endswith(".jpg"):
                imgpath = os.path.join(_image_dir, filename)
                shutil.copy(imgpath,
                            os.path.join(self.train_image_dir, filename))
                mask_png_name = basename + '.png'
                mask.save(os.path.join(self.train_label_dir, mask_png_name))

        val_detail = Detail(annFile, _image_dir, 'val')
        val_ids = val_detail.getImgs()
        for img_id in val_ids:
            mask = Image.fromarray(self._class_to_index(
                val_detail.getMask(img_id), _mapping=_mapping, _key=_key))
            filename = img_id['file_name']
            basename, _ = os.path.splitext(filename)
            if filename.endswith(".jpg"):
                imgpath = os.path.join(_image_dir, filename)
                shutil.copy(imgpath,
                            os.path.join(self.val_image_dir, filename))
                mask_png_name = basename + '.png'
                mask.save(os.path.join(self.val_label_dir, mask_png_name))
Exemplo n.º 3
0
def main():
    args = parse_args()
    devkit_path = args.devkit_path
    if args.out_dir is None:
        out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
    else:
        out_dir = args.out_dir
    json_path = args.json_path
    mmcv.mkdir_or_exist(out_dir)
    img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')

    train_detail = Detail(json_path, img_dir, 'train')
    train_ids = train_detail.getImgs()

    val_detail = Detail(json_path, img_dir, 'val')
    val_ids = val_detail.getImgs()

    mmcv.mkdir_or_exist(
        osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))

    train_list = mmcv.track_progress(
        partial(generate_labels, detail=train_detail, out_dir=out_dir),
        train_ids)
    with open(
            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
                     'train.txt'), 'w') as f:
        f.writelines(line + '\n' for line in sorted(train_list))

    val_list = mmcv.track_progress(
        partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
    with open(
            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
                     'val.txt'), 'w') as f:
        f.writelines(line + '\n' for line in sorted(val_list))

    print('Done!')
Exemplo n.º 4
0
 def __init__(self,
              root=None,
              transform=None,
              mode='train',
              target_transform=None,
              return_paths=False,
              loader=default_loader,
              ignore_flag=False,
              dataset_type='A'):
     if root:
         annFile = root + '/trainval_withkeypoints.json'
         imgDir = root + '/VOCdevkit/VOC2010/JPEGImages'
     details = Detail(annFile, imgDir, mode)
     self.details = details
     self.dtimgs = details.getImgs()
     images = [imgDir + '/' + i['file_name'] for i in self.dtimgs]
     self.mode = mode
     self.images = images
     self.transform = transform
     self.target_transform = target_transform
     self.return_paths = return_paths
     self.loader = loader
     self.num_classes = 2
     self.pixelmap = ['edge', 'non-edge']
Exemplo n.º 5
0
class ContextSegmentation(BaseDataset):
    BASE_DIR = 'VOCdevkit/VOC2010'
    NUM_CLASS = 59

    def __init__(
            self,
            root=os.path.
        expanduser(
            '/media/zilong/b368236f-2592-49f3-9dcc-0d4da674fd26/.encoding/data'
        ),
            split='train',
            mode=None,
            transform=None,
            target_transform=None,
            **kwargs):
        super(ContextSegmentation, self).__init__(root, split, mode, transform,
                                                  target_transform, **kwargs)
        from detail import Detail
        #from detail import mask
        root = os.path.join(root, self.BASE_DIR)
        annFile = os.path.join(root, 'trainval_merged.json')
        imgDir = os.path.join(root, 'JPEGImages')
        # training mode
        self.detail = Detail(annFile, imgDir, split)
        self.transform = transform
        self.target_transform = target_transform
        self.ids = self.detail.getImgs()
        # generate masks
        self._mapping = np.sort(
            np.array([
                0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25,
                284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45,
                46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458,
                34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105,
                366, 189, 368, 113, 115
            ]))
        self._key = np.array(range(len(self._mapping))).astype('uint8')
        mask_file = os.path.join(root, self.split + '.pth')
        print('mask_file:', mask_file)
        if os.path.exists(mask_file):
            self.masks = torch.load(mask_file)
        else:
            self.masks = self._preprocess(mask_file)

    def _class_to_index(self, mask):
        # assert the values
        values = np.unique(mask)
        for i in range(len(values)):
            assert (values[i] in self._mapping)
        index = np.digitize(mask.ravel(), self._mapping, right=True)
        return self._key[index].reshape(mask.shape)

    def _preprocess(self, mask_file):
        masks = {}
        tbar = trange(len(self.ids))
        print("Preprocessing mask, this will take a while." + \
            "But don't worry, it only run once for each split.")
        for i in tbar:
            img_id = self.ids[i]
            mask = Image.fromarray(
                self._class_to_index(self.detail.getMask(img_id)))
            masks[img_id['image_id']] = mask
            tbar.set_description("Preprocessing masks {}".format(
                img_id['image_id']))
        torch.save(masks, mask_file)
        return masks

    def __getitem__(self, index):
        img_id = self.ids[index]
        path = img_id['file_name']
        iid = img_id['image_id']
        img = Image.open(os.path.join(self.detail.img_folder,
                                      path)).convert('RGB')
        if self.mode == 'test':
            if self.transform is not None:
                img = self.transform(img)
            return img, os.path.basename(path)
        # convert mask to 60 categories
        mask = self.masks[iid]
        # synchrosized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            mask = self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            mask = self.target_transform(mask)
        return img, mask

    def _mask_transform(self, mask):
        target = np.array(mask).astype('int32') - 1
        return torch.from_numpy(target).long()

    def __len__(self):
        return len(self.ids)

    @property
    def pred_offset(self):
        return 1
class PascalContext(BaseDataset):
    NUM_CLASS = 59

    def __init__(self,
                 root="./data",
                 split="train",
                 mode=None,
                 transform=None,
                 target_transform=None,
                 **kwargs):
        super(PascalContext, self).__init__(root, split, mode, transform,
                                            target_transform, **kwargs)
        from detail import Detail

        # from detail import mask
        root = os.path.join(root, "PascalContext")
        annFile = os.path.join(root, "trainval_merged.json")
        imgDir = os.path.join(root, "JPEGImages")
        # training mode
        self.detail = Detail(annFile, imgDir, split)
        self.transform = transform
        self.target_transform = target_transform
        self.ids = self.detail.getImgs()
        # generate masks
        self._mapping = np.sort(
            np.array([
                0,
                2,
                259,
                260,
                415,
                324,
                9,
                258,
                144,
                18,
                19,
                22,
                23,
                397,
                25,
                284,
                158,
                159,
                416,
                33,
                162,
                420,
                454,
                295,
                296,
                427,
                44,
                45,
                46,
                308,
                59,
                440,
                445,
                31,
                232,
                65,
                354,
                424,
                68,
                326,
                72,
                458,
                34,
                207,
                80,
                355,
                85,
                347,
                220,
                349,
                360,
                98,
                187,
                104,
                105,
                366,
                189,
                368,
                113,
                115,
            ]))
        self.classes = [
            "background",
            "aeroplane",
            "mountain",
            "mouse",
            "track",
            "road",
            "bag",
            "motorbike",
            "fence",
            "bed",
            "bedclothes",
            "bench",
            "bicycle",
            "diningtable",
            "bird",
            "person",
            "floor",
            "boat",
            "train",
            "book",
            "bottle",
            "tree",
            "window",
            "plate",
            "platform",
            "tvmonitor",
            "building",
            "bus",
            "cabinet",
            "shelves",
            "light",
            "pottedplant",
            "wall",
            "car",
            "ground",
            "cat",
            "sidewalk",
            "truck",
            "ceiling",
            "rock",
            "chair",
            "wood",
            "food",
            "horse",
            "cloth",
            "sign",
            "computer",
            "sheep",
            "keyboard",
            "flower",
            "sky",
            "cow",
            "grass",
            "cup",
            "curtain",
            "snow",
            "water",
            "sofa",
            "dog",
            "door",
        ]
        self._key = np.array(range(len(self._mapping))).astype("uint8")
        mask_file = os.path.join(root, self.split + ".pth")
        print("mask_file:", mask_file)
        if os.path.exists(mask_file):
            self.masks = torch.load(mask_file)
        else:
            self.masks = self._preprocess(mask_file)

    def _class_to_index(self, mask):
        # assert the values
        values = np.unique(mask)
        for i in range(len(values)):
            assert values[i] in self._mapping
        index = np.digitize(mask.ravel(), self._mapping, right=True)
        return self._key[index].reshape(mask.shape)

    def _preprocess(self, mask_file):
        masks = {}
        tbar = trange(len(self.ids))
        print("Preprocessing mask, this will take a while." +
              "But don't worry, it only run once for each split.")
        for i in tbar:
            img_id = self.ids[i]
            mask = Image.fromarray(
                self._class_to_index(self.detail.getMask(img_id)))
            masks[img_id["image_id"]] = mask
            tbar.set_description("Preprocessing masks {}".format(
                img_id["image_id"]))
        torch.save(masks, mask_file)
        return masks

    def __getitem__(self, index):
        img_id = self.ids[index]
        path = img_id["file_name"]
        iid = img_id["image_id"]
        img = Image.open(os.path.join(self.detail.img_folder,
                                      path)).convert("RGB")
        if self.mode == "test":
            if self.transform is not None:
                img = self.transform(img)
            return img, os.path.basename(path)
        # convert mask to 60 categories
        mask = self.masks[iid]
        # synchrosized transform
        if self.mode == "train":
            img, mask = self._sync_transform(img, mask)
        elif self.mode == "val":
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == "testval"
            mask = self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            mask = self.target_transform(mask)
        return img, mask

    def _mask_transform(self, mask):
        target = np.array(mask).astype("int32") - 1
        return torch.from_numpy(target).long()

    def __len__(self):
        return len(self.ids)

    @property
    def pred_offset(self):
        return 1
Exemplo n.º 7
0
class PascalContextGenerator(object):
    def __init__(self, voc_path, annotation_path):
        self.voc_path = voc_path
        self.annotation_path = annotation_path
        self.label_dir = os.path.join(self.voc_path, 'Context')
        self._image_dir = os.path.join(self.voc_path, 'JPEGImages')
        self.annFile = os.path.join(self.annotation_path,
                                    'trainval_merged.json')

        self._mapping = np.sort(
            np.array([
                0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25,
                284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45,
                46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458,
                34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105,
                366, 189, 368, 113, 115
            ]))
        self._key = np.array(range(len(self._mapping))).astype('uint8')

        self.train_detail = Detail(self.annFile, self._image_dir, 'train')
        self.train_ids = self.train_detail.getImgs()
        self.val_detail = Detail(self.annFile, self._image_dir, 'val')
        self.val_ids = self.val_detail.getImgs()

        if not os.path.exists(self.label_dir):
            os.makedirs(self.label_dir)

    def _class_to_index(self, mask, _mapping, _key):
        # assert the values
        values = np.unique(mask)
        for i in range(len(values)):
            assert (values[i] in _mapping)
        index = np.digitize(mask.ravel(), _mapping, right=True)
        return _key[index].reshape(mask.shape)

    def save_mask(self, img_id, mode):
        if mode == 'train':
            mask = Image.fromarray(
                self._class_to_index(
                    self.train_detail.getMask(img_id),
                    _mapping=self._mapping,
                    _key=self._key))
        elif mode == 'val':
            mask = Image.fromarray(
                self._class_to_index(
                    self.val_detail.getMask(img_id),
                    _mapping=self._mapping,
                    _key=self._key))
        filename = img_id['file_name']
        basename, _ = os.path.splitext(filename)
        if filename.endswith(".jpg"):
            mask_png_name = basename + '.png'
            mask.save(os.path.join(self.label_dir, mask_png_name))
        return basename

    def generate_label(self):

        with open(
                os.path.join(self.voc_path,
                             'ImageSets/Segmentation/train_context.txt'),
                'w') as f:
            for img_id in tqdm.tqdm(self.train_ids, desc='train'):
                basename = self.save_mask(img_id, 'train')
                f.writelines(''.join([basename, '\n']))

        with open(
                os.path.join(self.voc_path,
                             'ImageSets/Segmentation/val_context.txt'),
                'w') as f:
            for img_id in tqdm.tqdm(self.val_ids, desc='val'):
                basename = self.save_mask(img_id, 'val')
                f.writelines(''.join([basename, '\n']))

        with open(
                os.path.join(self.voc_path,
                             'ImageSets/Segmentation/trainval_context.txt'),
                'w') as f:
            for img in tqdm.tqdm(os.listdir(self.label_dir), desc='trainval'):
                if img.endswith('.png'):
                    basename = img.split('.', 1)[0]
                    f.writelines(''.join([basename, '\n']))
Exemplo n.º 8
0
class PASCALContext(BaseDataset):
    def __init__(
        self,
        root,
        list_path,
        num_samples=None,
        num_classes=59,
        multi_scale=True,
        flip=True,
        ignore_label=-1,
        base_size=520,
        crop_size=(480, 480),
        downsample_rate=1,
        scale_factor=16,
        center_crop_test=False,
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ):

        super(PASCALContext,
              self).__init__(ignore_label, base_size, crop_size,
                             downsample_rate, scale_factor, mean, std)

        self.root = os.path.join(root, 'pascal_ctx/VOCdevkit/VOC2010')
        self.split = list_path

        self.num_classes = num_classes
        self.class_weights = None

        self.multi_scale = multi_scale
        self.flip = flip
        self.crop_size = crop_size

        # prepare data
        annots = os.path.join(self.root, 'trainval_merged.json')
        img_path = os.path.join(self.root, 'JPEGImages')
        from detail import Detail
        if 'val' in self.split:
            self.detail = Detail(annots, img_path, 'val')
            mask_file = os.path.join(self.root, 'val.pth')
        elif 'train' in self.split:
            self.mode = 'train'
            self.detail = Detail(annots, img_path, 'train')
            mask_file = os.path.join(self.root, 'train.pth')
        else:
            raise NotImplementedError('only supporting train and val set.')
        self.files = self.detail.getImgs()

        # generate masks
        self._mapping = np.sort(
            np.array([
                0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25,
                284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45,
                46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458,
                34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105,
                366, 189, 368, 113, 115
            ]))

        print('mask_file:', mask_file)
        if os.path.exists(mask_file):
            self.masks = torch.load(mask_file)
        else:
            self.masks = self._preprocess(mask_file)

    def _class_to_index(self, mask):
        # assert the values
        values = np.unique(mask)
        for i in range(len(values)):
            assert (values[i] in self._mapping)
        index = np.digitize(mask.ravel(), self._mapping, right=True)
        return self._key[index].reshape(mask.shape)

    def _preprocess(self, mask_file):
        masks = {}
        print("Preprocessing mask, this will take a while." + \
              "But don't worry, it only run once for each split.")
        for i in range(len(self.files)):
            img_id = self.files[i]
            mask = Image.fromarray(
                self._class_to_index(self.detail.getMask(img_id)))
            masks[img_id['image_id']] = mask
        torch.save(masks, mask_file)
        return masks

    def __getitem__(self, index):
        item = self.files[index]
        name = item['file_name']
        img_id = item['image_id']

        image = cv2.imread(os.path.join(self.detail.img_folder, name),
                           cv2.IMREAD_COLOR)
        label = np.asarray(self.masks[img_id], dtype=np.int)
        size = image.shape

        if self.split == 'val':
            image = cv2.resize(image,
                               self.crop_size,
                               interpolation=cv2.INTER_LINEAR)
            image = self.input_transform(image)
            image = image.transpose((2, 0, 1))

            label = cv2.resize(label,
                               self.crop_size,
                               interpolation=cv2.INTER_NEAREST)
            label = self.label_transform(label)
        elif self.split == 'testval':
            # evaluate model on val dataset
            image = self.input_transform(image)
            image = image.transpose((2, 0, 1))
            label = self.label_transform(label)
        else:
            image, label = self.gen_sample(image, label, self.multi_scale,
                                           self.flip)

        return image.copy(), label.copy(), np.array(size), name

    def label_transform(self, label):
        if self.num_classes == 59:
            # background is ignored
            label = np.array(label).astype('int32') - 1
            label[label == -2] = -1
        else:
            label = np.array(label).astype('int32')
        return label
Exemplo n.º 9
0
class PContextDataset(SegmentationDataset):
    NUM_CLASS = 59

    def __init__(self, cfg, stage, transform=None):
        # VOC2010
        super(PContextDataset, self).__init__(cfg, stage, transform)

        annFile = os.path.join(self.root, 'trainval_merged.json')
        imgDir = os.path.join(self.root, 'JPEGImages')

        self.detail = Detail(annFile, imgDir, self.stage)
        self.ids = self.detail.getImgs()
        self._mapping = np.sort(
            np.array([
                0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25,
                284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45,
                46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458,
                34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105,
                366, 189, 368, 113, 115
            ]))
        self._key = np.array(range(len(self._mapping))).astype('uint8')
        mask_file = os.path.join(self.root, self.stage + '.pth')
        if os.path.exists(mask_file):
            self.masks = torch.load(mask_file)
        else:
            self.masks = self._preprocess(mask_file)

    def _class_to_index(self, mask):
        values = np.unique(mask)
        for i in range(len(values)):
            assert values[i] in self._mapping
        index = np.digitize(mask.ravel(), self._mapping, right=True)
        return self._key[index].reshape(mask.shape)

    def _preprocess(self, mask_file):
        masks = {}
        tbar = trange(len(self.ids))
        for i in tbar:
            img_id = self.ids[i]
            mask = Image.fromarray(
                self._class_to_index(self.detail.getMask(img_id)))
            masks[img_id['image_id']] = mask
            tbar.set_description("Preprocessing masks {}".format(
                img_id['image_id']))
        torch.save(masks, mask_file)
        return masks

    def _mask_transform(self, mask):
        mask = np.array(mask).astype('int32') - 1
        return torch.from_numpy(mask).long()

    def __getitem__(self, index):
        img_id = self.ids[index]
        path = img_id['file_name']
        iid = img_id['image_id']
        img = Image.open(os.path.join(self.detail.img_foler,
                                      path)).convert('RGB')

        if self.stage == 'test':
            if self.transform is not None:
                img, _ = self.transform(img, None)
            return img, None

        mask = self.masks[iid]
        if self.stage == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.stage == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.stage == 'testval'
            mask = self._mask_transform(mask)

        if self.transform is not None:
            img, mask = self.transform(img, mask)

        return img, mask

    def __len__(self):
        return len(self.ids)

    @property
    def pred_offset(self):
        return 1
Exemplo n.º 10
0
 def _get_imgs(self, split='trainval'):
     """ get images by split type using Detail API. """
     annotation = os.path.join(self.root, 'trainval_merged.json')
     detail = Detail(annotation, self._img_dir, split)
     imgs = detail.getImgs()
     return imgs, detail
class PASCALContext(data.Dataset):
    def __init__(self,
                 cfg,
                 root,
                 image_set,
                 num_classes=59,
                 transform=None,
                 augmentations=None):

        self.cfg = cfg
        self.root = os.path.join(root, 'VOCdevkit/VOC2010')
        # self.root = root
        self.image_set = image_set

        if 'xception' in cfg.MODEL.NAME:
            mean = np.array([0.5, 0.5, 0.5], dtype=np.float32)
            std = np.array([0.5, 0.5, 0.5], dtype=np.float32)
        else:
            mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
            std = np.array([0.229, 0.224, 0.225], dtype=np.float32)

        self.mean = np.expand_dims(np.expand_dims(np.expand_dims(mean, axis=0),
                                                  axis=-1),
                                   axis=-1)
        self.std = np.expand_dims(np.expand_dims(np.expand_dims(std, axis=0),
                                                 axis=-1),
                                  axis=-1)

        self.patch_width = cfg.MODEL.IMAGE_SIZE[0]
        self.patch_height = cfg.MODEL.IMAGE_SIZE[1]

        self.n_classes = num_classes

        self.output_stride = cfg.MODEL.OUTPUT_STRIDE

        self.tf = transform
        self.augmentations = augmentations

        self._setup_db()
        self.db_length = len(self.files)

    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        item = self.files[index]
        im_name = item['file_name']
        im_path = os.path.join(self.root, "JPEGImages", im_name)
        img_id = item['image_id']

        im = Image.open(im_path)
        lbl = np.asarray(self.masks[img_id], dtype=np.int)
        lbl = self.label_transform(lbl)
        # Map all ignored pixels to class index 255
        lbl[np.logical_or(lbl >= self.n_classes, lbl < 0)] = 255
        lbl = Image.fromarray(lbl)

        if self.augmentations is not None:
            im, lbl = self.augmentations(im, lbl)

        im, lbl, lbl_os = self.transform(im, lbl)

        if self.cfg.MODEL.LEARN_PAIRWISE_TERMS:
            # if self.cfg.MODEL.NUM_PAIRWISE_TERMS > 1:
            lbl_hs = []
            lbl_vs = []

            stride = 1
            for _ in range(self.cfg.MODEL.NUM_PAIRWISE_TERMS):
                for i in range(stride):
                    for j in range(stride):
                        lbl_os_ = lbl_os[i::stride, j::stride]

                        lbl_h = lbl_os_[:, :-1] * self.n_classes + lbl_os_[:,
                                                                           1:]
                        lbl_v = lbl_os_[:-1, :] * self.n_classes + lbl_os_[
                            1:, :]
                        lbl_h[(lbl_os_[:, :-1] >= self.n_classes) |
                              (lbl_os_[:, 1:] >= self.n_classes)] = 255
                        lbl_v[(lbl_os_[:-1, :] >= self.n_classes) |
                              (lbl_os_[1:, :] >= self.n_classes)] = 255

                        lbl_hs.append(lbl_h)
                        lbl_vs.append(lbl_v)

                stride += self.cfg.MODEL.PAIRWISE_STEP_SIZE
            # else:
            #     lbl_h = lbl_os[:, :-1] * self.n_classes + lbl_os[:, 1:]
            #     lbl_v = lbl_os[:-1, :] * self.n_classes + lbl_os[1:, :]
            #     lbl_h[(lbl_os[:, :-1] >= self.n_classes) | (lbl_os[:, 1:] >= self.n_classes)] = 255
            #     lbl_v[(lbl_os[:-1, :] >= self.n_classes) | (lbl_os[1:, :] >= self.n_classes)] = 255

            return im, lbl, lbl_hs, lbl_vs, dict()
        else:
            return im, lbl, dict(), dict(), dict()

    def transform(self, img, lbl):
        if self.tf is not None:
            img = self.tf(img)

        # if self.is_train:
        w, h = lbl.size
        lbl_os = lbl.resize((math.ceil(
            w / self.output_stride), math.ceil(h / self.output_stride)),
                            Image.NEAREST)

        lbl = torch.from_numpy(np.array(lbl)).long()
        lbl[lbl >= self.n_classes] = 255  # ignore pixels

        lbl_os = torch.from_numpy(np.array(lbl_os)).long()
        lbl_os[lbl_os >= self.n_classes] = 255  # ignore pixels

        return img, lbl, lbl_os

    def _setup_db(self):
        # prepare data
        annots = os.path.join(self.root, 'trainval_merged.json')
        img_path = os.path.join(self.root, 'JPEGImages')
        from detail import Detail
        if 'val' in self.image_set:
            self.detail = Detail(annots, img_path, 'val')
            mask_file = os.path.join(self.root, 'val.pth')
        elif 'train' in self.image_set:
            self.mode = 'train'
            self.detail = Detail(annots, img_path, 'train')
            mask_file = os.path.join(self.root, 'train.pth')
        else:
            raise NotImplementedError('only supporting train and val set.')
        self.files = self.detail.getImgs()

        # generate masks
        self._mapping = np.sort(
            np.array([
                0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25,
                284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45,
                46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458,
                34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105,
                366, 189, 368, 113, 115
            ]))

        self._key = np.array(range(len(self._mapping))).astype('uint8')

        print('mask_file:', mask_file)
        if os.path.exists(mask_file):
            self.masks = torch.load(mask_file)
        else:
            self.masks = self._preprocess(mask_file)

    def decode_segmap(self, label_mask, plot=False):
        """Decode segmentation class labels into a color image

        Args:
            label_mask (np.ndarray): an (M,N) array of integer values denoting
              the class label at each spatial location.
            plot (bool, optional): whether to show the resulting color image
              in a figure.

        Returns:
            (np.ndarray, optional): the resulting decoded color image.
        """
        label_colours = self.get_pascal_labels()
        r = label_mask.copy()
        g = label_mask.copy()
        b = label_mask.copy()
        for ll in range(0, self.n_classes):
            r[label_mask == ll] = label_colours[ll, 0]
            g[label_mask == ll] = label_colours[ll, 1]
            b[label_mask == ll] = label_colours[ll, 2]
        rgb = np.zeros((label_mask.shape[0], label_mask.shape[1], 3))
        rgb[:, :, 0] = r / 255.0
        rgb[:, :, 1] = g / 255.0
        rgb[:, :, 2] = b / 255.0
        if plot:
            plt.imshow(rgb)
            plt.show()
        else:
            return rgb

    def _class_to_index(self, mask):
        # assert the values
        values = np.unique(mask)
        for i in range(len(values)):
            assert (values[i] in self._mapping)
        index = np.digitize(mask.ravel(), self._mapping, right=True)
        return self._key[index].reshape(mask.shape)

    def _preprocess(self, mask_file):
        masks = {}
        print("Preprocessing mask, this will take a while." + \
            "But don't worry, it only run once for each split.")
        for i in range(len(self.files)):
            img_id = self.files[i]
            mask = Image.fromarray(
                self._class_to_index(self.detail.getMask(img_id)))
            masks[img_id['image_id']] = mask
        torch.save(masks, mask_file)
        return masks

    def label_transform(self, label):
        if self.n_classes == 59:
            # background is ignored
            label = np.array(label).astype('int32') - 1
            label[label == -2] = -1
        else:
            label = np.array(label).astype('int32')
        return label

    def get_pascal_labels(self):
        """Load the mapping that associates pascal-context classes with label colors

        Returns:
            np.ndarray with dimensions (60, 3)
        """

        cmap = plt.get_cmap('rainbow')
        colors = [cmap(i) for i in np.linspace(0, 1, self.n_classes - 1)]
        colors = np.array([[c[2] * 255, c[1] * 255, c[0] * 255]
                           for c in colors])
        colors = np.vstack(([[0, 0, 0]], colors))

        return colors
Exemplo n.º 12
0
class PascalContext(BaseDataset):
    NUM_CLASS = 59

    def __init__(self,
                 root='./data',
                 split='train',
                 mode=None,
                 transform=None,
                 target_transform=None,
                 **kwargs):
        super(PascalContext, self).__init__(root, split, mode, transform,
                                            target_transform, **kwargs)
        from detail import Detail
        #from detail import mask
        root = os.path.join(root, 'PascalContext')
        annFile = os.path.join(root, 'trainval_merged.json')
        imgDir = os.path.join(root, 'JPEGImages')
        # training mode
        self.detail = Detail(annFile, imgDir, split)
        self.transform = transform
        self.target_transform = target_transform
        self.ids = self.detail.getImgs()
        # generate masks
        self._mapping = np.sort(
            np.array([
                0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25,
                284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45,
                46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458,
                34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105,
                366, 189, 368, 113, 115
            ]))
        self.classes = [
            'background', 'aeroplane', 'mountain', 'mouse', 'track', 'road',
            'bag', 'motorbike', 'fence', 'bed', 'bedclothes', 'bench',
            'bicycle', 'diningtable', 'bird', 'person', 'floor', 'boat',
            'train', 'book', 'bottle', 'tree', 'window', 'plate', 'platform',
            'tvmonitor', 'building', 'bus', 'cabinet', 'shelves', 'light',
            'pottedplant', 'wall', 'car', 'ground', 'cat', 'sidewalk', 'truck',
            'ceiling', 'rock', 'chair', 'wood', 'food', 'horse', 'cloth',
            'sign', 'computer', 'sheep', 'keyboard', 'flower', 'sky', 'cow',
            'grass', 'cup', 'curtain', 'snow', 'water', 'sofa', 'dog', 'door'
        ]
        self._key = np.array(range(len(self._mapping))).astype('uint8')
        mask_file = os.path.join(root, self.split + '.pth')
        print('mask_file:', mask_file)
        if os.path.exists(mask_file):
            self.masks = torch.load(mask_file)
        else:
            self.masks = self._preprocess(mask_file)

    def _class_to_index(self, mask):
        # assert the values
        values = np.unique(mask)
        for i in range(len(values)):
            assert (values[i] in self._mapping)
        index = np.digitize(mask.ravel(), self._mapping, right=True)
        return self._key[index].reshape(mask.shape)

    def _preprocess(self, mask_file):
        masks = {}
        tbar = trange(len(self.ids))
        print("Preprocessing mask, this will take a while." + \
            "But don't worry, it only run once for each split.")
        for i in tbar:
            img_id = self.ids[i]
            mask = Image.fromarray(
                self._class_to_index(self.detail.getMask(img_id)))
            masks[img_id['image_id']] = mask
            tbar.set_description("Preprocessing masks {}".format(
                img_id['image_id']))
        torch.save(masks, mask_file)
        return masks

    def __getitem__(self, index):
        img_id = self.ids[index]
        path = img_id['file_name']
        iid = img_id['image_id']
        img = Image.open(os.path.join(self.detail.img_folder,
                                      path)).convert('RGB')
        if self.mode == 'test':
            if self.transform is not None:
                img = self.transform(img)
            return img, os.path.basename(path)
        # convert mask to 60 categories
        mask = self.masks[iid]
        # synchrosized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            mask = self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            mask = self.target_transform(mask)
        return img, mask

    def _mask_transform(self, mask):
        target = np.array(mask).astype('int32') - 1
        return torch.from_numpy(target).long()

    def __len__(self):
        return len(self.ids)

    @property
    def pred_offset(self):
        return 1
Exemplo n.º 13
0
class DetailDataset(torch.utils.data.Dataset):

    CLASSES = PascalVOCDataset.CLASSES  # TODO to chyba nie wszystkie, Detail.getCats() zwraca wiecej

    def __init__(self,
                 img_dir,
                 ann_file,
                 split,
                 minimal=False,
                 transforms=None):
        self.img_dir = img_dir
        self.image_set = split
        self.transforms = transforms
        self.anno = ann_file

        self.detail = Detail(ann_file, img_dir, split, minimal, divider=10)

        # TODO poprawny format klas:
        self.CLASSES = self.detail.getCats()

        imgs = self.detail.getImgs()
        idxs = range(len(imgs))
        self.idx_to_img = dict(zip(idxs, imgs))

        # TODO może się przydać, zrobic to poprawnie, uważając na underscore
        # self.img_to_idx = dict(zip([x.image_id for x in imgs], idxs))

        self.class_to_ind = dict(zip(self.CLASSES, range(len(self.CLASSES))))

    def __len__(self):
        return len(self.idx_to_img)

    def _img_size(self, img):
        return (img['width'], img['height'])

    def get_groundtruth(self, idx):
        img = self.idx_to_img[idx]
        boxes = self.detail.getBboxes(img)
        # example of 'boxes':
        # [{'bbox': [250, 209, 241, 149], 'category': 'motorbike'},
        # {'bbox': [312, 139, 109, 191], 'category': 'person'}]
        boxes = [box['bbox'] for box in boxes
                 ]  # TODO gubimy informację o otoczonym przedmiocie
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, self._img_size(img),
                         mode="xywh").convert("xyxy")
        target = target.clip_to_image(remove_empty=True)

        img_keypoints = self.detail.getKpts(img)
        keypoints = [skelton['keypoints'] for skelton in img_keypoints]

        # TODO keypoints - gubimy informację o bbox
        target.add_field("kpts", Keypoints(keypoints, self._img_size(img)))
        # target.add_field("mask", SegmentationMask(self.detail.getMask(img).tolist(), size=self._img_size(img)))
        # TODO getMask zwraca macierz rozmiaru (img.height, img.width), gdzie każdemu pikselowi
        # TODO odpowiada numer id klasy, do której należy. SegmentationMask

        # from getMask() doc:
        # If semantic segmentation of an image is requested (cat=instance=superpart=part=None),
        # the result is an image whose pixel values are the class IDs for that image.
        # If instance-level segmentation for one category of an image is requested (img and cat provided),
        # the result is an image whose pixel values are the instance IDs for that class and 0 everywhere else.
        target.add_field("class_mask", self.detail.getMask(img))
        target.add_field("instance_mask", self.detail.getMask(img,
                                                              cat='person'))
        target.add_field("bounds", self.detail.getBounds(img))
        target.add_field("occl", self.detail.getOccl(img))
        # TODO human parts?

        return target

    def __getitem__(self, idx):
        img = self.idx_to_img[idx]
        # example img object:
        # {'file_name': '2008_000002.jpg', 'phase': 'val', 'height': 375, 'width': 500,
        #  'date_captured': '31-May-2015 17:44:04', 'image_id': 2008000002, 'annotations': [1, 62295],
        #  'categories': [454, 427], 'parts': [16], 'keypoints': []}
        img = Image.open(os.path.join(self.img_dir,
                                      img['file_name'])).convert('RGB')
        target = self.get_groundtruth(idx)
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target, idx

    def get_img_info(self, idx):
        img = self.idx_to_img[idx]
        return {"height": img['height'], "width": img['width']}
Exemplo n.º 14
0
class ContextSegmentation(BaseDataset):
    BASE_DIR = 'VOCdevkit/VOC2010'
    NUM_CLASS = 59

    def __init__(self,
                 root=os.path.expanduser('~/.encoding/data'),
                 split='train',
                 mode=None,
                 transform=None,
                 target_transform=None):
        super(ContextSegmentation, self).__init__(root, split, mode, transform,
                                                  target_transform)
        from detail import Detail
        #from detail import mask
        root = os.path.join(root, self.BASE_DIR)
        annFile = os.path.join(root, 'trainval_merged.json')
        imgDir = os.path.join(root, 'JPEGImages')
        # training mode
        if split == 'train':
            phase = 'train'
        elif split == 'val':
            phase = 'val'
        elif split == 'test':
            phase = 'val'
            #phase = 'test'
        print('annFile', annFile)
        print('imgDir', imgDir)
        self.detail = Detail(annFile, imgDir, phase)
        self.transform = transform
        self.target_transform = target_transform
        self.ids = self.detail.getImgs()
        self._mapping = np.sort(
            np.array([
                0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25,
                284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45,
                46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458,
                34, 207, 80, 355, 85, 347, 220, 349, 360, 98, 187, 104, 105,
                366, 189, 368, 113, 115
            ]))
        self._key = np.array(range(len(self._mapping))).astype('uint8')

    def _class_to_index(self, mask):
        # assert the values
        values = np.unique(mask)
        #assert(values.size > 1)
        for i in range(len(values)):
            assert (values[i] in self._mapping)
        index = np.digitize(mask.ravel(), self._mapping, right=True)
        return self._key[index].reshape(mask.shape)

    def __getitem__(self, index):
        detail = self.detail
        img_id = self.ids[index]
        path = img_id['file_name']
        iid = img_id['image_id']
        img = Image.open(os.path.join(detail.img_folder, path)).convert('RGB')
        if self.mode == 'test':
            if self.transform is not None:
                img = self.transform(img)
            return img, os.path.basename(path)
        # convert mask to 60 categories
        mask = Image.fromarray(self._class_to_index(detail.getMask(img_id)))
        # synchrosized transform
        if self.mode == 'train':
            img, mask = self._sync_transform(img, mask)
        elif self.mode == 'val':
            img, mask = self._val_sync_transform(img, mask)
        else:
            assert self.mode == 'testval'
            mask = self._mask_transform(mask)
        # general resize, normalize and toTensor
        if self.transform is not None:
            #print("transform for input")
            img = self.transform(img)
        if self.target_transform is not None:
            #print("transform for label")
            mask = self.target_transform(mask)
        return img, mask

    def _mask_transform(self, mask):
        target = np.array(mask).astype('int32') - 1
        return torch.from_numpy(target).long()

    def __len__(self):
        return len(self.ids)

    @property
    def pred_offset(self):
        return 1