Пример #1
0
def get_random(y_list, x_list):
    with hu.random_seed(1):
        yi = np.random.choice(y_list)
        x_tmp = x_list[y_list == yi]
        xi = np.random.choice(x_tmp)

    return yi, xi
Пример #2
0
def get_points_from_mask(mask, bg_points=0):
    n_points = 0
    points = np.zeros(mask.shape)
    # print(np.unique(mask))
    assert (len(np.setdiff1d(np.unique(mask), [0, 1, 2])) == 0)

    for c in np.unique(mask):
        if c == 0:
            continue
        blobs = morph.label((mask == c).squeeze())
        points_class = blobs2points(blobs)

        ind = points_class != 0
        n_points += int(points_class[ind].sum())
        points[ind] = c
    assert morph.label((mask).squeeze()).max() == n_points
    points[points == 0] = 255
    if bg_points == -1:
        bg_points = n_points

    if bg_points:
        from haven import haven_utils as hu
        y_list, x_list = np.where(mask == 0)
        with hu.random_seed(1):
            for i in range(bg_points):
                yi = np.random.choice(y_list)
                x_tmp = x_list[y_list == yi]
                xi = np.random.choice(x_tmp)
                points[yi, xi] = 0

    return points
Пример #3
0
def get_random_points(mask, n_points=1):
    y_list, x_list = np.where(mask)
    points = np.zeros(mask.squeeze().shape)
    with hu.random_seed(1):
        for i in range(n_points):
            yi = np.random.choice(y_list)
            x_tmp = x_list[y_list == yi]
            xi = np.random.choice(x_tmp)
            points[yi, xi] = 1

    return points
Пример #4
0
def get_points_from_mask(mask, bg_points=0):
    blobs = morph.label(mask.squeeze())
    points = blobs2points(blobs)
    points[points != 0] = 1
    points[points == 0] = 255
    if bg_points == -1:
        bg_points = np.unique(blobs).max()

    if bg_points:
        from haven import haven_utils as hu
        y_list, x_list = np.where(mask == 0)
        with hu.random_seed(1):
            for i in range(bg_points):
                yi = np.random.choice(y_list)
                x_tmp = x_list[y_list == yi]
                xi = np.random.choice(x_tmp)
                points[yi, xi] = 0

    return points
Пример #5
0
        def label_next_batch(self):
            pool_ind = self.get_pool_ind()

            if self.label_map.sum() == 0:
                with hu.random_seed(1):
                    # find k with infected regions
                    n_batches = 0
                    ind_list = []
                    for ind in np.random.permutation(len(self.train_set)):
                        if n_batches == self.init_sample_size:
                            break

                        # batch = ut.collate_fn([self.train_set[ind]])

                        # if batch['masks'].sum() == 0:
                        #     continue

                        n_batches += 1
                        ind_list += [{
                            'bbox_yxyx': np.arange(self.n_regions),
                            'index': ind
                        }]
            else:
                if self.heuristic == 'random':
                    with hu.random_seed(1):
                        img_ind_list = self.rng.choice(pool_ind,
                                                       min(
                                                           self.sample_size,
                                                           len(pool_ind)),
                                                       replace=False)
                        ind_list = []
                        for ind in img_ind_list:
                            bbox_ind = np.random.choice(
                                np.where(self.label_map[ind] == 0)[0])
                            ind_list += [{
                                'bbox_yxyx': [bbox_ind],
                                'index': ind
                            }]
                else:
                    ind_list = []
                    print('%s Scoring' % self.heuristic)
                    arange = np.arange(self.n_regions)
                    for ind in tqdm.tqdm(pool_ind):
                        batch = ut.collate_fn([self.train_set[ind]])
                        score_map = self.compute_uncertainty(
                            batch['images'],
                            replicate=True,
                            scale_factor=1,
                            method=self.heuristic).squeeze()
                        h, w = score_map.shape
                        bbox_yxyx = get_rect_bbox(h,
                                                  w,
                                                  n_regions=self.n_regions)

                        unlabeled = self.label_map[ind] == 0

                        bbox_yxyx = bbox_yxyx[unlabeled]
                        bbox_ind = arange[unlabeled]

                        assert len(bbox_yxyx) > 0

                        s_list = np.zeros(len(bbox_yxyx))
                        for i, (y1, x1, y2, x2) in enumerate(bbox_yxyx):
                            s_list[i] = score_map[y1:y2, x1:x2].mean()

                        # if 1:
                        #     heatmap =  hu.f2l(hi.gray2cmap((score_map/score_map.max()).cpu().numpy().squeeze()))
                        #     original = hu.denormalize(batch['images'], mode='rgb')

                        #     img_bbox = bbox_yxyx_on_image(bbox_yxyx[[s_list.argmax()]], original)
                        #     hu.save_image('.tmp/tmp.png' , img_bbox*0.5 + heatmap*0.5)

                        ind_list += [{
                            'score': float(score_map.mean()),
                            'bbox_yxyx': [bbox_ind[s_list.argmax()]],
                            'index': ind
                        }]

                    # sort ind_list and pick top k
                    ind_list = sorted(ind_list, key=lambda x: -x['score'])
                    ind_list = ind_list[:self.sample_size]

            # update labeled indices
            for ind_dict in ind_list:
                assert self.label_map[ind_dict['index'],
                                      ind_dict['bbox_yxyx']].sum() == 0
                self.label_map[ind_dict['index'], ind_dict['bbox_yxyx']] = 1
                assert self.label_map[ind_dict['index'],
                                      ind_dict['bbox_yxyx']].mean() == 1
                if not self.exp_dict['active_learning'].get('effort', None):
                    for i in ind_dict['bbox_yxyx']:
                        self.cost_map[ind_dict['index'], i] = 1

                elif self.exp_dict['model']['loss'] in [
                        'const_point_level', 'point_level'
                ]:
                    self.cost_map[ind_dict['index'], ind_dict['bbox_yxyx']] = 3

                elif self.exp_dict['model']['loss'] in ['point_level_2']:
                    self.cost_map[ind_dict['index'], ind_dict['bbox_yxyx']] = 4

                elif self.exp_dict['model']['loss'] in [
                        'joint_cross_entropy', 'cross_entropy'
                ]:
                    batch = self.train_set[ind_dict['index']]
                    mask = batch['masks'].squeeze()
                    h, w = mask.shape
                    bbox_yxyx = get_rect_bbox(h, w, n_regions=self.n_regions)
                    for i in ind_dict['bbox_yxyx']:
                        (y1, x1, y2, x2) = bbox_yxyx[i]

                        # in enumerate(bbox_yxyx):
                        patch = mask[y1:y2, x1:x2]
                        u_list = patch.unique()
                        if 1 in u_list:
                            from scipy.spatial import ConvexHull, convex_hull_plot_2d
                            pts = np.stack(np.where(patch)).transpose()
                            if len(pts) <= 2:
                                cost = len(pts) * 2
                            else:
                                hull = ConvexHull(pts, qhull_options='QJ')
                                cost = len(hull.simplices) * 3
                            self.cost_map[ind_dict['index'], i] = cost
                        elif 0 in u_list:
                            self.cost_map[ind_dict['index'], i] = 4

                else:
                    raise ValueError

            # return active dataset
            train_list = sorted(self.get_train_ind())
            return torch.utils.data.Subset(self.train_set, train_list)
Пример #6
0
    def __init__(self,
                 split,
                 transform_lvl,
                 datadir_base,
                 name='cifar10',
                 n_samples=None,
                 colorjitter=False,
                 val_transform='identity'):
        self.name = name
        if split in ['train', 'validation']:
            train = True
        elif split == 'test':
            train = False
        self.split = split
        if self.name == 'cifar10':
            path = datadir_base or "/mnt/datasets/public/cifar10"
            self.n_classes = 10
            self.dataset = dset.CIFAR10(root=path, train=train, download=True)
        else:
            path = datadir_base or "/mnt/datasets/public/cifar100"
            self.n_classes = 100
            self.dataset = dset.CIFAR100(root=path, train=train, download=True)
        self.dataset.targets = np.array(self.dataset.targets)
        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        self.mean = normalize.mean
        self.std = normalize.std

        self.image_size = 32
        self.nc = 3

        if split == 'train':
            # Train transforms
            if transform_lvl == 0:
                transform = transforms.Compose([
                    # transforms.ToTensor(),
                    # normalize,
                ])

            elif transform_lvl == 1:
                transform = transforms.Compose([
                    transforms.RandomCrop(self.image_size, padding=4),
                    # transforms.ToTensor(),
                    # normalize,
                ])

            elif transform_lvl == 1.5:
                transform = transforms.Compose([
                    transforms.RandomHorizontalFlip(),
                    # transforms.ToTensor(),
                    # normalize,
                ])

            elif transform_lvl == 2:
                transform = transforms.Compose([
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.RandomHorizontalFlip(),
                    # transforms.ToTensor(),
                    # normalize,
                ])

            elif transform_lvl == 2.5:
                transform = transforms.Compose([
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.RandomAffine(10, translate=None,
                                            scale=(0.5, 2)),
                    # transforms.ToTensor(),
                    # normalize,
                ])

            elif transform_lvl == 3:
                transform = transforms.Compose([
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.RandomAffine(10, translate=None,
                                            scale=(0.5, 2)),
                    transforms.RandomHorizontalFlip(),
                    # transforms.ToTensor(),
                    # normalize,
                ])
            else:
                raise ValueError(
                    'only lvls 0, 1, 1.5, 2, 2.5 and 3 are supported')

            if colorjitter:
                transform.transforms.append(
                    transforms.ColorJitter(
                        brightness=0.4,
                        contrast=0.4,
                        saturation=0.4,
                    ))
            transform.transforms.append(transforms.ToTensor())
            transform.transforms.append(normalize)

        elif split in ['validation', 'test']:
            # identity transform
            if val_transform == 'identity':
                transform = transforms.Compose(
                    [transforms.ToTensor(), normalize])
            elif val_transform == 'rotation':
                transform = transforms.Compose([
                    transforms.RandomRotation((45, 45)),
                    transforms.ToTensor(), normalize
                ])
            elif val_transform == 'translation':
                transform = transforms.Compose([
                    transforms.Pad((4, 4, 0, 0)),
                    transforms.CenterCrop(self.image_size),
                    transforms.ToTensor(), normalize
                ])
            elif val_transform == 'zoomin':
                transform = transforms.Compose([
                    transforms.Resize(int(self.image_size * 1.5)),
                    transforms.CenterCrop(self.image_size),
                    transforms.ToTensor(), normalize
                ])
            elif val_transform == 'zoomout':
                transform = transforms.Compose([
                    transforms.Resize(int(self.image_size * 0.75)),
                    transforms.Pad(4),
                    transforms.ToTensor(), normalize
                ])

            else:
                raise ValueError('%s is not supported' % val_transform)

        self.transform = transform

        # if n_samples is not None:
        #     with hu.random_seed(10):
        #         ind = np.random.choice(self.dataset.data.shape[0], n_samples, replace=False)

        #         self.dataset.data = self.dataset.data[ind]
        #         self.dataset.targets = np.array(self.dataset.targets)[ind]

        if n_samples is not None:
            assert n_samples % self.n_classes == 0, 'the number of samples %s must be a multiple of the number of classes %s' % (
                n_samples, self.n_classes)
            with hu.random_seed(10):
                n = int(n_samples /
                        self.n_classes)  # number of samples per class
                # Extract a balanced subset
                ind = np.hstack([
                    np.random.choice(np.where(self.dataset.targets == l)[0],
                                     n,
                                     replace=False)
                    for l in np.unique(self.dataset.targets)
                ])

                self.dataset.data = self.dataset.data[ind]
                self.dataset.targets = self.dataset.targets[ind]
Пример #7
0
    def __init__(self,
                 split,
                 transform_lvl,
                 datadir_base,
                 n_samples=None,
                 val_transform='identity'):
        path = datadir_base or '/mnt/projects/bilvlda/dataset/tiny-imagenet-200'
        self.name = 'tinyimagenet'
        self.n_classes = 200
        self.image_size = 64
        self.nc = 3

        normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                         std=[0.5, 0.5, 0.5])

        self.mean = normalize.mean
        self.std = normalize.std

        if split == 'train':
            if transform_lvl == 0:
                transform = transforms.Compose([
                    transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.ToTensor(),
                    normalize,
                ])

            elif transform_lvl == 1:
                transform = transforms.Compose([
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.ToTensor(),
                    normalize,
                ])

            elif transform_lvl == 1.5:
                transform = transforms.Compose([
                    transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    normalize,
                ])

            elif transform_lvl == 2:
                transform = transforms.Compose([
                    transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    normalize,
                ])

            elif transform_lvl == 2.5:
                transform = transforms.Compose([
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.RandomAffine(10, translate=None,
                                            scale=(0.5, 2)),
                    transforms.ToTensor(),
                    normalize,
                ])

            elif transform_lvl == 3:
                transform = transforms.Compose([
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.RandomAffine(10, translate=None,
                                            scale=(0.5, 2)),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    normalize,
                ])

            else:
                raise ValueError(
                    'only lvls 0, 1, 1.5, 2, 2.5 and 3 are supported')

        elif split in ['validation', 'test']:
            # identity transform
            if val_transform == 'identity':
                transform = transforms.Compose([
                    transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.ToTensor(), normalize
                ])
            elif val_transform == 'rotation':
                transform = transforms.Compose([
                    transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.RandomRotation((45, 45)),
                    transforms.ToTensor(), normalize
                ])
            elif val_transform == 'translation':
                transform = transforms.Compose([
                    transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.Pad((4, 4, 0, 0)),
                    transforms.CenterCrop(self.image_size),
                    transforms.ToTensor(), normalize
                ])
            elif val_transform == 'zoomin':
                transform = transforms.Compose([
                    transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.Resize(int(self.image_size * 1.5)),
                    transforms.CenterCrop(self.image_size),
                    transforms.ToTensor(), normalize
                ])
            elif val_transform == 'zoomout':
                transform = transforms.Compose([
                    transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.Resize(int(self.image_size * 0.75)),
                    transforms.Pad(4),
                    transforms.ToTensor(), normalize
                ])

        self.transform = transform

        if split in ['train', 'validation']:
            fname = '/mnt/projects/bilvlda/dataset/tiny-imagenet-200/tinyimagenet_train.json'

            if not os.path.exists(fname):
                dataset = dset.ImageFolder(root=os.path.join(path, 'train'))
                hu.save_json(fname, dataset.imgs)

            self.imgs = np.array(hu.load_json(fname))
            assert (len(self.imgs) == 100000)

        elif split == 'test':
            fname = '/mnt/projects/bilvlda/dataset/tiny-imagenet-200/tinyimagenet_validation.json'

            if not os.path.exists(fname):
                dataset = dset.ImageFolder(root=os.path.join(path, 'val'))
                hu.save_json(fname, dataset.imgs)
            self.imgs = np.array(hu.load_json(fname))
            assert (len(self.imgs) == 10000)

        if n_samples is not None:
            with hu.random_seed(10):
                imgs = np.array(self.imgs)
                ind = np.random.choice(imgs.shape[0], n_samples, replace=False)
                self.imgs = imgs[ind]
Пример #8
0
    def __init__(self,
                 split,
                 transform_lvl,
                 datadir_base,
                 n_samples=None,
                 colorjitter=False,
                 val_transform='identity',
                 netA=None):
        path = datadir_base or '/mnt/datasets/public/imagenet/imagenet-data/raw-data/'
        self.name = 'imagenet'
        self.n_classes = 1000
        self.image_size = 224
        self.nc = 3

        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        self.mean = normalize.mean
        self.std = normalize.std

        if split == 'train':
            if transform_lvl == 0:
                transform = transforms.Compose([
                    # transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    # transforms.ToTensor(),
                    # normalize,
                ])
                if netA is not None:
                    transform.transforms.append(netA)

            elif transform_lvl == 1:
                transform = transforms.Compose([
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.RandomCrop(self.image_size, padding=4),
                    # transforms.ToTensor(),
                    # normalize,
                ])

            elif transform_lvl == 1.5:
                transform = transforms.Compose([
                    # transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.RandomHorizontalFlip(),
                    # transforms.ToTensor(),
                    # normalize,
                ])

            elif transform_lvl == 2:
                transform = transforms.Compose([
                    # transforms.Lambda(lambda x: x.convert("RGB")),
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.RandomHorizontalFlip(),
                    # transforms.ToTensor(),
                    # normalize,
                ])

            elif transform_lvl == 2.5:
                transform = transforms.Compose([
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.RandomAffine(10, translate=None,
                                            scale=(0.5, 2)),
                    # transforms.ToTensor(),
                    # normalize,
                ])

            elif transform_lvl == 3:
                transform = transforms.Compose([
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.RandomCrop(self.image_size, padding=4),
                    transforms.RandomAffine(10, translate=None,
                                            scale=(0.5, 2)),
                    transforms.RandomHorizontalFlip(),
                    # transforms.ToTensor(),
                    # normalize,
                ])

            else:
                raise ValueError(
                    'only lvls 0, 1, 1.5, 2, 2.5 and 3 are supported')

            if colorjitter:
                transform.transforms.append(
                    transforms.ColorJitter(
                        brightness=0.5,
                        contrast=0.5,
                        saturation=0.5,
                    ))
            transform.transforms.append(transforms.ToTensor())
            transform.transforms.append(normalize)

        elif split in ['validation', 'test']:
            # identity transform
            transform = transforms.Compose([
                # transforms.Lambda(lambda x: x.convert("RGB")),
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                normalize,
            ])

        self.transform = transform

        if split in ['train', 'validation']:
            fname = '/mnt/projects/bilvlda/dataset/imagenet/imagenet_train.json'

            if not os.path.exists(fname):
                dataset = dset.ImageFolder(root=os.path.join(path, 'train'))
                hu.save_json(fname, dataset.imgs)

            self.imgs = np.array(hu.load_json(fname))
            assert (len(self.imgs) == 1281167)

        elif split == 'test':
            fname = '/mnt/projects/bilvlda/dataset/imagenet/imagenet_validation.json'

            if not os.path.exists(fname):
                dataset = dset.ImageFolder(
                    root=os.path.join(path, 'validation'))
                hu.save_json(fname, dataset.imgs)
            self.imgs = np.array(hu.load_json(fname))
            assert (len(self.imgs) == 50000)

        if n_samples is not None:
            assert n_samples % self.n_classes == 0, 'the number of samples %s must be a multiple of the number of classes %s' % (
                n_samples, self.n_classes)
            with hu.random_seed(10):
                imgs = np.array(self.imgs)
                n = int(n_samples /
                        self.n_classes)  # number of samples per class
                # Extract a balanced subset
                ind = np.hstack([
                    np.random.choice(np.where(imgs[:, 1] == l)[0],
                                     n,
                                     replace=False)
                    for l in np.unique(imgs[:, 1])
                ])
                # ind = np.random.choice(imgs.shape[0], n_samples, replace=False)

                self.imgs = imgs[ind]