def get_random(y_list, x_list): with hu.random_seed(1): yi = np.random.choice(y_list) x_tmp = x_list[y_list == yi] xi = np.random.choice(x_tmp) return yi, xi
def get_points_from_mask(mask, bg_points=0): n_points = 0 points = np.zeros(mask.shape) # print(np.unique(mask)) assert (len(np.setdiff1d(np.unique(mask), [0, 1, 2])) == 0) for c in np.unique(mask): if c == 0: continue blobs = morph.label((mask == c).squeeze()) points_class = blobs2points(blobs) ind = points_class != 0 n_points += int(points_class[ind].sum()) points[ind] = c assert morph.label((mask).squeeze()).max() == n_points points[points == 0] = 255 if bg_points == -1: bg_points = n_points if bg_points: from haven import haven_utils as hu y_list, x_list = np.where(mask == 0) with hu.random_seed(1): for i in range(bg_points): yi = np.random.choice(y_list) x_tmp = x_list[y_list == yi] xi = np.random.choice(x_tmp) points[yi, xi] = 0 return points
def get_random_points(mask, n_points=1): y_list, x_list = np.where(mask) points = np.zeros(mask.squeeze().shape) with hu.random_seed(1): for i in range(n_points): yi = np.random.choice(y_list) x_tmp = x_list[y_list == yi] xi = np.random.choice(x_tmp) points[yi, xi] = 1 return points
def get_points_from_mask(mask, bg_points=0): blobs = morph.label(mask.squeeze()) points = blobs2points(blobs) points[points != 0] = 1 points[points == 0] = 255 if bg_points == -1: bg_points = np.unique(blobs).max() if bg_points: from haven import haven_utils as hu y_list, x_list = np.where(mask == 0) with hu.random_seed(1): for i in range(bg_points): yi = np.random.choice(y_list) x_tmp = x_list[y_list == yi] xi = np.random.choice(x_tmp) points[yi, xi] = 0 return points
def label_next_batch(self): pool_ind = self.get_pool_ind() if self.label_map.sum() == 0: with hu.random_seed(1): # find k with infected regions n_batches = 0 ind_list = [] for ind in np.random.permutation(len(self.train_set)): if n_batches == self.init_sample_size: break # batch = ut.collate_fn([self.train_set[ind]]) # if batch['masks'].sum() == 0: # continue n_batches += 1 ind_list += [{ 'bbox_yxyx': np.arange(self.n_regions), 'index': ind }] else: if self.heuristic == 'random': with hu.random_seed(1): img_ind_list = self.rng.choice(pool_ind, min( self.sample_size, len(pool_ind)), replace=False) ind_list = [] for ind in img_ind_list: bbox_ind = np.random.choice( np.where(self.label_map[ind] == 0)[0]) ind_list += [{ 'bbox_yxyx': [bbox_ind], 'index': ind }] else: ind_list = [] print('%s Scoring' % self.heuristic) arange = np.arange(self.n_regions) for ind in tqdm.tqdm(pool_ind): batch = ut.collate_fn([self.train_set[ind]]) score_map = self.compute_uncertainty( batch['images'], replicate=True, scale_factor=1, method=self.heuristic).squeeze() h, w = score_map.shape bbox_yxyx = get_rect_bbox(h, w, n_regions=self.n_regions) unlabeled = self.label_map[ind] == 0 bbox_yxyx = bbox_yxyx[unlabeled] bbox_ind = arange[unlabeled] assert len(bbox_yxyx) > 0 s_list = np.zeros(len(bbox_yxyx)) for i, (y1, x1, y2, x2) in enumerate(bbox_yxyx): s_list[i] = score_map[y1:y2, x1:x2].mean() # if 1: # heatmap = hu.f2l(hi.gray2cmap((score_map/score_map.max()).cpu().numpy().squeeze())) # original = hu.denormalize(batch['images'], mode='rgb') # img_bbox = bbox_yxyx_on_image(bbox_yxyx[[s_list.argmax()]], original) # hu.save_image('.tmp/tmp.png' , img_bbox*0.5 + heatmap*0.5) ind_list += [{ 'score': float(score_map.mean()), 'bbox_yxyx': [bbox_ind[s_list.argmax()]], 'index': ind }] # sort ind_list and pick top k ind_list = sorted(ind_list, key=lambda x: -x['score']) ind_list = ind_list[:self.sample_size] # update labeled indices for ind_dict in ind_list: assert self.label_map[ind_dict['index'], ind_dict['bbox_yxyx']].sum() == 0 self.label_map[ind_dict['index'], ind_dict['bbox_yxyx']] = 1 assert self.label_map[ind_dict['index'], ind_dict['bbox_yxyx']].mean() == 1 if not self.exp_dict['active_learning'].get('effort', None): for i in ind_dict['bbox_yxyx']: self.cost_map[ind_dict['index'], i] = 1 elif self.exp_dict['model']['loss'] in [ 'const_point_level', 'point_level' ]: self.cost_map[ind_dict['index'], ind_dict['bbox_yxyx']] = 3 elif self.exp_dict['model']['loss'] in ['point_level_2']: self.cost_map[ind_dict['index'], ind_dict['bbox_yxyx']] = 4 elif self.exp_dict['model']['loss'] in [ 'joint_cross_entropy', 'cross_entropy' ]: batch = self.train_set[ind_dict['index']] mask = batch['masks'].squeeze() h, w = mask.shape bbox_yxyx = get_rect_bbox(h, w, n_regions=self.n_regions) for i in ind_dict['bbox_yxyx']: (y1, x1, y2, x2) = bbox_yxyx[i] # in enumerate(bbox_yxyx): patch = mask[y1:y2, x1:x2] u_list = patch.unique() if 1 in u_list: from scipy.spatial import ConvexHull, convex_hull_plot_2d pts = np.stack(np.where(patch)).transpose() if len(pts) <= 2: cost = len(pts) * 2 else: hull = ConvexHull(pts, qhull_options='QJ') cost = len(hull.simplices) * 3 self.cost_map[ind_dict['index'], i] = cost elif 0 in u_list: self.cost_map[ind_dict['index'], i] = 4 else: raise ValueError # return active dataset train_list = sorted(self.get_train_ind()) return torch.utils.data.Subset(self.train_set, train_list)
def __init__(self, split, transform_lvl, datadir_base, name='cifar10', n_samples=None, colorjitter=False, val_transform='identity'): self.name = name if split in ['train', 'validation']: train = True elif split == 'test': train = False self.split = split if self.name == 'cifar10': path = datadir_base or "/mnt/datasets/public/cifar10" self.n_classes = 10 self.dataset = dset.CIFAR10(root=path, train=train, download=True) else: path = datadir_base or "/mnt/datasets/public/cifar100" self.n_classes = 100 self.dataset = dset.CIFAR100(root=path, train=train, download=True) self.dataset.targets = np.array(self.dataset.targets) normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) self.mean = normalize.mean self.std = normalize.std self.image_size = 32 self.nc = 3 if split == 'train': # Train transforms if transform_lvl == 0: transform = transforms.Compose([ # transforms.ToTensor(), # normalize, ]) elif transform_lvl == 1: transform = transforms.Compose([ transforms.RandomCrop(self.image_size, padding=4), # transforms.ToTensor(), # normalize, ]) elif transform_lvl == 1.5: transform = transforms.Compose([ transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, ]) elif transform_lvl == 2: transform = transforms.Compose([ transforms.RandomCrop(self.image_size, padding=4), transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, ]) elif transform_lvl == 2.5: transform = transforms.Compose([ transforms.RandomCrop(self.image_size, padding=4), transforms.RandomAffine(10, translate=None, scale=(0.5, 2)), # transforms.ToTensor(), # normalize, ]) elif transform_lvl == 3: transform = transforms.Compose([ transforms.RandomCrop(self.image_size, padding=4), transforms.RandomAffine(10, translate=None, scale=(0.5, 2)), transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, ]) else: raise ValueError( 'only lvls 0, 1, 1.5, 2, 2.5 and 3 are supported') if colorjitter: transform.transforms.append( transforms.ColorJitter( brightness=0.4, contrast=0.4, saturation=0.4, )) transform.transforms.append(transforms.ToTensor()) transform.transforms.append(normalize) elif split in ['validation', 'test']: # identity transform if val_transform == 'identity': transform = transforms.Compose( [transforms.ToTensor(), normalize]) elif val_transform == 'rotation': transform = transforms.Compose([ transforms.RandomRotation((45, 45)), transforms.ToTensor(), normalize ]) elif val_transform == 'translation': transform = transforms.Compose([ transforms.Pad((4, 4, 0, 0)), transforms.CenterCrop(self.image_size), transforms.ToTensor(), normalize ]) elif val_transform == 'zoomin': transform = transforms.Compose([ transforms.Resize(int(self.image_size * 1.5)), transforms.CenterCrop(self.image_size), transforms.ToTensor(), normalize ]) elif val_transform == 'zoomout': transform = transforms.Compose([ transforms.Resize(int(self.image_size * 0.75)), transforms.Pad(4), transforms.ToTensor(), normalize ]) else: raise ValueError('%s is not supported' % val_transform) self.transform = transform # if n_samples is not None: # with hu.random_seed(10): # ind = np.random.choice(self.dataset.data.shape[0], n_samples, replace=False) # self.dataset.data = self.dataset.data[ind] # self.dataset.targets = np.array(self.dataset.targets)[ind] if n_samples is not None: assert n_samples % self.n_classes == 0, 'the number of samples %s must be a multiple of the number of classes %s' % ( n_samples, self.n_classes) with hu.random_seed(10): n = int(n_samples / self.n_classes) # number of samples per class # Extract a balanced subset ind = np.hstack([ np.random.choice(np.where(self.dataset.targets == l)[0], n, replace=False) for l in np.unique(self.dataset.targets) ]) self.dataset.data = self.dataset.data[ind] self.dataset.targets = self.dataset.targets[ind]
def __init__(self, split, transform_lvl, datadir_base, n_samples=None, val_transform='identity'): path = datadir_base or '/mnt/projects/bilvlda/dataset/tiny-imagenet-200' self.name = 'tinyimagenet' self.n_classes = 200 self.image_size = 64 self.nc = 3 normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) self.mean = normalize.mean self.std = normalize.std if split == 'train': if transform_lvl == 0: transform = transforms.Compose([ transforms.Lambda(lambda x: x.convert("RGB")), transforms.ToTensor(), normalize, ]) elif transform_lvl == 1: transform = transforms.Compose([ transforms.RandomCrop(self.image_size, padding=4), transforms.ToTensor(), normalize, ]) elif transform_lvl == 1.5: transform = transforms.Compose([ transforms.Lambda(lambda x: x.convert("RGB")), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) elif transform_lvl == 2: transform = transforms.Compose([ transforms.Lambda(lambda x: x.convert("RGB")), transforms.RandomCrop(self.image_size, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) elif transform_lvl == 2.5: transform = transforms.Compose([ transforms.RandomCrop(self.image_size, padding=4), transforms.RandomAffine(10, translate=None, scale=(0.5, 2)), transforms.ToTensor(), normalize, ]) elif transform_lvl == 3: transform = transforms.Compose([ transforms.RandomCrop(self.image_size, padding=4), transforms.RandomAffine(10, translate=None, scale=(0.5, 2)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: raise ValueError( 'only lvls 0, 1, 1.5, 2, 2.5 and 3 are supported') elif split in ['validation', 'test']: # identity transform if val_transform == 'identity': transform = transforms.Compose([ transforms.Lambda(lambda x: x.convert("RGB")), transforms.ToTensor(), normalize ]) elif val_transform == 'rotation': transform = transforms.Compose([ transforms.Lambda(lambda x: x.convert("RGB")), transforms.RandomRotation((45, 45)), transforms.ToTensor(), normalize ]) elif val_transform == 'translation': transform = transforms.Compose([ transforms.Lambda(lambda x: x.convert("RGB")), transforms.Pad((4, 4, 0, 0)), transforms.CenterCrop(self.image_size), transforms.ToTensor(), normalize ]) elif val_transform == 'zoomin': transform = transforms.Compose([ transforms.Lambda(lambda x: x.convert("RGB")), transforms.Resize(int(self.image_size * 1.5)), transforms.CenterCrop(self.image_size), transforms.ToTensor(), normalize ]) elif val_transform == 'zoomout': transform = transforms.Compose([ transforms.Lambda(lambda x: x.convert("RGB")), transforms.Resize(int(self.image_size * 0.75)), transforms.Pad(4), transforms.ToTensor(), normalize ]) self.transform = transform if split in ['train', 'validation']: fname = '/mnt/projects/bilvlda/dataset/tiny-imagenet-200/tinyimagenet_train.json' if not os.path.exists(fname): dataset = dset.ImageFolder(root=os.path.join(path, 'train')) hu.save_json(fname, dataset.imgs) self.imgs = np.array(hu.load_json(fname)) assert (len(self.imgs) == 100000) elif split == 'test': fname = '/mnt/projects/bilvlda/dataset/tiny-imagenet-200/tinyimagenet_validation.json' if not os.path.exists(fname): dataset = dset.ImageFolder(root=os.path.join(path, 'val')) hu.save_json(fname, dataset.imgs) self.imgs = np.array(hu.load_json(fname)) assert (len(self.imgs) == 10000) if n_samples is not None: with hu.random_seed(10): imgs = np.array(self.imgs) ind = np.random.choice(imgs.shape[0], n_samples, replace=False) self.imgs = imgs[ind]
def __init__(self, split, transform_lvl, datadir_base, n_samples=None, colorjitter=False, val_transform='identity', netA=None): path = datadir_base or '/mnt/datasets/public/imagenet/imagenet-data/raw-data/' self.name = 'imagenet' self.n_classes = 1000 self.image_size = 224 self.nc = 3 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.mean = normalize.mean self.std = normalize.std if split == 'train': if transform_lvl == 0: transform = transforms.Compose([ # transforms.Lambda(lambda x: x.convert("RGB")), transforms.Resize(256), transforms.CenterCrop(224), # transforms.ToTensor(), # normalize, ]) if netA is not None: transform.transforms.append(netA) elif transform_lvl == 1: transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.RandomCrop(self.image_size, padding=4), # transforms.ToTensor(), # normalize, ]) elif transform_lvl == 1.5: transform = transforms.Compose([ # transforms.Lambda(lambda x: x.convert("RGB")), transforms.Resize(256), transforms.CenterCrop(224), transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, ]) elif transform_lvl == 2: transform = transforms.Compose([ # transforms.Lambda(lambda x: x.convert("RGB")), transforms.Resize(256), transforms.CenterCrop(224), transforms.RandomCrop(self.image_size, padding=4), transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, ]) elif transform_lvl == 2.5: transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.RandomCrop(self.image_size, padding=4), transforms.RandomAffine(10, translate=None, scale=(0.5, 2)), # transforms.ToTensor(), # normalize, ]) elif transform_lvl == 3: transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.RandomCrop(self.image_size, padding=4), transforms.RandomAffine(10, translate=None, scale=(0.5, 2)), transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, ]) else: raise ValueError( 'only lvls 0, 1, 1.5, 2, 2.5 and 3 are supported') if colorjitter: transform.transforms.append( transforms.ColorJitter( brightness=0.5, contrast=0.5, saturation=0.5, )) transform.transforms.append(transforms.ToTensor()) transform.transforms.append(normalize) elif split in ['validation', 'test']: # identity transform transform = transforms.Compose([ # transforms.Lambda(lambda x: x.convert("RGB")), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) self.transform = transform if split in ['train', 'validation']: fname = '/mnt/projects/bilvlda/dataset/imagenet/imagenet_train.json' if not os.path.exists(fname): dataset = dset.ImageFolder(root=os.path.join(path, 'train')) hu.save_json(fname, dataset.imgs) self.imgs = np.array(hu.load_json(fname)) assert (len(self.imgs) == 1281167) elif split == 'test': fname = '/mnt/projects/bilvlda/dataset/imagenet/imagenet_validation.json' if not os.path.exists(fname): dataset = dset.ImageFolder( root=os.path.join(path, 'validation')) hu.save_json(fname, dataset.imgs) self.imgs = np.array(hu.load_json(fname)) assert (len(self.imgs) == 50000) if n_samples is not None: assert n_samples % self.n_classes == 0, 'the number of samples %s must be a multiple of the number of classes %s' % ( n_samples, self.n_classes) with hu.random_seed(10): imgs = np.array(self.imgs) n = int(n_samples / self.n_classes) # number of samples per class # Extract a balanced subset ind = np.hstack([ np.random.choice(np.where(imgs[:, 1] == l)[0], n, replace=False) for l in np.unique(imgs[:, 1]) ]) # ind = np.random.choice(imgs.shape[0], n_samples, replace=False) self.imgs = imgs[ind]