def preproc_data_id(task): if task._data_id is None: # We actually don't need to regenerate every time one of these # changes, but it makes things safer for now. depends = [ task.enable_augment and task.aug_params, task. input_shape, # FIXME: really a model spec, but needed here task.part_overlap, task.part_keepbound, task.base_modes, task.classnames, ] # In the future, the only thing that really matters for everything # is task.input_shape. # Different modes will have different dependencies, and those # should be accounted for on a per-mode basis. The same should be # done with augmentation. # lowres does not have additional dependencies # The part-scaleX modes depend on: # task.part_overlap and task.part_keepbound # aug modes have task.aug_params as a dependency. # The groundtruth should also be kept in another further-up # directory, and they have a strong dependency on task.classnames. task._data_id = hashutil.hash_data(ub.repr2(depends), hashlen=8) return task._data_id
def make_augment_scene(task, mode, scene, rng=None): """ Augments data in a scene of a specific "mode" mode = 'part-scale1' scene = '0000' rng = 'determ' gtdir = task.datasubdir('gtpart', scene)) imdir = task.datasubdir('impart', scene)) """ assert task.enable_augment if rng == 'determ': # Make a determenistic seed based on the scene and mode seed = int(hashutil.hash_data([scene, mode], alphabet='hex'), 16) seed = seed % (2**32 - 1) rng = np.random.RandomState(seed) auger = augment.SSegAugmentor(rng=rng, ignore_label=task.ignore_label) auger.params = task.aug_params # rng = np.random.RandomState(0) imdir = task.datasubdir('im' + mode, scene) gtdir = task.datasubdir('gt' + mode, scene) im_fpaths = sorted(glob.glob(join(imdir, '*.png'))) gt_fpaths = sorted(glob.glob(join(gtdir, '*.png'))) # Define the output path for the augmentation of this mode key = mode + '-aug' scene_imout_dpath = task.datasubdir('im' + key, scene) scene_gtout_dpath = task.datasubdir('gt' + key, scene) # Start fresh. Remove existing files ub.delete(scene_gtout_dpath, verbose=False) ub.delete(scene_imout_dpath, verbose=False) ub.ensuredir(scene_gtout_dpath) ub.ensuredir(scene_imout_dpath) for impath, gtpath in ub.ProgIter( list(zip(im_fpaths, gt_fpaths)), label=' * augment mode={}'.format(mode)): fname_we = splitext(basename(impath))[0] im = cv2.imread(impath, flags=cv2.IMREAD_UNCHANGED) gt = cv2.imread(gtpath, flags=cv2.IMREAD_UNCHANGED) aug_gen = auger.augment(im, gt) for augx, aug_data in enumerate(aug_gen): (im_aug, gt_aug) = aug_data[0:2] fname = '{}_aug{:0=4d}.png'.format(fname_we, augx) cv2.imwrite(join(scene_imout_dpath, fname), im_aug) cv2.imwrite(join(scene_gtout_dpath, fname), gt_aug) return scene_imout_dpath, scene_gtout_dpath
def __init__(self, img1_fpaths, img2_fpaths, labels, dim=224): super(LabeledPairDataset, self).__init__(dim=dim) assert len(img1_fpaths) == len(img2_fpaths) assert len(labels) == len(img2_fpaths) self.img1_fpaths = list(img1_fpaths) self.img2_fpaths = list(img2_fpaths) self.labels = list(labels) # Hack for input id if True: depends = [self.img1_fpaths, self.img2_fpaths, self.labels] hashid = hashutil.hash_data(depends)[:8] self.input_id = '{}-{}'.format(len(self), hashid) import utool as ut rng = ut.ensure_rng(3432, 'numpy') self.rng = rng
def __init__(self, pblm, pccs, dim=224): super(RandomBalancedIBEISSample, self).__init__(dim=dim) import utool as ut chip_config = {'resize_dim': 'wh', 'dim_size': (self.dim, self.dim)} self.pccs = pccs all_aids = ut.flatten(pccs) all_fpaths = pblm.infr.ibs.depc_annot.get('chips', all_aids, read_extern=False, colnames='img', config=chip_config) self.aid_to_fpath = dict(zip(all_aids, all_fpaths)) # self.multitons_pccs = [pcc for pcc in pccs if len(pcc) > 1] self.pos_pairs = [] # SAMPLE ALL POSSIBLE POS COMBINATIONS AND IGNORE INCOMPARABLE self.infr = pblm.infr # todo each sample should really get a weight depending on num aids in # its pcc for pcc in pccs: if len(pcc) >= 2: edges = np.array( list(it.starmap(self.infr.e_, it.combinations(pcc, 2)))) is_comparable = self.is_comparable(edges) pos_edges = edges[is_comparable] self.pos_pairs.extend(list(pos_edges)) rng = ut.ensure_rng(563401, 'numpy') self.pyrng = ut.ensure_rng(564043, 'python') self.rng = rng if True: depends = [ sorted(map(sorted, self.pccs)), ] hashid = hashutil.hash_data(depends)[:8] self.input_id = '{}-{}'.format(len(self), hashid)
def make_parts(prep, fullres, scale=1, clear=False): """ Slices the fullres images into smaller parts that fit into the network but are at the original resolution (or higher). >>> from clab.tasks.urban_mapper_3d import * >>> task = UrbanMapper3D(root='~/remote/aretha/data/UrbanMapper3D', workdir='~/data/work/urban_mapper') >>> task.prepare_fullres_inputs() >>> fullres = task.fullres >>> datadir = ub.ensuredir((task.workdir, 'data')) >>> prep = Preprocessor(datadir) >>> scale = 1 >>> clear = False >>> lowres = prep.make_parts(fullres, scale) """ part_config = prep.part_config hashid = hashutil.hash_data(ub.repr2(part_config), hashlen=8) shapestr = '_'.join(list(map(str, prep.input_shape))) mode = 'part-scale{}-{}-{}'.format(scale, shapestr, hashid) parts, flag = prep._mode_new_input(mode, fullres, clear=clear) if flag: return parts input_shape = prep.input_shape overlap = part_config['overlap'] keepbound = part_config['keepbound'] records = list(fullres.iter_records()) for record in ub.ProgIter(records, label='make ' + mode): dump_fname = basename(record['dump_fname']) im_shape = np.array(Image.open(record['im']).size[::-1]) im_shape = tuple(np.floor(im_shape * scale).astype(np.int)) # Consolodate all channels that belong to this record in_paths = record.get('aux').copy() for k in ['im', 'gt']: if k in record: in_paths[k] = record[k] # Read the images for this record and resize if necessary in_images = {k: imutil.imread(v) for k, v in in_paths.items()} # 9% of the time if scale != 1.0: for k in in_images.keys(): interp = cv2.INTER_LANCZOS4 if k == 'im' else cv2.INTER_NEAREST in_images[k] = imutil.imscale(in_images[k], scale, interp)[0] sl_gen = imutil.image_slices(im_shape, input_shape, overlap, keepbound) for idx, rc_slice in enumerate(sl_gen): rsl, csl = rc_slice suffix = '_part{:0=4d}_{:0=3d}_{:0=3d}'.format( idx, rsl.start, csl.start) fname = ub.augpath(dump_fname, suffix=suffix) for k, in_data in in_images.items(): out_data = in_data[rc_slice] out_fpath = join(parts.dirs[k], fname) imutil.imwrite(out_fpath, out_data) # 84% of the time parts.paths[k].append(out_fpath) return parts