def get_data(): def f(dp): im = dp[0][:, :, None] onehot = np.eye(10)[dp[1]] return [im, onehot] train = BatchData(MapData(dataset.Mnist('train'), f), 128) test = BatchData(MapData(dataset.Mnist('test'), f), 256) return train, test
def get_data(): def f(dp): im = dp[0][:, :, None] onehot = np.zeros(10, dtype='int32') onehot[dp[1]] = 1 return [im, onehot] train = BatchData(MapData(dataset.Mnist('train'), f), 128) test = BatchData(MapData(dataset.Mnist('test'), f), 256) return train, test
def get_batched_eval_dataflow(name, shard=0, num_shards=1, batch_size=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = DetectionDataset().load_inference_roidbs(name) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training ds = DataFromListOfDictBatched(roidbs[img_range[0]:img_range[1]], ['file_name', 'image_id'], batch_size) def decode_images(inputs): return [[cv2.imread(inp[0], cv2.IMREAD_COLOR), inp[1]] for inp in inputs] def resize_images(inputs): resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE) resized_imgs = [resizer.augment(inp[0]) for inp in inputs] org_shapes = [inp[0].shape for inp in inputs] scales = [ np.sqrt(rimg.shape[0] * 1.0 / org_shape[0] * rimg.shape[1] / org_shape[1]) for rimg, org_shape in zip(resized_imgs, org_shapes) ] return [[resized_imgs[i], inp[1], scales[i], org_shapes[i][:2]] for i, inp in enumerate(inputs)] def pad_and_batch(inputs): heights, widths, _ = zip(*[inp[0].shape for inp in inputs]) max_h, max_w = max(heights), max(widths) padded_images = np.stack([ np.pad(inp[0], [[0, max_h - inp[0].shape[0]], [0, max_w - inp[0].shape[1]], [0, 0]], 'constant') for inp in inputs ]) return [ padded_images, [inp[1] for inp in inputs], list(zip(heights, widths)), [inp[2] for inp in inputs], [inp[3] for inp in inputs] ] ds = MapData(ds, decode_images) ds = MapData(ds, resize_images) ds = MapData(ds, pad_and_batch) return ds
def get_input_cifar10(): train, test = dataset.Cifar10('train'), dataset.Cifar10('test', shuffle=False) def preprocess(x): image, label = x onehot = np.zeros(10) onehot[label] = 1.0 return image, onehot return MapData(train, preprocess), MapData(test, preprocess), ((32, 32, 3), (10, ))
def get_input_mnist(): train, test = dataset.Mnist('train'), dataset.Mnist('test', shuffle=False) def preprocess(x): image, label = x image = np.expand_dims(image, axis=-1) # Add a channels dimension onehot = np.zeros(10) onehot[label] = 1.0 return image, onehot return MapData(train, preprocess), MapData(test, preprocess), ((28, 28, 1), (10, ))
def get_debug_dataflow(add_mask=True, imageHW=768): """ Return a training dataflow. Each datapoint is: image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks] """ imgs = Detection.load_many( config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(imgs) import os import pandas as pd csv_path = os.path.join(config.BASEDIR, 'train_ship_segmentations_v2.csv') df = pd.read_csv(csv_path, engine="python") df = df.dropna(axis=0) df = df.set_index('ImageId') ds = DataFromList(imgs, shuffle=True) def preprocess(img): im, fname = img['image_data'], img['id'] multi_mask = getAnnotation(df, fname) im = cv2.imread(im) im, multi_mask = fix_resize_transform_range(im, multi_mask, [imageHW, imageHW], 1.0) boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask) return boxes ds = MapData(ds, preprocess) ds = PrefetchDataZMQ(ds, 6) return ds
def get_resnet_train_dataflow(): imgs = ResnetDetection.load_many( config.BASEDIR, config.TRAIN_DATASET) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(imgs) ds = DataFromList(imgs, shuffle=True) augmentors = get_resnet_augmentor() def preprocess(img): im, fname, label = img['image_data'], img['id'], img['with_ship'] im = cv2.imread(im) #============Aug================ im = cv2.resize(im, (config.RESNET_SIZE, config.RESNET_SIZE)) augmented = strong_aug()(image=im) im = augmented['image'] # im, multi_mask = do_flip_transpose2(im, multi_mask, type=random.randint(0,7)) #============================ ret = [im, label] return ret ds = MapData(ds, preprocess) ds = AugmentImageComponent(ds, augmentors, copy=False) ds = BatchData(ds, config.RESNET_BATCH) ds = PrefetchDataZMQ(ds, 6) return ds
def read_and_augment_images(ds): def mapf(dp): fname = dp[0] im = cv2.imread(fname, cv2.IMREAD_COLOR).astype('float32') assert im is not None, dp[0] dp[0] = im # assume floatbox as input assert dp[1].dtype == np.float32 dp[1] = box_to_point8(dp[1]) dp.append(fname) return dp ds = MapData(ds, mapf) augs = [ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ] ds = AugmentImageComponents(ds, augs, index=(0, ), coords_index=(1, )) def unmapf(points): boxes = point8_to_box(points) return boxes ds = MapDataComponent(ds, unmapf, 1) return ds
def test_dataloader(self): ds_test = MultiLabelDataset(folder=self.hparams.data, is_train='valid', fname='covid_test_v5.csv', types=self.hparams.types, pathology=self.hparams.pathology, resize=int(self.hparams.shape), fold_idx=None, n_folds=1) ds_test.reset_state() ag_test = [ imgaug.Resize(self.hparams.shape, interp=cv2.INTER_AREA), imgaug.ToFloat32(), ] ds_test = AugmentImageComponent(ds_test, ag_test, 0) ds_test = BatchData(ds_test, self.hparams.batch, remainder=True) ds_test = MultiProcessRunner(ds_test, num_proc=4, num_prefetch=16) ds_test = PrintData(ds_test) ds_test = MapData( ds_test, lambda dp: [ torch.tensor(np.transpose(dp[0], (0, 3, 1, 2))), torch.tensor(dp[1]).float() ]) return ds_test
def test_dataloader(self): """Summary Returns: TYPE: Description """ ds_test = CustomDataSet(folder=self.hparams.data, train_or_valid='test', size=np.inf, hparams=self.hparams) ds_test.reset_state() ag_test = [ imgaug.Resize(self.hparams.shape, interp=cv2.INTER_NEAREST), imgaug.ToFloat32(), ] # ds_test = AugmentImageComponent(ds_test, [imgaug.Albumentations(AB.CLAHE(tile_grid_size=(32, 32), always_apply=True, p=1)),], 0) ds_test = AugmentImageComponents(ds_test, ag_test, [0, 1]) ds_test = BatchData(ds_test, self.hparams.batch, remainder=True) ds_test = MultiProcessRunner(ds_test, num_proc=4, num_prefetch=16) ds_test = PrintData(ds_test) ds_test = MapData( ds_test, lambda dp: [ torch.tensor(dp[0][:, np.newaxis, :, :]).float(), torch.tensor(dp[1][:, np.newaxis, :, :]).float(), ]) return ds_test
def get_train_dataflow(add_mask=True): """ """ if config.CROSS_VALIDATION: imgs = BRATS_SEG.load_from_file(config.BASEDIR, config.TRAIN_DATASET) else: imgs = BRATS_SEG.load_many(config.BASEDIR, config.TRAIN_DATASET, add_gt=False, add_mask=add_mask) # no filter for training imgs = list(imgs) ds = DataFromList(imgs, shuffle=True) def preprocess(data): if config.NO_CACHE: fname, gt, im = data['file_name'], data['gt'], data['image_data'] volume_list, label, weight, _, _ = crop_brain_region(im, gt) batch = sampler3d(volume_list, label, weight) else: volume_list, label, weight, _, _ = data['preprocessed'] batch = sampler3d(volume_list, label, weight) return [batch['images'], batch['weights'], batch['labels']] ds = BatchData(MapData(ds, preprocess), config.BATCH_SIZE) ds = PrefetchDataZMQ(ds, 6) return ds
def val_dataloader(self): """Summary Returns: TYPE: Description """ ds_valid = MultiLabelDataset( folder=self.hparams.data, is_train='valid', fname='covid_test_v5.csv', types=self.hparams.types, pathology=self.hparams.pathology, resize=int(self.hparams.shape), ) ds_valid.reset_state() ag_valid = [ imgaug.Resize(self.hparams.shape, interp=cv2.INTER_AREA), imgaug.ToFloat32(), ] ds_valid = AugmentImageComponent(ds_valid, ag_valid, 0) ds_valid = BatchData(ds_valid, self.hparams.batch, remainder=True) ds_valid = MultiProcessRunner(ds_valid, num_proc=4, num_prefetch=16) ds_valid = PrintData(ds_valid) ds_valid = MapData( ds_valid, lambda dp: [ torch.tensor(np.transpose(dp[0], (0, 3, 1, 2))), torch.tensor(dp[1]).float() ]) return ds_valid
def get_train_aseval_dataflow(): """ Args: shard, num_shards: to get subset of evaluation data """ prw = PRWDataset(cfg.DATA.BASEDIR) imgs = prw.load() # no filter for training # test if it can repeat keys ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(img): fname = img['file_name'] im = cv2.imread(fname, cv2.IMREAD_COLOR) orig_shape = im.shape[:2] assert im is not None, fname im = im.astype('float32') # augmentation: im, params = aug.augment_return_params(im) ret = [fname, im, orig_shape] return ret ds = MapData(ds, preprocess) return ds
def eval_prepared(self, num_gpu, batch_size): if self.min_num_workers == 0: return MapData(self, self.ex_process.val_process) else: return MultiProcessMapData(self, max(num_gpu, self.min_num_workers), self.ex_process.val_process)
def test_dataloader(self): """Summary Returns: TYPE: Description """ ds_test = MultiLabelDataset(folder=self.hparams.data_path, is_train='test', fname='test.csv', types=self.hparams.types, pathology=self.hparams.pathology, resize=int(self.hparams.shape)) ds_test.reset_state() ag_test = [ imgaug.Albumentations( AB.SmallestMaxSize(self.hparams.shape, p=1.0)), iimgaug.ColorSpace(mode=cv2.COLOR_GRAY2RGB), imgaug.Albumentations(AB.CLAHE(p=1)), imgaug.ToFloat32(), ] ds_test = AugmentImageComponent(ds_test, ag_test, 0) ds_test = BatchData(ds_test, self.hparams.batch, remainder=True) # ds_test = MultiProcessRunner(ds_test, num_proc=4, num_prefetch=16) ds_test = PrintData(ds_test) ds_test = MapData(ds_test, lambda dp: [torch.tensor(np.transpose(dp[0], (0, 3, 1, 2))), torch.tensor(dp[1]).float()]) return ds_test
def get_train_dataflow(): imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training ds = DataFromListOfDict( imgs, ['file_name', 'boxes', 'class', 'is_crowd'], # we need this four keys only shuffle=True) ds = read_and_augment_images(ds) def add_anchor_to_dp(dp): im, boxes, klass, is_crowd, fname = dp try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is invalid for training: {}".format(fname, str(e)), 'warn') return None return [im, fm_labels, fm_boxes, boxes, klass] ds = MapData(ds, add_anchor_to_dp) return ds
def prepared(self, num_gpu, batch_size, eval=False): # use a single process version to debug if needed if self.min_num_workers == 0: ds = MapData(self, self.ex_process.train_process) else: ds = MultiProcessMapData(self, max(num_gpu, self.min_num_workers), self.ex_process.train_process) return BatchData(ds, batch_size)
def train_dataloader(self): ds_train = MultiLabelDataset(folder=self.hparams.data, is_train='train', fname='covid_train_v5.csv', types=self.hparams.types, pathology=self.hparams.pathology, resize=int(self.hparams.shape), balancing=None) ds_train.reset_state() ag_train = [ # imgaug.Albumentations( # AB.SmallestMaxSize(self.hparams.shape, p=1.0)), imgaug.ColorSpace(mode=cv2.COLOR_GRAY2RGB), # imgaug.Affine(shear=10), imgaug.RandomChooseAug([ imgaug.Albumentations(AB.Blur(blur_limit=4, p=0.25)), imgaug.Albumentations(AB.MotionBlur(blur_limit=4, p=0.25)), imgaug.Albumentations(AB.MedianBlur(blur_limit=4, p=0.25)), ]), imgaug.Albumentations(AB.CLAHE(tile_grid_size=(32, 32), p=0.5)), imgaug.RandomOrderAug([ imgaug.Affine(shear=10, border=cv2.BORDER_CONSTANT, interp=cv2.INTER_AREA), imgaug.Affine(translate_frac=(0.01, 0.02), border=cv2.BORDER_CONSTANT, interp=cv2.INTER_AREA), imgaug.Affine(scale=(0.5, 1.0), border=cv2.BORDER_CONSTANT, interp=cv2.INTER_AREA), ]), imgaug.RotationAndCropValid(max_deg=10, interp=cv2.INTER_AREA), imgaug.GoogleNetRandomCropAndResize( crop_area_fraction=(0.8, 1.0), aspect_ratio_range=(0.8, 1.2), interp=cv2.INTER_AREA, target_shape=self.hparams.shape), imgaug.ColorSpace(mode=cv2.COLOR_RGB2GRAY), imgaug.ToFloat32(), ] ds_train = AugmentImageComponent(ds_train, ag_train, 0) # Label smoothing ag_label = [ imgaug.BrightnessScale((0.8, 1.2), clip=False), ] # ds_train = AugmentImageComponent(ds_train, ag_label, 1) ds_train = BatchData(ds_train, self.hparams.batch, remainder=True) if self.hparams.debug: ds_train = FixedSizeData(ds_train, 2) ds_train = MultiProcessRunner(ds_train, num_proc=4, num_prefetch=16) ds_train = PrintData(ds_train) ds_train = MapData( ds_train, lambda dp: [ torch.tensor(np.transpose(dp[0], (0, 3, 1, 2))), torch.tensor(dp[1]).float() ]) return ds_train
def val_dataloader(self): """Summary Returns: TYPE: Description """ ds_valid = CustomDataSet(folder=self.hparams.data, train_or_valid='valid', size=np.inf, hparams=self.hparams) ds_valid.reset_state() ag_valid = [ imgaug.Resize(self.hparams.shape, interp=cv2.INTER_NEAREST), imgaug.ToFloat32(), ] ds_valid = AugmentImageComponent(ds_valid, [ imgaug.Albumentations(AB.CLAHE(p=1)), ], 0) if self.hparams.types == 6: ds_valid = AugmentImageComponents(ds_valid, ag_valid, [0, 1, 2, 3, 4, 5, 6]) elif self.hparams.types == 1: ds_valid = AugmentImageComponents(ds_valid, ag_valid, [0, 1]) ds_valid = BatchData(ds_valid, self.hparams.batch, remainder=True) ds_valid = MultiProcessRunner(ds_valid, num_proc=4, num_prefetch=16) ds_valid = PrintData(ds_valid) if self.hparams.types == 6: ds_valid = MapData( ds_valid, lambda dp: [ torch.tensor(dp[0][:, np.newaxis, :, :]).float(), torch.tensor(dp[1][:, np.newaxis, :, :]).float(), torch.tensor(dp[2][:, np.newaxis, :, :]).float(), torch.tensor(dp[3][:, np.newaxis, :, :]).float(), torch.tensor(dp[4][:, np.newaxis, :, :]).float(), torch.tensor(dp[5][:, np.newaxis, :, :]).float(), torch.tensor(dp[6][:, np.newaxis, :, :]).float(), ]) elif self.hparams.types == 1: ds_valid = MapData( ds_valid, lambda dp: [ torch.tensor(dp[0][:, np.newaxis, :, :]).float(), torch.tensor(dp[1][:, np.newaxis, :, :]).float(), ]) return ds_valid
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print( "---------------------------------------------------------------- data.py:343" ) print_class_histogram(roidbs) # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list( filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) preprocess = TrainingDataPreprocessor(cfg) if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == "horovod": # one dataflow for each process, therefore don't need large buffer buffer_size = cfg.DATA.NUM_WORKERS * 10 ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds
def make_data(): from COCOAllJoints import COCOJoints from dataset import Preprocessing d = COCOJoints() train_data, _ = d.load_data(1) from tensorpack.dataflow import DataFromList, MapData, BatchData dp = DataFromList(train_data) dp = MapData(dp, Preprocessing) dp = BatchData(dp, cfg.batch_size, use_list=True) dp.reset_state() dataiter = dp.get_data() return dataiter
def get_resnet_val_dataflow(): imgs = ResnetDetection.load_many( config.BASEDIR, config.VAL_DATASET) imgs = list(imgs) # ds = DataFromListOfDict(imgs, ['image_data', 'with_ship', 'id']) ds = DataFromList(imgs, shuffle=False) def f(img): image, label = img['image_data'], img['with_ship'] im = cv2.imread(image) im = cv2.resize(im, (config.RESNET_SIZE, config.RESNET_SIZE)) return [im, label] ds = MapData(ds, f) ds = BatchData(ds, config.RESNET_BATCH) ds = PrefetchDataZMQ(ds, 1) return ds
def main(): parser = argparse.ArgumentParser() parser.add_argument('--mdb_file', required=True) parser.add_argument('--data', choices=['gdb9', 'zinc'], required=True, help='Dataset to use.') parser.add_argument('-o', '--output', required=True) args = parser.parse_args() ds = LMDBData(args.mdb_file, shuffle=False) ds = MapData(ds, lambda dp: loads_msgpack(dp[1])) # used by PenalizedLogPScore ss = (CycleLengthScore(), LogPScore(), SAScore(GraphMolecularMetrics._SA_MODEL)) conv = get_decoder(args.data, True) values = [] for dp in ds.get_data(): m = conv.to_mol(dp[1].squeeze(), dp[0]) row = np.empty(len(ss), dtype=float) for i, s in enumerate(ss): row[i] = s.compute(m) values.append(row) values = np.row_stack(values) m = np.mean(values, axis=0) amin = np.min(values, axis=0) amax = np.max(values, axis=0) sd = np.std(values, axis=0, ddof=1) out = {} for s, mv, sdv, mi, mx in zip(ss, m, sd, amin, amax): out[s.name] = {'mean': mv, 'std': sdv, 'min': mi, 'max': mx} with open(args.output, 'wb') as fout: pickle.dump(out, fout)
def get_query_dataflow(): """ Args: shard, num_shards: to get subset of evaluation data """ prw = PRWDataset(cfg.DATA.BASEDIR) imgs = prw.load_query() # no filter for training # test if it can repeat keys ds = DataFromList(imgs, shuffle=False) aug = imgaug.AugmentorList( [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)]) def preprocess(img): fname, boxes, re_id_class = img['file_name'], img['boxes'], img[ 're_id_class'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = [im, boxes, re_id_class] return ret ds = MapData(ds, preprocess) return ds
def get_augmented_speech_commands_data(subset, options, do_multiprocess=True, shuffle=True): isTrain = subset == 'train' and do_multiprocess shuffle = shuffle if shuffle is not None else isTrain ds = SpeechCommandsDataFlow( os.path.join(options.data_dir, 'speech_commands_v0.02'), subset, shuffle, None) if isTrain: add_noise_func = functools.partial(_add_noise, noises=ds.noises) ds = MapDataComponent(ds, _pad_or_clip_to_desired_sample, index=0) ds = MapDataComponent(ds, _to_float, index=0) if isTrain: ds = MapDataComponent(ds, _time_shift, index=0) ds = MapData(ds, add_noise_func) ds = BatchData(ds, options.batch_size // options.nr_gpu, remainder=not isTrain) if do_multiprocess: ds = PrefetchData(ds, 4, 4) return ds
def dataflow_to_dataset(df, types, shapes): """ Wrap a dataflow to tf.data.Dataset. This function will also reset the dataflow. If the dataflow itself is finite, the returned dataset is also finite. Therefore, if used for training, you'll need to add `.repeat()` on the returned dataset. Args: df (DataFlow): a dataflow which produces lists types([tf.DType]): list of types Returns: (tf.data.Dataset) """ # TODO theoretically it can support dict assert isinstance(df, DataFlow), df assert isinstance(types, (list, tuple)), types df = MapData(df, lambda dp: tuple(dp)) df.reset_state() ds = tf.data.Dataset.from_generator(df.get_data, tuple(types), tuple(shapes)) return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN) print_class_histogram(roidbs) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. num = len(roidbs) roidbs = list( filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == 'horovod': buffer_size = cfg.DATA.NUM_WORKERS * 10 # one dataflow for each process, therefore don't need large buffer ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapDataZMQ(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds
def get_train_dataflow_davis(add_mask=False): # train_img_path = config.DAVIS_PATH + "train/" # train_label_path = config.DAVIS_PATH + "train-gt/" # imgs = glob.glob(train_img_path + "*/*.jpg") # train_img_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/" # train_label_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/" # train_img_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/" # train_label_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/" # train_img_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_images/" # train_label_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_gt/" train_img_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_images/" train_label_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_gt/" imgs = sorted(glob.glob(train_img_path + "*/*.jpg")) ds = DataFromList(imgs, shuffle=True) aug = imgaug.AugmentorList([ CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), imgaug.Flip(horiz=True) ]) def preprocess(fname): # print("start preproc mapillary") start = time.time() label_fname = fname.replace(train_img_path, train_label_path).replace(".jpg", ".png") pil_label = Image.open(label_fname) label = np.array(pil_label) instances = np.unique(label) instance_classes = [x // 256 for x in instances] if len(instances) == 0: print("no instances") pil_label.close() return None masks = np.array([label == inst for inst in instances], dtype=np.uint8) boxes1 = np.array( [get_bbox_from_segmentation_mask(mask) for mask in masks], dtype=np.float32) boxes = boxes1 # second_klass = np.array(instance_classes, dtype=np.int) second_klass = np.zeros_like(instance_classes, dtype=np.int) klass = np.ones_like(second_klass) is_crowd = np.zeros_like(second_klass) res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("davis: preproc_img returned None on", fname) pil_label.close() return None ret, params = res if add_mask: do_flip, h, w = params[1] assert do_flip in (True, False), do_flip # augment label label = np.array(pil_label.resize((w, h), Image.NEAREST)) if do_flip: label = label[:, ::-1] # create augmented masks masks = np.array([label == inst for inst in instances], dtype=np.uint8) ret.append(masks) end = time.time() elapsed = end - start # print("davis example done, elapsed:", elapsed) VISUALIZE = False if VISUALIZE: from viz import draw_annotation, draw_mask config.CLASS_NAMES = [str(idx) for idx in range(81)] im = ret[0] boxes = ret[3] draw_klass = ret[-2] viz = draw_annotation(im, boxes, draw_klass) for mask in masks: viz = draw_mask(viz, mask) tpviz.interactive_imshow(viz) pil_label.close() return ret ds = MapData(ds, preprocess) # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35) # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess) return ds
def get_eval_dataflow(name, shard=0, num_shards=1): seqs = [] with open("davis2017_fast_val_ids.txt") as f: for l in f: seqs.append(l.strip()) seqs_timesteps = [] for seq in seqs: files = sorted( glob.glob(cfg.DATA.DAVIS2017_ROOT + "/JPEGImages/480p/" + seq.split("__")[0] + "/*.jpg"))[1:-1] timesteps = [f.split('/')[-1].replace(".jpg", "") for f in files] for timestep in timesteps: ann_fn = cfg.DATA.DAVIS2017_ROOT + "/Annotations/480p/" + seq.split( "__")[0] + '/' + timestep + ".png" ann = np.array(PIL.Image.open(ann_fn)) ann_mask = ann == int(seq.split("__")[1]) if ann_mask.any(): seqs_timesteps.append( (seq.split('__')[0], seq.split('__')[1], timestep)) # seqs_timesteps += [(seq.split('__')[0], seq.split('__')[1], timestep) for timestep in timesteps] num_seqs_timesteps = len(seqs_timesteps) seqs_timesteps_per_shard = num_seqs_timesteps // num_shards seqs_timesteps_range = (shard * seqs_timesteps_per_shard, (shard + 1) * seqs_timesteps_per_shard if shard + 1 < num_shards else num_seqs_timesteps) ds = DataFromList( seqs_timesteps[seqs_timesteps_range[0]:seqs_timesteps_range[1]]) def preprocess(seq_timestep): seq, obj_id, timestep = seq_timestep ann_fn = cfg.DATA.DAVIS2017_ROOT + "/Annotations/480p/" + seq + '/' + timestep + ".png" ann = np.array(PIL.Image.open(ann_fn)) ann_mask = ann == int(obj_id) if not ann_mask.any(): return None, None, None, None, None # ann_box = np.array([-1000000, -1000000, 100000, 100000]) else: ann_box = get_bbox_from_segmentation_mask_np(ann_mask) ff_fn = cfg.DATA.DAVIS2017_ROOT + "/Annotations/480p/" + seq + '/' + str( 0).zfill(5) + ".png" ff = np.array(PIL.Image.open(ff_fn)) ff_mask = ff == int(obj_id) ff_box = get_bbox_from_segmentation_mask_np(ff_mask) x1, y1, x2, y2 = [float(x) for x in ann_box] target_bbox = np.array([x1, y1, x2, y2], dtype=np.float32) x1, y1, x2, y2 = [float(x) for x in ff_box] ref_bbox = np.array([x1, y1, x2, y2], dtype=np.float32) target_img_fn = cfg.DATA.DAVIS2017_ROOT + "/JPEGImages/480p/" + seq + "/" + timestep + ".jpg" ref_img_fn = cfg.DATA.DAVIS2017_ROOT + "/JPEGImages/480p/" + seq + "/" + str( 0).zfill(5) + ".jpg" target_img = cv2.imread(target_img_fn, cv2.IMREAD_COLOR) ref_img = cv2.imread(ref_img_fn, cv2.IMREAD_COLOR) return ref_img, ref_bbox, target_img, target_bbox, "__".join( seq_timestep) ds = MapData(ds, preprocess) return ds
def get_train_dataflow_w_unlabeled(load_path): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ assert os.path.isfile(load_path), "{} does not find".format(load_path) roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) if "VOC" in cfg.DATA.TRAIN[0]: roidbs_u = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.UNLABEL)) unlabled2017_used = False else: unlabled2017_used = np.any(["@" not in x for x in cfg.DATA.TRAIN]) def prase_name(x): if not unlabled2017_used: assert "@" in load_path, ( "{}: Did you use wrong pseudo_data.py for " "this model?").format(load_path) return x + "-unlabeled" else: # return coco2017 unlabeled data return "coco_unlabeled2017" roidbs_u = list( itertools.chain.from_iterable( DatasetRegistry.get(prase_name(x)).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs_u) # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. def remove_no_box_data(_roidbs, filter_fn, dset): num = len(_roidbs) _roidbs = filter_fn(_roidbs) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total {} #images for training: {}" .format(num - len(_roidbs), dset, len(_roidbs))) return _roidbs roidbs = remove_no_box_data( roidbs, lambda x: list( filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, x) ), "labeled") # load unlabeled if unlabled2017_used: assert "@" not in load_path, "Did you use the wrong pseudo path" pseudo_targets = dd.io.load(load_path) logger.info("Loaded {} pseudo targets from {}".format( len(pseudo_targets), load_path)) roidbs_u = remove_no_box_data( roidbs_u, lambda x: list( filter( lambda img: len(pseudo_targets[img["image_id"]]["boxes"]) > 0, x)), "unlabeled") preprocess = TrainingDataPreprocessorSSlAug( cfg, confidence=cfg.TRAIN.CONFIDENCE, pseudo_targets=pseudo_targets) ds = DataFrom2List(roidbs, roidbs_u, shuffle=True) if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == "horovod": buffer_size = cfg.DATA.NUM_WORKERS * 10 ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds