def get_hand_dataflow(is_train, img_path=None): ds = HandPose(img_path, is_train, input_size=368) # read data from lmdb if is_train: ds = MapDataComponent(ds, pose_random_scale) ds = MapDataComponent(ds, pose_rotation) # ds = MapDataComponent(ds, pose_flip) ds = MapDataComponent(ds, pose_resize_shortestedge_random) ds = MapDataComponent(ds, pose_crop_random) ds = MapData(ds, hand_pose_to_img) # augs = [ # imgaug.RandomApplyAug(imgaug.RandomChooseAug([ # imgaug.GaussianBlur(max_size=3) # ]), 0.7) # ] # ds = AugmentImageComponent(ds, augs) ds = PrefetchDataZMQ(ds, multiprocessing.cpu_count()) else: ds = MultiThreadMapData(ds, nr_thread=8, map_func=read_image_url, buffer_size=1000) ds = MapDataComponent(ds, pose_resize_shortestedge_fixed) ds = MapDataComponent(ds, pose_crop_center) ds = MapData(ds, pose_to_img) ds = PrefetchData(ds, 100, multiprocessing.cpu_count() // 4) return ds
def get_dataflow(annot_path, img_dir, batch_size): """ This function initializes the tensorpack dataflow and serves generator for training operation. :param annot_path: path to the annotation file :param img_dir: path to the images :param batch_size: batch size :return: dataflow object """ df = CocoDataFlow((368, 368), annot_path, img_dir) df.prepare() df = MapData(df, read_img) df = MapData(df, gen_mask) df = MapData(df, augment) df = MapData(df, apply_mask) df = MapData(df, build_sample) df = PrefetchDataZMQ(df, nr_proc=4) #df = PrefetchData(df, 2, 1) df = BatchData(df, batch_size, use_list=False) df = MapData( df, lambda x: ([x[0], x[1], x[2]], [ x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4] ])) df.reset_state() return df
def get_hand_dataflow_batch(is_train, batchsize, img_path=None): logger.info('dataflow img_path=%s' % img_path) ds = get_hand_dataflow(is_train, img_path=img_path) ds = BatchData(ds, batchsize) if is_train: # ds = PrefetchDataZMQ(ds, 10, 2) PrefetchDataZMQ(ds, 8) else: ds = PrefetchData(ds, 50, 2) return ds
def get_infer_iterator(hparams, dataset, num_gpu, batch_size): df = DataFromList(dataset, shuffle=False) num_samples = len(df) if num_samples % batch_size != 0 and num_samples % batch_size < num_gpu: raise ValueError("num_samples %% batch_size < num_gpu") df = MapData(df, lambda data: map_func(hparams, data)) batched_df = BatchData(df, batch_size=batch_size, remainder=True) splitted_df = MapData( batched_df, lambda x: [np.array_split(x[idx], num_gpu) for idx in range(len(x))]) prefetched_df = PrefetchDataZMQ(splitted_df, nr_proc=1, hwm=batch_size * 10) return prefetched_df
def get_dataflow(coco_data_paths): """ This function initializes the tensorpack dataflow and serves generator for training operation. :param coco_data_paths: paths to the coco files: annotation file and folder with images :return: dataflow object """ df = CocoDataFlow((368, 368), coco_data_paths) df.prepare() df = MapData(df, read_img) df = MapData(df, gen_mask) df = MapData(df, augment) df = MapData(df, apply_mask) df = MapData(df, build_sample) df = PrefetchDataZMQ(df, nr_proc=4) #df = PrefetchData(df, 2, 1) return df
def get_infer_iterator(dataset, hparams, lmdb_path): serialize_to_lmdb(dataset, hparams, lmdb_path) batch_size = hparams.infer_batch_size num_gpu = hparams.num_gpu df = LMDBSerializer.load(lmdb_path, shuffle=False) batched_df = BatchData(df, batch_size=batch_size, remainder=False) splitted_df = MapData( batched_df, lambda x: [np.array_split(x[idx], num_gpu) for idx in range(len(x))]) prefetched_df = PrefetchDataZMQ(splitted_df, nr_proc=1, hwm=batch_size * 10) return prefetched_df
def get_dataflow(annot_path, img_dir): """ This function initializes the tensorpack dataflow and serves generator for training operation. :param annot_path: path to the annotation file :param img_dir: path to the images :return: dataflow object """ df = CocoDataFlow((368, 368), annot_path, img_dir) df.prepare() df = MapData(df, read_img) df = MapData(df, gen_mask) df = MapData(df, augment) df = MapData(df, apply_mask) df = MapData(df, build_sample) df = PrefetchDataZMQ(df, nr_proc=4) #df = PrefetchData(df, 2, 1) return df
def get_iterator(hparams, dataset, lmdb_path, shuffle=True, drop_remainder=True, nr_proc=4): serialize_to_lmdb(hparams, dataset, lmdb_path) batch_size = hparams.batch_size num_gpu = hparams.num_gpu df = LMDBSerializer.load(lmdb_path, shuffle=shuffle) batched_df = BatchData(df, batch_size=batch_size, remainder=not drop_remainder) splitted_df = MapData( batched_df, lambda x: [np.array_split(x[idx], num_gpu) for idx in range(len(x))]) prefetched_df = PrefetchDataZMQ(splitted_df, nr_proc=nr_proc, hwm=batch_size * 10) return prefetched_df
if __name__ == '__main__': """ Run this script to check speed of generating samples. Tweak the nr_proc parameter of PrefetchDataZMQ. Ideally it should reflect the number of cores in your hardware """ batch_size = 10 curr_dir = os.path.dirname(__file__) annot_path = os.path.join( curr_dir, '../dataset/annotations/person_keypoints_val2017.json') img_dir = os.path.abspath(os.path.join(curr_dir, '../dataset/val2017/')) df = CocoDataFlow( (368, 368), COCODataPaths(annot_path, img_dir)) # , select_ids=[1000]) df.prepare() df = MapData(df, read_img) df = MapData(df, gen_mask) df = MapData(df, augment) df = MapData(df, apply_mask) df = MapData(df, build_sample) df = PrefetchDataZMQ(df, nr_proc=4) df = BatchData(df, batch_size, use_list=False) df = MapData( df, lambda x: ([x[0], x[1], x[2]], [ x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4] ])) TestDataSpeed(df, size=100).start()