Python PrefetchDataZMQ示例，tensorpack.dataflow.parallel.PrefetchDataZMQ Python示例

示例#1

0

显示文件

文件： pose_dataset.py 项目： xufeiyi0519/tf-et-assist-opensource

def get_hand_dataflow(is_train, img_path=None):
    ds = HandPose(img_path, is_train, input_size=368)  # read data from lmdb
    if is_train:
        ds = MapDataComponent(ds, pose_random_scale)
        ds = MapDataComponent(ds, pose_rotation)
        # ds = MapDataComponent(ds, pose_flip)
        ds = MapDataComponent(ds, pose_resize_shortestedge_random)
        ds = MapDataComponent(ds, pose_crop_random)
        ds = MapData(ds, hand_pose_to_img)
        # augs = [
        #     imgaug.RandomApplyAug(imgaug.RandomChooseAug([
        #         imgaug.GaussianBlur(max_size=3)
        #     ]), 0.7)
        # ]
        # ds = AugmentImageComponent(ds, augs)
        ds = PrefetchDataZMQ(ds, multiprocessing.cpu_count())
    else:
        ds = MultiThreadMapData(ds,
                                nr_thread=8,
                                map_func=read_image_url,
                                buffer_size=1000)
        ds = MapDataComponent(ds, pose_resize_shortestedge_fixed)
        ds = MapDataComponent(ds, pose_crop_center)
        ds = MapData(ds, pose_to_img)
        ds = PrefetchData(ds, 100, multiprocessing.cpu_count() // 4)

    return ds

示例#2

0

显示文件

文件： dataset.py 项目： rludlow/keras_Realtime_Multi-Person_Pose_Estimation

def get_dataflow(annot_path, img_dir, batch_size):
    """
    This function initializes the tensorpack dataflow and serves generator
    for training operation.

    :param annot_path: path to the annotation file
    :param img_dir: path to the images
    :param batch_size: batch size
    :return: dataflow object
    """
    df = CocoDataFlow((368, 368), annot_path, img_dir)
    df.prepare()
    df = MapData(df, read_img)
    df = MapData(df, gen_mask)
    df = MapData(df, augment)
    df = MapData(df, apply_mask)
    df = MapData(df, build_sample)
    df = PrefetchDataZMQ(df, nr_proc=4)  #df = PrefetchData(df, 2, 1)
    df = BatchData(df, batch_size, use_list=False)
    df = MapData(
        df, lambda x: ([x[0], x[1], x[2]], [
            x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4], x[3],
            x[4]
        ]))
    df.reset_state()

    return df

示例#3

0

显示文件

文件： pose_dataset.py 项目： xufeiyi0519/tf-et-assist-opensource

def get_hand_dataflow_batch(is_train, batchsize, img_path=None):
    logger.info('dataflow img_path=%s' % img_path)
    ds = get_hand_dataflow(is_train, img_path=img_path)
    ds = BatchData(ds, batchsize)
    if is_train:
        # ds = PrefetchDataZMQ(ds, 10, 2)
        PrefetchDataZMQ(ds, 8)
    else:
        ds = PrefetchData(ds, 50, 2)

    return ds

示例#4

0

显示文件

def get_infer_iterator(hparams, dataset, num_gpu, batch_size):

    df = DataFromList(dataset, shuffle=False)
    num_samples = len(df)
    if num_samples % batch_size != 0 and num_samples % batch_size < num_gpu:
        raise ValueError("num_samples %% batch_size < num_gpu")

    df = MapData(df, lambda data: map_func(hparams, data))
    batched_df = BatchData(df, batch_size=batch_size, remainder=True)
    splitted_df = MapData(
        batched_df,
        lambda x: [np.array_split(x[idx], num_gpu) for idx in range(len(x))])
    prefetched_df = PrefetchDataZMQ(splitted_df,
                                    nr_proc=1,
                                    hwm=batch_size * 10)

    return prefetched_df

示例#5

0

显示文件

def get_dataflow(coco_data_paths):
    """
    This function initializes the tensorpack dataflow and serves generator
    for training operation.

    :param coco_data_paths: paths to the coco files: annotation file and folder with images
    :return: dataflow object
    """
    df = CocoDataFlow((368, 368), coco_data_paths)
    df.prepare()
    df = MapData(df, read_img)
    df = MapData(df, gen_mask)
    df = MapData(df, augment)
    df = MapData(df, apply_mask)
    df = MapData(df, build_sample)
    df = PrefetchDataZMQ(df, nr_proc=4)  #df = PrefetchData(df, 2, 1)

    return df

示例#6

0

显示文件

文件： iterator_utils.py 项目： spashlee/neural-trajectory-prediction

def get_infer_iterator(dataset, hparams, lmdb_path):

    serialize_to_lmdb(dataset, hparams, lmdb_path)

    batch_size = hparams.infer_batch_size
    num_gpu = hparams.num_gpu

    df = LMDBSerializer.load(lmdb_path, shuffle=False)

    batched_df = BatchData(df, batch_size=batch_size, remainder=False)
    splitted_df = MapData(
        batched_df,
        lambda x: [np.array_split(x[idx], num_gpu) for idx in range(len(x))])
    prefetched_df = PrefetchDataZMQ(splitted_df,
                                    nr_proc=1,
                                    hwm=batch_size * 10)

    return prefetched_df

示例#7

0

显示文件

文件： dataset.py 项目： stevehjc/Keras_Realtime_MultiPerson_Pose_Estimation

def get_dataflow(annot_path, img_dir):
    """
    This function initializes the tensorpack dataflow and serves generator
    for training operation.

    :param annot_path: path to the annotation file
    :param img_dir: path to the images
    :return: dataflow object
    """
    df = CocoDataFlow((368, 368), annot_path, img_dir)
    df.prepare()
    df = MapData(df, read_img)
    df = MapData(df, gen_mask)
    df = MapData(df, augment)
    df = MapData(df, apply_mask)
    df = MapData(df, build_sample)
    df = PrefetchDataZMQ(df, nr_proc=4) #df = PrefetchData(df, 2, 1)

    return df

示例#8

0

显示文件

def get_iterator(hparams,
                 dataset,
                 lmdb_path,
                 shuffle=True,
                 drop_remainder=True,
                 nr_proc=4):

    serialize_to_lmdb(hparams, dataset, lmdb_path)

    batch_size = hparams.batch_size
    num_gpu = hparams.num_gpu
    df = LMDBSerializer.load(lmdb_path, shuffle=shuffle)

    batched_df = BatchData(df,
                           batch_size=batch_size,
                           remainder=not drop_remainder)
    splitted_df = MapData(
        batched_df,
        lambda x: [np.array_split(x[idx], num_gpu) for idx in range(len(x))])
    prefetched_df = PrefetchDataZMQ(splitted_df,
                                    nr_proc=nr_proc,
                                    hwm=batch_size * 10)

    return prefetched_df

示例#9

0

显示文件


if __name__ == '__main__':
    """
    Run this script to check speed of generating samples. Tweak the nr_proc
    parameter of PrefetchDataZMQ. Ideally it should reflect the number of cores 
    in your hardware
    """
    batch_size = 10
    curr_dir = os.path.dirname(__file__)
    annot_path = os.path.join(
        curr_dir, '../dataset/annotations/person_keypoints_val2017.json')
    img_dir = os.path.abspath(os.path.join(curr_dir, '../dataset/val2017/'))
    df = CocoDataFlow(
        (368, 368), COCODataPaths(annot_path, img_dir))  # , select_ids=[1000])
    df.prepare()
    df = MapData(df, read_img)
    df = MapData(df, gen_mask)
    df = MapData(df, augment)
    df = MapData(df, apply_mask)
    df = MapData(df, build_sample)
    df = PrefetchDataZMQ(df, nr_proc=4)
    df = BatchData(df, batch_size, use_list=False)
    df = MapData(
        df, lambda x: ([x[0], x[1], x[2]], [
            x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4], x[3], x[4], x[3],
            x[4]
        ]))

    TestDataSpeed(df, size=100).start()