def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder_in')
    parser.add_argument('--folder_out')
    parser.add_argument('--num_data', type=int, default=60000)
    parser.add_argument('--num_parallel', type=int, default=32)
    args = parser.parse_args()
    if not os.path.exists(args.folder_out):
        os.makedirs(args.folder_out)
    path_in = os.path.join(args.folder_in, 'multi_dsprites', 'multi_dsprites_colored_on_colored.tfrecords')
    dataset = multi_dsprites.dataset(path_in, 'colored_on_colored', map_parallel_calls=args.num_parallel)
    next_batch = get_next_batch(dataset, args.num_data)
    path_out = os.path.join(args.folder_out, 'dsprites')
    with tf.Session() as sess:
        data = sess.run(next_batch)
    create_dataset(data, path_out, args.num_parallel)
    return
Пример #2
0
def test_get_next_batch():
    """测试使用多线程下和单线程时间消耗
    """
    for i in range(CAPTCHA_COUNT):
        start_time = time.time()
        batch_x, batch_y = get_next_batch()
        end_time = time.time()

        start_time2 = time.time()
        thread_batch_x, thread_batch_y = get_next_batch_thread()
        end_time2 = time.time()
        interval_time = end_time - start_time
        interval_time2 = end_time2 - start_time2

        # 检查数据是否一致
        assert all([all(r) for r in thread_batch_x == batch_x])
        assert all([all(r) for r in thread_batch_y == batch_y])

        # 检查时间是否更短
        print_info("interval time: {}, time2: {}".format(
            interval_time, interval_time2))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder_in')
    parser.add_argument('--folder_out')
    parser.add_argument('--num_data', type=int, default=70000)
    parser.add_argument('--num_parallel', type=int, default=32)
    parser.add_argument('--split_objects', type=int, default=6)
    args = parser.parse_args()
    if not os.path.exists(args.folder_out):
        os.makedirs(args.folder_out)
    path_in = os.path.join(args.folder_in, 'clevr_with_masks',
                           'clevr_with_masks_train.tfrecords')
    dataset = clevr_with_masks.dataset(path_in,
                                       map_parallel_calls=args.num_parallel)
    next_batch = get_next_batch(dataset, args.num_data)
    path_out_all = {
        'train': os.path.join(args.folder_out, 'clevr_train'),
        'extra': os.path.join(args.folder_out, 'clevr_extra'),
    }
    with tf.Session() as sess:
        data_all = sess.run(next_batch)
    data_all['image'] = np.stack(
        [crop_and_resize(val) for val in data_all['image']])
    data_all['mask'] = np.stack([
        np.stack([crop_and_resize(sub_val) for sub_val in val])
        for val in data_all['mask']
    ])
    num_objects = np.sum(data_all['visibility'][:, 1:], axis=1)
    sel_train = num_objects <= args.split_objects
    sel_extra = np.bitwise_not(sel_train)
    data_all = {
        'train': {key: val[sel_train]
                  for key, val in data_all.items()},
        'extra': {key: val[sel_extra]
                  for key, val in data_all.items()},
    }
    for phase, data in data_all.items():
        path_out = path_out_all[phase]
        create_dataset(data, path_out, args.num_parallel)
    return
Пример #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder_in')
    parser.add_argument('--folder_out')
    parser.add_argument('--num_data', type=int, default=60000)
    parser.add_argument('--num_parallel', type=int, default=32)
    args = parser.parse_args()
    if not os.path.exists(args.folder_out):
        os.makedirs(args.folder_out)
    for split in ['train', 'empty_room', 'six_objects', 'identical_color']:
        if split == 'train':
            file_in = 'objects_room_{}.tfrecords'.format(split)
        else:
            file_in = 'objects_room_test_{}.tfrecords'.format(split)
        path_in = os.path.join(args.folder_in, 'objects_room', file_in)
        dataset = objects_room.dataset(path_in, split, map_parallel_calls=args.num_parallel)
        next_batch = get_next_batch(dataset, args.num_data)
        path_out = os.path.join(args.folder_out, 'room_{}'.format(split))
        with tf.Session() as sess:
            data = sess.run(next_batch)
        create_dataset(data, path_out, args.num_parallel)
    return
Пример #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--folder_in')
    parser.add_argument('--folder_out')
    parser.add_argument('--num_data', type=int, default=60000)
    parser.add_argument('--num_parallel', type=int, default=32)
    args = parser.parse_args()
    if not os.path.exists(args.folder_out):
        os.makedirs(args.folder_out)
    path_in = os.path.join(args.folder_in, 'clevr_with_masks',
                           'clevr_with_masks_train.tfrecords')
    dataset = clevr_with_masks.dataset(path_in,
                                       map_parallel_calls=args.num_parallel)
    next_batch = get_next_batch(dataset, args.num_data)
    path_out = os.path.join(args.folder_out, 'clevr')
    with tf.Session() as sess:
        data = sess.run(next_batch)
    data['image'] = np.stack([crop_and_resize(val) for val in data['image']])
    data['mask'] = np.stack([
        np.stack([crop_and_resize(sub_val) for sub_val in val])
        for val in data['mask']
    ])
    create_dataset(data, path_out, args.num_parallel)
    return