示例#1
0
def test_experimental_reader(synthetic_dataset):
    reader_v2_throughput(synthetic_dataset.url,
                         None,
                         warmup_cycles_count=5,
                         measure_cycles_count=5,
                         pool_type=WorkerPoolType.THREAD,
                         loaders_count=1,
                         read_method=ReadMethod.PYTHON)
示例#2
0
def test_tf_thread_pool_run_experimental(synthetic_dataset):
    reader_v2_throughput(synthetic_dataset.url,
                         field_regex=[r'\bid\b', r'\bmatrix\b'],
                         warmup_cycles_count=5,
                         measure_cycles_count=5,
                         pool_type=WorkerPoolType.THREAD,
                         loaders_count=1,
                         read_method=ReadMethod.TF)
示例#3
0
def _main(args):
    logging.basicConfig()
    args = _parse_args(args)

    if args.v:
        logging.getLogger().setLevel(logging.INFO)
    if args.vv:
        logging.getLogger().setLevel(logging.DEBUG)

    if args.experimental_reader:
        results = reader_v2_throughput(
            args.dataset_path,
            args.field_regex,
            warmup_cycles_count=args.warmup_cycles,
            measure_cycles_count=args.measure_cycles,
            pool_type=args.pool_type,
            loaders_count=args.workers_count,
            decoders_count=args.experimental_decoders_count,
            read_method=args.read_method,
            shuffling_queue_size=args.shuffling_queue_size,
            min_after_dequeue=args.min_after_dequeue)

    else:
        results = reader_throughput(
            args.dataset_path,
            args.field_regex,
            warmup_cycles_count=args.warmup_cycles,
            measure_cycles_count=args.measure_cycles,
            pool_type=args.pool_type,
            loaders_count=args.workers_count,
            profile_threads=args.profile_threads,
            read_method=args.read_method,
            shuffling_queue_size=args.shuffling_queue_size,
            min_after_dequeue=args.min_after_dequeue)

    logger.info('Done')
    print(
        'Average sample read rate: {:1.2f} samples/sec; RAM {:1.2f} MB (rss); '
        'CPU {:1.2f}%'.format(results.samples_per_second,
                              results.memory_info.rss / 2**20, results.cpu))
示例#4
0
def test_tf_thread_pool_run_experimental_with_pyarrow_serialize(synthetic_dataset):
    reader_v2_throughput(synthetic_dataset.url, field_regex=[r'\bid\b', r'\bmatrix\b'], warmup_cycles_count=5,
                         measure_cycles_count=5, pool_type=WorkerPoolType.PROCESS, loaders_count=1,
                         read_method=ReadMethod.TF, pyarrow_serialize=True)