def _test_permute_dataset(batch_size, epoch_size, trailing_samples, cb, py_num_workers, prefetch_queue_depth, reader_queue_depth): num_epochs = 3 pipe = utils.create_pipe(cb, "cpu", batch_size=batch_size, py_num_workers=py_num_workers, py_start_method="spawn", parallel=True, device_id=0, batch=False, num_threads=1, cycle=None, prefetch_queue_depth=prefetch_queue_depth, reader_queue_depth=reader_queue_depth) pipe.build() utils.capture_processes(pipe._py_pool) for epoch_idx in range(num_epochs): epoch_data = [ False for _ in range(epoch_size * batch_size + trailing_samples) ] for _ in range(epoch_size): (batch, ) = pipe.run() assert len(batch) == batch_size for sample in batch: epoch_data[np.array(sample)[0]] = True assert sum(epoch_data) == epoch_size * batch_size, \ "Epoch number {} did not contain some samples from data set".format(epoch_idx) try: pipe.run() except StopIteration: pipe.reset() else: assert False, "expected StopIteration"
def _test_large_sample(start_method): batch_size = 2 @pipeline_def def create_pipeline(): large = fn.external_source(large_sample_cb, batch=False, parallel=True, prefetch_queue_depth=1) # iteration over array in Python is too slow, so reduce the number of elements # to iterate over reduced = fn.reductions.sum(large, axes=(1, 2)) return reduced pipe = create_pipeline(batch_size=batch_size, py_num_workers=2, py_start_method=start_method, prefetch_queue_depth=1, num_threads=2, device_id=0) pipe.build() capture_processes(pipe._py_pool) for batch_idx in range(8): (out, ) = pipe.run() for idx_in_batch in range(batch_size): idx_in_epoch = batch_size * batch_idx + idx_in_batch expected_val = idx_in_epoch * 1024 * 1024 a = np.array(out[idx_in_batch]) assert a.shape == ( 512, ), "Expected shape (512,) but got {}".format(a.shape) for val in a.flat: assert val == expected_val, ( f"Unexpected value in batch: got {val}, expected {expected_val}, " f"for batch {batch_idx}, sample {idx_in_batch}")
def test_discard(): bs = 5 pipe = dali.Pipeline(batch_size=bs, device_id=None, num_threads=5, py_num_workers=4, py_start_method='spawn') with pipe: ext1 = dali.fn.external_source([[np.float32(i) for i in range(bs)]] * 3, cycle='raise') ext2 = dali.fn.external_source(ext_cb2, batch=False, parallel=True) ext3 = dali.fn.external_source(ext_cb2, batch=False, parallel=False) pipe.set_outputs(ext1, ext2, ext3) pipe.build() utils.capture_processes(pipe._py_pool) sample_in_epoch = 0 iteration = 0 for i in range(10): try: e1, e2, e3 = pipe.run() for i in range(bs): assert e1.at(i) == i assert np.array_equal( e2.at(i), np.array([sample_in_epoch, i, iteration])) assert np.array_equal( e3.at(i), np.array([sample_in_epoch, i, iteration])) sample_in_epoch += 1 iteration += 1 except StopIteration: sample_in_epoch = 0 iteration = 0 pipe.reset()
def _test_epoch_idx(batch_size, epoch_size, cb, py_num_workers, prefetch_queue_depth, reader_queue_depth, batch_mode, batch_info): num_epochs = 3 pipe = utils.create_pipe(cb, "cpu", batch_size=batch_size, py_num_workers=py_num_workers, py_start_method="spawn", parallel=True, device_id=0, batch=batch_mode, num_threads=1, cycle=None, batch_info=batch_info, prefetch_queue_depth=prefetch_queue_depth, reader_queue_depth=reader_queue_depth) pipe.build() utils.capture_processes(pipe._py_pool) for epoch_idx in range(num_epochs): for iteration in range(epoch_size): (batch, ) = pipe.run() assert len(batch) == batch_size for sample_i, sample in enumerate(batch): expected = np.array([ iteration * batch_size + sample_i, sample_i, iteration, epoch_idx if not batch_mode or batch_info else 0 ]) np.testing.assert_array_equal(sample, expected) try: pipe.run() except StopIteration: pipe.reset() else: assert False, "expected StopIteration"
def _test_cycle_quiet_non_resetable(iterable, reader_queue_size, batch_size, epoch_size): pipe = utils.create_pipe(iterable, "cpu", batch_size=batch_size, py_num_workers=1, py_start_method="spawn", parallel=True, device_id=None, batch=True, num_threads=5, cycle="quiet", reader_queue_depth=reader_queue_size) pipe.build() utils.capture_processes(pipe._py_pool) for _ in range(epoch_size): pipe.run() try: pipe.run() except StopIteration: pipe.reset() try: pipe.run() except StopIteration: pass else: assert False, "Expected stop iteration" else: assert False, "Expected stop iteration at the end of the epoch"
def _test_cycle_multiple_iterators(batch_size, iters_num, py_num_workers, reader_queue_sizes, cycle_policies, epoch_sizes): @dali.pipeline_def(batch_size=batch_size, num_threads=4, device_id=None, py_num_workers=py_num_workers, py_start_method='spawn') def pipeline(sample_cb, iter_1, iter_2, parallel): if parallel: queue_size_0, queue_size_1, queue_size_2 = reader_queue_sizes else: queue_size_0, queue_size_1, queue_size_2 = None, None, None cycle_1, cycle_2 = cycle_policies sample_out = dali.fn.external_source(source=sample_cb, parallel=parallel, batch=False, prefetch_queue_depth=queue_size_0) iter1_out = dali.fn.external_source(source=iter_1, parallel=parallel, batch=True, prefetch_queue_depth=queue_size_1, cycle=cycle_1) iter2_out = dali.fn.external_source(source=iter_2, parallel=parallel, batch=True, prefetch_queue_depth=queue_size_2, cycle=cycle_2) return (sample_out, iter1_out, iter2_out) shape = (2, 3) sample_epoch_size, iter_1_epoch_size, iter_2_epoch_size = epoch_sizes sample_cb = utils.ExtCallback((4, 5), sample_epoch_size * batch_size, np.int32) iter_1 = Iterable(batch_size, shape, epoch_size=iter_1_epoch_size, dtype=np.int32) iter_2 = Iterable(batch_size, shape, epoch_size=iter_2_epoch_size, dtype=np.int32) pipe_parallel = pipeline(sample_cb, iter_1, iter_2, parallel=True) pipe_seq = pipeline(sample_cb, iter_1, iter_2, parallel=False) pipe_parallel.build() utils.capture_processes(pipe_parallel._py_pool) pipe_seq.build() parallel_outs = collect_iterations(pipe_parallel, iters_num) seq_outs = collect_iterations(pipe_seq, iters_num) assert len(parallel_outs) == len(seq_outs) for parallel_out, seq_out in zip(parallel_outs, seq_outs): if parallel_out == StopIteration or seq_out == StopIteration: assert parallel_out == seq_out continue assert len(parallel_out) == len(seq_out) == 3 for batch_parallel, batch_seq in zip(parallel_out, seq_out): assert len(batch_parallel) == len(batch_seq) == batch_size for sample_parallel, sample_seq in zip(batch_parallel, batch_seq): np.testing.assert_equal(np.array(sample_parallel), np.array(sample_seq))
def _test_cycle_quiet(cb, is_gen_fun, batch_size, epoch_size, reader_queue_size): pipe = utils.create_pipe(cb, "cpu", batch_size=batch_size, py_num_workers=1, py_start_method="spawn", parallel=True, device_id=None, batch=True, num_threads=5, cycle="quiet", reader_queue_depth=reader_queue_size) pipe.build() utils.capture_processes(pipe._py_pool) refer_iter = cb for i in range(3 * epoch_size + 1): if i % epoch_size == 0: if is_gen_fun: refer_iter = cb() else: refer_iter = iter(cb) (batch, ) = pipe.run() expected_batch = next(refer_iter) assert len(batch) == len(expected_batch), \ f"Batch length mismatch: expected {len(expected_batch)}, got {len(batch)}" for sample, expected_sample in zip(batch, expected_batch): np.testing.assert_equal(sample, expected_sample)
def test_parallel_fork_cpu_only(): pipeline_pairs = 4 batch_size = 10 iters = 40 callback = utils.ExtCallback((4, 5), iters * batch_size, np.int32) parallel_pipes = [(utils.create_pipe(callback, 'cpu', batch_size, py_num_workers=4, py_start_method='fork', parallel=True, device_id=None), utils.create_pipe(callback, 'cpu', batch_size, py_num_workers=4, py_start_method='fork', parallel=True, device_id=None)) for i in range(pipeline_pairs)] for pipe0, pipe1 in parallel_pipes: pipe0.build() pipe1.build() utils.capture_processes(pipe0._py_pool) utils.capture_processes(pipe1._py_pool) utils.compare_pipelines(pipe0, pipe1, batch_size, iters)
def _test_all_kinds_parallel(sample_cb, batch_cb, iter_cb, batch_size, py_num_workers, reader_queue_sizes, num_iters): @dali.pipeline_def(batch_size=batch_size, num_threads=4, device_id=None, py_num_workers=py_num_workers, py_start_method='spawn') def pipeline(): queue_size_1, queue_size_2, queue_size_3 = reader_queue_sizes sample_out = dali.fn.external_source(source=sample_cb, parallel=True, batch=False, prefetch_queue_depth=queue_size_1) batch_out = dali.fn.external_source(source=batch_cb, parallel=True, batch=True, prefetch_queue_depth=queue_size_2, batch_info=True) iter_out = dali.fn.external_source(source=iter_cb, parallel=True, batch=True, prefetch_queue_depth=queue_size_3, cycle="raise") return (sample_out, batch_out, iter_out) pipe = pipeline() pipe.build() utils.capture_processes(pipe._py_pool) for _ in range(3): i = 0 while True: try: (sample_outs, batch_outs, iter_outs) = pipe.run() assert len(sample_outs) == len(batch_outs), \ f"Batch length mismatch: sample: {len(sample_outs)}, batch: {len(batch_outs)}" assert len(batch_outs) == len(iter_outs), \ f"Batch length mismatch: batch: {len(batch_outs)}, iter: {len(iter_outs)}" for sample_out, batch_out, iter_out in zip( sample_outs, batch_outs, iter_outs): np.testing.assert_equal(np.array(sample_out), np.array(batch_out)) np.testing.assert_equal(np.array(batch_out), np.array(iter_out)) i += 1 except StopIteration: pipe.reset() assert i == num_iters, \ f"Number of iterations mismatch: expected {num_iters}, got {i}" break
def _test_vs_non_parallel(batch_size, cb_parallel, cb_seq, batch, py_num_workers): pipe = dali.Pipeline(batch_size=batch_size, device_id=None, num_threads=5, py_num_workers=py_num_workers, py_start_method='spawn') with pipe: ext_seq = dali.fn.external_source(cb_parallel, batch=batch, parallel=False) ext_par = dali.fn.external_source(cb_seq, batch=batch, parallel=True) pipe.set_outputs(ext_seq, ext_par) pipe.build() utils.capture_processes(pipe._py_pool) for i in range(10): seq, par = pipe.run() for j in range(batch_size): s = seq.at(j) p = par.at(j) assert np.array_equal(s, p)
def _test_cycle_raise(cb, is_gen_fun, batch_size, epoch_size, reader_queue_size): pipe = utils.create_pipe(cb, "cpu", batch_size=batch_size, py_num_workers=1, py_start_method="spawn", parallel=True, device_id=None, batch=True, num_threads=5, cycle="raise", reader_queue_depth=reader_queue_size) pipe.build() utils.capture_processes(pipe._py_pool) if is_gen_fun: refer_iter = cb() else: refer_iter = cb for _ in range(3): i = 0 while True: try: (batch, ) = pipe.run() expected_batch = next(refer_iter) assert len(batch) == len(expected_batch), \ f"Batch length mismatch: expected {len(expected_batch)}, got {len(batch)}" for sample, expected_sample in zip(batch, expected_batch): np.testing.assert_equal(sample, expected_sample) i += 1 except StopIteration: pipe.reset() if is_gen_fun: refer_iter = cb() else: refer_iter = iter(cb) assert i == epoch_size, \ f"Number of iterations mismatch: expected {epoch_size}, got {i}" break