Exemplo n.º 1
0
def _test_vs_scipy(batch_size, num_dims, in_type, out_type):
    shape = (30, ) * num_dims
    # scipy supports only windows of size 3 and does not use smoothing
    window_size, smoothing_size = 3, 1
    data = RandomlyShapedDataIterator(batch_size,
                                      max_shape=shape,
                                      dtype=in_type)

    @pipeline_def
    def pipeline():
        if out_type == np.float32:
            dtype_args = {'dtype': types.FLOAT}
        else:
            dtype_args = {}
        input = fn.external_source(data)
        edges = fn.laplacian(input,
                             window_size=window_size,
                             smoothing_size=smoothing_size,
                             **dtype_args)
        return edges, input

    pipe = pipeline(device_id=types.CPU_ONLY_DEVICE_ID,
                    num_threads=4,
                    batch_size=batch_size)
    pipe.build()

    for _ in range(test_iters):
        edges, input = pipe.run()
        edges = to_batch(edges, batch_size)
        input = to_batch(input, batch_size)
        baseline = laplacian_sp(input, out_type)
        max_error = 1e-6
        check_batch(edges, baseline, batch_size, max_allowed_error=max_error)
Exemplo n.º 2
0
def compare_eager_with_pipeline(path,
                                batch_size=batch_size,
                                N_iterations=5,
                                fn_op=None,
                                eager_op=None,
                                **kwargs):
    import_path = path.split('.')
    if fn_op is None:
        fn_op = reduce(reduce_getattr, [fn] + import_path)
    if eager_op is None:
        eager_op = reduce(reduce_getattr, [eager] + import_path)

    pipe = single_op_pipe(fn_op, kwargs)
    pipe.build()

    for i in range(N_iterations):
        input_tl = tensors.TensorListCPU(np.array(get_data(i)), layout="HWC")
        out1, = pipe.run()
        out2 = eager_op(input_tl, **kwargs)

        out1_data = out1.as_cpu() if isinstance(
            out1, tensors.TensorListGPU) else out1
        out2_data = out2.as_cpu() if isinstance(
            out2, tensors.TensorListGPU) else out2

        check_batch(out1_data, out2_data, batch_size)
Exemplo n.º 3
0
def check_one_hot_operator(source,
                           device='cpu',
                           axis=-1,
                           expected_output_dim=None,
                           axis_name=None,
                           initial_layout=None):
    pipeline = OneHotPipeline(num_classes=num_classes,
                              source=source,
                              axis=axis,
                              layout=initial_layout,
                              axis_name=axis_name,
                              device=device)
    pipeline.build()
    (outputs, input_batch) = pipeline.run()
    if device == 'gpu':
        input_batch = input_batch.as_cpu()
    input_batch = list(map(np.array, input_batch))
    expected_output_dim = expected_output_dim or len(input_batch[0].shape) + 1
    reference = one_hot_3_axes(
        input_batch,
        axis) if expected_output_dim == 4 else one_hot(input_batch)
    expected_layout = modify_layout(initial_layout, expected_output_dim, axis,
                                    axis_name)
    check_batch(outputs,
                reference,
                batch_size,
                max_allowed_error=0,
                expected_layout=expected_layout)
Exemplo n.º 4
0
def compare_eager_with_pipeline(pipe, eager_op, *, eager_source=get_data_eager, layout='HWC',
                                batch_size=batch_size, N_iterations=5, **kwargs):
    """ Compares outputs from standard pipeline `pipe` and eager operator `eager_op` across
    `N_iterations`.
    """

    pipe.build()
    for i in range(N_iterations):
        input_tl = eager_source(i, layout)
        out_fn = pipe.run()
        if isinstance(input_tl, (tuple, list)):
            if len(input_tl):
                out_eager = eager_op(*input_tl, **kwargs)
            else:
                out_eager = eager_op(batch_size=batch_size, **kwargs)
        else:
            out_eager = eager_op(input_tl, **kwargs)

        if not isinstance(out_eager, (tuple, list)):
            out_eager = (out_eager,)

        assert len(out_fn) == len(out_eager)

        for tensor_out_fn, tensor_out_eager in zip(out_fn, out_eager):
            assert type(tensor_out_fn) == type(tensor_out_eager)

            if tensor_out_fn.dtype == types.BOOL:
                for t_fn, t_eager in zip(tensor_out_fn, tensor_out_eager):
                    assert np.array_equal(t_fn, t_eager)
            else:
                check_batch(tensor_out_fn, tensor_out_eager, batch_size)
Exemplo n.º 5
0
def check_generic_gaussian_blur(
        batch_size, sigma, window_size, shape, layout, axes, op_type="cpu", in_dtype=np.uint8,
        out_dtype=types.NO_TYPE, random_shape=True):
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0)
    min_shape = None if random_shape else shape
    data = RandomlyShapedDataIterator(batch_size, min_shape=min_shape, max_shape=shape, dtype=in_dtype)
    # Extract the numpy type from DALI, we can have float32 or the same as input
    if out_dtype == types.NO_TYPE:
        result_type = in_dtype
    elif dali_type(in_dtype) == out_dtype:
        result_type = in_dtype
    else:
        result_type = np.float32
    with pipe:
        input = fn.external_source(data, layout=layout)
        if op_type == "gpu":
            input = input.gpu()
        blurred = fn.gaussian_blur(input, device=op_type, sigma=sigma,
                                   window_size=window_size, dtype=out_dtype)
        pipe.set_outputs(blurred, input)
    pipe.build()

    for _ in range(test_iters):
        result, input = pipe.run()
        if op_type == "gpu":
            result = result.as_cpu()
            input = input.as_cpu()
        input = to_batch(input, batch_size)
        skip_axes = count_skip_axes(layout)
        baseline = [
            gaussian_baseline(img, sigma, window_size, axes, skip_axes, dtype=result_type)
            for img in input]
        max_error = 1 if result_type != np.float32 else 1e-04
        check_batch(result, baseline, batch_size, max_allowed_error=max_error, expected_layout=layout)
Exemplo n.º 6
0
def check_gaussian_blur(batch_size, sigma, window_size, op_type="cpu"):
    decoder_device = "cpu" if op_type == "cpu" else "mixed"
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0)
    with pipe:
        input, _ = fn.file_reader(file_root=images_dir,
                                  shard_id=0,
                                  num_shards=1)
        decoded = fn.image_decoder(input,
                                   device=decoder_device,
                                   output_type=types.RGB)
        blurred = fn.gaussian_blur(decoded,
                                   device=op_type,
                                   sigma=sigma,
                                   window_size=window_size)
        pipe.set_outputs(blurred, decoded)
    pipe.build()

    for _ in range(test_iters):
        result, input = pipe.run()
        if op_type == "gpu":
            result = result.as_cpu()
            input = input.as_cpu()
        input = to_batch(input, batch_size)
        baseline_cv = [gaussian_cv(img, sigma, window_size) for img in input]
        check_batch(result, baseline_cv, batch_size, max_allowed_error=1)
Exemplo n.º 7
0
def _test_scalar(device, as_tensors):
    """Test propagation of scalars from external source"""
    batch_size = 4
    src_pipe = Pipeline(batch_size, 1, 0)
    src_ext = fn.external_source(
        source=lambda i:
        [np.float32(i * 10 + i + 1) for i in range(batch_size)],
        device=device)
    src_pipe.set_outputs(src_ext)

    src_pipe.build()
    dst_pipe = Pipeline(batch_size,
                        1,
                        0,
                        exec_async=False,
                        exec_pipelined=False)
    dst_pipe.set_outputs(fn.external_source(name="ext", device=device))
    dst_pipe.build()

    for iter in range(3):
        src = src_pipe.run()
        data = src[0]
        if as_tensors:
            data = [data[i] for i in range(len(data))]
        dst_pipe.feed_input("ext", data)
        dst = dst_pipe.run()
        check_batch(src[0], dst[0], batch_size, 0, 0, "")
Exemplo n.º 8
0
def check_fixed_param_laplacian(device, batch_size, in_type, out_type, shape,
                                layout, axes, window_size, smoothing_size,
                                scales, normalize):

    iterator = RandomlyShapedDataIterator(batch_size,
                                          max_shape=shape,
                                          dtype=in_type)

    @pipeline_def
    def pipeline():
        data = fn.external_source(iterator, layout=layout)
        if out_type != np.float32:
            dtype_arg = {}
        else:
            dtype_arg = {"dtype": types.FLOAT}
        if device == "gpu":
            data = data.gpu()
        edges = fn.laplacian(data,
                             window_size=window_size,
                             smoothing_size=smoothing_size,
                             scale=scales,
                             normalized_kernel=normalize,
                             **dtype_arg)
        return edges, data

    pipe = pipeline(device_id=0, num_threads=4, batch_size=batch_size, seed=42)
    pipe.build()

    for _ in range(test_iters):
        edges, data = pipe.run()
        if device == "gpu":
            edges = edges.as_cpu()
            data = data.as_cpu()
        edges = to_batch(edges, batch_size)
        data = to_batch(data, batch_size)
        baseline = []
        for i in range(batch_size):
            skip_axes = count_skip_axes(layout)
            window_size = np.array([]) if window_size is None else np.array(
                window_size, dtype=np.int32)
            smoothing_size = np.array(
                []) if smoothing_size is None else np.array(smoothing_size,
                                                            dtype=np.int32)
            if normalize:
                all_sizes = get_window_sizes(window_size, smoothing_size, axes)
                scales = [2.**(-sum(sizes) + axes + 2) for sizes in all_sizes]
            scales = np.array(scales, dtype=np.float32)
            sample = laplacian_baseline(data[i], out_type or in_type,
                                        window_size, smoothing_size, scales,
                                        axes, skip_axes)
            baseline.append(sample)
        if out_type == np.float32:
            max_error = 1e-3
        else:
            max_error = 1
        check_batch(edges,
                    baseline,
                    batch_size,
                    max_allowed_error=max_error,
                    expected_layout=layout)
Exemplo n.º 9
0
def _test_vs_open_cv(batch_size, window_size, in_type, out_type, normalize,
                     grayscale):
    pipe = laplacian_pipe(device_id=types.CPU_ONLY_DEVICE_ID,
                          num_threads=4,
                          batch_size=batch_size,
                          window_size=window_size,
                          in_type=in_type,
                          out_type=out_type,
                          normalize=normalize,
                          grayscale=grayscale)
    pipe.build()
    norm_factor = normalization_factor(window_size)
    scale = 1 if not normalize else norm_factor
    for _ in range(test_iters):
        edges, imgs = pipe.run()
        imgs = to_batch(imgs, batch_size)
        baseline_cv = laplacian_cv(imgs, window_size, in_type, out_type, scale,
                                   grayscale)
        edges = to_batch(edges, batch_size)
        actual_out_type = out_type if out_type is not None else in_type
        assert (len(edges) == len(baseline_cv))
        if actual_out_type == types.FLOAT:
            max_error = 1e-7 if window_size <= 11 else 1e-4
        else:
            max_error = 1
        # values in the array raise exponentially with the window_size, so without normalization
        # the absolute error will also be big - normalize the values before the comparison
        if not normalize:
            edges = [a * norm_factor for a in edges]
            baseline_cv = [a * norm_factor for a in baseline_cv]
        check_batch(edges,
                    baseline_cv,
                    batch_size,
                    max_allowed_error=max_error,
                    expected_layout="HWC")
Exemplo n.º 10
0
def _run_test_cat(num_inputs, layout, ndim, axis, axis_name):
    num_iter = 3
    batch_size = 4
    if ndim is None:
        ndim = len(layout)

    ref_axis = layout.find(axis_name) if axis_name is not None else axis if axis is not None else 0
    assert ref_axis >= 0

    axis_arg = None if axis_name else axis

    pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=3, device_id=0)
    with pipe:
        inputs = fn.external_source(
            input_generator(num_inputs, batch_size, ndim, ref_axis),
            num_outputs=num_inputs, layout=layout)
        out_cpu = fn.cat(*inputs,                    axis=axis_arg, axis_name=axis_name)
        out_gpu = fn.cat(*(x.gpu() for x in inputs), axis=axis_arg, axis_name=axis_name)
        pipe.set_outputs(out_cpu, out_gpu, *inputs)
    pipe.build()

    for iter in range(num_iter):
        o_cpu, o_gpu, *inputs = pipe.run()
        ref = ref_cat(inputs, ref_axis)
        check_batch(o_cpu, ref, batch_size, eps=0, expected_layout=layout)
        check_batch(o_gpu, ref, batch_size, eps=0, expected_layout=layout)
Exemplo n.º 11
0
def _run_test_stack(num_inputs, layout, ndim, axis, axis_name):
    num_iter=3
    batch_size=4
    if ndim is None:
        ndim = len(layout)

    ref_axis = axis if axis is not None else 0

    if axis_name:
        ref_layout = layout[:axis] + axis_name + layout[axis:] if layout else axis_name
    else:
        ref_layout = ""

    pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads = 3, device_id = 0)
    with pipe:
        inputs  = fn.external_source(input_generator(num_inputs, batch_size, ndim), num_outputs=num_inputs, layout=layout)
        out_cpu = fn.stack(*inputs,                    axis=axis, axis_name=axis_name)
        out_gpu = fn.stack(*(x.gpu() for x in inputs), axis=axis, axis_name=axis_name)
        pipe.set_outputs(out_cpu, out_gpu, *inputs);
    pipe.build()

    for _ in range(num_iter):
        o_cpu, o_gpu, *inputs = pipe.run()
        ref = ref_stack(inputs, ref_axis)
        check_batch(o_cpu, ref, batch_size, eps=0, expected_layout=ref_layout)
        check_batch(o_gpu, ref, batch_size, eps=0, expected_layout=ref_layout)
Exemplo n.º 12
0
def check_per_sample_gaussian_blur(batch_size,
                                   sigma_dim,
                                   window_size_dim,
                                   shape,
                                   layout,
                                   axes,
                                   op_type="cpu"):
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=0)
    data = RandomlyShapedDataIterator(batch_size, max_shape=shape)
    with pipe:
        if sigma_dim is not None:
            sigma = fn.random.uniform(range=[0.5, 3], shape=[sigma_dim])
            sigma_arg = sigma
        else:
            # placeholder, so we can return something
            sigma = fn.random.coin_flip(probability=0)
            sigma_arg = None

        if window_size_dim is not None:
            window_radius = fn.random.uniform(range=[5, 10],
                                              shape=[window_size_dim])
            window_size = fn.cast(window_radius, dtype=types.INT32) * 2 + 1
            window_arg = window_size
        else:
            window_size = fn.random.coin_flip(probability=0)
            window_arg = None

        input = fn.external_source(data, layout=layout)
        if op_type == "gpu":
            input = input.gpu()
        blurred = fn.gaussian_blur(input,
                                   device=op_type,
                                   sigma=sigma_arg,
                                   window_size=window_arg)
        pipe.set_outputs(blurred, input, sigma, window_size)
    pipe.build()

    for _ in range(test_iters):
        result, input, sigma, window_size = pipe.run()
        if op_type == "gpu":
            result = result.as_cpu()
            input = input.as_cpu()
        input = to_batch(input, batch_size)
        sigma = to_batch(sigma, batch_size)
        window_size = to_batch(window_size, batch_size)
        baseline = []
        for i in range(batch_size):
            sigma_arg = sigma[i] if sigma is not None else None
            window_arg = window_size[i] if window_size_dim is not None else None
            skip_axes = count_skip_axes(layout)
            baseline.append(
                gaussian_baseline(input[i], sigma_arg, window_arg, axes,
                                  skip_axes))
        check_batch(result,
                    baseline,
                    batch_size,
                    max_allowed_error=1,
                    expected_layout=layout)
Exemplo n.º 13
0
def check_gaussian_blur(batch_size, sigma, window_size, op_type="cpu"):
    pipe = get_gaussian_pipe(batch_size, sigma, window_size, op_type)
    pipe.build()
    for _ in range(test_iters):
        result, input = pipe.run()
        if op_type == "gpu":
            result = result.as_cpu()
            input = input.as_cpu()
        input = to_batch(input, batch_size)
        baseline_cv = [gaussian_cv(img, sigma, window_size) for img in input]
        check_batch(result, baseline_cv, batch_size, max_allowed_error=1, expected_layout="HWC")
Exemplo n.º 14
0
def _test_feed_input(device):
    src_pipe, batch_size = build_src_pipe(device)

    dst_pipe = Pipeline(batch_size, 1, 0, exec_async=False, exec_pipelined=False)
    dst_pipe.set_outputs(fn.external_source(name="ext", device=device))
    dst_pipe.build()
    for iter in range(3):
        out1 = src_pipe.run()
        dst_pipe.feed_input("ext", out1[0])
        out2 = dst_pipe.run()
        check_batch(out2[0], out1[0], batch_size, 0, 0, "XY")
Exemplo n.º 15
0
def _test_seq_input(num_iters, operator_fn, fixed_params, input_params,
                    input_data: ArgData, rng):
    @pipeline_def
    def pipeline(args_data: List[ArgData]):
        pos_args = [
            arg_data for arg_data in args_data
            if arg_data.desc.is_positional_arg
        ]
        pos_nodes = [None] * len(pos_args)
        for arg_data in pos_args:
            assert 0 <= arg_data.desc.name < len(pos_nodes)
            assert pos_nodes[arg_data.desc.name] is None
            pos_nodes[arg_data.desc.name] = arg_data_node(arg_data)
        named_args = [
            arg_data for arg_data in args_data
            if not arg_data.desc.is_positional_arg
        ]
        arg_nodes = {
            arg_data.desc.name: arg_data_node(arg_data)
            for arg_data in named_args
        }
        output = operator_fn(*pos_nodes, **fixed_params, **arg_nodes)
        return output

    assert num_iters >= len(input_data.data)
    max_batch_size = max(len(batch) for batch in input_data.data)

    params_provider = input_params if isinstance(
        input_params, ParamsProviderBase) else ParamsProvider(input_params)
    params_provider.setup(input_data, fixed_params, rng)
    args_data = params_provider.compute_params()
    seq_pipe = pipeline(args_data=[input_data, *args_data],
                        batch_size=max_batch_size,
                        num_threads=4,
                        device_id=0)
    unfolded_input = params_provider.unfold_input()
    expanded_args_data = params_provider.expand_params()
    max_uf_batch_size = max(len(batch) for batch in unfolded_input.data)
    baseline_pipe = pipeline(args_data=[unfolded_input, *expanded_args_data],
                             batch_size=max_uf_batch_size,
                             num_threads=4,
                             device_id=0)
    seq_pipe.build()
    baseline_pipe.build()

    for _ in range(num_iters):
        (seq_batch, ) = seq_pipe.run()
        (baseline_batch, ) = baseline_pipe.run()
        assert params_provider.unfold_output_layout(
            seq_batch.layout()) == baseline_batch.layout()
        batch = params_provider.unfold_output(as_batch(seq_batch))
        baseline_batch = as_batch(baseline_batch)
        assert len(batch) == len(baseline_batch)
        check_batch(batch, baseline_batch, len(batch))
Exemplo n.º 16
0
def test_constant_promotion_mixed():
    filename = os.path.join(jpeg_folder, "241", "cute-4074304_1280.jpg")
    file_contents = np.fromfile(filename, dtype=np.uint8)
    pipe = Pipeline(1, 3, 0)
    with pipe:
        jpegs, _ = fn.readers.file(files=[filename])
        from_reader = fn.image_decoder(jpegs, device="mixed")
        from_constant = fn.image_decoder(file_contents, device="mixed")
        pipe.set_outputs(from_constant, from_reader)
    pipe.build()
    from_reader, from_constant = pipe.run()
    check_batch(from_reader, from_constant, 1)
def _testimpl_operator_noise_gaussian_vs_add_normal_dist(
        device, mean, stddev, variable_dist_params, batch_size, niter):
    pipe = pipe_gaussian_noise(mean,
                               stddev,
                               variable_dist_params,
                               device=device,
                               batch_size=batch_size,
                               num_threads=3,
                               device_id=0)
    pipe.build()
    for _ in range(niter):
        out0, out1 = pipe.run()
        check_batch(out0, out1, batch_size=batch_size, eps=0.1)
Exemplo n.º 18
0
def check_per_sample_laplacian(device, batch_size, window_dim, smoothing_dim,
                               normalize, shape, layout, axes, in_type,
                               out_type):

    iterator = RandomlyShapedDataIterator(batch_size,
                                          max_shape=shape,
                                          dtype=in_type)

    pipe = laplacian_per_sample_pipeline(device_id=0,
                                         device=device,
                                         num_threads=4,
                                         batch_size=batch_size,
                                         seed=42,
                                         iterator=iterator,
                                         layout=layout,
                                         window_dim=window_dim,
                                         smoothing_dim=smoothing_dim,
                                         axes=axes,
                                         normalize=normalize,
                                         out_type=out_type)
    pipe.build()

    for _ in range(test_iters):
        edges, data, window_size, smoothing_size, scale = pipe.run()
        if device == "gpu":
            edges = edges.as_cpu()
            data = data.as_cpu()
        edges, data, window_size, smoothing_size, scale = [
            to_batch(out, batch_size)
            for out in (edges, data, window_size, smoothing_size, scale)
        ]
        baseline = []
        for i in range(batch_size):
            skip_axes = count_skip_axes(layout)
            sample_baseline = laplacian_baseline(data[i], out_type or in_type,
                                                 window_size[i],
                                                 smoothing_size[i], scale[i],
                                                 axes, skip_axes)
            baseline.append(sample_baseline)
        if out_type == np.float32:
            # Normalized abs values are up to 2 * `axes` * 255 so it still gives
            # over 5 decimal digits of precision
            max_error = 1e-3
        else:
            max_error = 1
        check_batch(edges,
                    baseline,
                    batch_size,
                    max_allowed_error=max_error,
                    expected_layout=layout)
Exemplo n.º 19
0
def run_pipeline(device, num_dim, replace=False, layout=None):

    @pipeline_def
    def pipeline():
        arg = fn.external_source(input_batch(num_dim), layout=layout)
        if device == "gpu":
            arg = arg.gpu()
        return fn.per_frame(arg, replace=replace, device=device)

    pipe = pipeline(num_threads=4, batch_size=max_batch_size, device_id=0)
    pipe.build()
    expected_layout = "F" + "*" * (num_dim - 1) if layout is None else "F" + layout[1:]
    for baseline in input_batch(num_dim):
        (out,) = pipe.run()
        check_batch(out, baseline, len(baseline), expected_layout=expected_layout)
Exemplo n.º 20
0
def check_one_hot_operator(premade_batch, axis=-1):
    pipeline = OneHotPipeline(num_classes=num_classes,
                              input=premade_batch,
                              axis=axis)
    pipeline.build()
    outputs = pipeline.run()
    sample_dim = len(premade_batch[0].shape)
    reference = one_hot_3_axes(
        premade_batch, axis) if sample_dim == 3 else one_hot(premade_batch)
    new_layout = None  # TODO(klecki): add layout handling
    check_batch(outputs[0],
                reference,
                batch_size,
                max_allowed_error=0,
                expected_layout=new_layout)
Exemplo n.º 21
0
def test_compose_change_device():
    batch_size = 3
    pipe = Pipeline(batch_size, 1, 0)

    size = fn.random.uniform(shape=2, range=(300,500))
    c = ops.Compose([
        ops.decoders.Image(device="cpu"),
        ops.Resize(size=size, device="gpu")
    ])
    files, labels = fn.readers.caffe(path=caffe_db_folder, seed=1)
    pipe.set_outputs(c(files), fn.resize(fn.decoders.image(files).gpu(), size=size))

    pipe.build()
    out = pipe.run()
    assert isinstance(out[0], dali.backend.TensorListGPU)
    test_utils.check_batch(out[0], out[1], batch_size=batch_size)
Exemplo n.º 22
0
def _test_permute_batch_fixed(device):
    batch_size = 10
    pipe = Pipeline(batch_size, 4, 0)
    data = fn.external_source(source=lambda: gen_data(batch_size, np.int16),
                              device=device,
                              layout="abc")
    idxs = [4, 8, 0, 6, 3, 5, 2, 9, 7, 1]
    pipe.set_outputs(data, fn.permute_batch(data, indices=idxs))
    pipe.build()

    for i in range(10):
        orig, permuted = pipe.run()
        if isinstance(orig, dali.backend.TensorListGPU):
            orig = orig.as_cpu()
        ref = [orig.at(idx) for idx in idxs]
        check_batch(permuted, ref, len(ref), 0, 0, "abc")
Exemplo n.º 23
0
def _test_callback(device, as_tensors, change_layout_to = None):
    src_pipe, batch_size = build_src_pipe(device)
    ref_pipe, batch_size = build_src_pipe(device, layout=change_layout_to)

    dst_pipe = Pipeline(batch_size, 1, 0)
    def get_from_src():
        tl = src_pipe.run()[0]
        return [tl[i] for i in range(len(tl))] if as_tensors else tl

    dst_pipe.set_outputs(fn.external_source(source=get_from_src, device=device, layout=change_layout_to))
    dst_pipe.build()

    for iter in range(3):
        ref = ref_pipe.run()
        out = dst_pipe.run()
        check_batch(out[0], ref[0], batch_size, 0, 0)
Exemplo n.º 24
0
def check_stop_iteration_resume(pipe, batch_size, layout):
    pipe.build()
    capture_processes(pipe._py_pool)
    outputs_epoch_1, outputs_epoch_2 = [], []
    for output in [outputs_epoch_1, outputs_epoch_2]:
        try:
            while True:
                (r,) = pipe.run()
                r = [np.copy(r.at(i)) for i in range(len(r))]
                output.append(r)
        except StopIteration:
            pipe.reset()
    assert len(outputs_epoch_1) == len(outputs_epoch_2), (
        "Epochs must have same number of iterations, "
        "but they have {} {} respectively".format(len(outputs_epoch_1), len(outputs_epoch_2)))
    for out_1, out_2 in zip(outputs_epoch_1, outputs_epoch_2):
        check_batch(out_1, out_2, batch_size, 0, None, expected_layout=layout, compare_layouts=True)
Exemplo n.º 25
0
def _test_permute_batch(device, type):
    batch_size = 10
    pipe = Pipeline(batch_size, 4, 0)
    data = fn.external_source(source=lambda: gen_data(batch_size, type),
                              device=device,
                              layout="abc")
    perm = fn.batch_permutation()
    pipe.set_outputs(data, fn.permute_batch(data, indices=perm), perm)
    pipe.build()

    for i in range(10):
        orig, permuted, idxs = pipe.run()
        idxs = [int(idxs.at(i)) for i in range(batch_size)]
        if isinstance(orig, dali.backend.TensorListGPU):
            orig = orig.as_cpu()
        ref = [orig.at(idx) for idx in idxs]
        check_batch(permuted, ref, len(ref), 0, 0, "abc")
Exemplo n.º 26
0
def check_reader(op_path, *, fn_op=None, eager_op=None, batch_size=batch_size,
                 N_iterations=2, **kwargs):
    fn_op, eager_op = get_ops(op_path, fn_op, eager_op)
    pipe = reader_pipeline(fn_op, kwargs)
    pipe.build()

    iter_eager = eager_op(batch_size=batch_size, **kwargs)

    for _ in range(N_iterations):
        for i, out_eager in enumerate(iter_eager):
            out_fn = pipe.run()

            if not isinstance(out_eager, (tuple, list)):
                out_eager = (out_eager,)

            assert len(out_fn) == len(out_eager)

            for tensor_out_fn, tensor_out_eager in zip(out_fn, out_eager):
                if i == len(iter_eager) - 1:
                    tensor_out_fn = _slice_tensorlist(tensor_out_fn, len(tensor_out_eager))

                assert type(tensor_out_fn) == type(tensor_out_eager)
                check_batch(tensor_out_fn, tensor_out_eager, len(tensor_out_eager))
Exemplo n.º 27
0
def _test_resize(layout, interp, dtype, w, h):
    channel_first = (layout == "FCHW")
    pipe_dali = create_dali_pipe(channel_first, 8, interp, dtype, w, h)
    pipe_dali.build()
    pipe_ref = create_ref_pipe(channel_first, 8, interp, dtype, w, h)
    pipe_ref.build()
    eps = 1e-2
    max_err = 6
    for iter in range(4):
        out_dali = pipe_dali.run()
        out_ref = pipe_ref.run()[0]
        dali_cpu = out_dali[0]
        dali_gpu = out_dali[1]
        if interp == types.INTERP_LANCZOS3:
            # PIL can't resize float data. Lanczos resamling generates overshoot which we have
            # to get rid of for the comparison to succeed.
            dali_cpu = [np.array(x).clip(0, 255) for x in dali_cpu]
            dali_gpu = [np.array(x).clip(0, 255) for x in dali_gpu.as_cpu()]
        else:
            dali_cpu = [np.array(x) for x in dali_cpu]
            dali_gpu = [np.array(x) for x in dali_gpu.as_cpu()]
        if channel_first:
            out_ref = [np.array(x)[:, :, 1:-1, 1:-1] for x in out_ref]
            dali_gpu = [x[:, :, 1:-1, 1:-1] for x in dali_gpu]
            dali_cpu = [x[:, :, 1:-1, 1:-1] for x in dali_cpu]
        else:
            out_ref = [np.array(x)[:, 1:-1, 1:-1, :] for x in out_ref]
            dali_gpu = [x[:, 1:-1, 1:-1, :] for x in dali_gpu]
            dali_cpu = [x[:, 1:-1, 1:-1, :] for x in dali_cpu]
        check_batch(dali_cpu, out_ref, 2, eps=eps, max_allowed_error=max_err)
        check_batch(dali_gpu, out_ref, 2, eps=eps, max_allowed_error=max_err)
        ext_size = out_dali[2]
        size_cpu = out_dali[3]
        size_gpu = out_dali[4]
        check_batch(ext_size, size_cpu, 2)
        check_batch(ext_size, size_gpu, 2)
Exemplo n.º 28
0
def _test_standalone_vs_fused(device):
    pipe = audio_decoder_pipe(device=device,
                              batch_size=2,
                              num_threads=1,
                              device_id=0)
    pipe.build()
    is_gpu = device == 'gpu'
    for _ in range(2):
        outs = pipe.run()
        # two sampling rates - should be bit-exact
        check_batch(outs[0],
                    outs[1],
                    eps=1e-6 if is_gpu else 0,
                    max_allowed_error=1e-4 if is_gpu else 0)
        # numerical round-off error in rate
        check_batch(outs[0], outs[2], eps=1e-6, max_allowed_error=1e-4)
        # here, the sampling rate is slightly different, so we can tolerate larger errors
        check_batch(outs[0], outs[3], eps=1e-4, max_allowed_error=1)
Exemplo n.º 29
0
def _test_kernels(device, num_dims, smoothing, normalize):
    batch_size = (max_window_size + 2 - min_window_size) // 2

    def get_inputs():
        ones = []
        window_sizes = []
        smoothing_sizes = []
        scales = []
        padding = 2
        for win_size in range(min_window_size, max_window_size + 2, 2):
            a_size = win_size + padding
            a = np.zeros((a_size, ) * num_dims, dtype=np.float32)
            a[(a_size // 2, ) * num_dims] = 1
            ones.append(a)
            window_sizes.append(np.array(win_size, dtype=np.int32))
            if smoothing:
                smoothing_sizes.append(np.array(win_size, dtype=np.int32))
                exponent = num_dims * win_size - 2 - num_dims
            else:
                smoothing_sizes.append(np.array(1, dtype=np.int32))
                exponent = win_size - 3
            scales.append(np.array(2.**(-exponent), dtype=np.float32))
        return ones, window_sizes, smoothing_sizes, scales

    @pipeline_def
    def pipeline():
        ones, window_sizes, smoothing_sizes, scales = fn.external_source(
            get_inputs, num_outputs=4)
        if device == "gpu":
            ones = ones.gpu()
        kernels = fn.laplacian(ones,
                               window_size=window_sizes,
                               smoothing_size=smoothing_sizes,
                               dtype=types.FLOAT,
                               normalized_kernel=normalize,
                               device=device)
        return kernels, scales

    def outer(*vs):
        acc = np.array([1.])
        for v in vs:
            acc = np.outer(acc, v)
        return acc.reshape(tuple(len(v) for v in vs))

    def get_cv2_kernel(win_size, smoothing):
        d, s = cv2.getDerivKernels(2, 0, win_size)
        if not smoothing:
            s = np.zeros(win_size)
            s[win_size // 2] = 1.
        windows = [[d if i == j else s for j in range(num_dims)]
                   for i in range(num_dims)]
        return sum(outer(*ws) for ws in windows)

    pipe = pipeline(num_threads=4, batch_size=batch_size, device_id=0)
    pipe.build()
    (kernels, scales) = pipe.run()
    if device == "gpu":
        kernels = kernels.as_cpu()
    kernels = [np.array(ker)[(slice(1, -1), ) * num_dims] for ker in kernels]
    scales = [np.array(sf).item() for sf in scales]
    win_sizes = range(min_window_size, max_window_size + 2, 2)
    assert (len(kernels) == len(win_sizes) == len(scales))
    baseline_kernels = [
        get_cv2_kernel(win_size, smoothing) * scale
        for win_size, scale in zip(win_sizes, scales)
    ]
    if not normalize:  # output was not normalized by the op
        kernels = [kernel * scale for kernel, scale in zip(kernels, scales)]
    check_batch(kernels,
                baseline_kernels,
                batch_size,
                max_allowed_error=1e-5,
                expected_layout="HWC")
Exemplo n.º 30
0
def _test_stitching(device, dim, channel_first, dtype, interp):
    batch_size = 1 if dim == 3 else 10
    pipe = dali.pipeline.Pipeline(batch_size=batch_size,
                                  num_threads=1,
                                  device_id=0,
                                  seed=1234,
                                  prefetch_queue_depth=1)
    with pipe:
        if dim == 2:
            files, labels = dali.fn.readers.caffe(path=db_2d_folder,
                                                  random_shuffle=True)
            images_cpu = dali.fn.decoders.image(files, device="cpu")
        else:
            images_cpu = dali.fn.external_source(
                source=random_3d_loader(batch_size), layout="DHWC")

        images_hwc = images_cpu if device == "cpu" else images_cpu.gpu()

        if channel_first:
            images = dali.fn.transpose(
                images_hwc,
                perm=[3, 0, 1, 2] if dim == 3 else [2, 0, 1],
                transpose_layout=True)
        else:
            images = images_hwc

        out_size_full = [32, 32, 32] if dim == 3 else [160, 160]
        out_size_half = [x // 2 for x in out_size_full]

        roi_start = [0] * dim
        roi_end = [1] * dim

        resized = fn.resize(images,
                            dtype=dtype,
                            min_filter=interp,
                            mag_filter=interp,
                            size=out_size_full)

        outputs = [resized]

        for z in range(dim - 1):
            if dim == 3:
                roi_start[0] = z * 0.5
                roi_end[0] = (z + 1) * 0.5
            for y in [0, 1]:
                roi_start[-2] = y * 0.5
                roi_end[-2] = (y + 1) * 0.5
                for x in [0, 1]:
                    roi_start[-1] = x * 0.5
                    roi_end[-1] = (x + 1) * 0.5

                    part = fn.resize(images,
                                     dtype=dtype,
                                     interp_type=interp,
                                     size=out_size_half,
                                     roi_start=roi_start,
                                     roi_end=roi_end,
                                     roi_relative=True)
                    outputs.append(part)

        pipe.set_outputs(*outputs)

    pipe.build()
    for iter in range(1):
        out = pipe.run()
        if device == "gpu":
            out = [x.as_cpu() for x in out]
        whole = out[0]
        tiled = []
        for i in range(batch_size):
            slices = []
            for z in range(dim - 1):
                q00 = out[1 + z * 4 + 0].at(i)
                q01 = out[1 + z * 4 + 1].at(i)
                q10 = out[1 + z * 4 + 2].at(i)
                q11 = out[1 + z * 4 + 3].at(i)
                if channel_first:
                    slices.append(np.block([[q00, q01], [q10, q11]]))
                else:
                    slices.append(np.block([[[q00], [q01]], [[q10], [q11]]]))
            if dim == 3:
                if channel_first:
                    tiled.append(np.block([[[slices[0]]], [[slices[1]]]]))
                else:
                    tiled.append(np.block([[[[slices[0]]]], [[[slices[1]]]]]))
            else:
                tiled.append(slices[0])
        max_err = 1e-3 if type == types.FLOAT else 1
        check_batch(tiled,
                    whole,
                    batch_size,
                    1e-4,
                    max_err,
                    compare_layouts=False)