Пример #1
0
def check_dtype_config_support(dtype, precision):
    # We use FWD 2D to check it.
    # Based on documentation, algo small (CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM)
    # should support all configurations, for both v5.1, v6 and v7.
    inputs = aesara.shared(np.zeros((1, 1, 2, 2), dtype=dtype))
    filters = aesara.shared(np.zeros((1, 1, 2, 2), dtype=dtype))
    conv = dnn_conv(inputs, filters, precision=precision, algo="small")
    f = aesara.function([], conv, mode=mode_with_gpu)
    try:
        f()
    except RuntimeError as e:
        assert "CUDNN_STATUS_ARCH_MISMATCH" in str(e)
        return False
    return True
Пример #2
0
 def run_gradweight_runtime_algorithm(algo):
     with aesara.config.change_flags(dnn__conv__algo_bwd_filter=algo):
         inputs = TensorType(dtype, _broadcastable)()
         filters = TensorType(dtype, _broadcastable)()
         conv = dnn_conv(
             img=inputs,
             kerns=filters,
             algo=algo,
             precision=dtype,
             subsample=unit_shape,
             dilation=unit_shape,
         )
         grad_w = aesara.gradient.grad(conv.sum(), [filters])
         f = aesara.function([inputs, filters],
                             grad_w,
                             mode=mode_with_gpu)
         assert 1 == len([
             node for node in f.maker.fgraph.apply_nodes
             if isinstance(node.op, GpuDnnConvGradW)
         ])
         assert not any(
             isinstance(node.op, GpuDnnConv)
             for node in f.maker.fgraph.apply_nodes)
         assert not any(
             isinstance(node.op, GpuDnnConvGradI)
             for node in f.maker.fgraph.apply_nodes)
         if self.ndim == 3:
             flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
         else:
             flipped_filters = filters[:, :, ::-1, ::-1]
         conv_ref = self.cpu_conv_class(subsample=unit_shape)(
             ref_cast(inputs), flipped_filters)
         grad_w_ref = aesara.gradient.grad(conv_ref.sum(), [filters])
         f_ref = aesara.function([inputs, filters],
                                 grad_w_ref,
                                 mode="FAST_RUN")
         runtime_shapes = self.runtime_shapes
         if algo in ("time_once", "guess_once"):
             runtime_shapes = [list(runtime_shapes[0])]
             runtime_shapes[0][0] = 5
         for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
             print("Shapes:", inputs_shape, filters_shape)
             for i in range(ntimes):
                 inputs_val = np.random.random(inputs_shape).astype(
                     dtype)
                 filters_val = np.random.random(filters_shape).astype(
                     dtype)
                 gpu_res = f(inputs_val, filters_val)
                 cpu_res = f_ref(inputs_val, filters_val)
                 utt.assert_allclose(cpu_res, np.asarray(gpu_res))
Пример #3
0
 def run_fwd_runtime_algorithm(algo):
     inputs = TensorType(dtype, _broadcastable)()
     filters = TensorType(dtype, _broadcastable)()
     # Scale down the input values to prevent very large absolute errors
     # due to float rounding
     lower_inputs = inputs / 10
     lower_filters = filters / 10
     conv = dnn_conv(
         img=lower_inputs,
         kerns=lower_filters,
         algo=algo,
         precision=dtype,
         subsample=unit_shape,
         dilation=unit_shape,
     )
     f = aesara.function([inputs, filters], conv, mode=mode_with_gpu)
     if self.ndim == 3:
         flipped_filters = lower_filters[:, :, ::-1, ::-1, ::-1]
     else:
         flipped_filters = lower_filters[:, :, ::-1, ::-1]
     conv_ref = self.cpu_conv_class(subsample=unit_shape)(
         ref_cast(lower_inputs), flipped_filters)
     f_ref = aesara.function([inputs, filters],
                             conv_ref,
                             mode="FAST_RUN")
     runtime_shapes = self.runtime_shapes
     if algo in ("time_once", "guess_once"):
         runtime_shapes = [list(runtime_shapes[0])]
         runtime_shapes[0][0] = 5
     for ntimes, (inputs_shape, filters_shape) in runtime_shapes:
         print("Shapes:", inputs_shape, filters_shape)
         for i in range(ntimes):
             inputs_val = np.random.random(inputs_shape).astype(dtype)
             filters_val = np.random.random(filters_shape).astype(dtype)
             gpu_res = np.asarray(f(inputs_val, filters_val))
             cpu_res = f_ref(inputs_val, filters_val)
             self.scale_numpy_arrays_inplace(cpu_res, gpu_res, 1)
             utt.assert_allclose(cpu_res, gpu_res)
Пример #4
0
def local_abstractconv_cudnn_alt(fgraph, node):
    if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights,
                                AbstractConv2d_gradInputs)):
        return

    if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1):
        return None
    if node.op.unshared:
        return None
    if isinstance(node.op.border_mode, tuple) and any(
            isinstance(p, tuple) for p in node.op.border_mode):
        # Asymmetric padding not yet supported
        return None
    inp1 = node.inputs[0]
    inp2 = node.inputs[1]

    if not dnn_available(inp1.type.context_name):
        return

    op = node.op
    border_mode = node.op.border_mode
    subsample = node.op.subsample
    filter_dilation = node.op.filter_dilation
    num_groups = node.op.num_groups
    precision, _ = get_precision(None, [inp1, inp2])

    if node.op.filter_flip:
        conv_mode = "conv"
    else:
        conv_mode = "cross"

    if isinstance(op, AbstractConv2d):
        if border_mode == "half" or subsample != (1, 1) or num_groups != 1:
            return None
        if border_mode == "full":
            direction_hint = "bprop inputs"
        elif border_mode == "valid" and filter_dilation == (1, 1):
            direction_hint = "bprop weights"
        else:
            return None

        rval = dnn_conv(
            inp1,
            inp2,
            border_mode=border_mode,
            subsample=subsample,
            dilation=filter_dilation,
            direction_hint=direction_hint,
            conv_mode=conv_mode,
            num_groups=num_groups,
        )

    elif isinstance(op, AbstractConv2d_gradWeights):
        if (border_mode == "valid" and subsample == (1, 1)
                and filter_dilation == (1, 1) and num_groups == 1):
            img = gpu_contiguous(inp1)
            topgrad = gpu_contiguous(inp2)
            ctx_name = infer_context_name(img, topgrad)
            img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3))
            topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3))
            ishape = [shape_i_op(i)(img) for i in range(img.ndim)]
            tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)]
            out_shp = get_conv_output_shape(
                ishape,
                tshape,
                border_mode=border_mode,
                subsample=subsample,
                filter_dilation=filter_dilation,
            )

            out_shp = assert_conv_shape(out_shp)
            out = GpuAllocEmpty(dtype=img.dtype,
                                context_name=ctx_name)(*out_shp)
            desc = GpuDnnConvDesc(
                border_mode=border_mode,
                subsample=subsample,
                dilation=filter_dilation,
                conv_mode="cross",
                precision=precision,
            )(out.shape)

            conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad,
                                                                out, desc)
            if conv_mode == "conv":
                conv = conv[:, :, ::-1, ::-1]

            rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name)
        else:
            return None

    elif isinstance(op, AbstractConv2d_gradInputs):
        if border_mode == "valid" and subsample == (1, 1) and num_groups == 1:
            kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3))
            topgrad = gpu_contiguous(inp2)
            ctx_name = infer_context_name(kerns, topgrad)
            conv_mode = "cross" if conv_mode == "conv" else "conv"
            desc = GpuDnnConvDesc(
                border_mode="full",
                subsample=subsample,
                dilation=filter_dilation,
                conv_mode=conv_mode,
                precision=precision,
            )(kerns.shape)

            tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)]
            kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
            shape = get_conv_output_shape(
                tshape,
                kshape,
                border_mode="full",
                subsample=subsample,
                filter_dilation=filter_dilation,
            )

            shape = assert_conv_shape(shape)
            out = GpuAllocEmpty(dtype=topgrad.dtype,
                                context_name=ctx_name)(*shape)
            rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns,
                                                                out, desc)
        else:
            return None

    return [rval]
Пример #5
0
    def run_conv_fwd(self, algo, dtype, precision, parameters):
        (
            inputs_shape,
            filters_shape,
            subsample,
            dilation,
            border_mode,
            conv_mode,
            alpha,
            beta,
        ) = parameters

        inputs_val = np.random.random(inputs_shape).astype(dtype)
        filters_val = np.random.random(filters_shape).astype(dtype)

        # Scale down the input values to prevent very large absolute errors
        # due to float rounding
        inputs_val /= 10
        filters_val /= 10

        inputs = aesara.shared(inputs_val)
        filters = aesara.shared(filters_val)

        if beta == 0:
            out = None
        else:
            out = self.array_like_conv_output(inputs_shape, filters_shape,
                                              border_mode, subsample, dilation,
                                              dtype)
            out /= 10
        # Compile an Aesara function for the cuDNN implementation
        conv = dnn_conv(
            img=inputs,
            kerns=filters,
            alpha=alpha,
            beta=beta,
            out=out,
            border_mode=border_mode,
            subsample=subsample,
            dilation=dilation,
            conv_mode=conv_mode,
            algo=algo,
            precision=precision,
        )
        f = aesara.function([], conv, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters flipped according to the width, height and time axis
        if conv_mode == "conv":
            if inputs.ndim == 5:
                flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
            else:
                flipped_filters = filters[:, :, ::-1, ::-1]
        else:
            flipped_filters = filters

        # Compile an Aesara function for the reference implementation
        conv_ref = self.cpu_conv_class(border_mode=border_mode,
                                       subsample=subsample,
                                       filter_dilation=dilation)(
                                           ref_cast(inputs), flipped_filters)
        f_ref = aesara.function([], conv_ref, mode="FAST_RUN")

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = np.asarray(f())
        if algo in cudnn.deterministic_fwd_algorithms:
            utt.assert_allclose(res, np.asarray(f()))

        atol, rtol = self.get_atol_rtol(algo, dtype, precision)
        if beta == 0:
            cpu_res = alpha * res_ref
        else:
            cpu_res = alpha * res_ref + beta * out
        self.scale_numpy_arrays_inplace(cpu_res, res, alpha)
        utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)