Exemplo n.º 1
0
def test_gpuarray_shared_scalar():
    # By default, we don't put scalar as shared variable on the GPU
    with pytest.raises(TypeError):
        gpuarray_shared_constructor(np.asarray(1, dtype="float32"))

    # But we can force that
    gpuarray_shared_constructor(np.asarray(1, dtype="float32"), target=test_ctx_name)
Exemplo n.º 2
0
    def run_gradweight(self,
                       inputs_shape,
                       filters_shape,
                       dCdH_shape,
                       subsample=(1, 1, 1)):
        inputs_shape = [inputs_shape[i] for i in (0, 4, 1, 2, 3)]
        filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)]
        dCdH_shape = [dCdH_shape[i] for i in (0, 4, 1, 2, 3)]

        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
        dCdH_val = np.random.random(dCdH_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        dCdH = gpuarray_shared_constructor(dCdH_val)
        shape = gpuarray_shared_constructor(np.array(filters_shape[2:]))

        if subsample == (1, 1, 1):
            conv_ref = Corr3dMMGradWeights(subsample=subsample)(
                ref_cast(inputs), ref_cast(dCdH))
            conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(inputs,
                                                                     dCdH)
        else:
            conv_ref = Corr3dMMGradWeights(subsample=subsample)(
                ref_cast(inputs), ref_cast(dCdH), shape=shape)
            conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(
                inputs, dCdH, shape=shape)

        f_ref = aesara.function([], conv_ref, mode=mode_without_gpu)
        f = aesara.function([], conv_gemm, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
Exemplo n.º 3
0
    def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)):
        inputs_shape = [inputs_shape[i] for i in (0, 4, 1, 2, 3)]
        filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)]

        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
        filters_val = np.random.random(filters_shape).astype(config.floatX)
        inputs = gpuarray_shared_constructor(inputs_val)
        filters = gpuarray_shared_constructor(filters_val)

        bottom_height = (inputs_shape[2] - 1) * subsample[0] + filters_shape[2]
        bottom_width = (inputs_shape[3] - 1) * subsample[1] + filters_shape[3]
        bottom_depth = (inputs_shape[4] - 1) * subsample[2] + filters_shape[4]
        bottom_shape = gpuarray_shared_constructor(
            np.array([bottom_height, bottom_width, bottom_depth]))

        if subsample == (1, 1, 1):
            conv_ref = Corr3dMMGradInputs(subsample=subsample)(
                kern=ref_cast(filters), topgrad=ref_cast(inputs))
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=filters, topgrad=inputs)
        else:
            conv_ref = Corr3dMMGradInputs(subsample=subsample)(
                kern=ref_cast(filters),
                topgrad=ref_cast(inputs),
                shape=bottom_shape)
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=filters, topgrad=inputs, shape=bottom_shape)

        f_ref = aesara.function([], conv_ref, mode=mode_without_gpu)
        f = aesara.function([], conv_gemm, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
Exemplo n.º 4
0
    def run_conv_valid(
        self,
        inputs_shape,
        filters_shape,
        border_mode="valid",
        filter_dilation=(1, 1),
        subsample=(1, 1),
        unshared=False,
        verify_grad=False,
    ):
        inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)]
        if unshared:
            filters_shape = [filters_shape[i] for i in (0, 1, 2, 5, 3, 4)]
        else:
            filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)]

        inputs_val = np.random.random(inputs_shape).astype(config.floatX)
        filters_val = np.random.random(filters_shape).astype(config.floatX)

        inputs = gpuarray_shared_constructor(inputs_val)
        filters = gpuarray_shared_constructor(filters_val)

        conv_ref = CorrMM(
            border_mode=border_mode,
            filter_dilation=filter_dilation,
            subsample=subsample,
            unshared=unshared,
        )(ref_cast(inputs), ref_cast(filters))
        f_ref = aesara.function([], conv_ref, mode=mode_without_gpu)

        conv = GpuCorrMM(
            border_mode=border_mode,
            filter_dilation=filter_dilation,
            subsample=subsample,
            unshared=unshared,
        )(inputs, filters)
        f = aesara.function([], conv, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)

        if verify_grad:
            utt.verify_grad(
                GpuCorrMM(
                    border_mode=border_mode,
                    filter_dilation=filter_dilation,
                    subsample=subsample,
                    unshared=unshared,
                ),
                [inputs_val, filters_val],
                mode=mode_with_gpu,
            )
Exemplo n.º 5
0
def test_overflow_gpu_new_backend():
    seed = 12345
    n_substreams = 7
    curr_rstate = np.array([seed] * 6, dtype="int32")
    rstate = [curr_rstate.copy()]
    for j in range(1, n_substreams):
        rstate.append(rng_mrg.ff_2p72(rstate[-1]))
    rstate = np.asarray(rstate)
    rstate = gpuarray_shared_constructor(rstate)
    fct = functools.partial(GPUA_mrg_uniform.new,
                            rstate,
                            ndim=None,
                            dtype="float32")
    # should raise error as the size overflows
    sizes = [
        (2**31, ),
        (2**32, ),
        (
            2**15,
            2**16,
        ),
        (2, 2**15, 2**15),
    ]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=True)
    # should not raise error
    sizes = [(2**5, ), (2**5, 2**5), (2**5, 2**5, 2**5)]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=False)
    # should support int32 sizes
    sizes = [(np.int32(2**10), ),
             (np.int32(2), np.int32(2**10), np.int32(2**10))]
    rng_mrg_overflow(sizes, fct, mode, should_raise_error=False)
Exemplo n.º 6
0
def test_set_value_non_contiguous():
    s = gpuarray_shared_constructor(
        np.asarray([[1.0, 2.0], [1.0, 2.0], [5, 6]]))
    s.set_value(s.get_value(borrow=True, return_internal_type=True)[::2],
                borrow=True)
    assert not s.get_value(borrow=True,
                           return_internal_type=True).flags["C_CONTIGUOUS"]
    # In the past, this failed
    s.set_value([[0, 0], [1, 1]])
Exemplo n.º 7
0
def test_incsub_offset():
    # Test for https://github.com/Theano/Theano/issues/5670

    # Build a GPU variable which value will have an offset (x1)
    x = gpuarray_shared_constructor(np.zeros(5, dtype=aesara.config.floatX))
    x1 = x[1:]
    # Use inc_subtensor on it
    y = vector()
    z = inc_subtensor(x1[2:], y)
    # Use updates so that inc_subtensor can happen inplace
    f = aesara.function([y], z, updates={x: z}, mode=mode_with_gpu)
    utt.assert_allclose(f([1, 2]), np.array([0, 0, 1, 2], dtype=aesara.config.floatX))
Exemplo n.º 8
0
def test_consistency_GPUA_parallel():
    # Verify that the random numbers generated by GPUA_mrg_uniform, in
    # parallel, are the same as the reference (Java) implementation by
    # L'Ecuyer et al.
    seed = 12345
    n_samples = 5
    n_streams = 12
    n_substreams = 7  # 7 samples will be drawn in parallel

    samples = []
    curr_rstate = np.array([seed] * 6, dtype="int32")

    for i in range(n_streams):
        stream_samples = []
        rstate = [curr_rstate.copy()]
        for j in range(1, n_substreams):
            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
        rstate = np.asarray(rstate)
        rstate = gpuarray_shared_constructor(rstate)

        new_rstate, sample = GPUA_mrg_uniform.new(rstate,
                                                  ndim=None,
                                                  dtype="float32",
                                                  size=(n_substreams, ))
        rstate.default_update = new_rstate

        # Not really necessary, just mimicking
        # rng_mrg.MRG_RandomStream' behavior
        sample.rstate = rstate
        sample.update = (rstate, new_rstate)

        # We need the sample back in the main memory
        cpu_sample = at.as_tensor_variable(sample)
        f = aesara.function([], cpu_sample, mode=mode)

        for k in range(n_samples):
            s = f()
            stream_samples.append(s)

        samples.append(np.array(stream_samples).T.flatten())

        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

    samples = np.array(samples).flatten()
    assert np.allclose(samples, java_samples)
Exemplo n.º 9
0
def test_consistency_GPUA_serial():
    # Verify that the random numbers generated by GPUA_mrg_uniform, serially,
    # are the same as the reference (Java) implementation by L'Ecuyer et al.

    seed = 12345
    n_samples = 5
    n_streams = 12
    n_substreams = 7

    samples = []
    curr_rstate = np.array([seed] * 6, dtype="int32")

    for i in range(n_streams):
        stream_rstate = curr_rstate.copy()
        for j in range(n_substreams):
            substream_rstate = np.array([stream_rstate.copy()], dtype="int32")
            # Transfer to device
            rstate = gpuarray_shared_constructor(substream_rstate)

            new_rstate, sample = GPUA_mrg_uniform.new(rstate,
                                                      ndim=None,
                                                      dtype="float32",
                                                      size=(1, ))
            rstate.default_update = new_rstate

            # Not really necessary, just mimicking
            # rng_mrg.MRG_RandomStreams' behavior
            sample.rstate = rstate
            sample.update = (rstate, new_rstate)

            # We need the sample back in the main memory
            cpu_sample = tensor.as_tensor_variable(sample)
            f = aesara.function([], cpu_sample, mode=mode)
            for k in range(n_samples):
                s = f()
                samples.append(s)

            # next substream
            stream_rstate = rng_mrg.ff_2p72(stream_rstate)

        # next stream
        curr_rstate = rng_mrg.ff_2p134(curr_rstate)

    samples = np.array(samples).flatten()
    assert np.allclose(samples, java_samples)
Exemplo n.º 10
0
def test_cpu_target_with_shared_variable():
    srng = MRG_RandomStream()
    s = np.random.rand(2, 3).astype("float32")
    x = gpuarray_shared_constructor(s, name="x")
    try:
        # To have aesara.shared(x) try to move on the GPU
        aesara.compile.shared_constructor(gpuarray_shared_constructor)
        y = srng.uniform(x.shape, target="cpu")
        y.name = "y"
        z = (x * y).sum()
        z.name = "z"

        fz = aesara.function([], z, mode=mode)

        nodes = fz.maker.fgraph.toposort()
        assert not any(isinstance(node.op, GPUA_mrg_uniform) for node in nodes)
    finally:
        aesara.compile.shared_constructor(gpuarray_shared_constructor,
                                          remove=True)
Exemplo n.º 11
0
def test_Gpujoin_inplace():
    # Test Gpujoin to work inplace.
    #
    # This function tests the case when several elements are passed to the
    # Gpujoin function but all except one of them are empty. In this case
    # Gpujoin should work inplace and the output should be the view of the
    # non-empty element.
    s = tt.lscalar()
    data = np.array([3, 4, 5], dtype=aesara.config.floatX)
    x = gpuarray_shared_constructor(data, borrow=True)
    z = tt.zeros((s,))

    join = GpuJoin(view=0)
    c = join(0, x, z)

    f = aesara.function([s], aesara.Out(c, borrow=True))
    if not isinstance(mode_with_gpu, aesara.compile.DebugMode):
        assert x.get_value(borrow=True, return_internal_type=True) is f(0)
    assert np.allclose(f(0), [3, 4, 5])
Exemplo n.º 12
0
def test_GPUA_full_fill():
    # Make sure the whole sample buffer is filled.  Also make sure
    # large samples are consistent with CPU results.

    # This needs to be large to trigger the problem on GPU
    size = (10, 1000)

    R = MRG_RandomStream(234)
    uni = R.uniform(size, nstreams=60 * 256)
    f_cpu = aesara.function([], uni)

    rstate_gpu = gpuarray_shared_constructor(
        R.state_updates[-1][0].get_value())
    new_rstate, sample = GPUA_mrg_uniform.new(rstate_gpu,
                                              ndim=None,
                                              dtype="float32",
                                              size=size)
    rstate_gpu.default_update = new_rstate
    f_gpu = aesara.function([], sample, mode=mode)

    utt.assert_allclose(f_cpu(), f_gpu())
Exemplo n.º 13
0
def test_elemwise_pow():
    # Test that GpuElemwise(pow) can compile with any combination of integer
    # or float input dtype.
    dtypes = [
        "uint8",
        "uint16",
        "uint32",
        "uint64",
        "int8",
        "int16",
        "int32",
        "int64",
        "float16",
        "float32",
        "float64",
    ]

    for dtype_base in dtypes:
        for dtype_exp in dtypes:

            # Compile a gpu function with the specified dtypes
            base_val = np.random.randint(0, 5, size=10).astype(dtype_base)
            exp_val = np.random.randint(0, 3, size=10).astype(dtype_exp)

            base = vector(dtype=dtype_base)
            exp = gpuarray_shared_constructor(exp_val)
            assert exp.dtype == dtype_exp
            output = base**exp
            f = aesara.function([base], output, mode=mode_with_gpu)
            # We don't transfer to the GPU when the output dtype is int*
            n = len([
                n for n in f.maker.fgraph.apply_nodes
                if isinstance(n.op, GpuElemwise)
            ])
            assert n == (output.dtype in float_dtypes)

            # Call the function to make sure the output is valid
            out = f(base_val)
            expected_out = base_val**exp_val
            assert_allclose(out, expected_out)
Exemplo n.º 14
0
    def test_blocksparse_grad_merge(self):
        b = fmatrix()
        h = ftensor3()
        iIdx = lmatrix()
        oIdx = lmatrix()

        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
        W = gpuarray_shared_constructor(W_val, context=test_ctx_name)

        o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
        gW = aesara.grad(o.sum(), W)

        lr = np.asarray(0.05, dtype="float32")

        upd = W - lr * gW

        f1 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode_with_gpu)

        # Make sure the lr update was merged.
        assert isinstance(f1.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter)

        # Exclude the merge optimizations.
        mode = mode_with_gpu.excluding("local_merge_blocksparse_alpha")
        mode = mode.excluding("local_merge_blocksparse_output")

        f2 = aesara.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)

        # Make sure the lr update is not merged.
        assert not isinstance(f2.maker.fgraph.outputs[0].owner.op, GpuSparseBlockOuter)

        f2(h_val, iIdx_val, b_val, oIdx_val)
        W_ref = W.get_value()

        # reset the var
        W.set_value(W_val)
        f1(h_val, iIdx_val, b_val, oIdx_val)
        W_opt = W.get_value()

        utt.assert_allclose(W_ref, W_opt)
Exemplo n.º 15
0
 def shared(x, **kwargs):
     return gpuarray_shared_constructor(x, target=test_ctx_name, **kwargs)
Exemplo n.º 16
0
def test_validate_input_types_gpuarray_backend():
    with config.change_flags(compute_test_value="raise"):
        rstate = np.zeros((7, 6), dtype="int32")
        rstate = gpuarray_shared_constructor(rstate)
        rng_mrg.mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(3, ))