示例#1
0
def test_gemv_dot_strides():
    # Reported in https://github.com/Theano/Theano/issues/6142
    xv = rand(5)
    yv = rand(5, 1)
    x = gpuarray_shared_constructor(xv)
    y = gpuarray_shared_constructor(yv, broadcastable=(False, True))
    f = theano.function([], tensor.dot(x, y[::-1]), mode=mode_with_gpu)
    out = f()
    utt.assert_allclose(out, np.dot(xv, yv[::-1]))
示例#2
0
def test_float16():
    # gemv (gemm called)
    float16_data = [
        rand(3).astype("float16"),
        np.asarray(1, dtype=np.float32),
        rand(3, 3).astype("float16"),
        rand(3).astype("float16"),
        np.asarray(0.5, dtype=np.float32),
    ]
    float16_shared = [
        gpuarray_shared_constructor(val, target=test_ctx_name)
        for val in float16_data
    ]
    o = gemv(*float16_shared)
    f = theano.function([], o, mode=mode_with_gpu)
    y, alpha, A, x, beta = float16_data
    out = f()
    utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)
    topo = f.maker.fgraph.toposort()
    assert any([isinstance(n.op, GpuGemm) for n in topo])

    # gemm
    float16_data = [
        rand(3, 3).astype("float16"),
        np.asarray(1, dtype=np.float32),
        rand(3, 3).astype("float16"),
        rand(3, 3).astype("float16"),
        np.asarray(0.5, dtype=np.float32),
    ]
    float16_shared = [
        gpuarray_shared_constructor(val, target=test_ctx_name)
        for val in float16_data
    ]
    o = gpugemm_no_inplace(*float16_shared)
    f = theano.function([], o)
    y, alpha, A, x, beta = float16_data
    out = f()
    utt.assert_allclose(np.asarray(out), alpha * np.dot(A, x) + beta * y)

    # dot22
    float16_data = [rand(3, 3).astype("float16"), rand(3, 3).astype("float16")]

    float16_shared = [gpuarray_shared_constructor(val) for val in float16_data]
    o = gpu_dot22(*float16_shared)
    f = theano.function([], o)
    x, y = float16_data
    out = f()
    utt.assert_allclose(np.asarray(out), np.dot(x, y))
示例#3
0
def test_leaf_lstm():
    from theano.gpuarray import gpuarray_shared_constructor
    from theano.gpuarray.tests.config import mode_with_gpu

    n_time = 5
    n_batch = 3
    n_input = 2
    n_output = 3
    xs_data = I.Normal()([n_time, n_batch, n_input])
    ts_data = I.Normal()([n_time, n_batch, n_output])
    h0_data = I.Normal()([n_batch, n_output])
    c0_data = I.Normal()([n_batch, n_output])
    xs = tt.tensor3("xs")
    ts = tt.tensor3("xs")
    h0 = tt.matrix("h0")
    c0 = tt.matrix("c0")
    givens = {xs: xs_data, ts: ts_data, h0: h0_data, c0: c0_data}

    def forward(fun):
        return theano.function([],
                               fun(xs, h0, c0),
                               givens=givens,
                               on_unused_input='ignore',
                               mode=mode_with_gpu)()

    lstm = L.LSTM(n_input, n_output, impl=L.RNNImpl.ref)
    ref_lstm = forward(lstm.ref_forward)

    fused_lstm = forward(lstm.fused_forward)
    for r, f in zip(ref_lstm, fused_lstm):
        numpy.testing.assert_array_almost_equal(r, f)

    def backward(fun, params):
        ys = fun(xs, h0, c0)[0]
        cost = tt.mean((ts - ys)**2)
        grad = tt.grad(cost, [xs, h0, c0] + params)
        return theano.function([],
                               grad,
                               givens=givens,
                               on_unused_input='ignore',
                               mode=mode_with_gpu)()

    fused_grad = backward(lstm.fused_forward, lstm.params)
    ref_grad = backward(lstm.ref_forward, lstm.params)
    for r, f in zip(ref_grad, fused_grad):
        numpy.testing.assert_array_almost_equal(r, f)

    lstm.impl = L.RNNImpl.cudnn  # TODO: do this in cudnn_forward
    cudnn_lstm = forward(lstm.cudnn_forward)
    for r, c in zip(ref_lstm, cudnn_lstm):
        numpy.testing.assert_array_almost_equal(r, c, decimal=cudnn_decimal)

    cudnn_grad = backward(lstm.cudnn_forward, lstm.params)
    cudnn_grad = cudnn_grad[:3] + lstm._rnn_block.split_params(
        gpuarray_shared_constructor(cudnn_grad[3]), 0, [n_batch, n_input])
    for r, c in zip(ref_grad, cudnn_grad):
        numpy.testing.assert_array_almost_equal(r, c, decimal=cudnn_decimal)
示例#4
0
 def test_gpu_cholesky_inplace(self):
     A = self.rand_symmetric(1000)
     A_gpu = gpuarray_shared_constructor(A)
     A_copy = A_gpu.get_value()
     C = GpuMagmaCholesky()(A_gpu)
     fn = theano.function([], C, mode=mode_with_gpu, updates=[(A_gpu, C)])
     assert any([
         node.op.inplace for node in fn.maker.fgraph.toposort()
         if isinstance(node.op, GpuMagmaCholesky)
     ])
     fn()
     L = A_gpu.get_value()
     utt.assert_allclose(np.dot(L, L.T), A_copy, atol=1e-3)
示例#5
0
 def test_gpu_matrix_inverse_inplace(self):
     N = 1000
     test_rng = np.random.RandomState(seed=1)
     A_val_gpu = gpuarray_shared_constructor(
         test_rng.rand(N, N).astype("float32") * 2 - 1)
     A_val_copy = A_val_gpu.get_value()
     A_val_gpu_inv = GpuMagmaMatrixInverse()(A_val_gpu)
     fn = theano.function([],
                          A_val_gpu_inv,
                          mode=mode_with_gpu,
                          updates=[(A_val_gpu, A_val_gpu_inv)])
     assert any([
         node.op.inplace for node in fn.maker.fgraph.toposort()
         if isinstance(node.op, GpuMagmaMatrixInverse)
     ])
     fn()
     utt.assert_allclose(np.eye(N),
                         np.dot(A_val_gpu.get_value(), A_val_copy),
                         atol=5e-3)
示例#6
0
 def shared(val):
     try:
         return gpuarray_shared_constructor(val)
     except TypeError:
         return theano.shared(val)
示例#7
0
def test_leaf_gru():
    from theano.gpuarray import gpuarray_shared_constructor
    from theano.gpuarray.tests.config import mode_with_gpu

    n_time = 5
    n_batch = 3
    n_input = 2
    n_output = 3
    xs_data = I.Normal()([n_time, n_batch, n_input])
    h0_data = I.Normal()([n_batch, n_output])
    ts_data = I.Normal()([n_time, n_batch, n_output])
    xs = tt.tensor3("xs")
    h0 = tt.matrix("h0")
    ts = tt.tensor3("ts")
    givens = {xs: xs_data, h0: h0_data, ts: ts_data}

    def forward(fun):
        return theano.function([],
                               fun(xs, h0),
                               givens=givens,
                               on_unused_input='ignore',
                               mode=mode_with_gpu)()

    # NOTE: n_batch won't affect rnnblock (!?)
    gru = L.GRU(n_input, n_output, n_batch=1, impl=L.RNNImpl.auto)
    assert gru.impl != L.RNNImpl.auto
    gru.impl = L.RNNImpl.ref
    ref_ys = forward(gru.ref_forward)

    fused_ys = forward(gru.fused_forward)
    for r, f in zip(ref_ys, fused_ys):
        numpy.testing.assert_array_almost_equal(r, f)

    def backward(fun, params):
        ys = fun(xs, h0)[0]
        cost = tt.mean((ts - ys)**2)
        grad = tt.grad(cost, [xs, h0] + params)
        return theano.function([],
                               grad,
                               givens=givens,
                               on_unused_input='ignore',
                               mode=mode_with_gpu)()

    ref_grad = backward(gru.ref_forward, gru.params)
    fused_grad = backward(gru.fused_forward, gru.params)
    for r, f in zip(ref_grad, fused_grad):
        numpy.testing.assert_array_almost_equal(r, f)

    gru.impl = L.RNNImpl.cudnn
    assert gru.params == list(gru.get_params())
    cudnn_ys = forward(gru.cudnn_forward)
    for r, c in zip(ref_ys, cudnn_ys):
        numpy.testing.assert_array_almost_equal(r, c, decimal=cudnn_decimal)
    cudnn_grad = backward(gru.cudnn_forward, gru.params)
    cudnn_grad = cudnn_grad[:2] + gru._rnn_block.split_params(
        gpuarray_shared_constructor(cudnn_grad[2]), 0, [n_batch, n_input])
    for r, f in zip(ref_grad, cudnn_grad):
        numpy.testing.assert_array_almost_equal(r, f, decimal=cudnn_decimal)

    gru.impl = L.RNNImpl.fused
    fused_ys = forward(gru.fused_forward)
    for r, f in zip(ref_ys, fused_ys):
        numpy.testing.assert_array_almost_equal(r, f)