示例#1
0
def test_hostfromgpu_shape_i():
    # Test that the shape is lifted over hostfromgpu

    m = mode_with_gpu.including("local_dot_to_dot22",
                                "local_dot22_to_dot22scalar", "specialize")
    a = tt.fmatrix("a")
    ca = theano.gpuarray.type.GpuArrayType("float32", (False, False))()
    av = np.asarray(np.random.rand(5, 4), dtype="float32")
    cv = gpuarray.asarray(np.random.rand(5, 4),
                          dtype="float32",
                          context=get_context(test_ctx_name))

    f = theano.function([a], GpuFromHost(test_ctx_name)(a), mode=m)
    assert any(
        isinstance(x.op, GpuFromHost) for x in f.maker.fgraph.toposort())
    f = theano.function([a], GpuFromHost(test_ctx_name)(a).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, tt.opt.Shape_i)
    assert isinstance(topo[1].op, tt.opt.Shape_i)
    assert isinstance(topo[2].op, tt.opt.MakeVector)
    assert tuple(f(av)) == (5, 4)

    f = theano.function([ca], host_from_gpu(ca), mode=m)
    assert host_from_gpu in [x.op for x in f.maker.fgraph.toposort()]
    f = theano.function([ca], host_from_gpu(ca).shape, mode=m)
    topo = f.maker.fgraph.toposort()
    assert isinstance(topo[0].op, theano.compile.Shape_i)
    assert isinstance(topo[1].op, theano.compile.Shape_i)
    assert isinstance(topo[2].op, tt.opt.MakeVector)
    assert tuple(f(cv)) == (5, 4)
示例#2
0
    def filter_variable(self, other, allow_convert=True):
        from theano.gpuarray.basic_ops import GpuFromHost

        if hasattr(other, '_as_GpuArrayVariable'):
            other = other._as_GpuArrayVariable(self.context_name)

        if not isinstance(other, Variable):
            other = self.Constant(type=self, data=other)

        if other.type == self:
            return other

        if not isinstance(other.type, tensor.TensorType):
            raise TypeError('Incompatible type', (self, other.type))
        if (other.type.dtype != self.dtype):
            raise TypeError('Incompatible dtype',
                            (self.dtype, other.type.dtype))
        if other.type.ndim != self.ndim:
            raise TypeError('Incompatible number of dimensions.'
                            ' Expected %d, got %d.' % (self.ndim, other.ndim))
        if other.type.broadcastable != self.broadcastable:
            if allow_convert:
                type2 = other.type.clone(broadcastable=self.broadcastable)
                other2 = type2.convert_variable(other)
            else:
                other2 = None
            if other2 is None:
                raise TypeError(
                    'Incompatible broadcastable dimensions.'
                    ' Expected %s, got %s.' %
                    (str(other.type.broadcastable), str(self.broadcastable)))
            other = other2

        return GpuFromHost(self.context_name)(other)
示例#3
0
def test_transfer_cpu_gpu():
    a = tt.fmatrix("a")
    g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g")

    av = np.asarray(rng.rand(5, 4), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))

    f = theano.function([a], GpuFromHost(test_ctx_name)(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert np.all(fv == av)
示例#4
0
def traverse(out, x, x_copy, d, visited=None):
    """
    Function used by scan to parse the tree and figure out which nodes
    it needs to replace.

    There are two options :
        1) x and x_copy or on host, then you would replace x with x_copy
        2) x is on gpu, x_copy on host, then you need to replace
        host_from_gpu(x) with x_copy
    This happens because initially shared variables are on GPU... which is
    fine for the main computational graph but confuses things a bit for the
    inner graph of scan.

    """
    # ``visited`` is a set of nodes that are already known and don't need to be
    # checked again, speeding up the traversal of multiply-connected graphs.
    # if a ``visited`` set is given, it will be updated in-place so the callee
    # knows which nodes we have seen.
    if visited is None:
        visited = set()
    if out in visited:
        return d
    visited.add(out)
    from theano.sandbox import cuda
    from theano.gpuarray.basic_ops import GpuFromHost, host_from_gpu
    from theano.gpuarray import pygpu_activated
    from theano.gpuarray.type import GpuArrayType
    if out == x:
        if isinstance(x.type, cuda.CudaNdarrayType):
            d[out] = cuda.gpu_from_host(x_copy)
        else:
            assert isinstance(x.type, GpuArrayType)
            d[out] = GpuFromHost(x.type.context_name)(x_copy)
        return d
    elif out.owner is None:
        return d
    elif (cuda.cuda_available and out.owner.op == cuda.host_from_gpu
          and out.owner.inputs == [x]):
        d[out] = tensor.as_tensor_variable(x_copy)
        return d
    elif (pygpu_activated and out.owner.op == host_from_gpu
          and out.owner.inputs == [x]):
        d[out] = tensor.as_tensor_variable(x_copy)
        return d
    else:
        for inp in out.owner.inputs:
            d = traverse(inp, x, x_copy, d, visited)
        return d
示例#5
0
def test_transfer_strided():
    # This is just to ensure that it works in theano
    # libgpuarray has a much more comprehensive suit of tests to
    # ensure correctness
    a = tt.fmatrix("a")
    g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g")

    av = np.asarray(rng.rand(5, 8), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))

    av = av[:, ::2]
    gv = gv[:, ::2]

    f = theano.function([a], GpuFromHost(test_ctx_name)(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert np.all(fv == av)
示例#6
0
    def test_one_sequence_one_output_weights_gpu1(self):
        def f_rnn(u_t, x_tm1, W_in, W):
            return u_t * W_in + x_tm1 * W

        u = theano.tensor.fvector("u")
        x0 = theano.tensor.fscalar("x0")
        W_in = theano.tensor.fscalar("win")
        W = theano.tensor.fscalar("w")

        mode = mode_with_gpu.excluding("InputToGpuOptimizer")
        output, updates = scan(
            f_rnn,
            u,
            x0,
            [W_in, W],
            n_steps=None,
            truncate_gradient=-1,
            go_backwards=False,
            mode=mode,
        )

        output = GpuFromHost(test_ctx_name)(output)
        f2 = theano.function(
            [u, x0, W_in, W],
            output,
            updates=updates,
            allow_input_downcast=True,
            mode=mode,
        )

        rng = np.random.RandomState(utt.fetch_seed())
        v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0)
        v_x0 = rng.uniform()
        W = rng.uniform()
        W_in = rng.uniform()

        v_u = np.asarray(v_u, dtype="float32")
        v_x0 = np.asarray(v_x0, dtype="float32")
        W = np.asarray(W, dtype="float32")
        W_in = np.asarray(W_in, dtype="float32")

        # compute the output in numpy
        v_out = np.zeros((4, ))
        v_out[0] = v_u[0] * W_in + v_x0 * W
        for step in range(1, 4):
            v_out[step] = v_u[step] * W_in + v_out[step - 1] * W

        theano_values = f2(v_u, v_x0, W_in, W)
        utt.assert_allclose(theano_values, v_out)

        # TO DEL
        topo = f2.maker.fgraph.toposort()
        scan_node = [
            node for node in topo if isinstance(node.op, scan.op.Scan)
        ]
        assert len(scan_node) == 1
        scan_node = scan_node[0]

        topo = f2.maker.fgraph.toposort()
        assert sum([isinstance(node.op, HostFromGpu) for node in topo]) == 0
        assert sum([isinstance(node.op, GpuFromHost) for node in topo]) == 4

        scan_node = [
            node for node in topo if isinstance(node.op, scan.op.Scan)
        ]
        assert len(scan_node) == 1
        scan_node = scan_node[0]
        scan_node_topo = scan_node.op.fn.maker.fgraph.toposort()

        # check that there is no gpu transfer in the inner loop.
        assert any(
            [isinstance(node.op, GpuElemwise) for node in scan_node_topo])
        assert not any(
            [isinstance(node.op, HostFromGpu) for node in scan_node_topo])
        assert not any(
            [isinstance(node.op, GpuFromHost) for node in scan_node_topo])
示例#7
0
def safe_to_gpu(x, ctx_name):
    if isinstance(x.type, tensor.TensorType):
        return GpuFromHost(ctx_name)(x)
    else:
        return x