Пример #1
0
 def make_node(self, A):
     ctx_name = infer_context_name(A)
     A = as_gpuarray_variable(A, ctx_name)
     A = gpu_contiguous(A)
     if A.ndim != 2:
         raise LinAlgError("Matrix rank error")
     if A.dtype != "float32":
         raise TypeError("only `float32` is supported for now")
     if self.compute_uv:
         return theano.Apply(
             self,
             [A],
             # return S, U, VT
             [
                 GpuArrayType(
                     A.dtype, broadcastable=[False], context_name=ctx_name
                 )(),
                 A.type(),
                 A.type(),
             ],
         )
     else:
         return theano.Apply(
             self,
             [A],
             # return only S
             [GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)()],
         )
Пример #2
0
def test_values_eq_approx():
    a = rand_gpuarray(20, dtype="float32")
    assert GpuArrayType.values_eq_approx(a, a)
    b = a.copy()
    b[0] = np.asarray(b[0]) + 1.0
    assert not GpuArrayType.values_eq_approx(a, b)
    b = a.copy()
    b[0] = -np.asarray(b[0])
    assert not GpuArrayType.values_eq_approx(a, b)
Пример #3
0
def test_deep_copy():
    for dtype in ["float16", "float32"]:
        a = rand_gpuarray(20, dtype=dtype)
        g = GpuArrayType(dtype=dtype, broadcastable=(False, ))("g")

        f = theano.function([g], g)

        assert isinstance(f.maker.fgraph.toposort()[0].op, DeepCopyOp)

        res = f(a)

        assert GpuArrayType.values_eq(res, a)
Пример #4
0
def test_rebroadcast():
    for dtype in ["float16", "float32"]:
        a = rand_gpuarray(1, dtype=dtype)
        g = GpuArrayType(dtype=dtype, broadcastable=(False, ))("g")

        f = theano.function([g], Rebroadcast((0, True))(g))

        assert isinstance(f.maker.fgraph.toposort()[0].op, Rebroadcast)

        res = f(a)

        assert GpuArrayType.values_eq(res, a)
Пример #5
0
def test_view():
    for dtype in ["float16", "float32"]:
        a = rand_gpuarray(20, dtype=dtype)
        g = GpuArrayType(dtype=dtype, broadcastable=(False, ))("g")

        m = theano.compile.get_default_mode().excluding("local_view_op")
        f = theano.function([g], ViewOp()(g), mode=m)

        assert isinstance(f.maker.fgraph.toposort()[0].op, ViewOp)

        res = f(a)

        assert GpuArrayType.values_eq(res, a)
Пример #6
0
def test_filter_variable():
    # Test that filter_variable accepts more restrictive broadcast
    gpu_row = GpuArrayType(dtype=theano.config.floatX,
                           broadcastable=(True, False))
    gpu_matrix = GpuArrayType(dtype=theano.config.floatX,
                              broadcastable=(False, False))
    r = gpu_row()
    m = gpu_matrix.filter_variable(r)
    assert m.type == gpu_matrix

    # On CPU as well
    r = theano.tensor.row()
    m = gpu_matrix.filter_variable(r)
    assert m.type == gpu_matrix
Пример #7
0
def test_transfer_cpu_gpu():
    a = tt.fmatrix("a")
    g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g")

    av = np.asarray(rng.rand(5, 4), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))

    f = theano.function([a], GpuFromHost(test_ctx_name)(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert np.all(fv == av)
Пример #8
0
    def make_node(self, activations, labels, input_lengths):
        context_name = infer_context_name(activations)
        t_activations = as_gpuarray_variable(activations,
                                             context_name=context_name)
        # Ensure activations array is C-contiguous
        t_activations = gpu_contiguous(t_activations)

        # Labels and input lengths are always on the CPU
        t_labels = tt.as_tensor_variable(labels)
        t_input_lengths = tt.as_tensor_variable(input_lengths)

        if t_activations.type.dtype != "float32":
            raise TypeError("activations must use the float32 type.")

        if t_activations.ndim != 3:
            raise ValueError("activations must have 3 dimensions.")

        if t_labels.type.dtype != "int32":
            raise TypeError("labels must use the int32 type.")

        if t_labels.ndim != 2:
            raise ValueError("labels must have 2 dimensions.")

        if t_input_lengths.type.dtype != "int32":
            raise TypeError("input_lengths must use the int32 type.")

        if t_input_lengths.ndim != 1:
            raise ValueError("input_lengths must have 1 dimension.")

        costs = GpuArrayType(dtype="float32",
                             broadcastable=(False, ),
                             context_name=context_name)()
        outputs = [costs]

        if self.compute_grad:
            gradients = GpuArrayType(
                dtype="float32",
                broadcastable=(
                    False,
                    False,
                    False,
                ),
                context_name=context_name,
            )()
            outputs += [gradients]

        return theano.Apply(self,
                            inputs=[t_activations, t_labels, t_input_lengths],
                            outputs=outputs)
Пример #9
0
def test_transfer_gpu_gpu():
    g = GpuArrayType(dtype="float32",
                     broadcastable=(False, False),
                     context_name=test_ctx_name)()

    av = np.asarray(rng.rand(5, 4), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))
    mode = mode_with_gpu.excluding("cut_gpua_host_transfers",
                                   "local_cut_gpua_host_gpua")
    f = theano.function([g], GpuToGpu(test_ctx_name)(g), mode=mode)
    topo = f.maker.fgraph.toposort()
    assert len(topo) == 1
    assert isinstance(topo[0].op, GpuToGpu)
    fv = f(gv)
    assert GpuArrayType.values_eq(fv, gv)
Пример #10
0
    def make_node(self, inp1, inp2):
        if not cublas_available:
            raise RuntimeError(
                "CUBLAS is not available and "
                "GpuCublasTriangularSolve Op "
                "can not be constructed."
            )
        context_name = infer_context_name(inp1, inp2)

        inp1 = as_gpuarray_variable(inp1, context_name)
        inp2 = as_gpuarray_variable(inp2, context_name)

        inp1 = gpu_contiguous(inp1)
        inp2 = gpu_contiguous(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim in [1, 2]
        assert inp1.dtype == inp2.dtype

        return theano.Apply(
            self,
            [inp1, inp2],
            [
                GpuArrayType(
                    inp1.dtype,
                    broadcastable=inp2.broadcastable,
                    context_name=context_name,
                )()
            ],
        )
Пример #11
0
    def make_node(self, inp1, inp2):
        if not cusolver_available:
            raise RuntimeError(
                "CUSOLVER is not available and "
                "GpuCusolverSolve Op can not be constructed."
            )
        if skcuda.__version__ <= "0.5.1":
            warnings.warn(
                "The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8"
            )
        context_name = infer_context_name(inp1, inp2)

        inp1 = as_gpuarray_variable(inp1, context_name)
        inp2 = as_gpuarray_variable(inp2, context_name)

        inp1 = gpu_contiguous(inp1)
        inp2 = gpu_contiguous(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == inp2.dtype

        return theano.Apply(
            self,
            [inp1, inp2],
            [
                GpuArrayType(
                    inp1.dtype,
                    broadcastable=inp1.broadcastable,
                    context_name=context_name,
                )()
            ],
        )
Пример #12
0
 def output_type(self, inp):
     # add one extra dim for real/imag
     return GpuArrayType(
         inp.dtype,
         broadcastable=[False] * (inp.type.ndim + 1),
         context_name=inp.type.context_name,
     )
Пример #13
0
    def make_node(self, x, ilist):
        ctx_name = infer_context_name(x, ilist)
        x_ = as_gpuarray_variable(x, ctx_name)

        ilist__ = tt.as_tensor_variable(ilist)
        if ilist__.type.dtype not in tt.integer_dtypes:
            raise TypeError("index must be integers")
        if ilist__.type.dtype != "int64":
            ilist__ = tt.cast(ilist__, "int64")

        ilist_ = gpu_contiguous(as_gpuarray_variable(ilist__, ctx_name))

        if ilist_.type.dtype != "int64":
            raise TypeError("index must be int64")
        if ilist_.type.ndim != 1:
            raise TypeError("index must be a vector")
        if x_.type.ndim == 0:
            raise TypeError("cannot index into a scalar")

        bcast = ilist_.broadcastable + x_.broadcastable[1:]
        return gof.Apply(
            self,
            [x_, ilist_],
            [GpuArrayType(dtype=x.dtype, context_name=ctx_name, broadcastable=bcast)()],
        )
Пример #14
0
    def make_node(self, ten4, neib_shape, neib_step=None):
        ten4 = as_gpuarray_variable(ten4, infer_context_name(ten4))
        neib_shape = tt.as_tensor_variable(neib_shape)
        if neib_step is None:
            neib_step = neib_shape
        else:
            neib_step = tt.as_tensor_variable(neib_step)

        assert ten4.ndim == 4
        assert neib_shape.ndim == 1
        assert neib_step.ndim == 1
        assert neib_shape.dtype in tt.integer_dtypes
        assert neib_step.dtype in tt.integer_dtypes

        return Apply(
            self,
            [ten4, neib_shape, neib_step],
            [
                GpuArrayType(
                    broadcastable=(False, False),
                    dtype=ten4.type.dtype,
                    context_name=ten4.type.context_name,
                )()
            ],
        )
Пример #15
0
 def make_node(self, x, k=0): #TODO: dtype check
     x = as_gpuarray_variable(x, context_name=self.context_name)
     k = tensor.as_tensor_variable(k)
     assert x.ndim == 2
     assert k.ndim == 0
     broadcastable = (False,True) if self.keepdims else (False,)
     otype = GpuArrayType(dtype=x.type.dtype, broadcastable=broadcastable, context_name=self.context_name)
     return gof.Apply(self, [x, k], [otype()])
Пример #16
0
 def make_node(self, d, x):
     d = as_gpuarray_variable(d, context_name=self.context_name)
     x = as_gpuarray_variable(x, context_name=self.context_name)
     assert d.ndim == 1
     assert x.ndim == 1
     broadcastable = (False,)
     otype = GpuArrayType(dtype='int64' if self.dtype_int64 else 'int32', broadcastable=broadcastable, context_name=self.context_name)
     return gof.Apply(self, [d, x], [otype()])
Пример #17
0
 def make_node(self, x, *inputs):
     ctx_name = infer_context_name(x)
     rval = AdvancedSubtensor.make_node(self, x, *inputs)
     otype = GpuArrayType(
         dtype=rval.outputs[0].type.dtype,
         broadcastable=rval.outputs[0].type.broadcastable,
         context_name=ctx_name,
     )
     x = as_gpuarray_variable(x, ctx_name)
     return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
Пример #18
0
 def make_node(self, x, k=0, n=0, m=0): #TODO: dtype check
     x = as_gpuarray_variable(x, context_name=self.context_name)
     k = tensor.as_tensor_variable(k)
     n = tensor.as_tensor_variable(n)
     m = tensor.as_tensor_variable(m)
     assert x.ndim == 2 or x.ndim == 1
     assert k.ndim == 0
     assert n.ndim == 0
     assert m.ndim == 0
     otype = GpuArrayType(dtype=x.type.dtype, broadcastable=(False,False), context_name=self.context_name)
     return gof.Apply(self, [x, k, n, m], [otype()])
Пример #19
0
def test_transfer_strided():
    # This is just to ensure that it works in theano
    # libgpuarray has a much more comprehensive suit of tests to
    # ensure correctness
    a = tt.fmatrix("a")
    g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g")

    av = np.asarray(rng.rand(5, 8), dtype="float32")
    gv = gpuarray.array(av, context=get_context(test_ctx_name))

    av = av[:, ::2]
    gv = gv[:, ::2]

    f = theano.function([a], GpuFromHost(test_ctx_name)(a))
    fv = f(av)
    assert GpuArrayType.values_eq(fv, gv)

    f = theano.function([g], host_from_gpu(g))
    fv = f(gv)
    assert np.all(fv == av)
Пример #20
0
 def result(inp):
     dtype = inp.dtype
     ctx_name = _name_for_ctx(inp.context)
     key = (dtype, ctx_name)
     f = result.cache.get(key, None)
     if f is None:
         guard_in = GpuArrayType(str(dtype), (False,), context_name=ctx_name)()
         mode = get_mode("FAST_RUN").including("gpuarray")
         f = theano.function([guard_in], op(guard_in), mode=mode, profile=False)
         result.cache[key] = f
     return f(inp)
Пример #21
0
    def make_node(self, n, m):
        n = tensor.as_tensor_variable(n)
        m = tensor.as_tensor_variable(m)
        assert n.ndim == 0
        assert m.ndim == 0
        otype = GpuArrayType(
            dtype=self.dtype,
            broadcastable=(False, False),
            context_name=self.context_name,
        )

        return Apply(self, [n, m], [otype()])
Пример #22
0
def test_dump_load():
    x = GpuArraySharedVariable(
        "x",
        GpuArrayType("float32", (1, 1), name="x", context_name=test_ctx_name),
        [[1]],
        False,
    )

    with open("test", "wb") as f:
        dump(x, f)

    with open("test", "rb") as f:
        x = load(f)

    assert x.name == "x"
    np.testing.assert_allclose(x.get_value(), [[1]])
Пример #23
0
def test_shape():
    x = GpuArrayType(dtype="float32", broadcastable=[False, False, False])()
    v = gpuarray.zeros((3, 4, 5),
                       dtype="float32",
                       context=get_context(test_ctx_name))
    f = theano.function([x], x.shape)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    if theano.config.mode != "FAST_COMPILE":
        assert len(topo) == 4
        assert isinstance(topo[0].op, tt.opt.Shape_i)
        assert isinstance(topo[1].op, tt.opt.Shape_i)
        assert isinstance(topo[2].op, tt.opt.Shape_i)
        assert isinstance(topo[3].op, tt.opt.MakeVector)
    mode = mode_with_gpu.excluding("local_shape_to_shape_i")
    f = theano.function([x], x.shape, mode=mode)
    topo = f.maker.fgraph.toposort()
    assert np.all(f(v) == (3, 4, 5))
    assert len(topo) == 1
    assert isinstance(topo[0].op, tt.Shape)
Пример #24
0
    def make_node(self, points, dim):
        assert (points.ndim == 3)
        points = gpu_contiguous(as_tensor_variable(points.astype("float32")))

        dim = get_scalar_constant_value(dim)
        if "int" not in str(dim.dtype):
            raise ValueError("dim must be an integer.")

        if dim > 31:
            raise ValueError("GpuHashtable does not currently support \
dimensionality > 31.")

        dim = constant(dim, dtype="int32", name="dim")

        entries_type = GpuArrayType("int32",
                                    broadcastable=(False, ),
                                    context_name=self.context_name,
                                    name="entries")
        keys_type = GpuArrayType("int16",
                                 broadcastable=(False, False),
                                 context_name=self.context_name,
                                 name="keys")
        neib_ent_type = GpuArrayType("int32",
                                     broadcastable=(False, False, False),
                                     context_name=self.context_name,
                                     name="neighbor_entries")
        bary_type = GpuArrayType("float32",
                                 broadcastable=points.type.broadcastable,
                                 context_name=self.context_name,
                                 name="barycentric_coords")

        valid_entries_type = GpuArrayType("int32",
                                          broadcastable=(False, ),
                                          context_name=self.context_name,
                                          name="valid_entries")
        n_valid_type = GpuArrayType("int32",
                                    broadcastable=(False, ),
                                    context_name=self.context_name,
                                    name="n_valid")

        out_vars = [
            entries_type(name="hash_entries"),
            keys_type(name="hash_keys"),
            neib_ent_type(name="neighbor_entries"),
            bary_type(name="barycentric_coords"),
            valid_entries_type(name="valid_entries"),
            n_valid_type(name="n_valid")
        ]

        # TODO: I suppose GpuHashTable should be a type like GpuHashType, and
        # the Op should return one of those instead.

        # Two sets of entries can't be meaningfully compared without also
        # having the corresponding keys. Since we can only define per-output
        # comparisons, we have to hope that any time someone compares two
        # tables for equality, they will check all outputs.
        out_vars[0].tag.values_eq_approx = lambda e1, e2: True
        out_vars[2].tag.values_eq_approx = lambda e1, e2: True

        # The number of valid entries between two equivalent tables may be
        # different since it includes duplicates.
        out_vars[5].tag.values_eq_approx = lambda n1, n2: True

        def keys_comparison(k1, k2):
            k1 = [tuple(k) for k in np.asarray(k1)]
            k2 = [tuple(k) for k in np.asarray(k2)]
            return set(k1) == set(k2)

        out_vars[1].tag.values_eq_approx = keys_comparison

        def valid_entries_comparison(e1, e2):
            e1 = np.asarray(e1)
            e2 = np.asarray(e2)
            return len(np.unique(e1)) == len(np.unique(e2))

        out_vars[4].tag.values_eq_approx = valid_entries_comparison

        return Apply(self, [points, dim], out_vars)
Пример #25
0
    GpuCorr3dMM,
    GpuCorr3dMM_gradInputs,
    GpuCorr3dMM_gradWeights,
    GpuCorrMM,
    GpuCorrMM_gradInputs,
    GpuCorrMM_gradWeights,
)
from theano.gpuarray.dnn import (
    GpuDnnConv,
    GpuDnnConvGradI,
    GpuDnnConvGradW,
    dnn_available,
)
from theano.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor

gpu_ftensor4 = GpuArrayType(dtype="float32", broadcastable=(False, ) * 4)


class TestDnnConv2d(BaseTestConv2d):
    @classmethod
    def setup_class(cls):
        super().setup_class()
        cls.shared = staticmethod(gpuarray_shared_constructor)
        # provide_shape is not used by the cuDNN impementation
        cls.provide_shape = [False]

    def run_test_case(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
        if not dnn_available(test_ctx_name):
            pytest.skip(dnn_available.msg)

        mode = mode_with_gpu
Пример #26
0
def test_specify_shape():
    for dtype in ["float16", "float32"]:
        a = rand_gpuarray(20, dtype=dtype)
        g = GpuArrayType(dtype=dtype, broadcastable=(False, ))("g")
        f = theano.function([g], theano.tensor.specify_shape(g, [20]))
        f(a)