def make_node(self, A): ctx_name = infer_context_name(A) A = as_gpuarray_variable(A, ctx_name) A = gpu_contiguous(A) if A.ndim != 2: raise LinAlgError("Matrix rank error") if A.dtype != "float32": raise TypeError("only `float32` is supported for now") if self.compute_uv: return theano.Apply( self, [A], # return S, U, VT [ GpuArrayType( A.dtype, broadcastable=[False], context_name=ctx_name )(), A.type(), A.type(), ], ) else: return theano.Apply( self, [A], # return only S [GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)()], )
def test_values_eq_approx(): a = rand_gpuarray(20, dtype="float32") assert GpuArrayType.values_eq_approx(a, a) b = a.copy() b[0] = np.asarray(b[0]) + 1.0 assert not GpuArrayType.values_eq_approx(a, b) b = a.copy() b[0] = -np.asarray(b[0]) assert not GpuArrayType.values_eq_approx(a, b)
def test_deep_copy(): for dtype in ["float16", "float32"]: a = rand_gpuarray(20, dtype=dtype) g = GpuArrayType(dtype=dtype, broadcastable=(False, ))("g") f = theano.function([g], g) assert isinstance(f.maker.fgraph.toposort()[0].op, DeepCopyOp) res = f(a) assert GpuArrayType.values_eq(res, a)
def test_rebroadcast(): for dtype in ["float16", "float32"]: a = rand_gpuarray(1, dtype=dtype) g = GpuArrayType(dtype=dtype, broadcastable=(False, ))("g") f = theano.function([g], Rebroadcast((0, True))(g)) assert isinstance(f.maker.fgraph.toposort()[0].op, Rebroadcast) res = f(a) assert GpuArrayType.values_eq(res, a)
def test_view(): for dtype in ["float16", "float32"]: a = rand_gpuarray(20, dtype=dtype) g = GpuArrayType(dtype=dtype, broadcastable=(False, ))("g") m = theano.compile.get_default_mode().excluding("local_view_op") f = theano.function([g], ViewOp()(g), mode=m) assert isinstance(f.maker.fgraph.toposort()[0].op, ViewOp) res = f(a) assert GpuArrayType.values_eq(res, a)
def test_filter_variable(): # Test that filter_variable accepts more restrictive broadcast gpu_row = GpuArrayType(dtype=theano.config.floatX, broadcastable=(True, False)) gpu_matrix = GpuArrayType(dtype=theano.config.floatX, broadcastable=(False, False)) r = gpu_row() m = gpu_matrix.filter_variable(r) assert m.type == gpu_matrix # On CPU as well r = theano.tensor.row() m = gpu_matrix.filter_variable(r) assert m.type == gpu_matrix
def test_transfer_cpu_gpu(): a = tt.fmatrix("a") g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g") av = np.asarray(rng.rand(5, 4), dtype="float32") gv = gpuarray.array(av, context=get_context(test_ctx_name)) f = theano.function([a], GpuFromHost(test_ctx_name)(a)) fv = f(av) assert GpuArrayType.values_eq(fv, gv) f = theano.function([g], host_from_gpu(g)) fv = f(gv) assert np.all(fv == av)
def make_node(self, activations, labels, input_lengths): context_name = infer_context_name(activations) t_activations = as_gpuarray_variable(activations, context_name=context_name) # Ensure activations array is C-contiguous t_activations = gpu_contiguous(t_activations) # Labels and input lengths are always on the CPU t_labels = tt.as_tensor_variable(labels) t_input_lengths = tt.as_tensor_variable(input_lengths) if t_activations.type.dtype != "float32": raise TypeError("activations must use the float32 type.") if t_activations.ndim != 3: raise ValueError("activations must have 3 dimensions.") if t_labels.type.dtype != "int32": raise TypeError("labels must use the int32 type.") if t_labels.ndim != 2: raise ValueError("labels must have 2 dimensions.") if t_input_lengths.type.dtype != "int32": raise TypeError("input_lengths must use the int32 type.") if t_input_lengths.ndim != 1: raise ValueError("input_lengths must have 1 dimension.") costs = GpuArrayType(dtype="float32", broadcastable=(False, ), context_name=context_name)() outputs = [costs] if self.compute_grad: gradients = GpuArrayType( dtype="float32", broadcastable=( False, False, False, ), context_name=context_name, )() outputs += [gradients] return theano.Apply(self, inputs=[t_activations, t_labels, t_input_lengths], outputs=outputs)
def test_transfer_gpu_gpu(): g = GpuArrayType(dtype="float32", broadcastable=(False, False), context_name=test_ctx_name)() av = np.asarray(rng.rand(5, 4), dtype="float32") gv = gpuarray.array(av, context=get_context(test_ctx_name)) mode = mode_with_gpu.excluding("cut_gpua_host_transfers", "local_cut_gpua_host_gpua") f = theano.function([g], GpuToGpu(test_ctx_name)(g), mode=mode) topo = f.maker.fgraph.toposort() assert len(topo) == 1 assert isinstance(topo[0].op, GpuToGpu) fv = f(gv) assert GpuArrayType.values_eq(fv, gv)
def make_node(self, inp1, inp2): if not cublas_available: raise RuntimeError( "CUBLAS is not available and " "GpuCublasTriangularSolve Op " "can not be constructed." ) context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim in [1, 2] assert inp1.dtype == inp2.dtype return theano.Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp2.broadcastable, context_name=context_name, )() ], )
def make_node(self, inp1, inp2): if not cusolver_available: raise RuntimeError( "CUSOLVER is not available and " "GpuCusolverSolve Op can not be constructed." ) if skcuda.__version__ <= "0.5.1": warnings.warn( "The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8" ) context_name = infer_context_name(inp1, inp2) inp1 = as_gpuarray_variable(inp1, context_name) inp2 = as_gpuarray_variable(inp2, context_name) inp1 = gpu_contiguous(inp1) inp2 = gpu_contiguous(inp2) assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == inp2.dtype return theano.Apply( self, [inp1, inp2], [ GpuArrayType( inp1.dtype, broadcastable=inp1.broadcastable, context_name=context_name, )() ], )
def output_type(self, inp): # add one extra dim for real/imag return GpuArrayType( inp.dtype, broadcastable=[False] * (inp.type.ndim + 1), context_name=inp.type.context_name, )
def make_node(self, x, ilist): ctx_name = infer_context_name(x, ilist) x_ = as_gpuarray_variable(x, ctx_name) ilist__ = tt.as_tensor_variable(ilist) if ilist__.type.dtype not in tt.integer_dtypes: raise TypeError("index must be integers") if ilist__.type.dtype != "int64": ilist__ = tt.cast(ilist__, "int64") ilist_ = gpu_contiguous(as_gpuarray_variable(ilist__, ctx_name)) if ilist_.type.dtype != "int64": raise TypeError("index must be int64") if ilist_.type.ndim != 1: raise TypeError("index must be a vector") if x_.type.ndim == 0: raise TypeError("cannot index into a scalar") bcast = ilist_.broadcastable + x_.broadcastable[1:] return gof.Apply( self, [x_, ilist_], [GpuArrayType(dtype=x.dtype, context_name=ctx_name, broadcastable=bcast)()], )
def make_node(self, ten4, neib_shape, neib_step=None): ten4 = as_gpuarray_variable(ten4, infer_context_name(ten4)) neib_shape = tt.as_tensor_variable(neib_shape) if neib_step is None: neib_step = neib_shape else: neib_step = tt.as_tensor_variable(neib_step) assert ten4.ndim == 4 assert neib_shape.ndim == 1 assert neib_step.ndim == 1 assert neib_shape.dtype in tt.integer_dtypes assert neib_step.dtype in tt.integer_dtypes return Apply( self, [ten4, neib_shape, neib_step], [ GpuArrayType( broadcastable=(False, False), dtype=ten4.type.dtype, context_name=ten4.type.context_name, )() ], )
def make_node(self, x, k=0): #TODO: dtype check x = as_gpuarray_variable(x, context_name=self.context_name) k = tensor.as_tensor_variable(k) assert x.ndim == 2 assert k.ndim == 0 broadcastable = (False,True) if self.keepdims else (False,) otype = GpuArrayType(dtype=x.type.dtype, broadcastable=broadcastable, context_name=self.context_name) return gof.Apply(self, [x, k], [otype()])
def make_node(self, d, x): d = as_gpuarray_variable(d, context_name=self.context_name) x = as_gpuarray_variable(x, context_name=self.context_name) assert d.ndim == 1 assert x.ndim == 1 broadcastable = (False,) otype = GpuArrayType(dtype='int64' if self.dtype_int64 else 'int32', broadcastable=broadcastable, context_name=self.context_name) return gof.Apply(self, [d, x], [otype()])
def make_node(self, x, *inputs): ctx_name = infer_context_name(x) rval = AdvancedSubtensor.make_node(self, x, *inputs) otype = GpuArrayType( dtype=rval.outputs[0].type.dtype, broadcastable=rval.outputs[0].type.broadcastable, context_name=ctx_name, ) x = as_gpuarray_variable(x, ctx_name) return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
def make_node(self, x, k=0, n=0, m=0): #TODO: dtype check x = as_gpuarray_variable(x, context_name=self.context_name) k = tensor.as_tensor_variable(k) n = tensor.as_tensor_variable(n) m = tensor.as_tensor_variable(m) assert x.ndim == 2 or x.ndim == 1 assert k.ndim == 0 assert n.ndim == 0 assert m.ndim == 0 otype = GpuArrayType(dtype=x.type.dtype, broadcastable=(False,False), context_name=self.context_name) return gof.Apply(self, [x, k, n, m], [otype()])
def test_transfer_strided(): # This is just to ensure that it works in theano # libgpuarray has a much more comprehensive suit of tests to # ensure correctness a = tt.fmatrix("a") g = GpuArrayType(dtype="float32", broadcastable=(False, False))("g") av = np.asarray(rng.rand(5, 8), dtype="float32") gv = gpuarray.array(av, context=get_context(test_ctx_name)) av = av[:, ::2] gv = gv[:, ::2] f = theano.function([a], GpuFromHost(test_ctx_name)(a)) fv = f(av) assert GpuArrayType.values_eq(fv, gv) f = theano.function([g], host_from_gpu(g)) fv = f(gv) assert np.all(fv == av)
def result(inp): dtype = inp.dtype ctx_name = _name_for_ctx(inp.context) key = (dtype, ctx_name) f = result.cache.get(key, None) if f is None: guard_in = GpuArrayType(str(dtype), (False,), context_name=ctx_name)() mode = get_mode("FAST_RUN").including("gpuarray") f = theano.function([guard_in], op(guard_in), mode=mode, profile=False) result.cache[key] = f return f(inp)
def make_node(self, n, m): n = tensor.as_tensor_variable(n) m = tensor.as_tensor_variable(m) assert n.ndim == 0 assert m.ndim == 0 otype = GpuArrayType( dtype=self.dtype, broadcastable=(False, False), context_name=self.context_name, ) return Apply(self, [n, m], [otype()])
def test_dump_load(): x = GpuArraySharedVariable( "x", GpuArrayType("float32", (1, 1), name="x", context_name=test_ctx_name), [[1]], False, ) with open("test", "wb") as f: dump(x, f) with open("test", "rb") as f: x = load(f) assert x.name == "x" np.testing.assert_allclose(x.get_value(), [[1]])
def test_shape(): x = GpuArrayType(dtype="float32", broadcastable=[False, False, False])() v = gpuarray.zeros((3, 4, 5), dtype="float32", context=get_context(test_ctx_name)) f = theano.function([x], x.shape) topo = f.maker.fgraph.toposort() assert np.all(f(v) == (3, 4, 5)) if theano.config.mode != "FAST_COMPILE": assert len(topo) == 4 assert isinstance(topo[0].op, tt.opt.Shape_i) assert isinstance(topo[1].op, tt.opt.Shape_i) assert isinstance(topo[2].op, tt.opt.Shape_i) assert isinstance(topo[3].op, tt.opt.MakeVector) mode = mode_with_gpu.excluding("local_shape_to_shape_i") f = theano.function([x], x.shape, mode=mode) topo = f.maker.fgraph.toposort() assert np.all(f(v) == (3, 4, 5)) assert len(topo) == 1 assert isinstance(topo[0].op, tt.Shape)
def make_node(self, points, dim): assert (points.ndim == 3) points = gpu_contiguous(as_tensor_variable(points.astype("float32"))) dim = get_scalar_constant_value(dim) if "int" not in str(dim.dtype): raise ValueError("dim must be an integer.") if dim > 31: raise ValueError("GpuHashtable does not currently support \ dimensionality > 31.") dim = constant(dim, dtype="int32", name="dim") entries_type = GpuArrayType("int32", broadcastable=(False, ), context_name=self.context_name, name="entries") keys_type = GpuArrayType("int16", broadcastable=(False, False), context_name=self.context_name, name="keys") neib_ent_type = GpuArrayType("int32", broadcastable=(False, False, False), context_name=self.context_name, name="neighbor_entries") bary_type = GpuArrayType("float32", broadcastable=points.type.broadcastable, context_name=self.context_name, name="barycentric_coords") valid_entries_type = GpuArrayType("int32", broadcastable=(False, ), context_name=self.context_name, name="valid_entries") n_valid_type = GpuArrayType("int32", broadcastable=(False, ), context_name=self.context_name, name="n_valid") out_vars = [ entries_type(name="hash_entries"), keys_type(name="hash_keys"), neib_ent_type(name="neighbor_entries"), bary_type(name="barycentric_coords"), valid_entries_type(name="valid_entries"), n_valid_type(name="n_valid") ] # TODO: I suppose GpuHashTable should be a type like GpuHashType, and # the Op should return one of those instead. # Two sets of entries can't be meaningfully compared without also # having the corresponding keys. Since we can only define per-output # comparisons, we have to hope that any time someone compares two # tables for equality, they will check all outputs. out_vars[0].tag.values_eq_approx = lambda e1, e2: True out_vars[2].tag.values_eq_approx = lambda e1, e2: True # The number of valid entries between two equivalent tables may be # different since it includes duplicates. out_vars[5].tag.values_eq_approx = lambda n1, n2: True def keys_comparison(k1, k2): k1 = [tuple(k) for k in np.asarray(k1)] k2 = [tuple(k) for k in np.asarray(k2)] return set(k1) == set(k2) out_vars[1].tag.values_eq_approx = keys_comparison def valid_entries_comparison(e1, e2): e1 = np.asarray(e1) e2 = np.asarray(e2) return len(np.unique(e1)) == len(np.unique(e2)) out_vars[4].tag.values_eq_approx = valid_entries_comparison return Apply(self, [points, dim], out_vars)
GpuCorr3dMM, GpuCorr3dMM_gradInputs, GpuCorr3dMM_gradWeights, GpuCorrMM, GpuCorrMM_gradInputs, GpuCorrMM_gradWeights, ) from theano.gpuarray.dnn import ( GpuDnnConv, GpuDnnConvGradI, GpuDnnConvGradW, dnn_available, ) from theano.gpuarray.type import GpuArrayType, get_context, gpuarray_shared_constructor gpu_ftensor4 = GpuArrayType(dtype="float32", broadcastable=(False, ) * 4) class TestDnnConv2d(BaseTestConv2d): @classmethod def setup_class(cls): super().setup_class() cls.shared = staticmethod(gpuarray_shared_constructor) # provide_shape is not used by the cuDNN impementation cls.provide_shape = [False] def run_test_case(self, i, f, s, b, flip, provide_shape, fd=(1, 1)): if not dnn_available(test_ctx_name): pytest.skip(dnn_available.msg) mode = mode_with_gpu
def test_specify_shape(): for dtype in ["float16", "float32"]: a = rand_gpuarray(20, dtype=dtype) g = GpuArrayType(dtype=dtype, broadcastable=(False, ))("g") f = theano.function([g], theano.tensor.specify_shape(g, [20])) f(a)