示例#1
0
 def make_node(self, x, y, *inputs):
     ctx_name = infer_context_name(x, y)
     x = as_gpuarray_variable(x, ctx_name)
     y = as_gpuarray_variable(y, ctx_name)
     rval = IncSubtensor.make_node(self, x, y, *inputs)
     ret = gof.Apply(self, [x, y] + rval.inputs[2:], [x.type()])
     return ret
示例#2
0
    def make_node(self, inp1, inp2):
        if not cusolver_available:
            raise RuntimeError('CUSOLVER is not available and '
                               'GpuCusolverSolve Op can not be constructed.')
        if skcuda.__version__ <= '0.5.1':
            warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8')
        context_name = basic_ops.infer_context_name(inp1, inp2)

        inp1 = basic_ops.as_gpuarray_variable(inp1, context_name)
        inp2 = basic_ops.as_gpuarray_variable(inp2, context_name)

        inp1 = basic_ops.gpu_contiguous(inp1)
        inp2 = basic_ops.gpu_contiguous(inp2)

        # this op can only operate on float32 matrices
        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == 'float32'
        assert inp2.dtype == 'float32'

        return theano.Apply(
            self, [inp1, inp2],
            [GpuArrayType('float32',
                          broadcastable=inp1.broadcastable,
                          context_name=context_name)()])
示例#3
0
    def make_node(self, inp1, inp2):
        if not cublas_available:
            raise RuntimeError(
                "CUBLAS is not available and "
                "GpuCublasTriangularSolve Op "
                "can not be constructed."
            )
        context_name = infer_context_name(inp1, inp2)

        inp1 = as_gpuarray_variable(inp1, context_name)
        inp2 = as_gpuarray_variable(inp2, context_name)

        inp1 = gpu_contiguous(inp1)
        inp2 = gpu_contiguous(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim in [1, 2]
        assert inp1.dtype == inp2.dtype

        return theano.Apply(
            self,
            [inp1, inp2],
            [
                GpuArrayType(
                    inp1.dtype,
                    broadcastable=inp2.broadcastable,
                    context_name=context_name,
                )()
            ],
        )
示例#4
0
    def make_node(self, inp1, inp2):
        if not cusolver_available:
            raise RuntimeError(
                "CUSOLVER is not available and "
                "GpuCusolverSolve Op can not be constructed."
            )
        if skcuda.__version__ <= "0.5.1":
            warnings.warn(
                "The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8"
            )
        context_name = infer_context_name(inp1, inp2)

        inp1 = as_gpuarray_variable(inp1, context_name)
        inp2 = as_gpuarray_variable(inp2, context_name)

        inp1 = gpu_contiguous(inp1)
        inp2 = gpu_contiguous(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == inp2.dtype

        return theano.Apply(
            self,
            [inp1, inp2],
            [
                GpuArrayType(
                    inp1.dtype,
                    broadcastable=inp1.broadcastable,
                    context_name=context_name,
                )()
            ],
        )
示例#5
0
def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
    if not dnn_available(ctx_name):
        return
    if not op.ignore_border:
        return
    inp, out_grad, ws, stride, pad = inputs
    nd = op.ndim
    if nd not in (2, 3):
        return
    inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
    out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
    mode = op.mode

    # the GPU ops expect exactly 2 non-pooling dimensions
    if inp.ndim == nd + 2:
        # We reuse out_grad because cuDNN does not use the value of the `out`
        # argument but still checks its shape for average pooling. This
        # has been observed in v2 and v3 as far as I know.
        return GpuDnnPoolGrad(mode=mode)(inp, out_grad, out_grad, ws, stride,
                                         pad)
    else:
        # reshape to 4D or 5D with 2 non-pooling dimensions
        inp_padded = pad_dims(inp, 2, nd)
        out_grad_padded = pad_dims(out_grad, 2, nd)
        ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded, out_grad_padded,
                                               out_grad_padded, ws, stride,
                                               pad)
        return unpad_dims(ret_padded, inp, 2, nd)
示例#6
0
def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
    if not dnn_available(ctx_name):
        return
    if not op.ignore_border:
        return
    inp, out, out_grad, ws, stride, pad = inputs
    nd = op.ndim
    if nd not in (2, 3):
        return
    inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
    out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
    out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
    mode = op.mode

    # the GPU ops expect exactly 2 non-pooling dimensions
    if inp.ndim == nd + 2:
        return GpuDnnPoolGrad(mode=mode)(inp, out, out_grad, ws, stride, pad)
    else:
        # reshape to 4D or 5D with 2 non-pooling dimensions
        inp_padded = pad_dims(inp, 2, nd)
        out_padded = pad_dims(out, 2, nd)
        out_grad_padded = pad_dims(out_grad, 2, nd)
        ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded, out_padded,
                                               out_grad_padded, ws, stride,
                                               pad)
        return unpad_dims(ret_padded, inp, 2, nd)
示例#7
0
    def make_node(self, x, y, ilist):
        """
        It differs from GpuAdvancedIncSubtensor1 in that it makes sure
        the indexes are of type long.

        """
        ctx_name = infer_context_name(x, y, ilist)
        x_ = as_gpuarray_variable(x, ctx_name)
        y_ = as_gpuarray_variable(y.astype(x.dtype), ctx_name)
        ilist_ = as_gpuarray_variable(ilist, ctx_name)

        assert x_.type.ndim >= y_.type.ndim

        if ilist_.type.dtype not in tt.integer_dtypes:
            raise TypeError("index must be integers")
        if ilist_.type.ndim != 1:
            raise TypeError("index must be vector")
        if x_.type.ndim == 0:
            raise TypeError("cannot index into a scalar")
        if y_.type.ndim > x_.type.ndim:
            if self.set_instead_of_inc:
                opname = "set"
            else:
                opname = "increment"
            raise TypeError(
                "cannot %s x subtensor with ndim=%s by y with ndim=%s "
                % (opname, x_.type.ndim, y_.type.ndim)
            )

        return gof.Apply(self, [x_, y_, ilist_], [x_.type()])
示例#8
0
    def make_node(self, x, y, ilist):
        ctx_name = infer_context_name(x, y)
        x_ = as_gpuarray_variable(x, ctx_name)
        y_ = as_gpuarray_variable(y, ctx_name)
        ilist_ = tt.as_tensor_variable(ilist)

        assert x_.type.ndim >= y_.type.ndim

        if ilist_.type.dtype not in tt.integer_dtypes:
            raise TypeError("index must be integers")
        if ilist_.type.ndim != 1:
            raise TypeError("index must be vector")
        if x_.type.ndim == 0:
            raise TypeError("cannot index into a scalar")
        if y_.type.ndim > x_.type.ndim:
            if self.set_instead_of_inc:
                opname = "set"
            else:
                opname = "increment"
            raise TypeError(
                "cannot %s x subtensor with ndim=%s by y with ndim=%s "
                % (opname, x_.type.ndim, y_.type.ndim)
            )

        return gof.Apply(self, [x_, y_, ilist_], [x_.type()])
示例#9
0
    def make_node(self, x, ilist):
        ctx_name = infer_context_name(x, ilist)
        x_ = as_gpuarray_variable(x, ctx_name)

        ilist__ = tt.as_tensor_variable(ilist)
        if ilist__.type.dtype not in tt.integer_dtypes:
            raise TypeError("index must be integers")
        if ilist__.type.dtype != "int64":
            ilist__ = tt.cast(ilist__, "int64")

        ilist_ = gpu_contiguous(as_gpuarray_variable(ilist__, ctx_name))

        if ilist_.type.dtype != "int64":
            raise TypeError("index must be int64")
        if ilist_.type.ndim != 1:
            raise TypeError("index must be a vector")
        if x_.type.ndim == 0:
            raise TypeError("cannot index into a scalar")

        bcast = ilist_.broadcastable + x_.broadcastable[1:]
        return gof.Apply(
            self,
            [x_, ilist_],
            [GpuArrayType(dtype=x.dtype, context_name=ctx_name, broadcastable=bcast)()],
        )
示例#10
0
文件: linalg.py 项目: wh199211/Theano
    def make_node(self, inp1, inp2):
        if not cusolver_available:
            raise RuntimeError('CUSOLVER is not available and '
                               'GpuCusolverSolve Op can not be constructed.')
        if skcuda.__version__ <= '0.5.1':
            warnings.warn(
                'The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8'
            )
        context_name = basic_ops.infer_context_name(inp1, inp2)

        inp1 = basic_ops.as_gpuarray_variable(inp1, context_name)
        inp2 = basic_ops.as_gpuarray_variable(inp2, context_name)

        inp1 = basic_ops.gpu_contiguous(inp1)
        inp2 = basic_ops.gpu_contiguous(inp2)

        # this op can only operate on float32 matrices
        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == 'float32'
        assert inp2.dtype == 'float32'

        return theano.Apply(self, [inp1, inp2], [
            GpuArrayType('float32',
                         broadcastable=inp1.broadcastable,
                         context_name=context_name)()
        ])
示例#11
0
 def make_node(self, d, x):
     d = as_gpuarray_variable(d, context_name=self.context_name)
     x = as_gpuarray_variable(x, context_name=self.context_name)
     assert d.ndim == 1
     assert x.ndim == 1
     broadcastable = (False,)
     otype = GpuArrayType(dtype='int64' if self.dtype_int64 else 'int32', broadcastable=broadcastable, context_name=self.context_name)
     return gof.Apply(self, [d, x], [otype()])
示例#12
0
 def make_node(self, x, y, *inputs):
     ctx_name = infer_context_name(x, y)
     rval = AdvancedIncSubtensor.make_node(self, x, y, *inputs)
     otype = GpuArrayType(
         dtype=rval.outputs[0].type.dtype,
         broadcastable=rval.outputs[0].type.broadcastable,
         context_name=ctx_name,
     )
     x = as_gpuarray_variable(x, ctx_name)
     y = as_gpuarray_variable(y, ctx_name)
     return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
示例#13
0
    def make_node(self, x, ilist):
        ctx_name = infer_context_name(x, ilist)
        x_ = as_gpuarray_variable(x, ctx_name)
        ilist_ = as_gpuarray_variable(ilist, ctx_name)

        if ilist_.type.dtype not in tensor.integer_dtypes:
            raise TypeError('index must be integers')
        if ilist_.type.ndim != 1:
            raise TypeError('index must be vector')
        if x_.type.ndim == 0:
            raise TypeError('cannot index into a scalar')
        return gof.Apply(self, [x_, ilist_], [x_.type()])
示例#14
0
    def make_node(self, inp1, inp2):
        ctx = infer_context_name(inp1, inp2)
        inp1 = gpu_ops.as_gpuarray_variable(inp1, ctx)
        inp2 = gpu_ops.as_gpuarray_variable(inp2, ctx)

        assert inp1.dtype == "float32"
        assert inp2.dtype == "float32"
        assert inp1.ndim == 2
        assert inp2.ndim == 2
        otype = GpuArrayType(dtype=inp1.dtype,
                             broadcastable=(False, False),
                             context_name=ctx)

        return Apply(self, [inp1, inp2], [otype()])
示例#15
0
    def make_node(self, inp, s=None):
        # A shape parameter is expected as an input. For now this is used to
        # manage odd transform sizes.
        # Later this could be extended to handle padding and trunkation,
        # following numpy's interface. However, cuFFT expects array that match
        # the shape given to the plan, so padding will have to be done in the op.
        # The effect of padding on gradients has yet to be investigated.

        if not skcuda_available:
            raise RuntimeError("skcuda is needed for CuIFFTOp")

        if not pygpu_available:
            raise RuntimeError("pygpu is needed for CuIFFTOp")

        if not pycuda_available:
            raise RuntimeError("pycuda is needed for CuIFFTOp")

        inp = gpu_contiguous(as_gpuarray_variable(inp, infer_context_name(inp)))

        # If no shape is provided as input, calculate shape assuming even real transform.
        if s is None:
            s = inp.shape[1:-1]
            s = tt.set_subtensor(s[-1], (s[-1] - 1) * 2)
        s = tt.as_tensor_variable(s)

        assert inp.dtype == "float32"
        assert s.ndim == 1

        return theano.Apply(self, [inp, s], [self.output_type(inp)()])
示例#16
0
def local_cudnn_maxandargmax(node):
    if not isinstance(node.op, GpuMaxAndArgmax):
        return

    if not dnn_available(node.inputs[0].type.context_name):
        return

    if version(raises=False) < 6000:
        return

    if node.inputs[0].ndim > 8:
        return

    if node.inputs[0].dtype != node.outputs[0].dtype:
        return

    if node.inputs[0].dtype not in ["float16", "float32", "float64"]:
        return

    # order of the axes influences the output indices
    if node.op.axis is not None and tuple(sorted(
            node.op.axis)) != node.op.axis:
        return

    max, arg = GpuDnnReduction("maximum", node.op.axis, node.outputs[0].dtype,
                               node.outputs[0].dtype, True)(node.inputs[0])

    # cudnn can only return int32 indices
    return (
        max,
        as_gpuarray_variable(arg.astype("int64"),
                             node.outputs[1].type.context_name),
    )
示例#17
0
    def make_node(self, inp):
        if not cusolver_available:
            raise RuntimeError('CUSOLVER is not available and '
                               'GpuLU Op can not be constructed.')
        if skcuda.__version__ <= '0.5.1':
            warnings.warn(
                'The GpuLU op requires scikit-cuda > 0.5.1 to work with CUDA 8'
            )
        if not pygpu_available:
            raise RuntimeError('Missing pygpu or triu/tril functions.'
                               'Install or update libgpuarray.')
        context_name = infer_context_name(inp)

        inp = as_gpuarray_variable(inp, context_name)

        inp = gpu_contiguous(inp)

        # this op can only operate on float32 matrices
        # because of current implementation of triu/tril.
        # TODO: support float64
        assert inp.ndim == 2
        assert inp.dtype == 'float32'

        # outputs LU in a single matrix, and a pivots array
        pivots_type = GpuArrayType('int32',
                                   broadcastable=inp[0].broadcastable,
                                   context_name=context_name)()
        return theano.Apply(self, [inp], [inp.type(), pivots_type])
示例#18
0
    def make_node(self, ten4, neib_shape, neib_step=None):
        ten4 = as_gpuarray_variable(ten4, infer_context_name(ten4))
        neib_shape = tt.as_tensor_variable(neib_shape)
        if neib_step is None:
            neib_step = neib_shape
        else:
            neib_step = tt.as_tensor_variable(neib_step)

        assert ten4.ndim == 4
        assert neib_shape.ndim == 1
        assert neib_step.ndim == 1
        assert neib_shape.dtype in tt.integer_dtypes
        assert neib_step.dtype in tt.integer_dtypes

        return Apply(
            self,
            [ten4, neib_shape, neib_step],
            [
                GpuArrayType(
                    broadcastable=(False, False),
                    dtype=ten4.type.dtype,
                    context_name=ten4.type.context_name,
                )()
            ],
        )
示例#19
0
    def make_node(self, inp, s=None):
        # A shape parameter s can be provided as an input. For now this is used to
        # manage odd transform sizes.
        # Later this could be extended to handle padding and trunkation,
        # following numpy's interface. However, cuFFT expects array that match
        # the shape given to the plan, so padding will have to be done in the op.
        # The effect of padding on gradients has yet to be investigated.

        if not scikits_cuda_available:
            raise RuntimeError("skcuda is needed for CuFFTOp")

        if not pygpu_available:
            raise RuntimeError("pygpu is needed for CuFFTOp")

        if not pycuda_available:
            raise RuntimeError("pycuda is needed for CuFFTOp")

        inp = basic_ops.gpu_contiguous(
            basic_ops.as_gpuarray_variable(inp,
                                           basic_ops.infer_context_name(inp)))

        # If no shape is provided as input, default to input data shape.
        if s is None:
            s = inp.shape[1:]
        s = T.as_tensor_variable(s)

        assert inp.dtype == "float32"
        assert s.ndim == 1
        assert 'int' in s.dtype

        return theano.Apply(self, [inp, s], [self.output_type(inp)()])
示例#20
0
 def make_node(self, A):
     ctx_name = infer_context_name(A)
     A = as_gpuarray_variable(A, ctx_name)
     A = gpu_contiguous(A)
     if A.ndim != 2:
         raise LinAlgError("Matrix rank error")
     if A.dtype != "float32":
         raise TypeError("only `float32` is supported for now")
     if self.compute_uv:
         return theano.Apply(
             self,
             [A],
             # return S, U, VT
             [
                 GpuArrayType(
                     A.dtype, broadcastable=[False], context_name=ctx_name
                 )(),
                 A.type(),
                 A.type(),
             ],
         )
     else:
         return theano.Apply(
             self,
             [A],
             # return only S
             [GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)()],
         )
示例#21
0
 def make_node(self, x, k=0): #TODO: dtype check
     x = as_gpuarray_variable(x, context_name=self.context_name)
     k = tensor.as_tensor_variable(k)
     assert x.ndim == 2
     assert k.ndim == 0
     broadcastable = (False,True) if self.keepdims else (False,)
     otype = GpuArrayType(dtype=x.type.dtype, broadcastable=broadcastable, context_name=self.context_name)
     return gof.Apply(self, [x, k], [otype()])
示例#22
0
 def make_node(self, A):
     ctx_name = infer_context_name(A)
     A = as_gpuarray_variable(A, ctx_name)
     A = gpu_contiguous(A)
     if A.ndim != 2:
         raise LinAlgError("Matrix rank error")
     if A.dtype != "float32":
         raise TypeError("only `float32` is supported for now")
     return theano.Apply(self, [A], [A.type()])
示例#23
0
def local_softmax_dnn(node):
    if isinstance(node.op, GpuSoftmax):
        if not dnn_available(node.outputs[0].type.context_name):
            return
        ins = node.inputs[0].dimshuffle(0, 1, "x", "x")
        ins = gpu_contiguous(ins)
        out = GpuDnnSoftmax("accurate", "channel")(ins)
        out = as_gpuarray_variable(out.dimshuffle(0, 1), out.type.context_name)
        return [out]
示例#24
0
    def make_node(self, inp1, inp2):
        self.context = basic_ops.infer_context_name(inp1, inp2)

        inp1 = basic_ops.as_gpuarray_variable(inp1, self.context)
        inp2 = basic_ops.as_gpuarray_variable(inp2, self.context)

        inp1 = basic_ops.gpu_contiguous(inp1)
        inp2 = basic_ops.gpu_contiguous(inp2)

        # this op can only operate on float32 matrices
        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == 'float32'
        assert inp2.dtype == 'float32'

        return theano.Apply(self, [inp1, inp2], [
            GpuArrayType('float32',
                         broadcastable=inp1.broadcastable,
                         context_name=self.context)()
        ])
示例#25
0
文件: linalg.py 项目: bouthilx/Theano
    def make_node(self, inp1, inp2):
        self.context = basic_ops.infer_context_name(inp1, inp2)

        inp1 = basic_ops.as_gpuarray_variable(inp1, self.context)
        inp2 = basic_ops.as_gpuarray_variable(inp2, self.context)

        inp1 = basic_ops.gpu_contiguous(inp1)
        inp2 = basic_ops.gpu_contiguous(inp2)

        # this op can only operate on float32 matrices
        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == 'float32'
        assert inp2.dtype == 'float32'

        return theano.Apply(
            self, [inp1, inp2],
            [GpuArrayType('float32',
                          broadcastable=inp1.broadcastable,
                          context_name=self.context)()])
示例#26
0
 def make_node(self, x, k=0, n=0, m=0): #TODO: dtype check
     x = as_gpuarray_variable(x, context_name=self.context_name)
     k = tensor.as_tensor_variable(k)
     n = tensor.as_tensor_variable(n)
     m = tensor.as_tensor_variable(m)
     assert x.ndim == 2 or x.ndim == 1
     assert k.ndim == 0
     assert n.ndim == 0
     assert m.ndim == 0
     otype = GpuArrayType(dtype=x.type.dtype, broadcastable=(False,False), context_name=self.context_name)
     return gof.Apply(self, [x, k, n, m], [otype()])
示例#27
0
    def make_node(self, x):
        x = as_gpuarray_variable(x, self.context_name)

        x_arg = pygpu.elemwise.arg('x', 'float32', read=True)
        c_arg = pygpu.elemwise.arg('c', 'float32', read=True, write=True)
        self.my_op = pygpu.elemwise.GpuElemwise(
            get_context(self.context_name),
            "c = " + str(self.a) + " * x + " + str(self.b), [x_arg, c_arg],
            convert_f16=True)

        return Apply(self, [x], [x.type()])
示例#28
0
def local_gpua_softmax_dnn_grad(op, ctx_name, inputs, outputs):
    if not dnn_available(ctx_name):
        return
    ins = []
    for n in inputs:
        n = as_gpuarray_variable(n, ctx_name)
        if n.ndim != 2:
            return
        ins.append(n.dimshuffle(0, "x", 1, "x"))

    out = GpuDnnSoftmaxGrad("accurate", "instance")(gpu_contiguous(ins[0]),
                                                    gpu_contiguous(ins[1]))
    return [out.dimshuffle(0, 2)]
示例#29
0
    def make_node(self, _x):
        ctx_name = infer_context_name(_x)
        x = as_gpuarray_variable(_x, ctx_name)

        if x.ndim < 2:
            raise ValueError("Diagonal needs an input with 2 or more " "dimensions", x)
        axis_small, axis_large = sorted((self.axis1, self.axis2))
        broadcastable = (
            x.broadcastable[:axis_small]
            + x.broadcastable[axis_small + 1 : axis_large]
            + x.broadcastable[axis_large + 1 :]
            + (False,)
        )
        return gof.Apply(self, [x], [x.type.clone(broadcastable=broadcastable)()])
示例#30
0
    def make_node(self, activations, labels, input_lengths):
        context_name = infer_context_name(activations)
        t_activations = as_gpuarray_variable(activations,
                                             context_name=context_name)
        # Ensure activations array is C-contiguous
        t_activations = gpu_contiguous(t_activations)

        # Labels and input lengths are always on the CPU
        t_labels = tt.as_tensor_variable(labels)
        t_input_lengths = tt.as_tensor_variable(input_lengths)

        if t_activations.type.dtype != "float32":
            raise TypeError("activations must use the float32 type.")

        if t_activations.ndim != 3:
            raise ValueError("activations must have 3 dimensions.")

        if t_labels.type.dtype != "int32":
            raise TypeError("labels must use the int32 type.")

        if t_labels.ndim != 2:
            raise ValueError("labels must have 2 dimensions.")

        if t_input_lengths.type.dtype != "int32":
            raise TypeError("input_lengths must use the int32 type.")

        if t_input_lengths.ndim != 1:
            raise ValueError("input_lengths must have 1 dimension.")

        costs = GpuArrayType(dtype="float32",
                             broadcastable=(False, ),
                             context_name=context_name)()
        outputs = [costs]

        if self.compute_grad:
            gradients = GpuArrayType(
                dtype="float32",
                broadcastable=(
                    False,
                    False,
                    False,
                ),
                context_name=context_name,
            )()
            outputs += [gradients]

        return theano.Apply(self,
                            inputs=[t_activations, t_labels, t_input_lengths],
                            outputs=outputs)
示例#31
0
 def make_node(self, diag):
     ctx_name = infer_context_name(diag)
     diag = as_gpuarray_variable(diag, ctx_name)
     if diag.type.ndim < 1:
         raise ValueError(
             "AllocDiag needs an input with 1 or more " "dimensions", diag.type
         )
     return gof.Apply(
         self,
         [diag],
         [
             diag.type.__class__(
                 dtype=diag.dtype, broadcastable=[False] * (diag.ndim + 1)
             )()
         ],
     )
示例#32
0
文件: ops.py 项目: mila-udem/platoon
 def make_node(self, src, dest=None):
     if dest is None:
         inputs = [src]
         if self.inplace:
             self.inplace_pattern = {0: 0}
         else:
             self.inplace_pattern = {}
     else:
         inputs = [src, dest]
         self.inplace = True
         self.inplace_pattern = {0: 1}
     self.destroy_map = dict((o, [i]) for o, i in self.inplace_pattern.items())
     inputs = [as_gpuarray_variable(i, self.worker.ctx_name) for i in inputs]
     if dest is not None:
         if not inputs[0].type == inputs[1].type:
             raise TypeError("`src` and `dest` must have the same Type:",
                             (inputs[0].type, inputs[1].type))
     out_type = inputs[0].type.clone()
     return theano.Apply(self, inputs, [out_type()])
示例#33
0
def local_gpua_pool_dnn_alternative(fgraph, op, ctx_name, inputs, outputs):
    if not dnn_available(ctx_name):
        return
    if not op.ignore_border:
        return
    img, ws, stride, pad = inputs
    nd = op.ndim
    if nd not in (2, 3):
        return
    img = gpu_contiguous(as_gpuarray_variable(img, ctx_name))
    mode = op.mode
    # dnn_pool expects exactly 2 non-pooling dimensions
    if img.ndim == nd + 2:
        return dnn_pool(img, ws, stride=stride, pad=pad, mode=mode)
    else:
        # reshape to 4D or 5D with 2 non-pooling dimensions
        img_padded = pad_dims(img, 2, nd)
        ret_padded = dnn_pool(img_padded, ws, stride=stride, pad=pad, mode=mode)
        return unpad_dims(ret_padded, img, 2, nd)
示例#34
0
文件: linalg.py 项目: bbudescu/Theano
    def make_node(self, inp):
        if not cusolver_available:
            raise RuntimeError('CUSOLVER is not available and '
                               'GpuCholesky Op can not be constructed.')
        if skcuda.__version__ <= '0.5.1':
            warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8')
        if not pygpu_available:
            raise RuntimeError('Missing pygpu or triu/tril functions.'
                               'Install or update libgpuarray.')
        context_name = basic_ops.infer_context_name(inp)

        inp = basic_ops.as_gpuarray_variable(inp, context_name)

        inp = basic_ops.gpu_contiguous(inp)

        # this op can only operate on float32 matrices
        # because of current implementation of triu/tril.
        # TODO: support float64 for triu/tril in GpuArray and for GpuCholesky/GpuCusolverSolve in Theano.
        assert inp.ndim == 2
        assert inp.dtype == 'float32'

        return theano.Apply(self, [inp], [inp.type()])
示例#35
0
 def make_node(self, x):
     ctx_name = infer_context_name(x)
     x = as_gpuarray_variable(x, ctx_name)
     return Apply(self, [x], [x.type()])