Пример #1
0
 def make_node(self, A):
     ctx_name = infer_context_name(A)
     A = as_gpuarray_variable(A, ctx_name)
     if A.ndim != 2:
         raise LinAlgError("Matrix rank error")
     assert A.dtype == 'float32'
     if self.compute_uv:
         return theano.Apply(
             self,
             [A],
             # return S, U, VT
             [
                 GpuArrayType(A.dtype,
                              broadcastable=[False],
                              context_name=ctx_name)(),
                 A.type(),
                 A.type()
             ])
     else:
         return theano.Apply(
             self,
             [A],
             # return only S
             [
                 GpuArrayType(A.dtype,
                              broadcastable=[False],
                              context_name=ctx_name)()
             ])
Пример #2
0
    def make_node(self, images, top_down):
        """
        .. todo::

            WRITEME
        """
        images = as_cuda_ndarray_variable(images)
        top_down = as_cuda_ndarray_variable(top_down)

        assert images.ndim == 4
        assert top_down.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        houtput_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        houtput_type = GpuArrayType(broadcastable=houtput_broadcastable)
        houtput = houtput_type()

        poutput_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        poutput_type = GpuArrayType(broadcastable=poutput_broadcastable)
        poutput = poutput_type()

        return Apply(self, [images, top_down], [houtput, poutput])
Пример #3
0
 def make_node(self, A):
     ctx_name = infer_context_name(A)
     A = as_gpuarray_variable(A, ctx_name)
     A = gpu_contiguous(A)
     if A.ndim != 2:
         raise LinAlgError("Matrix rank error")
     if A.dtype != "float32":
         raise TypeError("only `float32` is supported for now")
     if self.compute_uv:
         return theano.Apply(
             self,
             [A],
             # return S, U, VT
             [
                 GpuArrayType(
                     A.dtype, broadcastable=[False], context_name=ctx_name
                 )(),
                 A.type(),
                 A.type(),
             ],
         )
     else:
         return theano.Apply(
             self,
             [A],
             # return only S
             [GpuArrayType(A.dtype, broadcastable=[False], context_name=ctx_name)()],
         )
Пример #4
0
    def make_node(self, inp1, inp2):
        if not cublas_available:
            raise RuntimeError(
                "CUBLAS is not available and "
                "GpuCublasTriangularSolve Op "
                "can not be constructed."
            )
        context_name = infer_context_name(inp1, inp2)

        inp1 = as_gpuarray_variable(inp1, context_name)
        inp2 = as_gpuarray_variable(inp2, context_name)

        inp1 = gpu_contiguous(inp1)
        inp2 = gpu_contiguous(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim in [1, 2]
        assert inp1.dtype == inp2.dtype

        return theano.Apply(
            self,
            [inp1, inp2],
            [
                GpuArrayType(
                    inp1.dtype,
                    broadcastable=inp2.broadcastable,
                    context_name=context_name,
                )()
            ],
        )
Пример #5
0
    def make_node(self, inp):
        if not cusolver_available:
            raise RuntimeError('CUSOLVER is not available and '
                               'GpuLU Op can not be constructed.')
        if skcuda.__version__ <= '0.5.1':
            warnings.warn(
                'The GpuLU op requires scikit-cuda > 0.5.1 to work with CUDA 8'
            )
        if not pygpu_available:
            raise RuntimeError('Missing pygpu or triu/tril functions.'
                               'Install or update libgpuarray.')
        context_name = infer_context_name(inp)

        inp = as_gpuarray_variable(inp, context_name)

        inp = gpu_contiguous(inp)

        # this op can only operate on float32 matrices
        # because of current implementation of triu/tril.
        # TODO: support float64
        assert inp.ndim == 2
        assert inp.dtype == 'float32'

        # outputs LU in a single matrix, and a pivots array
        pivots_type = GpuArrayType('int32',
                                   broadcastable=inp[0].broadcastable,
                                   context_name=context_name)()
        return theano.Apply(self, [inp], [inp.type(), pivots_type])
Пример #6
0
    def make_node(self, inp1, inp2):
        if not cusolver_available:
            raise RuntimeError('CUSOLVER is not available and '
                               'GpuCusolverSolve Op can not be constructed.')
        if skcuda.__version__ <= '0.5.1':
            warnings.warn(
                'The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8'
            )
        context_name = infer_context_name(inp1, inp2)

        inp1 = as_gpuarray_variable(inp1, context_name)
        inp2 = as_gpuarray_variable(inp2, context_name)

        inp1 = gpu_contiguous(inp1)
        inp2 = gpu_contiguous(inp2)

        # this op can only operate on float32 matrices
        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == 'float32'
        assert inp2.dtype == 'float32'

        return theano.Apply(self, [inp1, inp2], [
            GpuArrayType('float32',
                         broadcastable=inp1.broadcastable,
                         context_name=context_name)()
        ])
Пример #7
0
    def make_node(self, images, acts, denoms, dout):
        """
        .. todo::

            WRITEME
        """
        if not isinstance(images.type, GpuArrayType):
            inputs = images, acts, denoms, dout
            names = "images", "acts", "denoms", "dout"
            for name, var in zip(names, inputs):
                if not isinstance(var.type, GpuArrayType):
                    raise TypeError("CrossMapNormUndo: expected %s.type "
                                    "to be GpuArrayType, "
                                    "got %s" (name, str(images.type)))
        assert images.ndim == 4
        assert acts.ndim == 4
        assert denoms.ndim == 4
        assert dout.ndim == 4
        # Not strictly necessary I don't think
        assert images.type.broadcastable == acts.type.broadcastable
        assert images.type.broadcastable == denoms.type.broadcastable
        assert images.type.broadcastable == dout.type.broadcastable

        targets_broadcastable = tuple(images.type.broadcastable)
        targets_type = GpuArrayType(broadcastable=targets_broadcastable)
        targets = targets_type()
        out_acts = targets_type()
        return Apply(self, [images, acts, denoms, dout], [targets, out_acts])
Пример #8
0
    def make_node(self, inp1, inp2):
        if not cusolver_available:
            raise RuntimeError(
                "CUSOLVER is not available and "
                "GpuCusolverSolve Op can not be constructed."
            )
        if skcuda.__version__ <= "0.5.1":
            warnings.warn(
                "The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8"
            )
        context_name = infer_context_name(inp1, inp2)

        inp1 = as_gpuarray_variable(inp1, context_name)
        inp2 = as_gpuarray_variable(inp2, context_name)

        inp1 = gpu_contiguous(inp1)
        inp2 = gpu_contiguous(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == inp2.dtype

        return theano.Apply(
            self,
            [inp1, inp2],
            [
                GpuArrayType(
                    inp1.dtype,
                    broadcastable=inp1.broadcastable,
                    context_name=context_name,
                )()
            ],
        )
Пример #9
0
    def make_node(self, images, filters):
        """
        .. todo::

            WRITEME
        """
        if not isinstance(images.type, GpuArrayType):
            raise TypeError(
                "FilterActs: expected images.type to be GpuArrayType, "
                "got " + str(images.type))

        if not isinstance(filters.type, GpuArrayType):
            raise TypeError(
                "FilterActs: expected filters.type to be GpuArrayType, "
                "got " + str(filters.type))

        assert images.ndim == 4
        assert filters.ndim == 4

        channels_broadcastable = filters.type.broadcastable[3]
        batch_broadcastable = images.type.broadcastable[3]
        # Computing whether the rows and columns are broadcastable requires doing
        # arithmetic on quantities that are known only at runtime, like the specific
        # shape of the image and kernel
        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        targets_type = GpuArrayType(broadcastable=targets_broadcastable)
        targets = targets_type()

        return Apply(self, [images, filters], [targets])
Пример #10
0
def test_optimization():
    op = CrossMapNorm(16, 15. / 16., 1, True)
    x_ = theano.tensor.TensorVariable(GpuArrayType([False] * 4))
    f = theano.function([x_], theano.grad(op(x_)[0].sum(), x_))
    nodes = [
        x for x in f.maker.fgraph.apply_nodes if type(x.op) == CrossMapNormUndo
    ]
    assert len(nodes) == 1
    assert nodes[0].op.inplace
Пример #11
0
    def make_node(self, images, hid_grads, output_shape):
        """
        .. todo::

            WRITEME
        """
        if not isinstance(images.type, GpuArrayType):
            raise TypeError("WeightActs: expected images.type "
                            "to be GpuArrayType, "
                            "got " + str(images.type))

        if not isinstance(hid_grads.type, GpuArrayType):
            raise TypeError("WeightActs: expected hid_acts.type "
                            "to be GpuArrayType, "
                            "got " + str(hid_grads.type))

        assert images.ndim == 4
        assert hid_grads.ndim == 4

        input_channels_broadcastable = images.type.broadcastable[0]
        # We don't know anything about filter_rows or filter_cols at compile
        # time, so we assume they're not broadcastable.
        filter_rows_broadcastable = False
        filter_cols_broadcastable = False
        output_channels_broadcastable = hid_grads.type.broadcastable[0]

        weights_grads_type = GpuArrayType(
            (input_channels_broadcastable, filter_rows_broadcastable,
             filter_cols_broadcastable, output_channels_broadcastable))

        partial_sums_type = GpuArrayType((False, ) * 5)
        weights_grads = weights_grads_type()
        partial_sums = partial_sums_type()

        return Apply(self, [images, hid_grads, output_shape],
                     [weights_grads, partial_sums])
Пример #12
0
    def make_node(self, hid_acts, filters, output_shape=None):
        """
        .. todo::

            WRITEME

        Parameters
        ----------
        hid_acts : WRITEME
        filters : WRITEME
        output_shape : 2-element TensorVariable, optional
            The spatial shape of the image
        """

        if not isinstance(hid_acts.type, GpuArrayType):
            raise TypeError("ImageActs: expected hid_acts.type to be GpuArrayType, "
                    "got " + str(hid_acts.type))

        if not isinstance(filters.type, GpuArrayType):
            raise TypeError("ImageActs: expected filters.type to be GpuArrayType, "
                    "got " + str(filters.type))


        if output_shape is None:
            if self.stride != 1:
                raise ValueError("You must specify an output_shape for ImageActs if the stride is not 1.")
            hid_shape = hid_acts.shape[1:3]
            kernel_shape = filters.shape[1:3]
            output_shape = hid_shape + kernel_shape - 2 * self.pad - 1

        assert hid_acts.ndim == 4
        assert filters.ndim == 4

        channels_broadcastable = filters.type.broadcastable[3]
        batch_broadcastable = hid_acts.type.broadcastable[3]
        # Computing whether the rows and columns are broadcastable requires doing
        # arithmetic on quantities that are known only at runtime, like the specific
        # shape of the image and kernel
        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                cols_broadcastable, batch_broadcastable)
        targets_type = GpuArrayType(broadcastable=targets_broadcastable)
        targets = targets_type()

        return Apply(self, [hid_acts, filters, output_shape], [targets])
Пример #13
0
    def make_node(self, images):
        """
        .. todo::

            WRITEME
        """
        if not isinstance(images.type, GpuArrayType):
            raise TypeError(
                "CrossMapNorm: expected images.type to be GpuArrayType, "
                "got " + str(images.type))

        assert images.ndim == 4

        targets_broadcastable = images.type.broadcastable
        targets_type = GpuArrayType(broadcastable=targets_broadcastable)
        denoms = targets_type()
        targets = targets_type()

        return Apply(self, [images], [targets, denoms])
Пример #14
0
    def make_node(self, images, filters):
        """
        .. todo::

            WRITEME
        """
        ibcast = images.broadcastable
        fbcast = filters.broadcastable
        igroups, icolors_per_group, irows, icols, icount = ibcast
        fmodulesR, fmodulesC, fcolors, frows, fcols = fbcast[:-2]
        fgroups, filters_per_group = fbcast[-2:]
        hbcast = (fgroups, filters_per_group, fmodulesR, fmodulesC, icount)
        if not isinstance(images.type, GpuArrayType):
            raise TypeError('gpu_filter_acts requires CudaNdarray images',
                            images)
        if not isinstance(filters.type, GpuArrayType):
            raise TypeError('gpu_filter_acts requires CudaNdarray filters',
                            filters)
        htype = GpuArrayType(broadcastable=hbcast)
        return theano.gof.Apply(self, [images, filters], [htype()])
Пример #15
0
    def make_node(self, inp1, inp2):
        self.context = basic_ops.infer_context_name(inp1, inp2)

        inp1 = basic_ops.as_gpuarray_variable(inp1, self.context)
        inp2 = basic_ops.as_gpuarray_variable(inp2, self.context)

        inp1 = basic_ops.gpu_contiguous(inp1)
        inp2 = basic_ops.gpu_contiguous(inp2)

        # this op can only operate on float32 matrices
        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == 'float32'
        assert inp2.dtype == 'float32'

        return theano.Apply(self, [inp1, inp2], [
            GpuArrayType('float32',
                         broadcastable=inp1.broadcastable,
                         context_name=self.context)()
        ])
Пример #16
0
    def make_node(self, images):
        """
        .. todo::

            WRITEME
        """
        images = as_cuda_ndarray_variable(images)

        assert images.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        targets_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        targets_type = GpuArrayType(broadcastable=targets_broadcastable)
        targets = targets_type()

        return Apply(self, [images], [targets])
Пример #17
0
    def make_node(self, inp1, inp2):
        if not cublas_available:
            raise RuntimeError('CUBLAS is not available and '
                               'GpuCublasTriangularSolve Op can not be constructed.')
        context_name = infer_context_name(inp1, inp2)

        inp1 = as_gpuarray_variable(inp1, context_name)
        inp2 = as_gpuarray_variable(inp2, context_name)

        inp1 = gpu_contiguous(inp1)
        inp2 = gpu_contiguous(inp2)

        # this op can only operate on float32 matrices
        assert inp1.ndim == 2
        assert inp2.ndim in [1, 2]
        assert inp1.dtype == 'float32'
        assert inp2.dtype == 'float32'

        return theano.Apply(self, [inp1, inp2],
                            [GpuArrayType('float32',
                                          broadcastable=inp2.broadcastable,
                                          context_name=context_name)()])
Пример #18
0
    def output_type(self, inp):

        return GpuArrayType(inp.dtype,
                            broadcastable=[False] * (inp.type.ndim),
                            context_name=inp.type.context_name)
Пример #19
0
def test_cross_map_norm_simple():
    op = CrossMapNorm(16, 15. / 16., 1., True)
    x = CudaNdarray(numpy.ones((16, 2, 2, 2), dtype='float32'))
    x_ = theano.tensor.TensorVariable(GpuArrayType([False] * 4))
    f = theano.function([x_], op(x_)[0])
    numpy.testing.assert_allclose(f(x), 0.0625)
Пример #20
0
 def get_gpu_tensor(self):
     broadcastable = (False,) * self.tensor_size
     return GpuArrayType(self.dtype, broadcastable)()
Пример #21
0
 def output_type(self, inp):
     # add one extra dim for real/imag
     return GpuArrayType(inp.dtype,
                         broadcastable=[False] * (inp.type.ndim + 1),
                         context_name=inp.type.context_name)