Exemplo n.º 1
0
    def make_node(self, inp1, inp2):
        if not cusolver_available:
            raise RuntimeError('CUSOLVER is not available and '
                               'GpuCusolverSolve Op can not be constructed.')
        if skcuda.__version__ <= '0.5.1':
            warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8')
        context_name = basic_ops.infer_context_name(inp1, inp2)

        inp1 = basic_ops.as_gpuarray_variable(inp1, context_name)
        inp2 = basic_ops.as_gpuarray_variable(inp2, context_name)

        inp1 = basic_ops.gpu_contiguous(inp1)
        inp2 = basic_ops.gpu_contiguous(inp2)

        # this op can only operate on float32 matrices
        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == 'float32'
        assert inp2.dtype == 'float32'

        return theano.Apply(
            self, [inp1, inp2],
            [GpuArrayType('float32',
                          broadcastable=inp1.broadcastable,
                          context_name=context_name)()])
Exemplo n.º 2
0
    def make_node(self, inp, s=None):
        # A shape parameter s can be provided as an input. For now this is used to
        # manage odd transform sizes.
        # Later this could be extended to handle padding and trunkation,
        # following numpy's interface. However, cuFFT expects array that match
        # the shape given to the plan, so padding will have to be done in the op.
        # The effect of padding on gradients has yet to be investigated.

        if not scikits_cuda_available:
            raise RuntimeError("skcuda is needed for CuFFTOp")

        if not pygpu_available:
            raise RuntimeError("pygpu is needed for CuFFTOp")

        if not pycuda_available:
            raise RuntimeError("pycuda is needed for CuFFTOp")

        inp = basic_ops.gpu_contiguous(
            basic_ops.as_gpuarray_variable(inp,
                                           basic_ops.infer_context_name(inp)))

        # If no shape is provided as input, default to input data shape.
        if s is None:
            s = inp.shape[1:]
        s = T.as_tensor_variable(s)

        assert inp.dtype == "float32"
        assert s.ndim == 1
        assert 'int' in s.dtype

        return theano.Apply(self, [inp, s], [self.output_type(inp)()])
Exemplo n.º 3
0
 def forward(self, x, train=True):
     img = gpu_contiguous(x)
     kerns = gpu_contiguous(self.W.dimshuffle(1, 0, 2, 3))
     gpudnnconvdesc = GpuDnnConvDesc(
         border_mode=self.border_mode,
         subsample=self.subsample,
         conv_mode='conv'
     )
     out = GpuAllocEmpty()(
         img.shape[0],
         kerns.shape[1],
         img.shape[2] * self.subsample[0],
         img.shape[3] * self.subsample[1]
     )
     desc = gpudnnconvdesc(out.shape, kerns.shape)
     return (GpuDnnConvGradI()(kerns, img, out, desc) + self.b.dimshuffle('x', 0, 'x', 'x'))
Exemplo n.º 4
0
    def make_node(self, inp1, inp2):
        self.context = basic_ops.infer_context_name(inp1, inp2)

        inp1 = basic_ops.as_gpuarray_variable(inp1, self.context)
        inp2 = basic_ops.as_gpuarray_variable(inp2, self.context)

        inp1 = basic_ops.gpu_contiguous(inp1)
        inp2 = basic_ops.gpu_contiguous(inp2)

        # this op can only operate on float32 matrices
        assert inp1.ndim == 2
        assert inp2.ndim == 2
        assert inp1.dtype == 'float32'
        assert inp2.dtype == 'float32'

        return theano.Apply(
            self, [inp1, inp2],
            [GpuArrayType('float32',
                          broadcastable=inp1.broadcastable,
                          context_name=self.context)()])
Exemplo n.º 5
0
    def make_node(self, inp):
        if not cusolver_available:
            raise RuntimeError('CUSOLVER is not available and '
                               'GpuCholesky Op can not be constructed.')
        if skcuda.__version__ <= '0.5.1':
            warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8')
        if not pygpu_available:
            raise RuntimeError('Missing pygpu or triu/tril functions.'
                               'Install or update libgpuarray.')
        context_name = basic_ops.infer_context_name(inp)

        inp = basic_ops.as_gpuarray_variable(inp, context_name)

        inp = basic_ops.gpu_contiguous(inp)

        # this op can only operate on float32 matrices
        # because of current implementation of triu/tril.
        # TODO: support float64 for triu/tril in GpuArray and for GpuCholesky/GpuCusolverSolve in Theano.
        assert inp.ndim == 2
        assert inp.dtype == 'float32'

        return theano.Apply(self, [inp], [inp.type()])
Exemplo n.º 6
0
    def make_node(self, points, dim):
        assert(points.ndim == 3)
        points = gpu_contiguous(as_tensor_variable(points.astype("float32")))

        dim = get_scalar_constant_value(dim)
        if "int" not in str(dim.dtype):
            raise ValueError("dim must be an integer.")

        dim = constant(dim, dtype="int32", name="dim")

        entries_type = GpuArrayType("int32",
                                    broadcastable=(False,),
                                    context_name=self.context_name,
                                    name="entries")
        keys_type = GpuArrayType("int16",
                                 broadcastable=(False, False),
                                 context_name=self.context_name,
                                 name="keys")
        neib_ent_type = GpuArrayType("int32",
                                     broadcastable=(False, False, False),
                                     context_name=self.context_name,
                                     name="neighbor_entries")
        bary_type = GpuArrayType("float32",
                                 broadcastable=points.type.broadcastable,
                                 context_name=self.context_name,
                                 name="barycentric_coords")

        valid_entries_type = GpuArrayType("int32",
                                          broadcastable=(False,),
                                          context_name=self.context_name,
                                          name="valid_entries")
        n_valid_type = GpuArrayType("int32",
                                    broadcastable=(False,),
                                    context_name=self.context_name,
                                    name="n_valid")

        out_vars = [entries_type(name="hash_entries"),
                    keys_type(name="hash_keys"),
                    neib_ent_type(name="neighbor_entries"),
                    bary_type(name="barycentric_coords"),
                    valid_entries_type(name="valid_entries"),
                    n_valid_type(name="n_valid")]

        # TODO: I suppose GpuHashTable should be a type like GpuHashType, and
        # the Op should return one of those instead.

        # Two sets of entries can't be meaningfully compared without also
        # having the corresponding keys. Since we can only define per-output
        # comparisons, we have to hope that any time someone compares two
        # tables for equality, they will check all outputs.
        out_vars[0].tag.values_eq_approx = lambda e1, e2: True
        out_vars[2].tag.values_eq_approx = lambda e1, e2: True

        # The number of valid entries between two equivalent tables may be
        # different since it includes duplicates.
        out_vars[5].tag.values_eq_approx = lambda n1, n2: True

        def keys_comparison(k1, k2):
            k1 = [tuple(k) for k in np.asarray(k1)]
            k2 = [tuple(k) for k in np.asarray(k2)]
            return set(k1) == set(k2)
        out_vars[1].tag.values_eq_approx = keys_comparison

        def valid_entries_comparison(e1, e2):
            e1 = np.asarray(e1)
            e2 = np.asarray(e2)
            return len(np.unique(e1)) == len(np.unique(e2))
        out_vars[4].tag.values_eq_approx = valid_entries_comparison

        return Apply(self, [points, dim], out_vars)
Exemplo n.º 7
0
def local_abstractconv_cudnn_alt(node):
    if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights,
                                AbstractConv2d_gradInputs)):
        return

    if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1):
        return None
    if node.op.unshared:
        return None
    if isinstance(node.op.border_mode, tuple) and any(
            isinstance(p, tuple) for p in node.op.border_mode):
        # Asymmetric padding not yet supported
        return None
    inp1 = node.inputs[0]
    inp2 = node.inputs[1]

    if not dnn_available(inp1.type.context_name):
        return

    op = node.op
    border_mode = node.op.border_mode
    subsample = node.op.subsample
    filter_dilation = node.op.filter_dilation
    num_groups = node.op.num_groups
    precision, _ = get_precision(None, [inp1, inp2])

    if node.op.filter_flip:
        conv_mode = "conv"
    else:
        conv_mode = "cross"

    if isinstance(op, AbstractConv2d):
        if border_mode == "half" or subsample != (1, 1) or num_groups != 1:
            return None
        if border_mode == "full":
            direction_hint = "bprop inputs"
        elif border_mode == "valid" and filter_dilation == (1, 1):
            direction_hint = "bprop weights"
        else:
            return None

        rval = dnn_conv(
            inp1,
            inp2,
            border_mode=border_mode,
            subsample=subsample,
            dilation=filter_dilation,
            direction_hint=direction_hint,
            conv_mode=conv_mode,
            num_groups=num_groups,
        )

    elif isinstance(op, AbstractConv2d_gradWeights):
        if (border_mode == "valid" and subsample == (1, 1)
                and filter_dilation == (1, 1) and num_groups == 1):
            img = gpu_contiguous(inp1)
            topgrad = gpu_contiguous(inp2)
            ctx_name = infer_context_name(img, topgrad)
            img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3))
            topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3))
            ishape = [shape_i_op(i)(img) for i in range(img.ndim)]
            tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)]
            out_shp = get_conv_output_shape(
                ishape,
                tshape,
                border_mode=border_mode,
                subsample=subsample,
                filter_dilation=filter_dilation,
            )

            out_shp = assert_conv_shape(out_shp)
            out = GpuAllocEmpty(dtype=img.dtype,
                                context_name=ctx_name)(*out_shp)
            desc = GpuDnnConvDesc(
                border_mode=border_mode,
                subsample=subsample,
                dilation=filter_dilation,
                conv_mode="cross",
                precision=precision,
            )(out.shape)

            conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad,
                                                                out, desc)
            if conv_mode == "conv":
                conv = conv[:, :, ::-1, ::-1]

            rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name)
        else:
            return None

    elif isinstance(op, AbstractConv2d_gradInputs):
        if border_mode == "valid" and subsample == (1, 1) and num_groups == 1:
            kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3))
            topgrad = gpu_contiguous(inp2)
            ctx_name = infer_context_name(kerns, topgrad)
            conv_mode = "cross" if conv_mode == "conv" else "conv"
            desc = GpuDnnConvDesc(
                border_mode="full",
                subsample=subsample,
                dilation=filter_dilation,
                conv_mode=conv_mode,
                precision=precision,
            )(kerns.shape)

            tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)]
            kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)]
            shape = get_conv_output_shape(
                tshape,
                kshape,
                border_mode="full",
                subsample=subsample,
                filter_dilation=filter_dilation,
            )

            shape = assert_conv_shape(shape)
            out = GpuAllocEmpty(dtype=topgrad.dtype,
                                context_name=ctx_name)(*shape)
            rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns,
                                                                out, desc)
        else:
            return None

    return [rval]
Exemplo n.º 8
0
def local_dnn_convi_output_merge(node, *inputs):
    inputs = inputs[0:2] + (gpu_contiguous(inputs[2]), ) + inputs[3:]
    return [
        GpuDnnConvGradI(algo=node.op.algo,
                        num_groups=node.op.num_groups)(*inputs)
    ]
Exemplo n.º 9
0
    def make_node(self, x, boxes, grad):
        x = basic_ops.gpu_contiguous(x)
        boxes = basic_ops.gpu_contiguous(boxes)
        grad = basic_ops.gpu_contiguous(grad)

        return theano.Apply(self, [x, boxes, grad], [x.type()])
Exemplo n.º 10
0
 def make_node(self, x, truth):
     x = basic_ops.gpu_contiguous(x)
     truth = basic_ops.gpu_contiguous(truth)
     return theano.Apply(self, [x, truth], [x.type()])
Exemplo n.º 11
0
    def __init__(self, input, convstride, padsize, poolsize, poolstride, group,
                 b, W = None, filter_shape = None, 
                 poolpad=0, mode = 'max', 
                 lrn=False, lib_conv='cudnn', printinfo=True,
                 input_shape=None, output_shape=None,
                 ):
                 
                
        '''
                 ConvPoolLRN layer
                 
        To be used in AlexNet
        lib_conv can be cudnn (recommended)or cudaconvnet
        
        '''
                 
        
        self.get_input_shape(input,input_shape)
        self.convstride = convstride
        self.padsize = padsize
        self.lib_conv = lib_conv
        self.poolsize = poolsize
        self.poolstride = poolstride
        self.poolpad = poolpad
        self.lrn = lrn
        if self.lrn:
            self.lrn_func = CrossChannelNormalization()
                 
        if W == None and filter_shape!=None:
            
            assert group in [1, 2]
            
            self.filter_shape = np.asarray(filter_shape)
            
            if group == 1:
                    
                self.W = Normal(self.filter_shape, mean=0, std=0.01)
                self.b = Constant(self.filter_shape[3], val=b)
                
            else:
            
                self.filter_shape[0] = self.filter_shape[0] // 2
                self.filter_shape[3] = self.filter_shape[3] // 2
                # self.input_shape[0] = self.input_shape[0] / 2
                # self.input_shape[3] = self.input_shape[3] / 2
                channel = self.input_shape[0]
                self.W0 = Normal(self.filter_shape, mean=0, std=0.01)
                self.W1 = Normal(self.filter_shape, mean=0, std=0.01)
                self.b0 = Constant(self.filter_shape[3], val=b)
                self.b1 = Constant(self.filter_shape[3], val=b)
            
            
        elif W!=None and filter_shape==None:
            assert group ==1
            self.filter_shape = W.val.shape.eval()
            self.W=W
            self.b = Constant(self.filter_shape[3], val=b)
            
        else:
            raise AttributeError('need to specify exactly one of W and filtershape')                 
                                                

        if lib_conv == 'cudnn':
            

            input_shuffled = self.input.dimshuffle(3, 0, 1, 2)  # c01b to bc01
            
            # in01out to outin01
            # print image_shape_shuffled
            # print filter_shape_shuffled
            if group == 1:
                W_shuffled = self.W.val.dimshuffle(3, 0, 1, 2)  # c01b to bc01
                conv_out = dnn.dnn_conv(img=input_shuffled,
                                        kerns=W_shuffled,
                                        subsample=(convstride, convstride),
                                        border_mode=padsize,
                                        )
                conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x')
            else:
                W0_shuffled = \
                    self.W0.val.dimshuffle(3, 0, 1, 2)  # c01b to bc01
                    
                # print W0_shuffled.shape.eval()# c01b to bc01  # 96, 5, 5, 256 -> 128, 48, 5, 5
                #
                # x_in = np.zeros((96, 27, 27, 128), dtype=np.float32) # c01b to bc01  # 96, 27, 27, 128 -> 128, 48, 27, 27
                # test = input_shuffled[:, :self.channel / 2,:, :]
                #
                # print test.shape
                    
                conv_out0 = \
                    dnn.dnn_conv(img=input_shuffled[:, :channel//2,
                                                    :, :],
                                 kerns=W0_shuffled,
                                 subsample=(convstride, convstride),
                                 border_mode=padsize,
                                 )
                conv_out0 = conv_out0 + \
                    self.b0.val.dimshuffle('x', 0, 'x', 'x')
                W1_shuffled = \
                    self.W1.val.dimshuffle(3, 0, 1, 2)  # c01b to bc01
                conv_out1 = \
                    dnn.dnn_conv(img=input_shuffled[:, channel//2:,
                                                    :, :],
                                 kerns=W1_shuffled,
                                 subsample=(convstride, convstride),
                                 border_mode=padsize,
                                 )
                conv_out1 = conv_out1 + \
                    self.b1.val.dimshuffle('x', 0, 'x', 'x')
                conv_out = T.concatenate([conv_out0, conv_out1], axis=1)

            # ReLu
            self.output = T.maximum(conv_out, 0)

            # Pooling
            if poolsize != 1:
                self.output = dnn.dnn_pool(self.output,
                                           ws=(poolsize, poolsize),
                                           stride=(poolstride, poolstride))

            self.output = self.output.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
            
        # elif lib_conv == 'cudaconvnet':
#
#             from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
#
#             self.conv_op = FilterActs(pad=self.padsize, stride=self.convstride,
#                                       partial_sum=1)
#
#             from theano.gpuarray.basic_ops import gpu_contiguous
#
#             # Conv
#             if group == 1:
#                 contiguous_input = gpu_contiguous(self.input)
#                 contiguous_filters = gpu_contiguous(self.W.val)
#                 conv_out = self.conv_op(contiguous_input, contiguous_filters)
#                 conv_out = conv_out + self.b.val.dimshuffle(0, 'x', 'x', 'x')
#             else:
#                 contiguous_input0 = gpu_contiguous(
#                     self.input[:channel//2, :, :, :])
#                 contiguous_filters0 = gpu_contiguous(self.W0.val)
#                 conv_out0 = self.conv_op(
#                     contiguous_input0, contiguous_filters0)
#                 conv_out0 = conv_out0 + \
#                     self.b0.val.dimshuffle(0, 'x', 'x', 'x')
#
#                 contiguous_input1 = gpu_contiguous(
#                     self.input[channel//2:, :, :, :])
#                 contiguous_filters1 = gpu_contiguous(self.W1.val)
#                 conv_out1 = self.conv_op(
#                     contiguous_input1, contiguous_filters1)
#                 conv_out1 = conv_out1 + \
#                     self.b1.val.dimshuffle(0, 'x', 'x', 'x')
#                 conv_out = T.concatenate([conv_out0, conv_out1], axis=0)
#
#             # ReLu
#             conv_out = gpu_contiguous(conv_out)
#             self.output = T.maximum(conv_out, 0)
#
#             # Pooling
#             if poolsize != 1:
#                 from pylearn2.sandbox.cuda_convnet.pool import MaxPool
#                 self.pool_op = MaxPool(ds=poolsize, stride=poolstride)
#                 self.output = self.pool_op(self.output)

        elif lib_conv == 'corrmm':

            from theano.gpuarray.basic_ops import gpu_contiguous
            from theano.gpuarray.blas import GpuCorrMM

            border_mode = 'half' if padsize == (filter_shape[1]-1)//2 else (padsize, padsize)
            self.corr_mm_op = GpuCorrMM(subsample=(convstride,convstride),
                                                border_mode=border_mode)

            input_shuffled = self.input.dimshuffle(3, 0, 1, 2)  # c01b to bc01


            if group==1:

                filters = self.W.val.dimshuffle(3, 0, 1, 2)

                # flip top-down, left-right to compute convolution instead of correlation  
                contiguous_filters = gpu_contiguous(filters[:, :, ::-1, ::-1])
                contiguous_input = gpu_contiguous(input_shuffled)

                conv_out = self.corr_mm_op(contiguous_input, contiguous_filters)
                conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x')

            else:

                W0_shuffled = self.W0.val.dimshuffle(3, 0, 1, 2)  # c01b to bc01

                contiguous_filters0 = gpu_contiguous(W0_shuffled[:, :, ::-1, ::-1])
                contiguous_input0 = gpu_contiguous(input_shuffled[:, :channel // 2,:, :])

                conv_out0 = self.corr_mm_op(contiguous_input0, contiguous_filters0)
                conv_out0 = conv_out0 + self.b0.val.dimshuffle('x', 0, 'x', 'x')

                W1_shuffled = self.W1.val.dimshuffle(3, 0, 1, 2)  # c01b to bc01

                contiguous_filters1 = gpu_contiguous(W1_shuffled[:, :, ::-1, ::-1])
                contiguous_input1 = gpu_contiguous(input_shuffled[:, channel // 2:,:, :])

                conv_out1 = self.corr_mm_op(contiguous_input1, contiguous_filters1)
                conv_out1 = conv_out1 + self.b1.val.dimshuffle('x', 0, 'x', 'x')
                conv_out = T.concatenate([conv_out0, conv_out1], axis=1)

            # ReLu
            self.output = T.maximum(conv_out, 0)
   

            # Pooling
            if poolsize != 1:
                from theano.gpuarray.pool import GpuPool
                
                
                ds_op = GpuPool(ignore_border=False, mode='max', ndim=2)
                
                self.output = ds_op(inp=self.output, ws=(poolsize,poolsize),
                                    stride=(poolstride,poolstride), pad=(0,0))
                
            self.output = self.output.dimshuffle(1, 2, 3, 0)  # bc01 to c01b

        else:
            NotImplementedError("lib_conv can only be cudnn or cudaconvnet for now")

        # LRN
        if self.lrn:
            # lrn_input = gpu_contiguous(self.output)
            self.output = self.lrn_func(self.output)

        if group == 1:
            self.params = [self.W.val, self.b.val]
            self.weight_type = ['W', 'b']
        else:
            self.params = [self.W0.val, self.b0.val, self.W1.val, self.b1.val]
            self.weight_type = ['W', 'b', 'W', 'b']

        if output_shape:
            self.output_shape = output_shape 
        else:
            self.output_shape = self.get_output_shape(self.input_shape)
        
        self.name = 'ConvPoolLRN(%s)' % lib_conv
        if printinfo: self.print_shape()