def make_node(self, inp1, inp2): if not cusolver_available: raise RuntimeError('CUSOLVER is not available and ' 'GpuCusolverSolve Op can not be constructed.') if skcuda.__version__ <= '0.5.1': warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8') context_name = basic_ops.infer_context_name(inp1, inp2) inp1 = basic_ops.as_gpuarray_variable(inp1, context_name) inp2 = basic_ops.as_gpuarray_variable(inp2, context_name) inp1 = basic_ops.gpu_contiguous(inp1) inp2 = basic_ops.gpu_contiguous(inp2) # this op can only operate on float32 matrices assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == 'float32' assert inp2.dtype == 'float32' return theano.Apply( self, [inp1, inp2], [GpuArrayType('float32', broadcastable=inp1.broadcastable, context_name=context_name)()])
def make_node(self, inp, s=None): # A shape parameter s can be provided as an input. For now this is used to # manage odd transform sizes. # Later this could be extended to handle padding and trunkation, # following numpy's interface. However, cuFFT expects array that match # the shape given to the plan, so padding will have to be done in the op. # The effect of padding on gradients has yet to be investigated. if not scikits_cuda_available: raise RuntimeError("skcuda is needed for CuFFTOp") if not pygpu_available: raise RuntimeError("pygpu is needed for CuFFTOp") if not pycuda_available: raise RuntimeError("pycuda is needed for CuFFTOp") inp = basic_ops.gpu_contiguous( basic_ops.as_gpuarray_variable(inp, basic_ops.infer_context_name(inp))) # If no shape is provided as input, default to input data shape. if s is None: s = inp.shape[1:] s = T.as_tensor_variable(s) assert inp.dtype == "float32" assert s.ndim == 1 assert 'int' in s.dtype return theano.Apply(self, [inp, s], [self.output_type(inp)()])
def forward(self, x, train=True): img = gpu_contiguous(x) kerns = gpu_contiguous(self.W.dimshuffle(1, 0, 2, 3)) gpudnnconvdesc = GpuDnnConvDesc( border_mode=self.border_mode, subsample=self.subsample, conv_mode='conv' ) out = GpuAllocEmpty()( img.shape[0], kerns.shape[1], img.shape[2] * self.subsample[0], img.shape[3] * self.subsample[1] ) desc = gpudnnconvdesc(out.shape, kerns.shape) return (GpuDnnConvGradI()(kerns, img, out, desc) + self.b.dimshuffle('x', 0, 'x', 'x'))
def make_node(self, inp1, inp2): self.context = basic_ops.infer_context_name(inp1, inp2) inp1 = basic_ops.as_gpuarray_variable(inp1, self.context) inp2 = basic_ops.as_gpuarray_variable(inp2, self.context) inp1 = basic_ops.gpu_contiguous(inp1) inp2 = basic_ops.gpu_contiguous(inp2) # this op can only operate on float32 matrices assert inp1.ndim == 2 assert inp2.ndim == 2 assert inp1.dtype == 'float32' assert inp2.dtype == 'float32' return theano.Apply( self, [inp1, inp2], [GpuArrayType('float32', broadcastable=inp1.broadcastable, context_name=self.context)()])
def make_node(self, inp): if not cusolver_available: raise RuntimeError('CUSOLVER is not available and ' 'GpuCholesky Op can not be constructed.') if skcuda.__version__ <= '0.5.1': warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8') if not pygpu_available: raise RuntimeError('Missing pygpu or triu/tril functions.' 'Install or update libgpuarray.') context_name = basic_ops.infer_context_name(inp) inp = basic_ops.as_gpuarray_variable(inp, context_name) inp = basic_ops.gpu_contiguous(inp) # this op can only operate on float32 matrices # because of current implementation of triu/tril. # TODO: support float64 for triu/tril in GpuArray and for GpuCholesky/GpuCusolverSolve in Theano. assert inp.ndim == 2 assert inp.dtype == 'float32' return theano.Apply(self, [inp], [inp.type()])
def make_node(self, points, dim): assert(points.ndim == 3) points = gpu_contiguous(as_tensor_variable(points.astype("float32"))) dim = get_scalar_constant_value(dim) if "int" not in str(dim.dtype): raise ValueError("dim must be an integer.") dim = constant(dim, dtype="int32", name="dim") entries_type = GpuArrayType("int32", broadcastable=(False,), context_name=self.context_name, name="entries") keys_type = GpuArrayType("int16", broadcastable=(False, False), context_name=self.context_name, name="keys") neib_ent_type = GpuArrayType("int32", broadcastable=(False, False, False), context_name=self.context_name, name="neighbor_entries") bary_type = GpuArrayType("float32", broadcastable=points.type.broadcastable, context_name=self.context_name, name="barycentric_coords") valid_entries_type = GpuArrayType("int32", broadcastable=(False,), context_name=self.context_name, name="valid_entries") n_valid_type = GpuArrayType("int32", broadcastable=(False,), context_name=self.context_name, name="n_valid") out_vars = [entries_type(name="hash_entries"), keys_type(name="hash_keys"), neib_ent_type(name="neighbor_entries"), bary_type(name="barycentric_coords"), valid_entries_type(name="valid_entries"), n_valid_type(name="n_valid")] # TODO: I suppose GpuHashTable should be a type like GpuHashType, and # the Op should return one of those instead. # Two sets of entries can't be meaningfully compared without also # having the corresponding keys. Since we can only define per-output # comparisons, we have to hope that any time someone compares two # tables for equality, they will check all outputs. out_vars[0].tag.values_eq_approx = lambda e1, e2: True out_vars[2].tag.values_eq_approx = lambda e1, e2: True # The number of valid entries between two equivalent tables may be # different since it includes duplicates. out_vars[5].tag.values_eq_approx = lambda n1, n2: True def keys_comparison(k1, k2): k1 = [tuple(k) for k in np.asarray(k1)] k2 = [tuple(k) for k in np.asarray(k2)] return set(k1) == set(k2) out_vars[1].tag.values_eq_approx = keys_comparison def valid_entries_comparison(e1, e2): e1 = np.asarray(e1) e2 = np.asarray(e2) return len(np.unique(e1)) == len(np.unique(e2)) out_vars[4].tag.values_eq_approx = valid_entries_comparison return Apply(self, [points, dim], out_vars)
def local_abstractconv_cudnn_alt(node): if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs)): return if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1): return None if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None inp1 = node.inputs[0] inp2 = node.inputs[1] if not dnn_available(inp1.type.context_name): return op = node.op border_mode = node.op.border_mode subsample = node.op.subsample filter_dilation = node.op.filter_dilation num_groups = node.op.num_groups precision, _ = get_precision(None, [inp1, inp2]) if node.op.filter_flip: conv_mode = "conv" else: conv_mode = "cross" if isinstance(op, AbstractConv2d): if border_mode == "half" or subsample != (1, 1) or num_groups != 1: return None if border_mode == "full": direction_hint = "bprop inputs" elif border_mode == "valid" and filter_dilation == (1, 1): direction_hint = "bprop weights" else: return None rval = dnn_conv( inp1, inp2, border_mode=border_mode, subsample=subsample, dilation=filter_dilation, direction_hint=direction_hint, conv_mode=conv_mode, num_groups=num_groups, ) elif isinstance(op, AbstractConv2d_gradWeights): if (border_mode == "valid" and subsample == (1, 1) and filter_dilation == (1, 1) and num_groups == 1): img = gpu_contiguous(inp1) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(img, topgrad) img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3)) ishape = [shape_i_op(i)(img) for i in range(img.ndim)] tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] out_shp = get_conv_output_shape( ishape, tshape, border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation, ) out_shp = assert_conv_shape(out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) desc = GpuDnnConvDesc( border_mode=border_mode, subsample=subsample, dilation=filter_dilation, conv_mode="cross", precision=precision, )(out.shape) conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad, out, desc) if conv_mode == "conv": conv = conv[:, :, ::-1, ::-1] rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name) else: return None elif isinstance(op, AbstractConv2d_gradInputs): if border_mode == "valid" and subsample == (1, 1) and num_groups == 1: kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(kerns, topgrad) conv_mode = "cross" if conv_mode == "conv" else "conv" desc = GpuDnnConvDesc( border_mode="full", subsample=subsample, dilation=filter_dilation, conv_mode=conv_mode, precision=precision, )(kerns.shape) tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] shape = get_conv_output_shape( tshape, kshape, border_mode="full", subsample=subsample, filter_dilation=filter_dilation, ) shape = assert_conv_shape(shape) out = GpuAllocEmpty(dtype=topgrad.dtype, context_name=ctx_name)(*shape) rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns, out, desc) else: return None return [rval]
def local_dnn_convi_output_merge(node, *inputs): inputs = inputs[0:2] + (gpu_contiguous(inputs[2]), ) + inputs[3:] return [ GpuDnnConvGradI(algo=node.op.algo, num_groups=node.op.num_groups)(*inputs) ]
def make_node(self, x, boxes, grad): x = basic_ops.gpu_contiguous(x) boxes = basic_ops.gpu_contiguous(boxes) grad = basic_ops.gpu_contiguous(grad) return theano.Apply(self, [x, boxes, grad], [x.type()])
def make_node(self, x, truth): x = basic_ops.gpu_contiguous(x) truth = basic_ops.gpu_contiguous(truth) return theano.Apply(self, [x, truth], [x.type()])
def __init__(self, input, convstride, padsize, poolsize, poolstride, group, b, W = None, filter_shape = None, poolpad=0, mode = 'max', lrn=False, lib_conv='cudnn', printinfo=True, input_shape=None, output_shape=None, ): ''' ConvPoolLRN layer To be used in AlexNet lib_conv can be cudnn (recommended)or cudaconvnet ''' self.get_input_shape(input,input_shape) self.convstride = convstride self.padsize = padsize self.lib_conv = lib_conv self.poolsize = poolsize self.poolstride = poolstride self.poolpad = poolpad self.lrn = lrn if self.lrn: self.lrn_func = CrossChannelNormalization() if W == None and filter_shape!=None: assert group in [1, 2] self.filter_shape = np.asarray(filter_shape) if group == 1: self.W = Normal(self.filter_shape, mean=0, std=0.01) self.b = Constant(self.filter_shape[3], val=b) else: self.filter_shape[0] = self.filter_shape[0] // 2 self.filter_shape[3] = self.filter_shape[3] // 2 # self.input_shape[0] = self.input_shape[0] / 2 # self.input_shape[3] = self.input_shape[3] / 2 channel = self.input_shape[0] self.W0 = Normal(self.filter_shape, mean=0, std=0.01) self.W1 = Normal(self.filter_shape, mean=0, std=0.01) self.b0 = Constant(self.filter_shape[3], val=b) self.b1 = Constant(self.filter_shape[3], val=b) elif W!=None and filter_shape==None: assert group ==1 self.filter_shape = W.val.shape.eval() self.W=W self.b = Constant(self.filter_shape[3], val=b) else: raise AttributeError('need to specify exactly one of W and filtershape') if lib_conv == 'cudnn': input_shuffled = self.input.dimshuffle(3, 0, 1, 2) # c01b to bc01 # in01out to outin01 # print image_shape_shuffled # print filter_shape_shuffled if group == 1: W_shuffled = self.W.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out = dnn.dnn_conv(img=input_shuffled, kerns=W_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x') else: W0_shuffled = \ self.W0.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 # print W0_shuffled.shape.eval()# c01b to bc01 # 96, 5, 5, 256 -> 128, 48, 5, 5 # # x_in = np.zeros((96, 27, 27, 128), dtype=np.float32) # c01b to bc01 # 96, 27, 27, 128 -> 128, 48, 27, 27 # test = input_shuffled[:, :self.channel / 2,:, :] # # print test.shape conv_out0 = \ dnn.dnn_conv(img=input_shuffled[:, :channel//2, :, :], kerns=W0_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out0 = conv_out0 + \ self.b0.val.dimshuffle('x', 0, 'x', 'x') W1_shuffled = \ self.W1.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 conv_out1 = \ dnn.dnn_conv(img=input_shuffled[:, channel//2:, :, :], kerns=W1_shuffled, subsample=(convstride, convstride), border_mode=padsize, ) conv_out1 = conv_out1 + \ self.b1.val.dimshuffle('x', 0, 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=1) # ReLu self.output = T.maximum(conv_out, 0) # Pooling if poolsize != 1: self.output = dnn.dnn_pool(self.output, ws=(poolsize, poolsize), stride=(poolstride, poolstride)) self.output = self.output.dimshuffle(1, 2, 3, 0) # bc01 to c01b # elif lib_conv == 'cudaconvnet': # # from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs # # self.conv_op = FilterActs(pad=self.padsize, stride=self.convstride, # partial_sum=1) # # from theano.gpuarray.basic_ops import gpu_contiguous # # # Conv # if group == 1: # contiguous_input = gpu_contiguous(self.input) # contiguous_filters = gpu_contiguous(self.W.val) # conv_out = self.conv_op(contiguous_input, contiguous_filters) # conv_out = conv_out + self.b.val.dimshuffle(0, 'x', 'x', 'x') # else: # contiguous_input0 = gpu_contiguous( # self.input[:channel//2, :, :, :]) # contiguous_filters0 = gpu_contiguous(self.W0.val) # conv_out0 = self.conv_op( # contiguous_input0, contiguous_filters0) # conv_out0 = conv_out0 + \ # self.b0.val.dimshuffle(0, 'x', 'x', 'x') # # contiguous_input1 = gpu_contiguous( # self.input[channel//2:, :, :, :]) # contiguous_filters1 = gpu_contiguous(self.W1.val) # conv_out1 = self.conv_op( # contiguous_input1, contiguous_filters1) # conv_out1 = conv_out1 + \ # self.b1.val.dimshuffle(0, 'x', 'x', 'x') # conv_out = T.concatenate([conv_out0, conv_out1], axis=0) # # # ReLu # conv_out = gpu_contiguous(conv_out) # self.output = T.maximum(conv_out, 0) # # # Pooling # if poolsize != 1: # from pylearn2.sandbox.cuda_convnet.pool import MaxPool # self.pool_op = MaxPool(ds=poolsize, stride=poolstride) # self.output = self.pool_op(self.output) elif lib_conv == 'corrmm': from theano.gpuarray.basic_ops import gpu_contiguous from theano.gpuarray.blas import GpuCorrMM border_mode = 'half' if padsize == (filter_shape[1]-1)//2 else (padsize, padsize) self.corr_mm_op = GpuCorrMM(subsample=(convstride,convstride), border_mode=border_mode) input_shuffled = self.input.dimshuffle(3, 0, 1, 2) # c01b to bc01 if group==1: filters = self.W.val.dimshuffle(3, 0, 1, 2) # flip top-down, left-right to compute convolution instead of correlation contiguous_filters = gpu_contiguous(filters[:, :, ::-1, ::-1]) contiguous_input = gpu_contiguous(input_shuffled) conv_out = self.corr_mm_op(contiguous_input, contiguous_filters) conv_out = conv_out + self.b.val.dimshuffle('x', 0, 'x', 'x') else: W0_shuffled = self.W0.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 contiguous_filters0 = gpu_contiguous(W0_shuffled[:, :, ::-1, ::-1]) contiguous_input0 = gpu_contiguous(input_shuffled[:, :channel // 2,:, :]) conv_out0 = self.corr_mm_op(contiguous_input0, contiguous_filters0) conv_out0 = conv_out0 + self.b0.val.dimshuffle('x', 0, 'x', 'x') W1_shuffled = self.W1.val.dimshuffle(3, 0, 1, 2) # c01b to bc01 contiguous_filters1 = gpu_contiguous(W1_shuffled[:, :, ::-1, ::-1]) contiguous_input1 = gpu_contiguous(input_shuffled[:, channel // 2:,:, :]) conv_out1 = self.corr_mm_op(contiguous_input1, contiguous_filters1) conv_out1 = conv_out1 + self.b1.val.dimshuffle('x', 0, 'x', 'x') conv_out = T.concatenate([conv_out0, conv_out1], axis=1) # ReLu self.output = T.maximum(conv_out, 0) # Pooling if poolsize != 1: from theano.gpuarray.pool import GpuPool ds_op = GpuPool(ignore_border=False, mode='max', ndim=2) self.output = ds_op(inp=self.output, ws=(poolsize,poolsize), stride=(poolstride,poolstride), pad=(0,0)) self.output = self.output.dimshuffle(1, 2, 3, 0) # bc01 to c01b else: NotImplementedError("lib_conv can only be cudnn or cudaconvnet for now") # LRN if self.lrn: # lrn_input = gpu_contiguous(self.output) self.output = self.lrn_func(self.output) if group == 1: self.params = [self.W.val, self.b.val] self.weight_type = ['W', 'b'] else: self.params = [self.W0.val, self.b0.val, self.W1.val, self.b1.val] self.weight_type = ['W', 'b', 'W', 'b'] if output_shape: self.output_shape = output_shape else: self.output_shape = self.get_output_shape(self.input_shape) self.name = 'ConvPoolLRN(%s)' % lib_conv if printinfo: self.print_shape()