def CNN(x,c_l1,c_l2,f_l1,f_l2,PP,ims): print ims #------- #conv3D get rid of dependency of the number of input image channel b=numpy.zeros(c_l1.get_value().shape[0]) conv1=tensor.nnet.relu(conv3D(x.dimshuffle(0,2,3,1,'x'),c_l1.dimshuffle(0,2,3,1,'x'),b,d=(1,1,1))) # shuffle dimensions conv1=tensor.sum(conv1,axis=3) #add the dimension of channels conv1=conv1.dimshuffle(0,3,1,2) #shuffle back to same dimension as conv2D #--------- #conv1=tensor.nnet.relu(conv2d(x,c_l1)) #default stride=1 --subsample=(1,1) conv1_shp=get_conv_output_shape(ims,c_l1.get_value().shape,border_mode='valid',subsample=(1,1)) print conv1_shp #pp=tensor.reshape(conv1,conv1_shp[:2]+(conv1_shp[2]*conv1_shp[3],)) #print pp pool1=pool_2d(conv1,(2,2),st=(2,2),ignore_border=True) #default maxpool pool1_shp=get_pool_output_shape(conv1_shp,pool_size=(2,2),st=(2,2),ignore_border=True) print pool1_shp conv2=tensor.nnet.relu(conv2d(pool1,c_l2)) conv2_shp=get_conv_output_shape(pool1_shp,c_l2.get_value().shape,border_mode='valid',subsample=(1,1)) print conv2_shp #pool2=pool_2d(conv2,(2,2),st=(2,2),ignore_border=True) pool2=spp(conv2,conv2_shp,PP,'max') fpool2=tensor.flatten(pool2,outdim=2) full1=tensor.nnet.relu(tensor.dot(fpool2,f_l1)) pyx=tensor.nnet.softmax(tensor.dot(full1,f_l2)) return c_l1, c_l2, f_l1, f_l2, pyx
def CNN(x,c_l1,c_l2,f_l1,f_l2,insize): print "in size ", insize conv1=tensor.nnet.relu(conv2d(x,c_l1)) #default stride=1 --subsample=(1,1) conv1_shp=get_conv_output_shape(insize,c_l1.get_value().shape,border_mode='valid',subsample=(1,1)) print "conv1 size ", conv1_shp pool1=pool_2d(conv1,(3,3),st=(3,3),ignore_border=True) #default maxpool pool1_shp=get_pool_output_shape(conv1_shp,pool_size=(3,3),st=(3,3),ignore_border=True) print "pool1 size ", pool1_shp lrn1=LRN(pool1,pool1_shp) lrn1_shp=tuple(pool1_shp) print "cross map norm1 size ", lrn1_shp conv2=tensor.nnet.relu(conv2d(lrn1,c_l2)) conv2_shp=get_conv_output_shape(lrn1_shp,c_l2.get_value().shape,border_mode='valid',subsample=(1,1)) print "conv2 size ", conv2_shp pool2=pool_2d(conv2,(2,2),st=(2,2),ignore_border=True) pool2_shp=get_pool_output_shape(conv2_shp,pool_size=(2,2),st=(2,2),ignore_border=True) print "pool2 size ", pool2_shp lrn2=LRN(pool2,pool2_shp) lrn2_shp=tuple(pool2_shp) print "cross map norm2 size " , lrn2_shp fpool2=tensor.flatten(lrn2,outdim=2) full1=tensor.nnet.relu(tensor.dot(fpool2,f_l1)) pyx=tensor.nnet.sigmoid(tensor.dot(full1,f_l2)) return c_l1, c_l2, f_l1, f_l2, pyx
def burn(): sz = 128 img_shp = [sz, sz, sz, sz] kern_shp = [sz // 2, sz, 3, 3] out_shp = get_conv_output_shape(img_shp, kern_shp, "valid", (1, 1)) img = tt.tensor4("img") kern = tt.tensor4("kern") out = tt.tensor4("out") def rand(shp): return np.random.rand(*shp).astype(theano.config.floatX) img = theano.shared(rand(img_shp)) kern = theano.shared(rand(kern_shp)) out = theano.shared(rand(out_shp)) # beta 1 is needed to force the reuse of out, otherwise, it is # replaced by a GpuAllocEmpty o1 = dnn._dnn_conv(img, kern, conv_mode="conv", out=out, beta=1.0) mode = theano.compile.get_default_mode().including("local_remove_all_assert") f = theano.function([], [o1], mode=mode) theano.printing.debugprint(f) print("Start computation") for i in range(10000): f.fn() print("Computation stopped")
def infer_shape(self, node, input_shape): imshp = input_shape[0] kshp = input_shape[1] res = get_conv_output_shape( imshp, kshp, self.border_mode, self.subsample, self.filter_dilation ) return [res]
def array_like_conv_output(self, inputs_shape, filters_shape, border_mode, subsample, dilation, dtype): # Return a random array with inferred convolution output shape. out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation) out_shp = assert_conv_shape(out_shp) return np.random.random(out_shp).astype(dtype)
def burn(): sz = 128 img_shp = [sz, sz, sz, sz] kern_shp = [sz // 2, sz, 3, 3] out_shp = get_conv_output_shape(img_shp, kern_shp, 'valid', (1, 1)) img = T.tensor4('img') kern = T.tensor4('kern') out = T.tensor4('out') def rand(shp): return np.random.rand(*shp).astype(theano.config.floatX) img = theano.shared(rand(img_shp)) kern = theano.shared(rand(kern_shp)) out = theano.shared(rand(out_shp)) # beta 1 is needed to force the reuse of out, otherwise, it is # replaced by a GpuAllocEmpty o1 = dnn._dnn_conv(img, kern, conv_mode='conv', out=out, beta=1.) mode = theano.compile.get_default_mode().including( "local_remove_all_assert") f = theano.function([], [o1], mode=mode) theano.printing.debugprint(f) print("Start computation") for i in range(10000): f.fn() print("Computation stopped")
def test_basic(self): image_shape, kernel_shape = (3, 2, 8, 9), (4, 2, 5, 6) sub_sample = (1, 2) test1_params = get_conv_output_shape( image_shape, kernel_shape, 'valid', sub_sample) test2_params = get_conv_output_shape( image_shape, kernel_shape, 'half', sub_sample) test3_params = get_conv_output_shape( image_shape, kernel_shape, 'full', sub_sample) test4_params = get_conv_output_shape( image_shape, kernel_shape, (1, 2), sub_sample) self.assertTrue(test1_params == (3, 4, 4, 2)) self.assertTrue(test2_params == (3, 4, 8, 5)) self.assertTrue(test3_params == (3, 4, 12, 7)) self.assertTrue(test4_params == (3, 4, 6, 4))
def get_out_shape(ishape, kshape, border_mode, subsample): """ This function computes the output shape for a convolution with the specified parameters. `ishape` and `kshape` can be symbolic or scalar. """ return get_conv_output_shape(ishape, kshape, border_mode, subsample)
def test_basic_3d(self): image_shape, kernel_shape = (3, 2, 12, 9, 7), (4, 2, 5, 6, 4) sub_sample = (1, 2, 1) filter_dilation = (2, 1, 1) test1_params = get_conv_output_shape( image_shape, kernel_shape, 'valid', sub_sample, filter_dilation) test2_params = get_conv_output_shape( image_shape, kernel_shape, 'half', sub_sample, filter_dilation) test3_params = get_conv_output_shape( image_shape, kernel_shape, 'full', sub_sample, filter_dilation) test4_params = get_conv_output_shape( image_shape, kernel_shape, (1, 2, 3), sub_sample, filter_dilation) self.assertTrue(test1_params == (3, 4, 4, 2, 4)) self.assertTrue(test2_params == (3, 4, 12, 5, 8)) self.assertTrue(test3_params == (3, 4, 20, 7, 10)) self.assertTrue(test4_params == (3, 4, 6, 4, 10))
def test_basic_3d(self): image_shape, kernel_shape = (3, 2, 12, 9, 7), (4, 2, 5, 6, 4) sub_sample = (1, 2, 1) filter_dilation = (2, 1, 1) test1_params = get_conv_output_shape( image_shape, kernel_shape, 'valid', sub_sample, filter_dilation) test2_params = get_conv_output_shape( image_shape, kernel_shape, 'half', sub_sample, filter_dilation) test3_params = get_conv_output_shape( image_shape, kernel_shape, 'full', sub_sample, filter_dilation) test4_params = get_conv_output_shape( image_shape, kernel_shape, (1, 2, 3), sub_sample, filter_dilation) self.assertTrue(test1_params == (3, 4, 4, 2, 4)) self.assertTrue(test2_params == (3, 4, 12, 5, 8)) self.assertTrue(test3_params == (3, 4, 20, 7, 10)) self.assertTrue(test4_params == (3, 4, 6, 4, 10))
def get_out_shape(ishape, kshape, border_mode, subsample): """ This function computes the output shape for a convolution with the specified parameters. `ishape` and `kshape` can be symbolic or scalar. """ return get_conv_output_shape(ishape, kshape, border_mode, subsample)
def infer_shape(self, node, input_shape): imshp = input_shape[0] kshp = input_shape[1] res = get_conv_output_shape( imshp, kshp, self.border_mode, self.subsample) return [res]
def get_conv_shape(input_shape, filter_shape, padding, stride): """ Helper method to calculate the shapes post-convolution operation given input parameters. This isn't used for our output_size calculations because Theano provides a function specific to its conv op. """ if isinstance(input_shape, Iterable): shape = get_conv_output_shape(input_shape, filter_shape, padding, stride) else: shape = get_conv_shape_1axis(input_shape, filter_shape, padding, stride) return shape
def get_if_valid_conv_output_shape(case_tuple): # Filter function to keep only cases that produce valid convolution output shapes. out_shp = get_conv_output_shape(case_tuple[0], # input shape case_tuple[1], # filter shape case_tuple[4], # border mode case_tuple[2], # subsample case_tuple[3]) # dilation try: return assert_conv_shape(out_shp) except ValueError: return False
def CNN(x,c_l1,c_l2,f_l1,f_l2,PP,ims): print ims conv1=tensor.nnet.relu(conv2d(x,c_l1)) #default stride=1 --subsample=(1,1) conv1_shp=get_conv_output_shape(ims,c_l1.get_value().shape,border_mode='valid',subsample=(1,1)) print conv1_shp pp=tensor.reshape(conv1,conv1_shp[:2]+(conv1_shp[2]*conv1_shp[3],)) print pp pool1=pool_2d(conv1,(2,2),st=(2,2),ignore_border=True) #default maxpool pool1_shp=get_pool_output_shape(conv1_shp,pool_size=(2,2),st=(2,2),ignore_border=True) print pool1_shp conv2=tensor.nnet.relu(conv2d(pool1,c_l2)) conv2_shp=get_conv_output_shape(pool1_shp,c_l2.get_value().shape,border_mode='valid',subsample=(1,1)) print conv2_shp #pool2=pool_2d(conv2,(2,2),st=(2,2),ignore_border=True) pool2=spp(conv2,conv2_shp,PP,'max') fpool2=tensor.flatten(pool2,outdim=2) full1=tensor.nnet.relu(tensor.dot(fpool2,f_l1)) pyx=tensor.nnet.sigmoid(tensor.dot(full1,f_l2)) return c_l1, c_l2, f_l1, f_l2, pyx
def get_conv_shape(input_shape, filter_shape, padding, stride): """ Helper method to calculate the shapes post-convolution operation given input parameters. This isn't used for our output_size calculations because Theano provides a function specific to its conv op. """ if isinstance(input_shape, Iterable): shape = get_conv_output_shape(input_shape, filter_shape, padding, stride) else: shape = get_conv_shape_1axis(input_shape, filter_shape, padding, stride) return shape
def _build(self, input_tensor): """Build 2D conolution operation of the input tensor Parameters ---------- input_tensor : Tensor 4D Tensor with shape (batch, #input channel, row, col) Returns ------- Tensor 4D Tensor with shape (batch, #output channel, row, col) """ input_shape = input_tensor.shape _LG.debug(' input_shape: %s', input_shape) if not len(input_shape) == 4: raise ValueError( 'Input tensor must be 4D. ({})'.format(input_tensor)) border_mode = _map_border_mode(self.args['padding']) subsample = _get_subsample(self.args['strides']) filter_shape = self._get_filter_shape(input_shape[1]) bias_shape = (filter_shape[0], ) output_shape = get_conv_output_shape(input_shape, filter_shape, border_mode, subsample) _check_output_shape(input_shape, filter_shape, border_mode, subsample) _LG.debug(' border_mode: %s', border_mode) _LG.debug(' subsample: %s', subsample) _LG.debug(' filter_shape: %s', filter_shape) _LG.debug(' output_shape: %s', output_shape) self._build_parameters(filter_shape, bias_shape, input_tensor.dtype) filters = self.get_parameter_variable('filter') output_tensor = T.nnet.conv2d(input_tensor.unwrap(), filters=filters.unwrap(), input_shape=input_shape, filter_shape=filter_shape, border_mode=border_mode, subsample=subsample) if self.args['with_bias']: bias = self.get_parameter_variable('bias').unwrap() bias = bias.dimshuffle(('x', 0, 'x', 'x')) output_tensor = bias + output_tensor return wrapper.Tensor(output_tensor, shape=output_shape, name='output')
def _build(self, input_tensor): """Build 2D conolution operation of the input tensor Parameters ---------- input_tensor : Tensor 4D Tensor with shape (batch, #input channel, row, col) Returns ------- Tensor 4D Tensor with shape (batch, #output channel, row, col) """ input_shape = input_tensor.shape _LG.debug(' input_shape: %s', input_shape) if not len(input_shape) == 4: raise ValueError( 'Input tensor must be 4D. ({})'.format(input_tensor)) border_mode = _map_border_mode(self.args['padding']) subsample = _get_subsample(self.args['strides']) filter_shape = self._get_filter_shape(input_shape[1]) bias_shape = (filter_shape[0],) output_shape = get_conv_output_shape( input_shape, filter_shape, border_mode, subsample) _check_output_shape(input_shape, filter_shape, border_mode, subsample) _LG.debug(' border_mode: %s', border_mode) _LG.debug(' subsample: %s', subsample) _LG.debug(' filter_shape: %s', filter_shape) _LG.debug(' output_shape: %s', output_shape) self._build_parameters(filter_shape, bias_shape, input_tensor.dtype) filters = self.get_parameter_variable('filter') output_tensor = T.nnet.conv2d( input_tensor.unwrap(), filters=filters.unwrap(), input_shape=input_shape, filter_shape=filter_shape, border_mode=border_mode, subsample=subsample) if self.args['with_bias']: bias = self.get_parameter_variable('bias').unwrap() bias = bias.dimshuffle(('x', 0, 'x', 'x')) output_tensor = bias + output_tensor return wrapper.Tensor(output_tensor, shape=output_shape, name='output')
def array_like_conv_output(self, inputs_shape, filters_shape, border_mode, subsample, dilation, dtype): # Return a random array with inferred convolution output shape. out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation) out_shp = assert_conv_shape(out_shp) return np.random.random(out_shp).astype(dtype)
def local_conv2d_gradinputs_cpu(node): if not isinstance(node.op, AbstractConv2d_gradInputs): return None kern, topgrad, shape = node.inputs if ((not isinstance(kern.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == 'valid' and node.op.subsample != (1, 1): kern = kern[:, :, ::-1, ::-1] shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :]) rval = convTransp3D(W=shuffled_kern, b=b, d=(node.op.subsample[0], node.op.subsample[1], 1), H=shuffled_topgrad, RShape=(shape[0], shape[1], 1)) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = 'valid' if not node.op.border_mode == 'full': mode = 'full' filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp(imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint='bprop inputs') din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def local_conv2d_gradweight_cpu(node): if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if ((not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return if node.op.border_mode == 'valid' and \ (node.op.subsample != (1, 1)): # Use the gradient as defined in conv3D, because the implementation # by Conv is slow (about 3x slower than conv3D, and probably 10x # slower than it could be), and incorrect when subsample > 2. # build a "node", that should be equivalent to the one given by # self.make_node, but using convGrad3D instead. shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) rval = convGrad3D(V=shuffled_img, d=(node.op.subsample[0], node.op.subsample[1], 1), WShape=(shuffled_topgrad.shape[4], shape[0], shape[1], 1, shuffled_img.shape[4]), dCdH=shuffled_topgrad) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = rval[:, :, ::-1, ::-1] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == 'valid': (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == 'full': (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( 'Only [full,valid] modes are currently supported.') # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid', unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint='bprop weights') res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == 'valid': res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]
def c_code(self, node, name, inp, out, sub): x, = inp dH, dW = self.subsample if self.imshp is None: self.imshp = x.shape i_n, i_c, i_h, i_w = self.imshp if len(self.kshp) == 5: grp, k_n, k_c, k_h, k_w = self.kshp assert i_c == k_c * grp else: k_n, k_c, k_h, k_w = self.kshp grp = 1 o_n, o_c, o_h, o_w = get_conv_output_shape(image_shape=self.imshp, kernel_shape=self.kshp, border_mode=self.border_mode, filter_dilation=self.filter_dilation, subsample=self.subsample) if self.border_mode == 'valid': padH, padW = (0, 0) elif self.border_mode == 'full': padH, padW = ((k_h - 1), (k_w - 1)) elif self.border_mode == 'half': padH, padW = ((k_h / 2), (k_w / 2)) elif isinstance(self.border_mode, tuple): padH, padW = self.border_mode else: raise ValueError("border_mode must have two elements") z, = out if 'float32' == node.inputs[0].type.dtype: precision = 'F32' elif 'float64' == node.inputs[0].type.dtype: precision = 'F64' else: raise Exception("Type %s is not supported!" % node.inputs[0].type.dtype) fail = sub['fail'] ccode = """ if (1 == first_run) { int convPadding[2]; size_t convStride[2], weightSize[5], weightStride[5], imageSize[4], imageStride[4], zSize[4], zStride[4]; convStride[0] = %(dW)s; convStride[1] = %(dH)s; convPadding[0] = -%(padW)s; convPadding[1] = -%(padH)s; imageSize[0] = %(i_w)s; //w imageSize[1] = %(i_h)s; //h imageSize[2] = %(i_c)s; //c imageSize[3] = %(i_n)s; //n imageStride[0] = 1; imageStride[1] = imageSize[0]; imageStride[2] = imageSize[0] * imageSize[1]; imageStride[3] = imageSize[0] * imageSize[1] * imageSize[2]; weightSize[0] = %(k_w)s; weightSize[1] = %(k_h)s; weightSize[2] = %(k_c)s; weightSize[3] = %(k_n)s; weightSize[4] = %(grp)s; weightStride[0] = 1; weightStride[1] = weightSize[0]; weightStride[2] = weightSize[0] * weightSize[1]; weightStride[3] = weightSize[0] * weightSize[1] * weightSize[2]; weightStride[4] = weightSize[0] * weightSize[1] * weightSize[2] * weightSize[3]; zSize[0] = %(o_w)s; zSize[1] = %(o_h)s; zSize[2] = %(o_c)s; zSize[3] = %(o_n)s; zStride[0] = 1; zStride[1] = zSize[0]; zStride[2] = zSize[0] * zSize[1]; zStride[3] = zSize[0] * zSize[1] * zSize[2]; const int group = %(grp)s; //create user layout CHECK_ERR( dnnLayoutCreate_%(precision)s(&layout_user, DIMENSION, imageSize, imageStride), err ); CHECK_ERR( dnnGroupsConvolutionCreateForward_%(precision)s(&primitive, NULL, dnnAlgorithmConvolutionDirect, group, DIMENSION, imageSize, zSize, weightSize, convStride, convPadding, dnnBorderZeros), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_%(precision)s(&layout_internal, primitive, dnnResourceSrc), err ); } if (!dnnLayoutCompare_%(precision)s(layout_user, layout_internal)) { if (NULL == to_internal) { CHECK_ERR( dnnConversionCreate_%(precision)s(&to_internal, layout_user, layout_internal), err ); } } if (NULL == %(z)s) { //Create PyArrayObject for output %(z)s = (PyArrayObject*)PyArray_ZEROS(DIMENSION, PyArray_DIMS(%(x)s), PyArray_TYPE(%(x)s), 0); if (NULL == %(z)s) { %(fail)s } } if (NULL == internal_buf) { CHECK_ERR( dnnAllocateBuffer_%(precision)s((void**)&internal_buf, layout_internal), err ); } if (to_internal) { convert_resources[dnnResourceFrom] = (PyArray_DATA(%(x)s)); convert_resources[dnnResourceTo] = (void*)(internal_buf); CHECK_ERR( dnnExecute_%(precision)s(to_internal, convert_resources), err ); } else { internal_buf = (PyArray_DATA(%(x)s)); } if (layout_internal != ((dnnLayout_t*)PyArray_DATA(%(z)s))[0]) { ((dnnLayout_t*)PyArray_DATA(%(z)s))[0] = layout_internal; } if (internal_buf != ((void**)PyArray_DATA(%(z)s))[1]) { ((void**)PyArray_DATA(%(z)s))[1] = internal_buf; } first_run = 0; #ifdef _MKL_DEBUG_ std::cout << "U2IConv2D: from buffer: " << convert_resources[dnnResourceFrom] << " to buffer: " << convert_resources[dnnResourceTo] << std::endl; #endif """ % locals() return ccode
args.input_shape, args.filter_shape, args.subsample, args.dilation, args.border_mode, args.conv_mode, args.alpha, args.beta, ) if args.print_infos: CheckDnn.print_infos(count_tests=False) print("======================") print("Running", test, algo, dtype, precision, *parameters) if test == FWD: tests.run_conv_fwd(algo, dtype, precision, parameters) expected_output_shape = get_conv_output_shape( args.input_shape, args.filter_shape, args.border_mode, args.subsample, args.dilation, ) elif test == BWD_FILTER: tests.run_conv_gradweight(algo, dtype, precision, parameters) expected_output_shape = args.filter_shape elif test == BWD_DATA: tests.run_conv_gradinput(algo, dtype, precision, parameters) expected_output_shape = args.input_shape print("Computed shape:", expected_output_shape) print("... OK")
def local_conv2d_gradinputs_cpu(node): if (not isinstance(node.op, AbstractConv2d_gradInputs) or node.inputs[0].dtype == "float16"): return None kern, topgrad, shape = node.inputs if not isinstance(kern.type, TensorType) or not isinstance( topgrad.type, TensorType): return None if node.op.border_mode not in ["full", "valid"]: return None if not node.op.filter_flip: # Not tested yet return None if node.op.num_groups > 1 or node.op.unshared: return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == "valid" and node.op.subsample != (1, 1): # The op don't support that anymore. return False # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = "valid" if not node.op.border_mode == "full": mode = "full" filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape( op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation, )[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp( imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint="bprop inputs", ) din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def local_conv2d_gradweight_cpu(node): if (not isinstance(node.op, AbstractConv2d_gradWeights) or node.inputs[0].dtype == "float16"): return None img, topgrad, shape = node.inputs if not isinstance(img.type, TensorType) or not isinstance( topgrad.type, TensorType): return None if node.op.border_mode not in ["full", "valid"]: return None if not node.op.filter_flip: # Not tested yet return if node.op.num_groups > 1 or node.op.unshared: return None if node.op.border_mode == "valid" and (node.op.subsample != (1, 1)): return None dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape( op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation, )[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == "valid": (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == "full": (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( "Only [full,valid] modes are currently supported.") # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp( imshp, kshp, nkern, bsize, 1, 1, output_mode="valid", unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint="bprop weights", ) res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == "valid": res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]
def local_conv2d_gradweight_cpu(node): if (not isinstance(node.op, AbstractConv2d_gradWeights) or node.inputs[0].dtype == 'float16'): return None img, topgrad, shape = node.inputs if ((not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return if node.op.num_groups > 1 or node.op.unshared: return None if node.op.border_mode == 'valid' and \ (node.op.subsample != (1, 1)): return None dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == 'valid': (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == 'full': (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( 'Only [full,valid] modes are currently supported.') # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid', unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint='bprop weights') res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == 'valid': res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]
elif test == BWD_FILTER: check_config = cudnn.bwd_filter_algo_supports_dtype_config(args.algo, args.dtype, args.precision, ndim) elif test == BWD_DATA: check_config = cudnn.bwd_data_algo_supports_dtype_config(args.algo, args.dtype, args.precision, ndim) if not check_config: print('Warning: %s computation does not normally support configuration (%s, %s) for algo %s.' % ( test, args.dtype, args.precision, args.algo), file=sys.stderr) algo = args.algo dtype = args.dtype precision = args.precision parameters = ( args.input_shape, args.filter_shape, args.subsample, args.dilation, args.border_mode, args.conv_mode, args.alpha, args.beta) if args.print_infos: CheckDnn.print_infos(count_tests=False) print('======================') print('Running', test, algo, dtype, precision, *parameters) if test == FWD: tests.run_conv_fwd(algo, dtype, precision, parameters) expected_output_shape = get_conv_output_shape(args.input_shape, args.filter_shape, args.border_mode, args.subsample, args.dilation) elif test == BWD_FILTER: tests.run_conv_gradweight(algo, dtype, precision, parameters) expected_output_shape = args.filter_shape elif test == BWD_DATA: tests.run_conv_gradinput(algo, dtype, precision, parameters) expected_output_shape = args.input_shape print('Computed shape:', expected_output_shape) print('... OK')
def local_abstractconv_cudnn_alt(node): if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs)): return if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1): return None if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None inp1 = node.inputs[0] inp2 = node.inputs[1] if not dnn_available(inp1.type.context_name): return op = node.op border_mode = node.op.border_mode subsample = node.op.subsample filter_dilation = node.op.filter_dilation num_groups = node.op.num_groups precision, _ = get_precision(None, [inp1, inp2]) if node.op.filter_flip: conv_mode = "conv" else: conv_mode = "cross" if isinstance(op, AbstractConv2d): if border_mode == "half" or subsample != (1, 1) or num_groups != 1: return None if border_mode == "full": direction_hint = "bprop inputs" elif border_mode == "valid" and filter_dilation == (1, 1): direction_hint = "bprop weights" else: return None rval = dnn_conv( inp1, inp2, border_mode=border_mode, subsample=subsample, dilation=filter_dilation, direction_hint=direction_hint, conv_mode=conv_mode, num_groups=num_groups, ) elif isinstance(op, AbstractConv2d_gradWeights): if (border_mode == "valid" and subsample == (1, 1) and filter_dilation == (1, 1) and num_groups == 1): img = gpu_contiguous(inp1) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(img, topgrad) img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3)) ishape = [shape_i_op(i)(img) for i in range(img.ndim)] tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] out_shp = get_conv_output_shape( ishape, tshape, border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation, ) out_shp = assert_conv_shape(out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) desc = GpuDnnConvDesc( border_mode=border_mode, subsample=subsample, dilation=filter_dilation, conv_mode="cross", precision=precision, )(out.shape) conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad, out, desc) if conv_mode == "conv": conv = conv[:, :, ::-1, ::-1] rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name) else: return None elif isinstance(op, AbstractConv2d_gradInputs): if border_mode == "valid" and subsample == (1, 1) and num_groups == 1: kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(kerns, topgrad) conv_mode = "cross" if conv_mode == "conv" else "conv" desc = GpuDnnConvDesc( border_mode="full", subsample=subsample, dilation=filter_dilation, conv_mode=conv_mode, precision=precision, )(kerns.shape) tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] shape = get_conv_output_shape( tshape, kshape, border_mode="full", subsample=subsample, filter_dilation=filter_dilation, ) shape = assert_conv_shape(shape) out = GpuAllocEmpty(dtype=topgrad.dtype, context_name=ctx_name)(*shape) rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns, out, desc) else: return None return [rval]
def local_conv2d_gradinputs_cpu(node): if (not isinstance(node.op, AbstractConv2d_gradInputs) or node.inputs[0].dtype == 'float16'): return None kern, topgrad, shape = node.inputs if ((not isinstance(kern.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return None if node.op.num_groups > 1 or node.op.unshared: return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == 'valid' and node.op.subsample != (1, 1): # The op don't support that anymore. return False # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = 'valid' if not node.op.border_mode == 'full': mode = 'full' filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp(imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint='bprop inputs') din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def local_conv2d_gradweight_cpu(node): if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if ((not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return if node.op.border_mode == 'valid' and \ (node.op.subsample != (1, 1)): # Use the gradient as defined in conv3D, because the implementation # by Conv is slow (about 3x slower than conv3D, and probably 10x # slower than it could be), and incorrect when subsample > 2. # build a "node", that should be equivalent to the one given by # self.make_node, but using convGrad3D instead. shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) rval = convGrad3D(V=shuffled_img, d=(node.op.subsample[0], node.op.subsample[1], 1), WShape=(shuffled_topgrad.shape[4], shape[0], shape[1], 1, shuffled_img.shape[4]), dCdH=shuffled_topgrad) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = rval[:, :, ::-1, ::-1] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == 'valid': (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == 'full': (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( 'Only [full,valid] modes are currently supported.') # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid', unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint='bprop weights') res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == 'valid': res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]
def local_conv2d_gradinputs_cpu(node): if not isinstance(node.op, AbstractConv2d_gradInputs): return None kern, topgrad, shape = node.inputs if ((not isinstance(kern.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == 'valid' and node.op.subsample != (1, 1): kern = kern[:, :, ::-1, ::-1] shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :]) rval = convTransp3D(W=shuffled_kern, b=b, d=(node.op.subsample[0], node.op.subsample[1], 1), H=shuffled_topgrad, RShape=(shape[0], shape[1], 1)) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = 'valid' if not node.op.border_mode == 'full': mode = 'full' filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp(imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint='bprop inputs') din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def c_code(self, node, name, inp, out, sub): x, = inp dH, dW = self.subsample if self.imshp is None: self.imshp = x.shape i_n, i_c, i_h, i_w = self.imshp if len(self.kshp) == 5: grp, k_n, k_c, k_h, k_w = self.kshp assert i_c == k_c * grp else: k_n, k_c, k_h, k_w = self.kshp grp = 1 o_n, o_c, o_h, o_w = get_conv_output_shape( image_shape=self.imshp, kernel_shape=self.kshp, border_mode=self.border_mode, filter_dilation=self.filter_dilation, subsample=self.subsample) if self.border_mode == 'valid': padH, padW = (0, 0) elif self.border_mode == 'full': padH, padW = ((k_h - 1), (k_w - 1)) elif self.border_mode == 'half': padH, padW = ((k_h / 2), (k_w / 2)) elif isinstance(self.border_mode, tuple): padH, padW = self.border_mode else: raise ValueError("border_mode must have two elements") z, = out if 'float32' == node.inputs[0].type.dtype: precision = 'F32' elif 'float64' == node.inputs[0].type.dtype: precision = 'F64' else: raise Exception("Type %s is not supported!" % node.inputs[0].type.dtype) fail = sub['fail'] ccode = """ if (1 == first_run) { int convPadding[2]; size_t convStride[2], weightSize[5], weightStride[5], imageSize[4], imageStride[4], zSize[4], zStride[4]; convStride[0] = %(dW)s; convStride[1] = %(dH)s; convPadding[0] = -%(padW)s; convPadding[1] = -%(padH)s; imageSize[0] = %(i_w)s; //w imageSize[1] = %(i_h)s; //h imageSize[2] = %(i_c)s; //c imageSize[3] = %(i_n)s; //n imageStride[0] = 1; imageStride[1] = imageSize[0]; imageStride[2] = imageSize[0] * imageSize[1]; imageStride[3] = imageSize[0] * imageSize[1] * imageSize[2]; weightSize[0] = %(k_w)s; weightSize[1] = %(k_h)s; weightSize[2] = %(k_c)s; weightSize[3] = %(k_n)s; weightSize[4] = %(grp)s; weightStride[0] = 1; weightStride[1] = weightSize[0]; weightStride[2] = weightSize[0] * weightSize[1]; weightStride[3] = weightSize[0] * weightSize[1] * weightSize[2]; weightStride[4] = weightSize[0] * weightSize[1] * weightSize[2] * weightSize[3]; zSize[0] = %(o_w)s; zSize[1] = %(o_h)s; zSize[2] = %(o_c)s; zSize[3] = %(o_n)s; zStride[0] = 1; zStride[1] = zSize[0]; zStride[2] = zSize[0] * zSize[1]; zStride[3] = zSize[0] * zSize[1] * zSize[2]; const int group = %(grp)s; //create user layout CHECK_ERR( dnnLayoutCreate_%(precision)s(&layout_user, DIMENSION, imageSize, imageStride), err ); CHECK_ERR( dnnGroupsConvolutionCreateForward_%(precision)s(&primitive, NULL, dnnAlgorithmConvolutionDirect, group, DIMENSION, imageSize, zSize, weightSize, convStride, convPadding, dnnBorderZeros), err ); CHECK_ERR( dnnLayoutCreateFromPrimitive_%(precision)s(&layout_internal, primitive, dnnResourceSrc), err ); } if (!dnnLayoutCompare_%(precision)s(layout_user, layout_internal)) { if (NULL == to_internal) { CHECK_ERR( dnnConversionCreate_%(precision)s(&to_internal, layout_user, layout_internal), err ); } } if (NULL == %(z)s) { //Create PyArrayObject for output %(z)s = (PyArrayObject*)PyArray_ZEROS(DIMENSION, PyArray_DIMS(%(x)s), PyArray_TYPE(%(x)s), 0); if (NULL == %(z)s) { %(fail)s } } if (NULL == internal_buf) { CHECK_ERR( dnnAllocateBuffer_%(precision)s((void**)&internal_buf, layout_internal), err ); } if (to_internal) { convert_resources[dnnResourceFrom] = (PyArray_DATA(%(x)s)); convert_resources[dnnResourceTo] = (void*)(internal_buf); CHECK_ERR( dnnExecute_%(precision)s(to_internal, convert_resources), err ); } else { internal_buf = (PyArray_DATA(%(x)s)); } if (layout_internal != ((dnnLayout_t*)PyArray_DATA(%(z)s))[0]) { ((dnnLayout_t*)PyArray_DATA(%(z)s))[0] = layout_internal; } if (internal_buf != ((void**)PyArray_DATA(%(z)s))[1]) { ((void**)PyArray_DATA(%(z)s))[1] = internal_buf; } first_run = 0; #ifdef _MKL_DEBUG_ std::cout << "U2IConv2D: from buffer: " << convert_resources[dnnResourceFrom] << " to buffer: " << convert_resources[dnnResourceTo] << std::endl; #endif """ % locals() return ccode