def local_conv2d_gradinputs_cpu(node): if not isinstance(node.op, AbstractConv2d_gradInputs): return None kern, topgrad, shape = node.inputs if ((not isinstance(kern.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == 'valid' and node.op.subsample != (1, 1): kern = kern[:, :, ::-1, ::-1] shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :]) rval = convTransp3D(W=shuffled_kern, b=b, d=(node.op.subsample[0], node.op.subsample[1], 1), H=shuffled_topgrad, RShape=(shape[0], shape[1], 1)) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = 'valid' if not node.op.border_mode == 'full': mode = 'full' filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp(imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint='bprop inputs') din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def local_conv2d_gradweight_cpu(node): if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if ((not isinstance(img.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return if node.op.border_mode == 'valid' and \ (node.op.subsample != (1, 1)): # Use the gradient as defined in conv3D, because the implementation # by Conv is slow (about 3x slower than conv3D, and probably 10x # slower than it could be), and incorrect when subsample > 2. # build a "node", that should be equivalent to the one given by # self.make_node, but using convGrad3D instead. shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) rval = convGrad3D(V=shuffled_img, d=(node.op.subsample[0], node.op.subsample[1], 1), WShape=(shuffled_topgrad.shape[4], shape[0], shape[1], 1, shuffled_img.shape[4]), dCdH=shuffled_topgrad) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = rval[:, :, ::-1, ::-1] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == 'valid': (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == 'full': (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( 'Only [full,valid] modes are currently supported.') # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid', unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint='bprop weights') res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == 'valid': res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]
def exec_multilayer_conv_nnet_old(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0): # build actual input images imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2]) a = T.dmatrix() kerns = [a for i in nkerns] inputs4 = dmatrix4() kerns4 = dmatrix4() # for each layer ntot = 0 tctot = 0 tpytot = 0 for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))): if do_print: print '************* layer %i ***************' % n_layer print conv_mode, ss, n_layer, kshp, nkern # actual values w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp]) w_flip = flip(w, kshp).reshape(w.shape) ## manual implementation # check first stage padimg = imgval if conv_mode == 'full': padimg_shp = N.array( imshp[1:]) + 2 * (N.array(kshp) - N.array([1, 1])) padimg = N.zeros(N.r_[bsize, imshp[0], padimg_shp]) padimg[:, :, kshp[0] - 1:-kshp[0] + 1, kshp[1] - 1:-kshp[1] + 1] = imgval outshp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) time1 = time.time() outval = N.zeros(N.r_[bsize, outshp]) if validate: # causes an atexit problem from scipy.signal.sigtools import _convolve2d from scipy.signal.signaltools import _valfrommode, _bvalfromboundary val = _valfrommode(conv_mode) bval = _bvalfromboundary('fill') for b in range(bsize): # loop over batches for n in range(nkern): # loop over filters for i in range(imshp[0]): # loop over input feature maps outval[b,n,...] += _convolve2d(\ imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]] ntot += time.time() - time1 # ConvOp if unroll_patch and not unroll_patch_size: conv_op = ConvOp(dx=ss[0], dy=ss[1], output_mode=conv_mode, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) else: conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) l1shp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py")) time1 = time.time() for i in range(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ #[:,:,0::ss[0],0::ss[1]] tctot += time.time() - time1 if conv_op_py: time1 = time.time() for i in range(repeat): hidval3_ = propup3(imgval, w_flip) hidval3 = hidval3_ #[:,:,0::ss[0],0::ss[1]] tpytot += time.time() - time1 assert (N.abs(hidval2 - hidval3) < 1e-5).all() else: tpytot += 0 if validate: temp = N.abs(outval - hidval2) assert (temp < 1e-5).all() if validate and conv_op_py: temp = N.abs(outval - hidval3) assert (temp < 1e-5).all() imshp = tuple(outshp) imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) return tctot, tpytot, ntot
def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0): # build actual input images imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2]) a = T.dmatrix() kerns = [a for i in nkerns] inputs4 = dmatrix4() kerns4 = dmatrix4() # for each layer ntot = 0 tctot = 0 tpytot = 0 for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))): if do_print: print '************* layer %i ***************' % n_layer print conv_mode, ss, n_layer, kshp, nkern # actual values w = global_rng.random_sample(N.r_[nkern, imshp[0], kshp]) w_flip = flip(w, kshp).reshape(w.shape) outshp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) time1 = time.time() outval = N.zeros(N.r_[bsize, outshp]) # ConvOp if unroll_patch and not unroll_patch_size: conv_op = ConvOp(dx=ss[0], dy=ss[1], output_mode=conv_mode, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) else: conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4) l1shp = N.hstack( (nkern, ConvOp.getOutputShape(imshp[1:], kshp, ss, conv_mode))) propup2 = function([inputs4, kerns4], conv_op) time1 = time.time() for i in range(repeat): hidval2_ = propup2(imgval, w_flip) hidval2 = hidval2_ #[:,:,0::ss[0],0::ss[1]] tctot += time.time() - time1 imshp = tuple(outshp) imgval = outval.reshape(bsize, outshp[0], outshp[1], outshp[2]) return tctot, tpytot, ntot
def local_conv2d_gradinputs_cpu(node): if (not isinstance(node.op, AbstractConv2d_gradInputs) or node.inputs[0].dtype == "float16"): return None kern, topgrad, shape = node.inputs if not isinstance(kern.type, TensorType) or not isinstance( topgrad.type, TensorType): return None if node.op.border_mode not in ["full", "valid"]: return None if not node.op.filter_flip: # Not tested yet return None if node.op.num_groups > 1 or node.op.unshared: return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == "valid" and node.op.subsample != (1, 1): # The op don't support that anymore. return False # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = "valid" if not node.op.border_mode == "full": mode = "full" filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape( op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation, )[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp( imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint="bprop inputs", ) din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def local_conv2d_gradweight_cpu(node): if (not isinstance(node.op, AbstractConv2d_gradWeights) or node.inputs[0].dtype == "float16"): return None img, topgrad, shape = node.inputs if not isinstance(img.type, TensorType) or not isinstance( topgrad.type, TensorType): return None if node.op.border_mode not in ["full", "valid"]: return None if not node.op.filter_flip: # Not tested yet return if node.op.num_groups > 1 or node.op.unshared: return None if node.op.border_mode == "valid" and (node.op.subsample != (1, 1)): return None dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): # We cannot infer the shapes return None # Determine gradient on kernels assert len(op_imshp) == 4 and len(op_kshp) == 4 outshp = get_conv_output_shape( op_imshp, op_kshp, node.op.border_mode, node.op.subsample, node.op.filter_dilation, )[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] newimg = img.dimshuffle((1, 0, 2, 3)) newtopgrad = topgrad.dimshuffle((1, 0, 2, 3)) if node.op.border_mode == "valid": (img, filters) = (newimg, newtopgrad) kshp_logical = fulloutshp kshp_logical_top_aligned = False imshp_logical = None (bsize, nkern) = (op_imshp[1], op_kshp[0]) imshp = (op_imshp[0], op_imshp[2], op_imshp[3]) kshp = outshp elif node.op.border_mode == "full": (img, filters) = (newtopgrad, newimg) kshp_logical = None kshp_logical_top_aligned = True imshp_logical = (op_imshp[0], fulloutshp[0], fulloutshp[1]) (bsize, nkern) = (op_kshp[0], op_imshp[1]) imshp = (op_imshp[0], outshp[0], outshp[1]) kshp = op_imshp[2:] else: raise NotImplementedError( "Only [full,valid] modes are currently supported.") # Flip the kernels filters = filters[:, :, ::-1, ::-1] dw = ConvOp( imshp, kshp, nkern, bsize, 1, 1, output_mode="valid", unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=kshp_logical, kshp_logical_top_aligned=kshp_logical_top_aligned, direction_hint="bprop weights", ) res = dw(img, filters) copy_stack_trace(node.outputs[0], res) if node.op.border_mode == "valid": res = res.dimshuffle((1, 0, 2, 3)) res = res[:, :, ::-1, ::-1] res = theano.tensor.patternbroadcast(res, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], res) return [res]