def local_conv3d_gradinputs_cpu(node): if not isinstance(node.op, AbstractConv3d_gradInputs): return None kern, topgrad, shape = node.inputs if ((not isinstance(kern.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['valid', (0, 0, 0)]: return None if node.op.filter_dilation != (1, 1, 1): return None # need to flip the kernel if necessary (conv3D does not flip) if node.op.filter_flip: kern = kern[:, :, ::-1, ::-1, ::-1] # conv3D expects shape (batch, row, column, time, channel) kern = kern.dimshuffle(0, 2, 3, 4, 1) topgrad = topgrad.dimshuffle(0, 2, 3, 4, 1) bias = theano.tensor.zeros_like(kern[0, 0, 0, 0, :]) rval = convTransp3D(kern, bias, node.op.subsample, topgrad, shape) copy_stack_trace(node.outputs[0], rval) rval = rval.dimshuffle(0, 4, 1, 2, 3) rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) return [rval]
def setUp(self): super(TestConv3D, self).setUp() utt.seed_rng() self.rng = N.random.RandomState(utt.fetch_seed()) mode = copy.copy(theano.compile.mode.get_default_mode()) mode.check_py_code = False self.W = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX)) self.b = shared(N.zeros(1, dtype=floatX)) self.rb = shared(N.zeros(1, dtype=floatX)) self.V = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX)) self.d = shared(N.ndarray(shape=(3, ), dtype=int)) self.H = conv3D(self.V, self.W, self.b, self.d) self.H_func = function([], self.H, mode=mode) self.H_shape_func = function([], self.H.shape, mode=mode) self.RShape = T.vector(dtype='int64') self.otherH = T.TensorType(floatX, (False, False, False, False, False))(name='otherH') self.transp = convTransp3D(self.W, self.rb, self.d, self.otherH, self.RShape) self.transp_func = function([self.otherH, self.RShape], self.transp, mode=mode) self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape) self.R_func = function([self.RShape], self.R, mode=mode) self.R_shape_func = function([self.RShape], self.R.shape) self.reconsObj = T.sum(T.sqr(self.V - self.R)) self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode) self.gradientsFunc = function([self.RShape], [T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj, self.b)], mode=mode) self.check_c_against_python = function([self.RShape], [T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj, self.b)], mode='DEBUG_MODE') self.dCdW_shape_func = function([self.RShape], T.grad(self.reconsObj, self.W).shape, mode=mode)
def grad(self, inputs, output_gradients): C, d, WShape, B = inputs dLdA, = output_gradients z = T.zeros_like(C[0, 0, 0, 0, :]) dLdC = convTransp3D(dLdA, z, d, B, C.shape[1:4]) # d actually does affect the outputs, so it's not disconnected dLdd = grad_undefined(self, 1, d) # The shape of the weights doesn't affect the output elements dLdWShape = DisconnectedType()() dLdB = conv3D(C, dLdA, T.zeros_like(B[0, 0, 0, 0, :]), d) return [dLdC, dLdd, dLdWShape, dLdB]
def setUp(self): utt.seed_rng() self.rng = N.random.RandomState(utt.fetch_seed()) mode = copy.copy(theano.compile.mode.get_default_mode()) mode.check_py_code = False self.W = shared(N.ndarray(shape=(1,1,1,1,1), dtype=floatX)) self.b = shared(N.zeros(1,dtype=floatX)) self.rb = shared(N.zeros(1,dtype=floatX)) self.V = shared(N.ndarray(shape=(1,1,1,1,1), dtype=floatX)) self.d = shared(N.ndarray(shape=(3,),dtype=int)) self.H = conv3D(self.V, self.W, self.b, self.d) self.H_func = function([], self.H, mode = mode) self.H_shape_func = function( [], self.H.shape, mode = mode) self.RShape = T.vector(dtype='int64') self.otherH = T.TensorType(floatX,(False,False,False,False,False))(name='otherH') self.transp = convTransp3D(self.W, self.rb, self.d, self.otherH, self.RShape) self.transp_func = function([self.otherH,self.RShape],self.transp, mode=mode) self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape) self.R_func = function([self.RShape], self.R, mode = mode) self.R_shape_func = function([self.RShape], self.R.shape) self.reconsObj = T.sum(T.sqr(self.V-self.R)) self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode) self.gradientsFunc = function([self.RShape], [ T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj,self.b) ] , mode=mode) self.check_c_against_python = function([self.RShape], [ T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj,self.b) ] , mode='DEBUG_MODE') self.dCdW_shape_func = function([self.RShape], T.grad(self.reconsObj, self.W).shape, mode=mode)
def local_conv2d_gradinputs_cpu(node): if not isinstance(node.op, AbstractConv2d_gradInputs): return None kern, topgrad, shape = node.inputs if ((not isinstance(kern.type, TensorType) or not isinstance(topgrad.type, TensorType))): return None if node.op.border_mode not in ['full', 'valid']: return None if not node.op.filter_flip: # Not tested yet return None # Conv 3d implementation, needed when subsample > 2 if node.op.border_mode == 'valid' and node.op.subsample != (1, 1): kern = kern[:, :, ::-1, ::-1] shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1) shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1) b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :]) rval = convTransp3D(W=shuffled_kern, b=b, d=(node.op.subsample[0], node.op.subsample[1], 1), H=shuffled_topgrad, RShape=(shape[0], shape[1], 1)) copy_stack_trace(node.outputs[0], rval) rval = theano.tensor.addbroadcast(rval, 3) rval = rval.dimshuffle(0, 4, 1, 2) rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval] # Conv2d Implementation dx, dy = node.op.subsample if dx not in (1, 2) or dy not in (1, 2): # Not implemented in the gradient of ConvOp return None if node.op.imshp is None: op_imshp = (None, None, None, None) else: op_imshp = node.op.imshp if node.op.kshp is None: op_kshp = (None, None, None, None) else: op_kshp = node.op.kshp if None in op_imshp or None in op_kshp: if (dx, dy) != (1, 1): return None mode = 'valid' if not node.op.border_mode == 'full': mode = 'full' filters = kern.dimshuffle((1, 0, 2, 3)) filters = filters[:, :, ::-1, ::-1] outshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, node.op.subsample)[2:] fulloutshp = get_conv_output_shape(op_imshp, op_kshp, node.op.border_mode, (1, 1))[2:] nkern = op_imshp[1] imshp = (op_kshp[0], outshp[0], outshp[1]) imshp_logical = (op_kshp[0], fulloutshp[0], fulloutshp[1]) din = ConvOp(imshp, op_kshp[2:], nkern, op_imshp[0], 1, 1, output_mode=mode, unroll_batch=None, unroll_kern=None, unroll_patch=None, imshp_logical=imshp_logical, kshp_logical=None, version=-1, direction_hint='bprop inputs') din = din(topgrad, filters) copy_stack_trace(node.outputs[0], din) din = theano.tensor.patternbroadcast(din, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], din) return [din]
def setUp(self): super(TestConv3D, self).setUp() utt.seed_rng() self.rng = N.random.RandomState(utt.fetch_seed()) mode = copy.copy(theano.compile.mode.get_default_mode()) mode.check_py_code = False self.W = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX)) self.W.name = 'W' self.b = shared(N.zeros(1, dtype=floatX)) self.b.name = 'b' self.rb = shared(N.zeros(1, dtype=floatX)) self.rb.name = 'rb' self.V = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX)) self.V.name = 'V' self.d = shared(N.ndarray(shape=(3, ), dtype=int)) self.d.name = 'd' self.H = conv3D(self.V, self.W, self.b, self.d) self.H.name = 'H' self.H_func = function([], self.H, mode=mode) self.H_shape_func = function([], self.H.shape, mode=mode) self.RShape = T.vector(dtype='int64') self.RShape.name = 'RShape' self.otherH = T.TensorType( floatX, (False, False, False, False, False))(name='otherH') self.transp = convTransp3D(self.W, self.rb, self.d, self.otherH, self.RShape) self.transp.name = 'transp' self.transp_func = function([self.otherH, self.RShape], self.transp, mode=mode) self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape) self.R.name = 'R' self.R_func = function([self.RShape], self.R, mode=mode) self.R_shape_func = function([self.RShape], self.R.shape) diff = self.V - self.R diff.name = 'diff' sqr = T.sqr(diff) sqr.name = 'sqr' self.reconsObj = T.sum(sqr) self.reconsObj.name = 'reconsObj' self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode) W_grad = T.grad(self.reconsObj, self.W) self.gradientsFunc = function([self.RShape], [ W_grad, T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj, self.b) ], mode=mode) self.check_c_against_python = function([self.RShape], [ T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj, self.b) ], mode='DEBUG_MODE') self.dCdW_shape_func = function([self.RShape], T.grad(self.reconsObj, self.W).shape, mode=mode)
def __call__(self, t): output = convTransp3D(self.W + t * self.dW, self.b + t * self.db, self.d, self.H + t * self.dH, self.RShape) return output