def upconv(x, w, stride, x_shape=None, w_shape=None, axis_order='dnn'): assert stride is not None stride = tuple(stride) conv_dim = len(stride) border_mode = 'valid' # if (x_shape is None) or (None in x_shape): # variable batch size or so # x_shape = None if conv_dim == 1: x = x.dimshuffle(0, 1, 2, 'x') w = w.dimshuffle(0, 1, 2, 'x') if w_shape is not None: w_shape = list(w_shape) + [ 1, ] if x_shape is not None: x_shape = list(x_shape) + [ 1, ] stride = list(stride) + [ 1, ] y = conv2d_grad_wrt_inputs(x, w, x_shape, w_shape, border_mode, subsample=stride, filter_flip=False) y = y[:, :, :, 0] elif conv_dim == 2: y = conv2d_grad_wrt_inputs(x, w, x_shape, w_shape, border_mode, subsample=stride, filter_flip=False) elif conv_dim == 3: if not dnn_avail or axis_order != 'dnn': raise ValueError("Need dnn and dnn axis order") kerns = dnn.gpu_contiguous(w) image = dnn.gpu_contiguous(x) k = kerns.shape[1] img_sh = list(image.shape) out_sh = img_sh[:1] + [ k, ] + [st * sh for st, sh in zip(stride, img_sh[2:])] out = dnn.gpu_alloc_empty(*out_sh) desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=stride, conv_mode='cross')(out.shape, kerns.shape) y = dnn.GpuDnnConv3dGradI()(kerns, image, out, desc) return y
def _bilinear_upsampling_1D(self, inpt, ratio, batch_size=None, num_input_channels=None): ''' This implementation is a very minimally changed excerpt from: https://github.com/Theano/theano/blob/ddfd7d239a1e656cee850cdbc548da63f349c37d/theano/tensor/nnet/abstract_conv.py#L455 ''' if theano.config.device.startswith('gpu'): from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D, conv2d_grad_wrt_inputs else: raise AssertionError('Bilinear interpolation requires GPU and cuDNN.') try: up_bs = batch_size * num_input_channels except TypeError: up_bs = None row, col = inpt.shape[2:] up_input = inpt.reshape((-1, 1, row, col)) concat_mat = T.concatenate((up_input[:, :, :1, :], up_input, up_input[:, :, -1:, :]), axis=2) pad = 2 * ratio - (ratio - 1) // 2 - 1 kern = bilinear_kernel_1D(ratio=ratio, normalize=True) upsampled_row = conv2d_grad_wrt_inputs( output_grad=concat_mat, filters=kern[np.newaxis, np.newaxis, :, np.newaxis], input_shape=(up_bs, 1, row * ratio, col), filter_shape=(1, 1, None, 1), border_mode=(pad, 0), subsample=(ratio, 1), filter_flip=True ) return upsampled_row.reshape((inpt.shape[0], inpt.shape[1], row * ratio, col * 1))
def deconv(self,input): try: up_bs = self.batch_size * self.num_in_channels except TypeError: up_bs = None row, col = input.shape[2:] up_input = input.reshape((-1, 1, row, col)) # # concatenating the first and last row and column # # first and last row # concat_mat = tensor.concatenate((up_input[:, :, :1, :], up_input, # up_input[:, :, -1:, :]), axis=2) # # first and last col # concat_mat = tensor.concatenate((concat_mat[:, :, :, :1], concat_mat, # concat_mat[:, :, :, -1:]), axis=3) # concat_col = col + 2 # pad_col = 2 * self.ratio - (self.ratio - 1) // 2 - 1 # pad_row = 3 up_col = get_init_len(col,self.ratio,self.border) upsampled_mat = conv2d_grad_wrt_inputs(output_grad=up_input, filters=self.W[np.newaxis, np.newaxis, :, :], input_shape=(up_bs, 1, row, up_col), filter_shape=(1, 1, None, None), border_mode='half', subsample=(1, self.ratio), filter_flip=True, filter_dilation=(1, 1)) return upsampled_mat.reshape((input.shape[0], input.shape[1], row, up_col))
def optimizer_2d(self, input_shapes, direction, include_tags, exclude_tags, op, border_mode='valid', subsample=(1, 1), filter_dilation=(1, 1)): inp1 = theano.shared( np.random.random(input_shapes[0]).astype(theano.config.floatX)) inp2 = theano.shared( np.random.random(input_shapes[1]).astype(theano.config.floatX)) if (direction == 0): conv_op = abstract_conv.conv2d(inp1, inp2, input_shapes[0], input_shapes[1], border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation) if (direction == 1): conv_op = abstract_conv.conv2d_grad_wrt_weights( inp1, inp2, input_shapes[2], input_shapes[0], border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation) if (direction == 2): conv_op = abstract_conv.conv2d_grad_wrt_inputs( inp1, inp2, input_shapes[2], input_shapes[1], border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation) theano.config.metaopt.optimizer_including = include_tags theano.config.metaopt.optimizer_excluding = exclude_tags mode = mode_with_gpu.including('conv_meta') ref_func = theano.function([], conv_op, mode=mode_with_gpu) # All meta optimizer compile a new function. This need to know # the current linker, but this information is not available, # so it use the default mode. with theano.change_flags(mode=mode): conv_func = theano.function([], conv_op, mode=mode) assert any([ isinstance(node.op, op) for node in conv_func.maker.fgraph.toposort() ]) utt.assert_allclose(conv_func(), ref_func())
def trconv(output, filters, output_shape, filter_size, subsample=(1, 1), border_mode=(0, 0)): f1, f2 = (filter_size[0], filter_size[1]) a1 = 1 a2 = 1 o_prime1 = subsample[0] * (output_shape[2] - 1) + a1 + f1 - 2 * border_mode[0] o_prime2 = subsample[1] * (output_shape[3] - 1) + a2 + f2 - 2 * border_mode[1] input_shape = (None, None, o_prime1, o_prime2) input = abstract_conv.conv2d_grad_wrt_inputs( output, filters, input_shape=input_shape, filter_shape=None, subsample=subsample, border_mode=border_mode) return input
def optimizer_2d(self, input_shapes, direction, include_tags, exclude_tags, op, border_mode='valid', subsample=(1, 1), filter_dilation=(1, 1)): inp1 = theano.shared( np.random.random(input_shapes[0]).astype(theano.config.floatX)) inp2 = theano.shared( np.random.random(input_shapes[1]).astype(theano.config.floatX)) if (direction == 0): conv_op = abstract_conv.conv2d(inp1, inp2, input_shapes[0], input_shapes[1], border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation) if (direction == 1): conv_op = abstract_conv.conv2d_grad_wrt_weights( inp1, inp2, input_shapes[2], input_shapes[0], border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation) if (direction == 2): conv_op = abstract_conv.conv2d_grad_wrt_inputs( inp1, inp2, input_shapes[2], input_shapes[1], border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation) theano.config.metaopt.optimizer_including = include_tags theano.config.metaopt.optimizer_excluding = exclude_tags mode = mode_with_gpu.including('conv_meta') ref_func = theano.function([], conv_op, mode=mode_with_gpu) conv_func = theano.function([], conv_op, mode=mode) assert any([ isinstance(node.op, op) for node in conv_func.maker.fgraph.toposort() ]) utt.assert_allclose(conv_func(), ref_func())
def get_output(self, input, **kwargs): lin_output = conv2d_grad_wrt_inputs( output_grad=input, filters=self.W, input_shape=self.out_shape, filter_shape=self.filter_shape, border_mode=self.mode, subsample=self.subsample, # filter_flip=True, filter_dilation=self.filter_dilation) if not self.no_bias: lin_output += self.b.dimshuffle('x', 0, 'x', 'x') return self.activation(lin_output)
def conv2d_tr_half(output, filters, filter_shape, input_shape, subsample=(1, 1)): input = conv2d_grad_wrt_inputs(output, filters, input_shape=(None, filter_shape[0], input_shape[2], input_shape[3]), filter_shape=filter_shape, border_mode='half', subsample=subsample) return input
def conv_transpose(output, filters, output_shape, filter_size, subsample=(1, 1), border_mode=(0, 0)): """Compute convolution transpose (deconv) Note: We will assume zero padding, non-unit strides as explained in (Dumoulin, Visin): http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html Some notations: input_shape (batch size (b), input channels (c), input rows (i1), input columns (i2)) filter_shape (output channels (c1), input channels (c2), filter rows (k1), filter columns (k2)) Note: I am a bit confused by the theano code illustrations by (Dumoulin, Visin): input = theano.tensor.nnet.abstract_conv.conv2d_grad_wrt_inputs( ..., input_shape=(b, c1, o_prime1, o_prime2), filter_shape=(c1, c2, k1, k2), ...) The above seems misleading and input_shape=(b, c2, o_prime1, o_prime2) seems more appropriate (note the use of c2 instead of c1). """ k1, k2 = (filter_size[0], filter_size[1]) # we do not support even filter sizes assert (k1 % 2 == 1) assert (k2 % 2 == 1) # We are only considering the case where input_size is # such that a = (a1, a2) = (1, 1) = (1 + 2p - k) mod s # Note: by 'p' we mean the vector 'border_mode', by 's' we mean the vector 'subsample'. # a1, a2 in {0, ..., s-1} a1 = 1 a2 = 1 o_prime1 = subsample[0] * (output_shape[2] - 1) + a1 + k1 - 2 * border_mode[0] o_prime2 = subsample[1] * (output_shape[3] - 1) + a2 + k2 - 2 * border_mode[1] input_shape = (None, None, o_prime1, o_prime2) #print "a: (%d, %d)" % (a1, a2) #print "Filter size: (%d, %d)" % (k1, k2) #print "Input size: (%d, %d)" % (input_shape[2], input_shape[3]) input = abstract_conv.conv2d_grad_wrt_inputs(output, filters, input_shape=input_shape, filter_shape=None, subsample=subsample, border_mode=border_mode) return input
def optimizer_2d(self, input_shapes, direction, include_tags, exclude_tags, op, border_mode='valid', subsample=(1, 1), filter_dilation=(1, 1)): inp1 = theano.shared(np.random.random(input_shapes[0]).astype(theano.config.floatX)) inp2 = theano.shared(np.random.random(input_shapes[1]).astype(theano.config.floatX)) if(direction == 0): conv_op = abstract_conv.conv2d(inp1, inp2, input_shapes[0], input_shapes[1], border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation) if(direction == 1): conv_op = abstract_conv.conv2d_grad_wrt_weights(inp1, inp2, input_shapes[2], input_shapes[0], border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation) if(direction == 2): conv_op = abstract_conv.conv2d_grad_wrt_inputs(inp1, inp2, input_shapes[2], input_shapes[1], border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation) theano.config.metaopt.optimizer_including = include_tags theano.config.metaopt.optimizer_excluding = exclude_tags mode = mode_with_gpu.including('conv_meta') ref_func = theano.function([], conv_op, mode=mode_with_gpu) conv_func = theano.function([], conv_op, mode=mode) assert any([isinstance(node.op, op) for node in conv_func.maker.fgraph.toposort()]) utt.assert_allclose(conv_func(), ref_func())
def _bilinear_upsampling_1D(self, inpt, ratio, batch_size=None, num_input_channels=None): ''' This implementation is a very minimally changed excerpt from: https://github.com/Theano/theano/blob/ddfd7d239a1e656cee850cdbc548da63f349c37d/theano/tensor/nnet/abstract_conv.py#L455 ''' if theano.config.device.startswith('gpu'): from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D, conv2d_grad_wrt_inputs else: raise AssertionError( 'Bilinear interpolation requires GPU and cuDNN.') try: up_bs = batch_size * num_input_channels except TypeError: up_bs = None row, col = inpt.shape[2:] up_input = inpt.reshape((-1, 1, row, col)) concat_mat = T.concatenate( (up_input[:, :, :1, :], up_input, up_input[:, :, -1:, :]), axis=2) pad = 2 * ratio - (ratio - 1) // 2 - 1 kern = bilinear_kernel_1D(ratio=ratio, normalize=True) upsampled_row = conv2d_grad_wrt_inputs(output_grad=concat_mat, filters=kern[np.newaxis, np.newaxis, :, np.newaxis], input_shape=(up_bs, 1, row * ratio, col), filter_shape=(1, 1, None, 1), border_mode=(pad, 0), subsample=(ratio, 1), filter_flip=True) return upsampled_row.reshape( (inpt.shape[0], inpt.shape[1], row * ratio, col * 1))
def __init__(self, layers, filter_shape=None, filter_stride=(1, 1), use_bias=True, border_mode="valid", wb="he-backward", json_param={}): super().__init__(layer_index=len(layers)) self.input = layers[-1].output self.input_shape = layers[-1].output_shape #get parameters self.border_mode = json_param.get("border", border_mode) self.filter_shape = tuple(json_param.get("shape", filter_shape)) self.stride = tuple(json_param.get("stride", filter_stride)) self.use_bias = json_param.get("useBias", use_bias) self.size = (self.filter_shape[2], self.filter_shape[3]) #use initialization if type(wb) is float: self.w_bound = float(wb) elif "he-forward" in wb: self.w_bound = math.sqrt( 2.0 / (self.filter_shape[2] * self.filter_shape[3] * self.filter_shape[1])) elif "he-backward" in wb: self.w_bound = math.sqrt( 2.0 / (self.filter_shape[2] * self.filter_shape[3] * self.filter_shape[0])) elif "xavier-forward" in wb: self.w_bound = math.sqrt( 1.0 / (self.filter_shape[2] * self.filter_shape[3] * self.filter_shape[1])) elif "xavier-backward" in wb: self.w_bound = math.sqrt( 1.0 / (self.filter_shape[2] * self.filter_shape[3] * self.filter_shape[0])) #initialize weights if self.w_bound > 0: if "uniform" in wb: w = numpy.random.uniform(-self.w_bound, self.w_bound, size=self.filter_shape) else: w = numpy.random.normal(0.0, self.w_bound, size=self.filter_shape) else: w = numpy.zeros(shape=self.filter_shape) self.omega = theano.shared(numpy.asarray(w, dtype=theano.config.floatX), name="deconv omega") #initialize bias if self.use_bias: self.beta = theano.shared(value=numpy.zeros( (self.filter_shape[0], ), dtype=theano.config.floatX), name="deconv beta") #calculate output shape if self.border_mode == "half": fh = self.filter_shape[2] // 2 fw = self.filter_shape[3] // 2 h = self.input_shape[2] * self.stride[ 0] - 2 * fh + self.filter_shape[2] - 1 w = self.input_shape[3] * self.stride[ 1] - 2 * fw + self.filter_shape[3] - 1 else: raise Exception("Unknown border mode: " + str(self.border_mode)) self.output_shape = (self.input_shape[0], self.filter_shape[0], h, w) self.output = conv2d_grad_wrt_inputs( self.input, self.omega.dimshuffle((1, 0, 2, 3)), self.output_shape, (self.filter_shape[1], self.filter_shape[0], self.filter_shape[2], self.filter_shape[3]), self.border_mode, self.stride) if self.use_bias: self.output += self.beta[None, :, None, None] logging.verbose("Adding", self)
def __init__(self, rng, output_grad, ratio): """ This class turns the output_grad into a tensor of shape (#bch * #kernels, 1, #rows, #cols) and then applied the transpose convolution by using a kernel of shape (1, 1, filter_shape[0], filter_shape[1]). This will upsample each feature map independetly of the values in the other feature maps. :type rng: np.random.RandomState :param rng: a random number generator used to initialize weights. :type output_grad: theano.tensor.dtensor4 :param output_grad: symbolic tensor that should be upsampled. :type ratio: int :param ratio: the amount by which output_grad will be upsampled. """ # reshaping output_grad to have only one feature map bch, ch, row, col = output_grad.shape output_grad_reshaped = output_grad.reshape((-1, 1, row, col)) up_conv_size = ratio * 2 - 1 # input_shape is the shape of the upsampled tensor input_shape = (None, None, row * ratio, col * ratio) # filter shape is 2D kernel shape that will be applied # to upsample the feature maps filter_shape = (1, 1, up_conv_size, up_conv_size) border_mode = (ratio - 1, ratio - 1) subsample = (ratio, ratio) # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * np.prod(filter_shape[2:])) # initialize weights with random weights W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(np.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), name='Conv_W', borrow=True) # convolve input feature maps with filters conv_out = conv2d_grad_wrt_inputs(output_grad=output_grad_reshaped, filters=self.W, input_shape=input_shape, filter_shape=filter_shape, border_mode=border_mode, subsample=subsample, filter_flip=True) self.output = conv_out.reshape((bch, ch, row * ratio, col * ratio)) self.params = [self.W]
def bilinear_upsampling(input, ratio, batch_size=None, num_input_channels=None, use_1D_kernel=True): """Compute bilinear upsampling This function will build the symbolic graph for upsampling a tensor by the given ratio using bilinear interpolation. Parameters ---------- input: symbolic 4D tensor mini-batch of feature map stacks, of shape (batch size, input channels, input rows, input columns) that will be upsampled. ratio: `int or Constant or Scalar Tensor of int* dtype` the ratio by which the input is upsampled in the 2D space (row and col size). batch_size: None, int or Constant variable The size of the first dimension of the input variable. Optional, possibly used to choose an optimal implementation. batch_size will be used only if num_input_channels is not None. num_input_channels: None, int or Constant variable The size of the second dimension of the input variable. Optional, possibly used to choose an optimal implementation. num_input_channels will be used only if batch_size is not None. use_1D_kernel: bool if set to true, row and column will be upsampled seperately by 1D kernels, otherwise they are upsampled together using a 2D kernel. The final result is the same, only the speed can differ, given factors such as upsampling ratio. Returns ------- symbolic 4D tensor set of feature maps generated by bilinear upsampling. Tensor is of shape (batch size, num_input_channels, input row size * ratio, input column size * ratio) Notes ----- :note: The kernel used for bilinear interpolation is fixed (not learned). :note: When the upsampling ratio is even, the last row and column is repeated one extra time compared to the first row and column which makes the upsampled tensor asymmetrical on both sides. This does not happen when the upsampling ratio is odd. """ T = theano.tensor try: up_bs = batch_size * num_input_channels except TypeError: up_bs = None row, col = input.shape[2:] up_input = input.reshape((-1, 1, row, col)) # concatenating the first and last row and column # first and last row concat_mat = T.concatenate( (up_input[:, :, :1, :], up_input, up_input[:, :, -1:, :]), axis=2) # first and last col concat_mat = T.concatenate( (concat_mat[:, :, :, :1], concat_mat, concat_mat[:, :, :, -1:]), axis=3) concat_col = col + 2 pad = 2 * ratio - (ratio - 1) // 2 - 1 if use_1D_kernel: kern = bilinear_kernel_1D(ratio=ratio, normalize=True) # upsampling rows upsampled_row = conv2d_grad_wrt_inputs( output_grad=concat_mat, filters=kern[np.newaxis, np.newaxis, :, np.newaxis], input_shape=(up_bs, 1, row * ratio, concat_col), filter_shape=(1, 1, None, 1), border_mode=(pad, 0), subsample=(ratio, 1), filter_flip=True) # upsampling cols upsampled_mat = conv2d_grad_wrt_inputs( output_grad=upsampled_row, filters=kern[np.newaxis, np.newaxis, np.newaxis, :], input_shape=(up_bs, 1, row * ratio, col * ratio), filter_shape=(1, 1, 1, None), border_mode=(0, pad), subsample=(1, ratio), filter_flip=True) else: kern = bilinear_kernel_2D(ratio=ratio, normalize=True) upsampled_mat = conv2d_grad_wrt_inputs( output_grad=concat_mat, filters=kern[np.newaxis, np.newaxis, :, :], input_shape=(up_bs, 1, row * ratio, col * ratio), filter_shape=(1, 1, None, None), border_mode=(pad, pad), subsample=(ratio, ratio), filter_flip=True) return upsampled_mat.reshape( (input.shape[0], input.shape[1], row * ratio, col * ratio))