def body(t, seq_img_trans): img = tf.image.per_image_standardization(_in[t]) # standardization img_trans = transformer(tf.expand_dims(img, 0), tf.stack([[scale_x, 0., 0., 0., scale_y, 0.]])) seq_img_trans = seq_img_trans.write(t, img_trans[0]) return t + 1, seq_img_trans
def classifier(images, options, learner='cnn', name='classifier'): with tf.variable_scope(name, reuse=tf.AUTO_REUSE): # x = relu(conv2d(images, options.nf, ks=5, s=1, name='conv1')) # 28*28*nf # if learner == 'stn': # theta = linear(tf.reshape(x, [-1, int(options.input_size * options.input_size * options.nf)]), 128, # name='loc_linear1') # theta = linear(theta, 6, name='loc_linear2') # x = transformer(x, theta) if learner == 'stn': theta = linear(tf.layers.flatten(images), 128, name='loc_linear1') theta = linear(theta, 6, name='loc_linear2') x = transformer(images, theta, [options.input_size, options.input_size]) x = relu(conv2d(x, options.nf, ks=5, s=1, name='conv1')) # 28*28*nf else: x = relu(conv2d(images, options.nf, ks=5, s=1, name='conv1')) # 28*28*nf x = relu(conv2d(x, 2 * options.nf, ks=3, s=2, name='conv2')) # 14*14*(2*nf) x = relu(conv2d(x, 4 * options.nf, ks=3, s=2, name='conv3')) # 7*7*(4*nf) x = linear(tf.layers.flatten(x), 128, name='linear1') x = dropout(x, 0.5, options.phase) x = linear(x, options.label_n, name='linear2') return x
def decoder(self, merged_lv, activation, is_training, batch_size): with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE, initializer=xavier_initializer_conv2d(), regularizer=l2_regularizer(0.01)): d_conv = tf.reshape(merged_lv, [-1, 8, 8, 256]) d_conv = tf.image.resize_images(d_conv, (16, 16)) # stn >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> n_fc = 6 initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32').flatten() Wd_fc1 = tf.Variable(tf.zeros(shape=[16 * 16 * 256, n_fc]), name='Wdst1_fc1', validate_shape=False) bd_fc1 = tf.Variable(initial_value=initial, name='bdst1_fc1') hd_fc1 = tf.matmul(tf.zeros([batch_size, 16 * 16 * 256]), Wd_fc1) + bd_fc1 hd_trans = transformer(d_conv, hd_fc1) # stn <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< d_conv = self.coord_conv(hd_trans, 256, 3, padding='same', activation=None) d_conv = tf.layers.batch_normalization(d_conv, training=is_training, fused=True) d_conv = activation(d_conv) d_conv = tf.image.resize_images(d_conv, (32, 32)) d_conv = self.coord_conv(d_conv, 92, 3, padding='same', activation=None) d_conv = tf.layers.batch_normalization(d_conv, training=is_training, fused=True) d_conv = activation(d_conv) d_conv = tf.image.resize_images(d_conv, (64, 64)) d_conv = self.coord_conv(d_conv, 48, 3, padding='same', activation=None) d_conv = tf.layers.batch_normalization(d_conv, training=is_training, fused=True) d_conv = activation(d_conv) d_conv = tf.image.resize_images(d_conv, (128, 128)) d_conv = self.coord_conv(d_conv, 3, 3, padding='same', activation=None) return d_conv
def __init__(self, batch_size, image_height, image_width): input = Input(shape=(image_height, image_width, 3), batch_size=batch_size ) # 3 is number of channels, we're taking in RGB #Channel 1 x = Conv2D(filters=32, kernel_size=3)(input) x = MaxPooling3D(pool_size=(3, 3, 1), strides=(2, 2, 1))(x) x = Conv2D(filters=32, kernel_size=5, strides=3, activation="relu")(x) x = BatchNormalization()(x) #Channel 2 y = Conv2D(filters=32, kernel_size=5, strides=5, activation="relu")(input) y = BatchNormalization()(y) #Merge channels x = concatenate(inputs=[x, y], axis=-1) #Channel wise concat, as channel is last dim x = Dropout(x, rate=0.5) x = Dense(units=32, activation="tanh")(x) x = Dense(units=6, activation="tanh")(x) out_image = transformer(input, x) return out_image
def roi_rotate_tensor_pad(self, feature_map, transform_matrixs, box_masks, box_widths): with tf.variable_scope("RoIrotate"): max_width = box_widths[tf.argmax(box_widths, 0, output_type=tf.int32)] # box_widths = tf.cast(box_widths, tf.float32) tile_feature_maps = [] # crop_boxes = [] # crop_sizes = [] # box_inds = [] map_shape = tf.shape(feature_map) map_shape = tf.cast(map_shape, tf.float32) for i, mask in enumerate( box_masks ): # box_masks is a list of num of rois in each feature map _feature_map = feature_map[i] # _crop_box = tf.constant([0, 0, 8/map_shape[0], box_widths[i]/map_shape[1]]) # _crop_size = tf.constant([8, tf.cast(box_widths[i], tf.int32)]) _feature_map = tf.expand_dims(_feature_map, axis=0) box_nums = tf.shape(mask)[0] _feature_map = tf.tile(_feature_map, [box_nums, 1, 1, 1]) # crop_boxes.append(_crop_box) # crop_sizes.append(_crop_size) tile_feature_maps.append(_feature_map) # box_inds.append(i) tile_feature_maps = tf.concat( tile_feature_maps, axis=0) # N' * H * W * C where N' = N * B trans_feature_map = transformer(tile_feature_maps, transform_matrixs) box_nums = tf.shape(box_widths)[0] pad_rois = tf.TensorArray(tf.float32, box_nums) i = 0 def cond(pad_rois, i): return i < box_nums def body(pad_rois, i): _affine_feature_map = trans_feature_map[i] width_box = box_widths[i] # _affine_feature_map = tf.expand_dims(_affine_feature_map, 0) # roi = tf.image.crop_and_resize(after_transform, [[0, 0, 8/map_shape[0], width_box/map_shape[1]]], [0], [8, tf.cast(width_box, tf.int32)]) roi = tf.image.crop_to_bounding_box(_affine_feature_map, 0, 0, 8, width_box) pad_roi = tf.image.pad_to_bounding_box(roi, 0, 0, 8, max_width) pad_rois = pad_rois.write(i, pad_roi) i += 1 return pad_rois, i pad_rois, _ = tf.while_loop(cond, body, loop_vars=[pad_rois, i]) pad_rois = pad_rois.stack() print "pad_rois shape: ", pad_rois return pad_rois
def call(self, inputs): x, x_loc = inputs if self.out_dims is None: B, H, W, C = x.shape.as_list() self.out_dims = (H, W) h_trans = transformer(x, x_loc, self.out_dims) return h_trans
def __call__(self, fixed, moving): fixed_features = self.convnet(fixed) moving_features = self.convnet(moving) params = self.parameter_regressor(fixed_features, moving_features) transformation_matrix = self.__transformationMatrix(params) warped = transformer(fixed, transformation_matrix) return warped
def stn_block(name, theta, inp): with tf.variable_scope(name): theta = tf.reshape(theta, (-1, 2 * 3)) # define loc net weight and bias loc_in = 112 * 112 * 3 loc_out = 6 W_loc = tf.Variable(tf.zeros([loc_in, loc_out]), name='W_loc') b_loc = theta # tie everything together fc_loc = tf.matmul(tf.zeros([opts['batch_size'] * 12, loc_in]), W_loc) + b_loc # [B*12, 6] op = transformer(inp, fc_loc) return op
def _stn_layer(self, name_scope, inputs, reuse=False): # Flatten inputs B1, H1, W1, C1 = inputs.get_shape().as_list() fln_inputs = tf.reshape(inputs, [-1, H1 * W1 * C1]) _, D = fln_inputs.get_shape().as_list() # Localization + Spatial Transformer with tf.variable_scope(name_scope, reuse=reuse): # Localization w = tf.get_variable(shape=[D, 6], initializer=self.const_initializer, name='weights') b = tf.get_variable(shape=[6], initializer=self.ident_initializer, name='biases') theta = tf.nn.tanh(tf.matmul(fln_inputs, w) + b) # Bx6 output = transformer(U=inputs, theta=theta, out_size=(H1, W1)) return output
def rotate(x, mins, maxes, image_shape=[28, 28, 1]): angle = tf.random_uniform(shape=(), minval=mins, maxval=maxes, dtype=tf.float32) # Rotation matrix + zero bias term theta = [tf.cos(angle), -tf.sin(angle), 0, tf.sin(angle), tf.cos(angle), 0] B, H, W, C = x.shape # define loc net weight and bias loc_in = H * W * C loc_out = 6 W_loc = tf.constant(tf.zeros([loc_in, loc_out]), name='W_loc', trainable=False) b_loc = tf.constant(value=theta, name='b_loc', trainable=False) # tie everything together fc_loc = tf.matmul(tf.zeros([B, loc_in]), W_loc) + b_loc h_trans = transformer(x, fc_loc) return h_trans
def roi_rotate_tensor(self, feature_map, transform_matrixs, box_masks, box_widths, is_debug=False): """ Input: feature_map: N * H * W * C transform_matrixs: N' * 6 box_masks: list of tensor N' box_widths: N' """ with tf.variable_scope("RoIrotate"): max_width = box_widths[tf.argmax(box_widths, 0, output_type=tf.int32)] box_widths = tf.cast(box_widths, tf.float32) tile_feature_maps = [] # crop_boxes = [] # crop_sizes = [] # box_inds = [] map_shape = tf.shape(feature_map) map_shape = tf.cast(map_shape, tf.float32) for i, mask in enumerate( box_masks ): # box_masks is a list of num of rois in each feature map _feature_map = feature_map[i] # _crop_box = tf.constant([0, 0, 8/map_shape[0], box_widths[i]/map_shape[1]]) # _crop_size = tf.constant([8, tf.cast(box_widths[i], tf.int32)]) _feature_map = tf.expand_dims(_feature_map, axis=0) box_nums = tf.shape(mask)[0] _feature_map = tf.tile(_feature_map, [box_nums, 1, 1, 1]) # crop_boxes.append(_crop_box) # crop_sizes.append(_crop_size) tile_feature_maps.append(_feature_map) # box_inds.append(i) tile_feature_maps = tf.concat( tile_feature_maps, axis=0) # N' * H * W * C where N' = N * B norm_box_widths = box_widths / map_shape[2] ones = tf.ones_like(norm_box_widths) norm_box_heights = ones * (8.0 / map_shape[1]) zeros = tf.zeros_like(norm_box_widths) crop_boxes = tf.transpose( tf.stack([zeros, zeros, norm_box_heights, norm_box_widths])) """ box_height = ones * 8 box_height = tf.cast(box_height, tf.int32) box_width = ones * max_width box_width = tf.cast(box_width, tf.int32) """ crop_size = tf.transpose(tf.stack([8, max_width])) # crop_boxes = tf.stack(crop_boxes, axis=0) # crop_sizes = tf.stack(crop_sizes, axis=0) trans_feature_map = transformer(tile_feature_maps, transform_matrixs) # box_inds = tf.concat(box_masks, axis=0) box_inds = tf.range(tf.shape(trans_feature_map)[0]) rois = tf.image.crop_and_resize(trans_feature_map, crop_boxes, box_inds, crop_size) pad_rois = tf.image.pad_to_bounding_box(rois, 0, 0, 8, max_width) print "pad_rois: ", pad_rois return pad_rois
from stn import spatial_transformer_network as transformer import numpy as np import tensorflow as tf # params n_fc = 6 B, H, W, C = (2, 200, 200, 3) # identity transform initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32').flatten() # input placeholder x = tf.placeholder(tf.float32, [B, H, W, C]) # localization network W_fc1 = tf.Variable(tf.zeros([H * W * C, n_fc]), name='W_fc1') b_fc1 = tf.Variable(initial_value=initial, name='b_fc1') h_fc1 = tf.matmul(tf.zeros([B, H * W * C]), W_fc1) + b_fc1 # spatial transformer layer h_trans = transformer(x, h_fc1)
def encoder(self, imgs, activation, is_training, batch_size, img_shape, channels): with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE, initializer=xavier_initializer_conv2d(), regularizer=l2_regularizer(0.01)): # stn >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> n_fc = 6 initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32').flatten() W_fc1 = tf.Variable( tf.zeros(shape=[img_shape * img_shape * channels, n_fc]), name='W_fc1', validate_shape=False) b_fc1 = tf.Variable(initial_value=initial, name='b_fc1') h_fc1 = tf.matmul( tf.zeros([batch_size, img_shape * img_shape * channels]), W_fc1) + b_fc1 h_trans = transformer(imgs, h_fc1) # stn <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< e_conv = self.coord_conv(h_trans, 48, 3, padding='same', activation=None) e_conv = tf.layers.batch_normalization(e_conv, training=is_training, fused=True) e_conv = activation(e_conv) e_conv = tf.layers.max_pooling2d(e_conv, 2, 2) e_conv = self.coord_conv(e_conv, 92, 3, padding='same', activation=None) e_conv = tf.layers.batch_normalization(e_conv, training=is_training, fused=True) e_conv = activation(e_conv) e_conv = tf.layers.max_pooling2d(e_conv, 2, 2) e_conv = self.coord_conv(e_conv, 256, 3, padding='same', activation=None) e_conv = tf.layers.batch_normalization(e_conv, training=is_training, fused=True) e_conv = activation(e_conv) e_conv = tf.layers.max_pooling2d(e_conv, 2, 2) e_conv = self.coord_conv(e_conv, 256, 3, padding='same', activation=None) e_conv = tf.layers.batch_normalization(e_conv, training=is_training, fused=True) e_conv = activation(e_conv) e_conv = tf.layers.max_pooling2d(e_conv, 2, 2) e_conv = self.coord_conv(e_conv, 256, 3, padding='same', activation=None) e_conv = tf.layers.batch_normalization(e_conv, training=is_training, fused=True) e_conv = activation(e_conv) e_conv = tf.layers.max_pooling2d(e_conv, 2, 2) e_conv = self.coord_conv(e_conv, 256, 3, padding='same', activation=None) e_conv = tf.layers.batch_normalization(e_conv, training=is_training, fused=True) e_conv = activation(e_conv) e_conv = tf.layers.max_pooling2d(e_conv, 2, 2) lv = tf.layers.flatten(e_conv) return lv
def build_model(self): # Helper Variables self.global_step_tensor = tf.Variable(0, trainable=False, name='global_step') self.global_step_inc = self.global_step_tensor.assign(self.global_step_tensor + 1) self.global_epoch_tensor = tf.Variable(0, trainable=False, name='global_epoch') self.global_epoch_inc = self.global_epoch_tensor.assign(self.global_epoch_tensor + 1) # Inputs to the network with tf.variable_scope('inputs'): self.x, y, self.length, self.lab_length = self.data_loader.get_input() self.y = tf.contrib.layers.dense_to_sparse(y, eos_token=-1) self.x = tf.expand_dims(self.x, 3) # Center Images x_shift = (tf.shape(self.x)[2] - self.length) / tf.constant(2) y_shift = tf.zeros_like(x_shift) translation_vector = tf.cast(tf.stack([x_shift, y_shift], axis=1), tf.float32) self.x = tf.contrib.image.translate(self.x, translation_vector) self.length = tf.cast(tf.math.ceil(tf.math.divide(self.length, tf.constant(self.reduce_factor))), tf.int32) batch_size = tf.shape(self.x)[0] self.is_training = tf.placeholder(tf.bool, name='Training_flag') tf.add_to_collection('inputs', self.x) tf.add_to_collection('inputs', self.length) tf.add_to_collection('inputs', self.lab_length) tf.add_to_collection('inputs', y) tf.add_to_collection('inputs', self.is_training) # Define CNN variables intitalizer = tf.contrib.layers.xavier_initializer_conv2d() out_W = tf.Variable(tf.truncated_normal([2 * self.rnn_num_hidden, self.data_loader.num_classes], stddev=0.1), name='out_W') out_b = tf.Variable(tf.constant(0., shape=[self.data_loader.num_classes]), name='out_b') # localization network W_fc1 = tf.Variable(tf.zeros([self.stn_loc_fc, 6]), name='W_fc1') b_fc1 = tf.Variable(initial_value=[1., 0., 0., 0., 1., 0.], name='b_fc1') with tf.name_scope('Localization'): conv_loc = tf.layers.conv2d(self.x, self.stn_loc_conv_d[0], self.stn_loc_conv_s[0], padding='same') conv_loc = tf.nn.leaky_relu(conv_loc) conv_loc = tf.layers.max_pooling2d(conv_loc, 2, 2, padding='same') conv_loc = tf.layers.conv2d(conv_loc, self.stn_loc_conv_d[1], self.stn_loc_conv_s[1], padding='same') conv_loc = tf.nn.leaky_relu(conv_loc) fc_loc = tf.reduce_mean(conv_loc, axis=[1, 2]) fc_loc = tf.layers.dense(fc_loc, self.stn_loc_fc) fc_loc = tf.nn.leaky_relu(fc_loc) theta = tf.matmul(fc_loc, W_fc1) + b_fc1 # spatial transformer network h_trans = transformer(self.x, theta) # CNNs with tf.name_scope('CNN_Block_1'): conv1_out = tf.layers.dropout(h_trans, self.conv_dropouts[0], tf.concat( [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, 1])], 0), training=self.is_training) conv1_out = tf.layers.conv2d(conv1_out, self.conv_depths[0], self.conv_patch_sizes[0], padding='same', activation=None, kernel_initializer=intitalizer) conv1_out = tf.layers.batch_normalization(conv1_out) conv1_out = tf.nn.leaky_relu(conv1_out) conv1_out = tf.layers.max_pooling2d(conv1_out, 2, 2, padding='same') with tf.name_scope('CNN_Block_2'): conv2_out = tf.layers.dropout(conv1_out, self.conv_dropouts[1], noise_shape=tf.concat( [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, self.conv_depths[0]])], 0), training=self.is_training) conv2_out = tf.layers.conv2d(conv2_out, self.conv_depths[1], self.conv_patch_sizes[1], padding='same', activation=None, kernel_initializer=intitalizer) conv2_out = tf.layers.batch_normalization(conv2_out) conv2_out = tf.nn.leaky_relu(conv2_out) conv2_out = tf.layers.max_pooling2d(conv2_out, 2, 2, padding='same') with tf.name_scope('CNN_Block_3'): conv3_out = tf.layers.dropout(conv2_out, self.conv_dropouts[2], noise_shape=tf.concat( [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, self.conv_depths[1]])], 0), training=self.is_training) conv3_out = tf.layers.conv2d(conv3_out, self.conv_depths[2], self.conv_patch_sizes[2], padding='same', activation=None, kernel_initializer=intitalizer) conv3_out = tf.layers.batch_normalization(conv3_out) conv3_out = tf.nn.leaky_relu(conv3_out) conv3_out = tf.layers.max_pooling2d(conv3_out, 2, 2, padding='same') with tf.name_scope('CNN_Block_4'): conv4_out = tf.layers.dropout(conv3_out, self.conv_dropouts[3], noise_shape=tf.concat( [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, self.conv_depths[2]])], 0), training=self.is_training) conv4_out = tf.layers.conv2d(conv4_out, self.conv_depths[3], self.conv_patch_sizes[3], padding='same', activation=None, kernel_initializer=intitalizer) conv4_out = tf.layers.batch_normalization(conv4_out) conv4_out = tf.nn.leaky_relu(conv4_out) with tf.name_scope('CNN_Block_5'): conv5_out = tf.layers.dropout(conv4_out, self.conv_dropouts[4], noise_shape=tf.concat( [tf.reshape(batch_size, [-1]), tf.constant(value=[1, 1, self.conv_depths[3]])], 0), training=self.is_training) conv5_out = tf.layers.conv2d(conv5_out, self.conv_depths[4], self.conv_patch_sizes[4], padding='same', activation=None, kernel_initializer=intitalizer) conv5_out = tf.layers.batch_normalization(conv5_out) conv5_out = tf.nn.leaky_relu(conv5_out) output = tf.transpose(conv5_out, [2, 0, 1, 3]) output = tf.reshape(output, [-1, batch_size, (self.config.im_height//self.reduce_factor)*self.conv_depths[4]]) self.length = tf.tile(tf.expand_dims(tf.shape(output)[0], axis=0), [batch_size]) # RNN with tf.variable_scope('MultiRNN', reuse=tf.AUTO_REUSE): for i in range(self.rnn_num_layers): output = tf.layers.dropout(output, self.rnn_dropout, training=self.is_training) lstm = tf.contrib.cudnn_rnn.CudnnLSTM(1, self.rnn_num_hidden, 'linear_input', 'bidirectional') output, state = lstm(output) # Fully Connected with tf.name_scope('Dense'): output = tf.concat(output, 2) # Linear dropout output = tf.layers.dropout(output, self.linear_dropout, training=self.is_training) # Reshaping to apply the same weights over the timesteps output = tf.reshape(output, [-1, 2*self.rnn_num_hidden]) # Doing the affine projection logits = tf.matmul(output, out_W) + out_b # Reshaping back to the original shape self.logits = tf.reshape(logits, [-1, batch_size, self.data_loader.num_classes]) with tf.variable_scope('loss-acc'): self.loss = warpctc_tensorflow.ctc(self.logits, self.y.values, self.lab_length, self.length, self.data_loader.num_classes - 1) self.cost = tf.reduce_mean(self.loss) self.prediction = tf.nn.ctc_beam_search_decoder(self.logits, sequence_length=self.length, merge_repeated=False) self.cer = self.calc_cer(self.prediction[0][0], self.y) with tf.variable_scope('train_step'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_step = tf.train.RMSPropOptimizer(learning_rate=self.config.learning_rate, decay=self.config.learning_rate_decay).minimize( self.loss, global_step=self.global_step_tensor) tf.add_to_collection('train', self.train_step) tf.add_to_collection('train', self.cost) tf.add_to_collection('train', self.cer)
def spatial_transformer(images, encoder_blocks=[128], is_training=True, reuse=False, is_chief=True, verbose=False, **kwargs): """A simple Spatial Transformer Network constrained to TSR style transformation. Args: images: a 4D tensor of input images in [0., 1.] encoder_blocks: A list of integers indicating the number of each channel in the encoder. The last layer of the encoder is fully-connected, while the rests are convolutional blocks with leaky ReLU and batch norm. is_training: whether we are in training mode or not reuse: Whether to reuse the model variables is_chief: whether the model is run by the chief worker verbose: verbosity level kwargs: remaining keyword arguments (unused here) Returns: A 4D Tensor of images in [0., 1.] """ del is_chief del kwargs # Use STN from https://github.com/kevinzakka/spatial-transformer-network sys.path.append('spatial-transformer-network') from stn import spatial_transformer_network as transformer with tf.control_dependencies([tf.assert_greater_equal(images, 0.)]): with tf.control_dependencies([tf.assert_less_equal(images, 1.)]): net = images in_dims = images.get_shape().as_list()[1:] with tf.variable_scope('localization_network', reuse=reuse): ## Encoder with tf.contrib.framework.arg_scope( [slim.conv2d], kernel_size=[3, 3], padding='SAME', stride=2, activation_fn=tf.nn.leaky_relu, normalizer_fn=slim.batch_norm, normalizer_params={ 'is_training': is_training, 'decay': 0.9, 'epsilon': 1e-5 }, weights_initializer=tf.random_normal_initializer(0, 0.02)): for block_id, block_num_filters in enumerate(encoder_blocks[:-1]): scope = 'conv_%d' % (block_id + 1) net = slim.conv2d(net, block_num_filters, scope=scope) if verbose: print(' \033[34m%s:\033[0m' % scope, net.get_shape()) ## STN # fc 1 net = tf.layers.flatten(net) net = slim.fully_connected( net, encoder_blocks[-1], activation_fn=tf.nn.tanh, weights_initializer=tf.zeros_initializer(), biases_initializer=tf.truncated_normal_initializer(stddev=0.01)) if verbose: print(' \033[34mfc1:\033[0m', net.get_shape()) # rotation angle (init: 0) theta = slim.fully_connected( net, 1, activation_fn=tf.nn.tanh, weights_initializer=tf.zeros_initializer(), biases_initializer=tf.truncated_normal_initializer( stddev=0.01)) * np.pi rotate_matrix = tf.concat([ tf.cos(theta), -tf.sin(theta), tf.zeros(tf.shape(theta)), tf.sin(theta), tf.cos(theta), tf.zeros(tf.shape(theta)), tf.zeros(tf.shape(theta)), tf.zeros(tf.shape(theta)), tf.ones(tf.shape(theta)) ], axis=-1) rotate_matrix = tf.reshape(rotate_matrix, (-1, 3, 3)) # translation and scale (init: identity) translate_matrix = slim.fully_connected( net, 4, activation_fn=None, weights_initializer=tf.zeros_initializer(), biases_initializer=tf.constant_initializer([1., 1., 0., 0.])) #sx, sy, tx, tx translate_matrix = tf.split(translate_matrix, 4, axis=-1) translate_matrix = tf.concat([ translate_matrix[0], tf.zeros(tf.shape(theta)), translate_matrix[2], tf.zeros(tf.shape(theta)), translate_matrix[1], translate_matrix[3], tf.zeros(tf.shape(theta)), tf.zeros(tf.shape(theta)), tf.ones(tf.shape(theta)) ], axis=1) translate_matrix = tf.reshape(translate_matrix, (-1, 3, 3)) # final transformation transform_matrix = tf.matmul(rotate_matrix, translate_matrix) transform_matrix = tf.layers.flatten(transform_matrix) transform_matrix = transform_matrix[:, :6] images = transformer(images, transform_matrix, out_dims=in_dims) images = tf.clip_by_value(images, 0., 1.) return images
def transformer_net(self, x, theta): return transformer(x, theta)
def call(self, inputs): x_loc = self.localisation_net(inputs) h_trans = transformer(inputs, x_loc, self.out_dims) return h_trans
loss = tf.reduce_mean(tf.square(para[:, 0] - angle_tensor)) error = tf.reduce_mean(tf.abs(para[:, 0] - angle_tensor) * 180) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) from stn import spatial_transformer_network as transformer pred_image = [] for i in range(16): theta = tf.stack( [(tf.cos(para[i, 0]), -tf.sin(para[i, 0]), tf.constant(0.0)), (tf.sin(para[i, 0]), tf.cos(para[i, 0]), tf.constant(0.0))], axis=0) iImg = input_img[i, :, :, 0] iImg = tf.expand_dims(iImg, axis=0) iImg = tf.expand_dims(iImg, axis=3) pImg = transformer(iImg, theta, out_dims=[320, 320]) pred_image.append(pImg) pred_image = tf.concat(pred_image, axis=0) optimizer = tf.train.AdamOptimizer(1e-3) # train = optimizer.minimize(loss) train_op = optimizer.minimize(loss) # train_op = tf.group([train_op, update_ops]) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss) init = tf.initialize_all_variables() sess = tf.Session()
input_img = np.concatenate([img1, img2, img3, img4], axis=0) B, H, W, C = input_img.shape print("Input Img Shape: {}".format(input_img.shape)) # identity transform theta = np.array([[1., 0, 0], [0, 1., 0]]) x = tf.placeholder(tf.float32, [None, H, W, C]) with tf.variable_scope('spatial_transformer'): theta = theta.astype('float32') theta = theta.flatten() # define loc net weight and bias loc_in = H * W * C loc_out = 6 W_loc = tf.Variable(tf.zeros([loc_in, loc_out]), name='W_loc') b_loc = tf.Variable(initial_value=theta, name='b_loc') # tie everything together fc_loc = tf.matmul(tf.zeros([B, loc_in]), W_loc) + b_loc h_trans = transformer(x, fc_loc) # run session sess = tf.Session() sess.run(tf.global_variables_initializer()) y = sess.run(h_trans, feed_dict={x: input_img}) print("y: {}".format(y.shape)) array2img(y[0]).show()