def stn(self, obj_image, mask_image, stn_filter): with tf.variable_scope("g_enerator") as scope: obj_image_stn = transformer(obj_image, stn_filter, (256, 256)) tf.get_variable_scope().reuse_variables() mask_image = tf.cast(mask_image, 'float32') # self.mask_before = mask_image mask_image_stn = transformer(mask_image, stn_filter, (256, 256)) # self.mask_after = mask_image_stn mask_image_stn = tf.cast(mask_image_stn, 'bool') # self.mask_cast = mask_image_stn return obj_image_stn, mask_image_stn
def generator_flow_joint_base(self, i1i2): theta = self.theta_generator_small(i1i2) i2hat0, flow0 = transformer(U=i1i2[..., 0:3], theta=theta, out_size=[self.h, self.w], mode='Projective2D', name='g_transformer') flow1 = self.flow_generator_flownet(i1i2, flow0) i2hat1 = transformer(U=i1i2[..., 0:3], flow=flow1, out_size=[self.h, self.w], mode='Flow', name='g_transformer') return i2hat0, i2hat1, flow0, flow1
def conv_spatial_transfo(x, thetas, kernel_size): """Run the spatial transformer for each patch of kernel_size * kernel_size. Args: thetas: the parameters of each spatial transformer kernel_size: the size of each patch Return: The patches glued together after having been spatially transformed """ size_x = int(x.get_shape()[1]) size_y = int(x.get_shape()[2]) channels = int(x.get_shape()[3]) # Flatten the parameters thetas = tf.reshape(thetas, [-1, size_x * size_y, 6]) # Extract patches of kernel_size * kernel_size at each # pixel of the input image x = tf.extract_image_patches(x, ksizes=[1, kernel_size, kernel_size, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') # Flatten the patches x = tf.reshape(x, [-1, kernel_size, kernel_size, channels]) # Run through the spatial transformer res = transformer(x, thetas, (kernel_size, kernel_size)) # Reform the image res = tf.reshape( res, [-1, size_x * kernel_size, size_y * kernel_size, channels]) return res
def spatial_transformer(x, opt, keep_prob, out_size): """ Generates spatial transformer network by setting up the two-layer localization network to figure out the parameters for an affine transformation of the input Args: x: input vector name: name of the spatial transformer Returns: h_trans: transformed feature map (tensor) """ x_tensor = tf.reshape(x, [-1, opt.pixels, opt.pixels, CHANNELS]) # Weights for localization network W_fc_loc1 = weight_vector([IMAGE_PIXELS, 20]) b_fc_loc1 = bias_vector([20]) W_fc_loc2 = weight_vector([20, 6]) # starting with identity transformation initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32') initial = initial.flatten() b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2') # Defining two layer localization network h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1) # h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob) h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1, W_fc_loc2) + b_fc_loc2) h_trans = transformer(x_tensor, h_fc_loc2, out_size) return h_trans
def build_network(self): self.X = tf.placeholder(tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images') self.detection = tf.placeholder(tf.float32, [self.batch_size,2], name='detection') self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks') self.visibility = tf.placeholder(tf.float32, [self.batch_size,21], name='visibility') self.pose = tf.placeholder(tf.float32, [self.batch_size,3], name='pose') self.gender = tf.placeholder(tf.float32, [self.batch_size,2], name='gender') theta = self.localization_squeezenet(self.X) self.T_mat = tf.reshape(theta, [-1, 2,3]) self.cropped = transformer(self.X, self.T_mat, [self.out_height, self.out_width]) net_output = self.hyperface(self.cropped) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender) loss_detection = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(net_output[0], self.detection)) visibility_mask = tf.reshape(tf.tile(tf.expand_dims(self.visibility, axis=2), [1,1,2]), [self.batch_size, -1]) loss_landmarks = tf.reduce_mean(tf.square(visibility_mask*(net_output[1] - self.landmarks))) loss_visibility = tf.reduce_mean(tf.square(net_output[2] - self.visibility)) loss_pose = tf.reduce_mean(tf.square(net_output[3] - self.pose)) loss_gender = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(net_output[4], self.gender)) self.loss = self.weight_detect*loss_detection + self.weight_landmarks*loss_landmarks \ + self.weight_visibility*loss_visibility + self.weight_pose*loss_pose \ + self.weight_gender*loss_gender
def _spatial_transformer(self, name, x, in_filters, arr_out_filters): width = x.get_shape().as_list()[1] height = x.get_shape().as_list()[2] with tf.variable_scope(name): _x = MaxPooling2D(x, name='pool1') with tf.variable_scope('conv_1'): _x = Conv2D(_x, in_filters, 5, arr_out_filters[0], name='conv_1') _x = BatchNormalization(_x, self.mode == 'train', name='batch1') _x = MaxPooling2D(_x, use_relu=True, name='pool2') with tf.variable_scope('conv_2'): _x = Conv2D(_x, arr_out_filters[0], 5, arr_out_filters[1], name='conv_2') _x = BatchNormalization(_x, self.mode == 'train', name='batch2') _x = MaxPooling2D(_x, use_relu=True, name='pool3') with tf.variable_scope('fc1'): _x_flat, _x_size = Flatten(_x) W_fc_loc1 = weight_variable([_x_size, arr_out_filters[2]]) b_fc_loc1 = bias_variable([arr_out_filters[2]]) h_fc_loc1 = tf.nn.tanh( tf.matmul(_x_flat, W_fc_loc1) + b_fc_loc1) h_fc_loc1 = slim.dropout(h_fc_loc1, self._dropout, is_training=(self.mode == 'train' and self._dropout > 0), scope='dropout') with tf.variable_scope('fc2'): W_fc_loc2 = weight_variable([arr_out_filters[2], 6]) # Use identity transformation as starting point initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32') initial = initial.flatten() b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2') h_fc_loc2 = tf.nn.tanh( tf.matmul(h_fc_loc1, W_fc_loc2) + b_fc_loc2) # %% We'll create a spatial transformer module to identify discriminative # %% patches out_size = (width, height) h_trans = transformer(x, h_fc_loc2, out_size) h_trans = tf.reshape( h_trans, [self.hps.batch_size, width, height, in_filters]) return h_trans
def stp_transformation(self, prev_image, stp_input, num_masks, reuse= None, suffix = None): """Apply spatial transformer predictor (STP) to previous image. Args: prev_image: previous image to be transformed. stp_input: hidden layer to be used for computing STN parameters. num_masks: number of masks and hence the number of STP transformations. Returns: List of images transformed by the predicted STP parameters. """ # Only import spatial transformer if needed. from spatial_transformer import transformer identity_params = tf.convert_to_tensor( np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32)) transformed = [] trafos = [] for i in range(num_masks): params = slim.layers.fully_connected( stp_input, 6, scope='stp_params' + str(i) + suffix, activation_fn=None, reuse= reuse) + identity_params outsize = (prev_image.get_shape()[1], prev_image.get_shape()[2]) transformed.append(transformer(prev_image, params, outsize)) trafos.append(params) return transformed, trafos
def _spatial_transformer(self, name, x, in_filters, arr_out_filters): width = x.get_shape().as_list()[1] height = x.get_shape().as_list()[2] with tf.variable_scope(name): W_fc_loc1 = weight_variable([width * height * in_filters, arr_out_filters[2]]) b_fc_loc1 = bias_variable([arr_out_filters[2]]) W_fc_loc2 = weight_variable([arr_out_filters[2], 6]) # Use identity transformation as starting point initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32') initial = initial.flatten() b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2') x_reshape = tf.reshape(x, [self.hps.batch_size, -1]) # %% Define the two layer localisation network h_fc_loc1 = tf.nn.tanh(tf.matmul(x_reshape, W_fc_loc1) + b_fc_loc1) h_fc_loc1 = self._batch_norm2(name, h_fc_loc1) h_fc_loc1 = self._relu(h_fc_loc1) h_fc_loc1 = slim.dropout(h_fc_loc1, self._dropout, is_training=(self.mode == 'train' and self._dropout and self._dropout > 0), scope='dropout') h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1, W_fc_loc2) + b_fc_loc2) # %% We'll create a spatial transformer module to identify discriminative # %% patches out_size = (width, height) h_trans = transformer(x, h_fc_loc2, out_size) h_trans = tf.reshape(h_trans, [self.hps.batch_size, width, height, in_filters]) return h_trans
def stp_transformation(prev_image, stp_input, num_masks): """Apply spatial transformer predictor (STP) to previous image. Args: prev_image: previous image to be transformed. stp_input: hidden layer to be used for computing STN parameters. num_masks: number of masks and hence the number of STP transformations. Returns: List of images transformed by the predicted STP parameters. """ # Only import spatial transformer if needed. import sys sys.path.append('../transformer') from spatial_transformer import transformer height = int(prev_image.get_shape()[1]) width = int(prev_image.get_shape()[2]) identity_params = tf.convert_to_tensor( np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32)) transformed = [] for i in range(num_masks - 1): params = slim.layers.fully_connected( stp_input, 6, scope='stp_params' + str(i), activation_fn=None) + identity_params transformed.append(transformer(prev_image, params, (height, width))) return transformed
def stp_transformation(prev_image, stp_input, num_masks): """Apply spatial transformer predictor (STP) to previous image. 将STP应用到先前图像 Args: prev_image: previous image to be transformed. 先前图像 stp_input: hidden layer to be used for computing STN parameters. 用于计算STP的隐藏层 num_masks: number of masks and hence the number of STP transformations.掩码的数量以及STP转换的数量 Returns: List of images transformed by the predicted STP parameters. 由预测的STP参数转换的图像列表 """ # Only import spatial transformer if needed. from spatial_transformer import transformer identity_params = tf.convert_to_tensor( np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32)) transformed = [] for i in range(num_masks - 1): params = slim.layers.fully_connected( stp_input, 6, scope='stp_params' + str(i), activation_fn=None) + identity_params transformed.append(transformer(prev_image, params)) return transformed
def generator_theta(self, i1i2): theta = self.theta_generator_small(i1i2) i2hat, flow = transformer(U=i1i2[..., 0:3], theta=theta, out_size=[self.h, self.w], mode=self.config.transform, name='g_transformer') return i2hat, flow
def zoom_image(self, x, new_height, new_width): assert len(x.shape) == 4 delta = tf.zeros((tf.shape(x)[0], 2, new_height * new_width)) zoomed_x = spatial_transformer.transformer(x, delta, (new_height, new_width)) return tf.reshape( zoomed_x, [tf.shape(x)[0], new_height, new_width, x.shape[-1].value])
def generator_flow(self, i1i2): flow = self.flow_generator_flownet(i1i2) i2hat = transformer(U=i1i2[..., 0:3], flow=flow, out_size=[self.h, self.w], mode='Flow', name='g_transformer') return i2hat, flow
def warp_image(self, x, u, v): assert len(x.shape) == 4 assert len(u.shape) == 3 assert len(v.shape) == 3 u = u / x.shape[2].value * 2 v = v / x.shape[1].value * 2 delta = tf.concat(axis=1, values=[u, v]) return spatial_transformer.transformer(x, delta, (x.shape[-3].value, x.shape[-2].value))
def stn_img(self, batch_img): feature_map = self.flatten1(batch_img) feature_map = self.fc1(feature_map) feature_map = self.dropout1(feature_map) feature_map = self.fc2(feature_map) feature_map = transformer(U=batch_img, theta=feature_map, out_size=(40, 40)) return feature_map
def stn_idsia_inference_type2(batch_x): with tf.name_scope('stn_network_t2'): stn_output = stn_locnet_type2(stn_convolve_pool_flatten_type2(batch_x)) transformed_batch_x = transformer(batch_x, stn_output, (IMAGE_SIZE, IMAGE_SIZE, TF_CONFIG['channels'])) with tf.name_scope('idsia_classifier'): features, batch_act = idsia_convolve_pool_flatten(transformed_batch_x, multiscale=True) logits = idsia_fc_logits(features, multiscale=True) return logits, transformed_batch_x, batch_act
def spatialTransformer(x_img, num_regions): """ Create spatial transformer network and return output tensor. Args: x_img: tensor The input image in NCHW format num_regions: int The number of region proposals generated by the network. The dropout probability defaults to `keep_prob=1.0`. The placeholder that controls it is `transformer/keep_prob:0`. Returns: image: tenser It has the same shape as the input `x_img` """ # Sanity check: must be a valid NCHW image. assert len(x_img.shape) == 4 _, chan, height, width = x_img.shape.as_list() with tf.variable_scope('transformer'): # Setup the two-layer localisation network to figure out the # parameters for an affine transformation of the input. kp = tf.placeholder_with_default(1.0, None, 'keep_prob') # Do nothing if the transformer was disabled. if num_regions in [None, 0]: return x_img # Spatial transformer uses NHWC format. x_img = tf.transpose(x_img, [0, 2, 3, 1]) # Create variables for fully connected layer. W1, b1 = weights([chan * height * width, num_regions]), bias([num_regions]) # Weights and bias for spatial transform matrix. Initialise to identity. W2 = weights([num_regions, 6]) initial = np.array([[1, 0, 0], [0, 1, 0]]).astype(np.float32).flatten() b2 = tf.Variable(initial_value=initial, name='b2') # Define the two layer localisation network. x_flat = tf.reshape(x_img, [-1, chan * height * width]) h1 = tf.nn.tanh(tf.matmul(x_flat, W1) + b1) h1_drop = tf.nn.dropout(h1, keep_prob=kp) h2 = tf.nn.tanh(tf.matmul(h1_drop, W2) + b2) # We'll create a spatial transformer module to identify # discriminate patches out_flat = spatial_transformer.transformer(x_img, h2, (height, width)) out_img = tf.reshape(out_flat, [-1, height, width, chan]) # Return image as NCHW. return tf.transpose(out_img, [0, 3, 1, 2])
def stn_only(self, img, flow): b, h, w, c = flow.shape img = tf.convert_to_tensor(img) flow = tf.convert_to_tensor(flow) interp = transformer(U=img, flow=flow, out_size=[h, w], mode='Flow', name='stn_only') out_interp = self.sess.run([interp]) return out_interp
def obj_ll(self, images, z_where): num_steps = self.conf.num_steps patch_h, patch_w = self.conf.patch_height, self.conf.patch_width n, scene_h, scene_w, chans = map(int, images.shape) # Extract object patches (also referred to as y) patches, object_scores = stn.batch_transformer(images, z_where, [patch_h, patch_w]) patches = tf.identity(patches, name='y') # Compute background score iteratively by 'cutting out' each object cur_bg_score = tf.ones_like(object_scores[:, 0]) bg_maps = [cur_bg_score] obj_visible = [] for step in range(num_steps): # Everything outside the scene is unobserved -> pad bg_score with zeros padded_bg_score = tf.pad(cur_bg_score, [[0, 0], [1, 1], [1, 1]]) padded_bg_score = tf.expand_dims(padded_bg_score, -1) shifted_z_where = z_where[:, step] + [0., 0., 1., 0., 0., 1.] vis, _ = stn.transformer(padded_bg_score, shifted_z_where, [patch_h, patch_w]) obj_visible.append(vis[..., 0]) cur_bg_score *= 1 - object_scores[:, step] # cur_bg_score = tf.clip_by_value(cur_bg_score, 0.0, 1.0) bg_maps.append(cur_bg_score) tf.identity(cur_bg_score, name='bg_score') obj_visible = tf.stack(obj_visible, axis=1) overlap_ratio = 1 - tf.reduce_mean(obj_visible, axis=[2, 3]) flattened_patches = tf.reshape( patches, [n * num_steps, patch_h * patch_w * chans]) spn_input = flattened_patches pixels_visible = tf.reshape(obj_visible, [n, num_steps, patch_h * patch_w, 1]) channels_visible = tf.tile(pixels_visible, [1, 1, 1, chans]) channels_visible = tf.reshape( channels_visible, [n, num_steps, patch_h * patch_w * chans]) channels_visible = tf.identity(channels_visible, name='obj_vis') marginalize = 1 - channels_visible marginalize = tf.reshape(marginalize, [n * num_steps, patch_h * patch_w * chans]) spn_output = self.obj_spn.forward(spn_input, marginalize) p_ys = spn_output[:, 0] # tf.reduce_logsumexp(spn_output + tf.log(0.1), axis=1) p_ys = tf.reshape(p_ys, [n, num_steps]) # Scale by patch size to approximate a calibrated likelihood over x p_ys *= z_where[:, :, 0] * z_where[:, :, 4] return p_ys, bg_maps, overlap_ratio
def get_STL(path, num_batch): h = 384 w = 384 im = cv2.imread(path[0]) im = im / 255. im = cv2.resize(im, (w, h), interpolation=cv2.INTER_CUBIC) im = im.reshape(1, h, w, 3) im = im.astype('float32') batch = np.append(im, im, axis=0) for p in path: im = cv2.imread(p) im = im / 255. im = cv2.resize(im, (w, h), interpolation=cv2.INTER_CUBIC) im = im.reshape(1, h, w, 3) im = im.astype('float32') batch = np.append(batch, im, axis=0) batch = batch[2:, :, :, :] out_size = (h, w) # %% Simulate batch x = tf.placeholder(tf.float32, [None, h, w, 3]) x = tf.cast(batch, 'float32') # %% Create localisation network and convolutional layer with tf.variable_scope('spatial_transformer_0'): # %% Create a fully-connected layer with 6 output nodes n_fc = 6 W_fc1 = tf.Variable(tf.zeros([h * w * 3, n_fc]), name='W_fc1') # %% Zoom into the image a, b, c, d, e, f = np.random.random(6) / 10 initial = np.array([[1 - a, b, c], [d, 1 - e, f]]) initial = initial.astype('float32') initial = initial.flatten() b_fc1 = tf.Variable(initial_value=initial, name='b_fc1') h_fc1 = tf.matmul(tf.zeros([num_batch, h * w * 3]), W_fc1) + b_fc1 h_trans = transformer(x, h_fc1, out_size) # %% Run session with tf.Session() as sess: sess.run(tf.global_variables_initializer()) y = sess.run(h_trans, feed_dict={x: batch}) sess.close() return y
def _spatial_transformer(self, name, x, in_filters, arr_out_filters): width = x.get_shape().as_list()[1] height = x.get_shape().as_list()[2] with tf.variable_scope(name): _x = MaxPooling2D(x, name='pool1') with tf.variable_scope('conv_1'): _x = Conv2D(_x, in_filters, 5, arr_out_filters[0], name='conv_1') # _x = BatchNormalization(_x, self.mode == 'train', name='batch1') _x = MaxPooling2D(_x, use_relu=True, name='pool2') with tf.variable_scope('conv_2'): _x = Conv2D(_x, arr_out_filters[0], 5, arr_out_filters[1], name='conv_2') # _x = BatchNormalization(_x, self.mode == 'train', name='batch2') _x = MaxPooling2D(_x, use_relu=True, name='pool3') with tf.variable_scope('fc1'): _x_flat, _x_size = Flatten(_x) _x = Dense(_x_flat, _x_size, arr_out_filters[2], use_relu=True, name='fc1') _x = slim.dropout(_x, self._dropout, is_training=(self.mode == 'train' and self._dropout > 0), scope='dropout') with tf.variable_scope('fc2'): _x = Dense(_x, arr_out_filters[2], 6, use_relu=False, trans=True, name='fc2') out_size = (width, height) h_trans = transformer(x, _x, out_size) h_trans = tf.reshape( h_trans, [self.hps.batch_size, width, height, in_filters]) return h_trans
def model_sin(): """ Create model and return tensors necessary to run the model. Creates both training and testing phase tensors. """ x = tf.placeholder(tf.float32, [None, 28, 28, 1]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) if RESTRICT_ROTATE: initial = 0.0 theta = tf.Variable(initial_value=initial, name='theta') sin = tf.sin(theta) cos = tf.cos(theta) rot_matrix = [cos, -sin, tf.constant(0.0), sin, cos, tf.constant(0.0)] else: initial = np.array([[1, 0, 0], [0, 1, 0]]) initial = initial.astype('float32') initial = initial.flatten() theta = tf.Variable(initial_value=initial, name='theta') rot_matrix = tf.identity(theta) h_fc1 = tf.zeros([1, 6]) + rot_matrix # takes advantage of TF's broadcast transformed_x = transformer(x, h_fc1, (28, 28)) if MODEL == 'LENET': net, model_var_dict = lenet(transformed_x) elif MODEL == 'BEGINNER': transformed_x = tf.reshape(transformed_x, (1, 28, 28, 1)) W, b, net, model_var_dict = beginner(transformed_x) elif MODEL == 'SMALL_FNN': transformed_x = tf.reshape(transformed_x, (1, 784)) net, model_var_dict = small_fnn(transformed_x) y = tf.nn.softmax(net) # test phase tensors test_cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y), reduction_indices=[1])) test_opt = tf.train.GradientDescentOptimizer(10 ** -2) test_train_step = test_opt.minimize(test_cross_entropy, var_list=[theta]) # train phase tensors train_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(y, y_) train_opt = tf.train.GradientDescentOptimizer(10 ** -2) theta_train_step = train_opt.minimize(train_cross_entropy, var_list=model_var_dict.values()) # return all tensors since references are required to run operations return (x, y_, theta, rot_matrix, h_fc1, transformed_x, net, model_var_dict, y, test_cross_entropy, test_train_step, train_cross_entropy, train_opt, theta_train_step)
def build_network(self): self.X = tf.placeholder( tf.float32, [self.batch_size, self.img_height, self.img_width, self.channel], name='images') self.detection = tf.placeholder(tf.float32, [self.batch_size, 2], name='detection') self.landmarks = tf.placeholder(tf.float32, [self.batch_size, 42], name='landmarks') self.visibility = tf.placeholder(tf.float32, [self.batch_size, 21], name='visibility') self.pose = tf.placeholder(tf.float32, [self.batch_size, 3], name='pose') self.gender = tf.placeholder(tf.float32, [self.batch_size, 2], name='gender') theta = self.localization_squeezenet(self.X) self.T_mat = tf.reshape(theta, [-1, 2, 3]) self.cropped = transformer(self.X, self.T_mat, [self.out_height, self.out_width]) net_output = self.hyperface( self.cropped ) # (out_detection, out_landmarks, out_visibility, out_pose, out_gender) loss_detection = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(net_output[0], self.detection)) visibility_mask = tf.reshape( tf.tile(tf.expand_dims(self.visibility, axis=2), [1, 1, 2]), [self.batch_size, -1]) loss_landmarks = tf.reduce_mean( tf.square(visibility_mask * (net_output[1] - self.landmarks))) loss_visibility = tf.reduce_mean( tf.square(net_output[2] - self.visibility)) loss_pose = tf.reduce_mean(tf.square(net_output[3] - self.pose)) loss_gender = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(net_output[4], self.gender)) self.loss = self.weight_detect*loss_detection + self.weight_landmarks*loss_landmarks \ + self.weight_visibility*loss_visibility + self.weight_pose*loss_pose \ + self.weight_gender*loss_gender
def call(self, batch_img): """ フィルタ同士を繋げ,ネットワークを生成する """ feature_map = self.flatten1(batch_img) feature_map = self.fc1(feature_map) feature_map = self.dropout1(feature_map) feature_map = self.fc2(feature_map) feature_map = transformer(U=batch_img, theta=feature_map, out_size=(40, 40)) # feature_map = batch_img############### feature_map = self.conv1(feature_map) feature_map = self.conv2(feature_map) feature_map = self.flatten2(feature_map) feature_map = self.fc3(feature_map) feature_map = self.fc4(feature_map) return feature_map
def gen_STN(): #x = np.reshape(np.arange(5*5*1),(1,5,5,1)).astype(np.float32) sess = tf.Session() U = tf.range(3 * 6 * 8 * 3) U = tf.reshape(U, [3, 6, 8, 3]) #input is NCHW theta = tf.constant([[1, 0, 0, 0, 1, 0], [1, 0, 0, 0, 1, 0], [1, 0, 0, 0, 1, 0]]) output = transformer(U, theta, (6, 8)) dump_data(sess.run(U), "U.data", fmt="binary", data_type="float32") dump_data(sess.run(U), "U.txt", fmt="float", data_type="float32") dump_data(sess.run(theta), "theta.data", fmt="binary", data_type="float32") dump_data(sess.run(theta), "theta.txt", fmt="float", data_type="float32") dump_data(sess.run(output), "output.data", fmt="binary", data_type="float32") dump_data(sess.run(output), "output.txt", fmt="float", data_type="float32")
def transform(self, inputs, out_height, out_width): net = tf.layers.conv2d(inputs, 64, 7, 3, activation=tf.nn.relu) net = tf.layers.max_pooling2d(net, 3, 2) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.layers.max_pooling2d(net, 3, 2) net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.reduce_mean(net, [1, 2], name='global_pool') net = tf.layers.dense(net, 20, activation=tf.nn.tanh) net = tf.layers.dropout(net, 0.8) initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32') initial = initial.flatten() net = tf.layers.dense(net, 6, activation=tf.nn.tanh, bias_initializer=tf.initializers.constant(initial)) self.localization = net for i in range(6): tf.summary.scalar("param%d" % i, net[0][i]) return transformer(inputs, net, (out_height, out_width))
def transform_isotropic(self, inputs, out_height, out_width): net = tf.layers.conv2d(inputs, 64, 7, 3, activation=tf.nn.relu) net = tf.layers.max_pooling2d(net, 3, 2) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.layers.max_pooling2d(net, 3, 2) net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.reduce_mean(net, [1, 2], name='global_pool') net = tf.layers.dense(net, 36, activation=tf.nn.tanh) net = tf.layers.dropout(net, 0.8) # net = tf.layers.dense(net, 18, activation=tf.nn.tanh) # net = tf.layers.dense(net, 12, activation=tf.nn.tanh) net = tf.layers.dense(net, 6, activation=tf.nn.tanh) self.localization = net # for i in range(12): for i in range(2): # for i in range(18): tf.summary.scalar("param%d" % i, net[0][i]) # bg, title, credit = tf.split(inputs, [3, 4, 4], 3) bg, title = tf.split(inputs, [3, 4], 3) tf.summary.image("bg", bg, max_outputs=10) tf.summary.image("title", title, max_outputs=10) # tf.summary.image("credit", credit, max_outputs=10) # bg_p, title_p, credit_p = tf.split(net, 3, 1) # bg_p, title_p = tf.split(net, 2, 1) mul_c = tf.constant([[0., 0., 0., 0., 1., 1.]], tf.float32, shape=[1, 6]) add_c = tf.constant([[0.5, 0., 0.5, 0., 0., 0.]], tf.float32, shape=[1, 6]) # bg_trans = transformer(bg, tf.multiply(bg_p, cont_p), (out_height, out_width)) theta = net * mul_c + add_c self.theta = theta title_trans = transformer(title, theta, (out_height, out_width)) # credit_trans = transformer(credit, tf.multiply(credit_p, cont_p), (out_height, out_width)) # return bg_trans, title_trans, credit_trans # return bg_trans, title_trans return title_trans
def _spatial_transform(self, x): ## x shape: [N, W, H, C=1] conv1_loc = tf.layers.conv2d(x, 16, 3, padding='same', activation=tf.nn.relu, name='conv1_loc') pool1_loc = tf.layers.max_pooling2d(conv1_loc, 2, 2) flat_loc = tf.contrib.layers.flatten(pool1_loc) fc1_loc = tf.contrib.layers.fully_connected(flat_loc, 64, scope='fc1_loc') ac1_loc = tf.nn.tanh(fc1_loc) fc2_loc = tf.contrib.layers.fully_connected(ac1_loc, 6, scope='fc2_loc') ac2_loc = tf.nn.tanh(fc2_loc) stn = st.transformer(x, ac2_loc, out_size=(self._img_height, self._img_width)) return stn
def transform_mixed(self, inputs, out_height, out_width): net = tf.layers.conv2d(inputs, 64, 7, 3, activation=tf.nn.relu) net = tf.layers.max_pooling2d(net, 3, 2) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.layers.max_pooling2d(net, 3, 2) net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 256, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.layers.conv2d(net, 128, 3, activation=tf.nn.relu) net = tf.reduce_mean(net, [1, 2], name='global_pool') net = tf.layers.dense(net, 36, activation=tf.nn.tanh) net = tf.layers.dropout(net, 0.8) # initial = np.array([[1., 0, 0], [0, 1., 0], [1., 0, 0], [0, 1., 0], [1., 0, 0], [0, 1., 0]]) initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32') initial = initial.flatten() # net = tf.layers.dense(net, 18, activation=tf.nn.tanh, bias_initializer=tf.initializers.constant(initial)) # net = tf.layers.dense(net, 12, activation=tf.nn.tanh, bias_initializer=tf.initializers.constant(initial)) net = tf.layers.dense(net, 6, activation=tf.nn.tanh, bias_initializer=tf.initializers.constant(initial)) self.localization = net for i in range(6): # for i in range(18): tf.summary.scalar("param%d" % i, net[0][i]) # bg, title, credit = tf.split(inputs, [3, 4, 4], 3) bg, title = tf.split(inputs, [3, 4], 3) tf.summary.image("bg", bg, max_outputs=10) tf.summary.image("title", title, max_outputs=10) # tf.summary.image("credit", credit, max_outputs=10) # bg_p, title_p, credit_p = tf.split(net, 3, 1) # bg_trans = transformer(bg, bg_p, (out_height, out_width)) self.theta = net title_trans = transformer(title, net, (out_height, out_width)) # credit_trans = transformer(credit, credit_p, (out_height, out_width)) # return bg_trans, title_trans, credit_trans # return bg_trans, title_trans return title_trans
def stp_transformation(prev_image, stp_input, num_masks): """Apply spatial transformer predictor (STP) to previous image. Args: prev_image: previous image to be transformed. stp_input: hidden layer to be used for computing STN parameters. num_masks: number of masks and hence the number of STP transformations. Returns: List of images transformed by the predicted STP parameters. """ # Only import spatial transformer if needed. from spatial_transformer import transformer identity_params = tf.convert_to_tensor( np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0], np.float32)) transformed = [] for i in range(num_masks - 1): params = slim.layers.fully_connected( stp_input, 6, scope='stp_params' + str(i), activation_fn=None) + identity_params transformed.append(transformer(prev_image, params)) return transformed
def spTrans(x_tensor,width, height, channels, n_loc,keep_prob): resolution = width * height * channels W_fc_loc1 = weight_variable([resolution, n_loc]) b_fc_loc1 = bias_variable([n_loc]) W_fc_loc2 = weight_variable([n_loc, 6]) # Use identity transformation as starting point initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32') initial = initial.flatten() b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2') # Two layer localisation network h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1) # dropout (reduce overfittin) h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob) # %% Second layer h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1_drop, W_fc_loc2) + b_fc_loc2) # spatial transformer out_size = (width, height) h_trans = transformer(x_tensor, h_fc_loc2, out_size) return h_trans, b_fc_loc2, h_fc_loc2
def inference(self): self.base_network = layers.base_network(self.img, self.training, 'base_network') self.intermediate_layer = layers.intermediate_layer(self.base_network, self.training, 'intermediate_layer') self.logits_cls = layers.clf_layer(self.intermediate_layer, self.training,'cls_layer') self.scores_cls = tf.nn.sigmoid(self.logits_cls) reg = layers.reg_layer(self.intermediate_layer,self.training, 'reg_layer') self.tx, self.ty, self.tw, self.th = self.parameterize(reg) # Faster RCNN additional layers scores_cls_flat = tf.reshape(self.logits_cls,[-1, self.scores_cls.shape[1]*self.scores_cls.shape[2]]) # Find the top 2 iou-score locations in each of the batch self.values, self.indices = tf.nn.top_k(scores_cls_flat, k=2, sorted=True, name=None) self.ind1, self.ind2 = self.indices[:,0:1], self.indices[:,1:2] self.ind1 = tf.concat([tf.reshape(tf.range(self.batch_size),[-1,1]), self.ind1], 1) self.ind2 = tf.concat([tf.reshape(tf.range(self.batch_size),[-1,1]), self.ind2], 1) x1, y1, w1, h1 = self.gather(reg[:,:,:,0:1], self.ind1), self.gather(reg[:,:,:,1:2], self.ind1), self.gather(reg[:,:,:,2:3], self.ind1), self.gather(reg[:,:,:,3:4], self.ind1) x2, y2, w2, h2 = self.gather(reg[:,:,:,0:1], self.ind2), self.gather(reg[:,:,:,1:2], self.ind2), self.gather(reg[:,:,:,2:3], self.ind2), self.gather(reg[:,:,:,3:4], self.ind2) x, y, w, h = tf.concat([x1,x2], axis=0), tf.concat([y1,y2], axis=0), tf.concat([w1,w2], axis=0), tf.concat([h1,h2], axis=0) theta = tf.concat([w*16/128.0, 0.0*w, (x*16 - 64)/64.0, 0.0*h, h*16/128, (y*16 - 64)/64.0],axis=1) img = tf.concat([self.base_network, self.base_network], 0) label1, label2 = self.gather(self.label, self.ind1), self.gather(self.label, self.ind2) label = tf.concat([label1, label2], 0) label = tf.one_hot(label, self.n_classes, on_value=1.0, off_value=0.0, axis=-1) self.one_hot_label = tf.reshape(label, [-1, self.n_classes]) spatial_transformer_out = spatial_transformer.transformer(img, theta, out_size=(4,4)) spatial_transformer_out = tf.reshape(spatial_transformer_out, [-1,4,4,128]) self.logits = layers.faster_rcnn(spatial_transformer_out, self.training, 'faster_rcnn',self.n_classes) print('hi!')
W_fc_loc2 = weight_variable([20, 6]) initial = np.array([[1.,0, 0],[0,1.,0]]) # Use identity transformation as starting point initial = initial.astype('float32') initial = initial.flatten() b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2') # %% Define the two layer localisation network h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1) # %% We can add dropout for regularizing and to reduce overfitting like so: keep_prob = tf.placeholder(tf.float32) h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob) # %% Second layer h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1_drop, W_fc_loc2) + b_fc_loc2) # %% We'll create a spatial transformer module to identify discriminative patches h_trans = transformer(x_tensor, h_fc_loc2, downsample_factor=1) # %% We'll setup the first convolutional layer # Weight matrix is [height x width x input_channels x output_channels] filter_size = 3 n_filters_1 = 16 W_conv1 = weight_variable([filter_size, filter_size, 1, n_filters_1]) # %% Bias is [output_channels] b_conv1 = bias_variable([n_filters_1]) # %% Now we can build a graph which does the first layer of convolution: # we define our stride as batch x height x width x channels # instead of pooling, we use strides of 2 and more layers # with smaller filters.
def zoom_image(self, x, new_height, new_width): assert len(x.shape) == 4 delta = tf.zeros((tf.shape(x)[0], 2, new_height * new_width)) zoomed_x = spatial_transformer.transformer(x, delta, (new_height, new_width)) return tf.reshape(zoomed_x, [tf.shape(x)[0], new_height, new_width, x.shape[-1].value])
# %% Simulate batch batch = np.append(im, im, axis=0) batch = np.append(batch, im, axis=0) num_batch = 3 x = tf.placeholder(tf.float32, [None, 1200, 1600, 3]) x = tf.cast(batch, 'float32') # %% Create localisation network and convolutional layer with tf.variable_scope('spatial_transformer_0'): # %% Create a fully-connected layer with 6 output nodes n_fc = 6 W_fc1 = tf.Variable(tf.zeros([1200 * 1600 * 3, n_fc]), name='W_fc1') # %% Zoom into the image initial = np.array([[0.5, 0, 0], [0, 0.5, 0]]) initial = initial.astype('float32') initial = initial.flatten() b_fc1 = tf.Variable(initial_value=initial, name='b_fc1') h_fc1 = tf.matmul(tf.zeros([num_batch, 1200 * 1600 * 3]), W_fc1) + b_fc1 h_trans = transformer(x, h_fc1, out_size) # %% Run session sess = tf.Session() sess.run(tf.initialize_all_variables()) y = sess.run(h_trans, feed_dict={x: batch}) # plt.imshow(y[0])
initial = initial.astype('float32') initial = initial.flatten() b_fc_loc2 = tf.Variable(initial_value=initial, name='b_fc_loc2') # %% Define the two layer localisation network h_fc_loc1 = tf.nn.tanh(tf.matmul(x, W_fc_loc1) + b_fc_loc1) # %% We can add dropout for regularizing and to reduce overfitting like so: keep_prob = tf.placeholder(tf.float32) h_fc_loc1_drop = tf.nn.dropout(h_fc_loc1, keep_prob) # %% Second layer h_fc_loc2 = tf.nn.tanh(tf.matmul(h_fc_loc1_drop, W_fc_loc2) + b_fc_loc2) # %% We'll create a spatial transformer module to identify discriminative # %% patches out_size = (40, 40) h_trans = transformer(x_tensor, h_fc_loc2, out_size) # %% We'll setup the first convolutional layer # Weight matrix is [height x width x input_channels x output_channels] filter_size = 3 n_filters_1 = 16 W_conv1 = weight_variable([filter_size, filter_size, 1, n_filters_1]) # %% Bias is [output_channels] b_conv1 = bias_variable([n_filters_1]) # %% Now we can build a graph which does the first layer of convolution: # we define our stride as batch x height x width x channels # instead of pooling, we use strides of 2 and more layers # with smaller filters.
batch = np.append(batch, im, axis=0) num_batch = 3 x = tf.placeholder(tf.float32, [None, 1200, 1600, 3]) x = tf.cast(batch,'float32') num_batch = 3 x = tf.placeholder(tf.float32, [None, 1200, 1600, 3]) x = tf.cast(batch,'float32') # Create localisation network and convolutional layer with tf.variable_scope('spatial_transformer_0'): # %% Create a fully-connected layer: n_fc = 6 W_fc1 = tf.Variable(tf.zeros([1200 * 1600 * 3, n_fc]), name='W_fc1') initial = np.array([[0.5,0, 0],[0,0.5,0]]) initial = initial.astype('float32') initial = initial.flatten() b_fc1 = tf.Variable(initial_value=initial, name='b_fc1') x_flatten = tf.reshape(x,[-1,1200 * 1600 * 3]) #h_fc1 = tf.nn.relu(tf.matmul(x_flatten, W_fc1) + b_fc1) h_fc1 = tf.matmul(tf.zeros([num_batch ,1200 * 1600 * 3]), W_fc1) + b_fc1 h_trans = transformer(x, h_fc1, downsample_factor=2) # Run session sess = tf.Session() sess.run(tf.initialize_all_variables()) y = sess.run(h_trans, feed_dict={x: batch}) plt.imshow(y[0])