def test_slim_unit_norm(self): graph = tf.Graph() with graph.as_default() as g: inputs = tf.placeholder(tf.float32, shape=[None,8], name='test_slim_unit_norm/input') with slim.arg_scope([slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(0.0, 0.2), weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.fully_connected(inputs, 10, scope='fc') net = slim.unit_norm(net,1) output_name = [net.op.name] self._test_tf_model(graph, {"test_slim_unit_norm/input:0":[1,8]}, output_name, delta=1e-2)
def test_custom_tile(self): graph = tf.Graph() with graph.as_default() as g: inputs = tf.placeholder(tf.float32, shape=[None, 8], name='input') with slim.arg_scope([slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(0.0, 0.2), weights_regularizer=slim.l2_regularizer(0.0005)): y = slim.fully_connected(inputs, 10, scope='fc') y = slim.unit_norm(y, dim=1) output_name = [y.op.name] coreml_model = self._test_tf_model(graph, {"input:0": [1, 8]}, output_name, check_numerical_accuracy=False, add_custom_layers=True) spec = coreml_model.get_spec() layers = spec.neuralNetwork.layers self.assertIsNotNone(layers[9].custom) self.assertEqual('Tile', layers[9].custom.className)
def _build_interpretation(self): '''Interprets the logits''' softmax_out = tf.nn.softmax( self.net['logits']) # 5 * num_classes vector splits = tf.split(softmax_out, num_or_size_splits=self.num_classes, axis=1) max_splits = [ tf.reduce_max(splits[cls], axis=1, keep_dims=True) for cls in xrange(self.num_classes) ] softmax_out_pooled = tf.concat(max_splits, axis=1) softmax_out_norm = slim.unit_norm(softmax_out_pooled, dim=1, scope='normalize_softmax') self.predictions_prob = softmax_out_norm self.predictions = tf.argmax(self.predictions_prob, axis=1, output_type=tf.int32) self.score = tf.reduce_mean( tf.to_float(tf.equal(self.predictions, self.labels)))
def forward(self, inputs, grid, is_training=True, reuse=False): def preprocessing(inputs): dims = inputs.get_shape() if len(dims) == 3: inputs = tf.expand_dims(inputs, dim=0) mean_BGR = tf.reshape(self.mean_BGR, [1, 1, 1, 3]) inputs = inputs[:, :, :, ::-1] + mean_BGR return inputs ## -----------------------depth and normal FCN-------------------------- inputs = preprocessing(inputs) with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.relu, stride=1, padding='SAME', weights_initializer=weight_from_caffe(self.pretrain_weight), biases_initializer=bias_from_caffe(self.pretrain_weight)): with tf.variable_scope('fcn', reuse=reuse): ##---------------------vgg depth------------------------------------ conv1 = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') pool1 = slim.max_pool2d(conv1, [3, 3], stride=2, padding='SAME', scope='pool1') conv2 = slim.repeat(pool1, 2, slim.conv2d, 128, [3, 3], scope='conv2') pool2 = slim.max_pool2d(conv2, [3, 3], stride=2, padding='SAME', scope='pool2') conv3 = slim.repeat(pool2, 3, slim.conv2d, 256, [3, 3], scope='conv3') pool3 = slim.max_pool2d(conv3, [3, 3], stride=2, padding='SAME', scope='pool3') conv4 = slim.repeat(pool3, 3, slim.conv2d, 512, [3, 3], scope='conv4') pool4 = slim.max_pool2d(conv4, [3, 3], stride=1, padding='SAME', scope='pool4') conv5 = slim.repeat(pool4, 3, slim.conv2d, 512, [3, 3], rate=2, scope='conv5') pool5 = slim.max_pool2d(conv5, [3, 3], stride=1, padding='SAME', scope='pool5') pool5a = slim.avg_pool2d(pool5, [3, 3], stride=1, padding='SAME', scope='pool5a') fc6 = slim.conv2d(pool5a, 1024, [3, 3], stride=1, rate=12, scope='fc6') fc6 = slim.dropout(fc6, 0.5, is_training=is_training, scope='drop6') fc7 = slim.conv2d(fc6, 1024, [1, 1], scope='fc7') fc7 = slim.dropout(fc7, 0.5, is_training=is_training, scope='drop7') pool6_1x1 = slim.avg_pool2d(fc7, [61, 81], stride=[61, 81], padding='SAME', scope='pool6_1x1') pool6_1x1_norm = slim.unit_norm(pool6_1x1, dim=3, scope='pool6_1x1_norm_new') pool6_1x1_norm_scale = pool6_1x1_norm * 10 pool6_1x1_norm_upsample = tf.tile( pool6_1x1_norm_scale, [1, 61, 81, 1], name='pool6_1x1_norm_upsample') out = tf.concat([fc7, pool6_1x1_norm_upsample], axis=-1, name='out') out_reduce = slim.conv2d(out, 256, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='out_reduce', padding='SAME', weights_initializer=weight_from_caffe( self.pretrain_weight), biases_initializer=bias_from_caffe( self.pretrain_weight)) out_conv = slim.conv2d(out_reduce, 256, [3, 3], activation_fn=tf.nn.relu, stride=1, scope='out_conv', padding='SAME', weights_initializer=weight_from_caffe( self.pretrain_weight), biases_initializer=bias_from_caffe( self.pretrain_weight)) out_conv_increase = slim.conv2d( out_conv, 1024, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='out_conv_increase', padding='SAME', weights_initializer=weight_from_caffe( self.pretrain_weight), biases_initializer=bias_from_caffe(self.pretrain_weight)) fc8_nyu_depth = slim.conv2d(out_conv_increase, 1, [1, 1], activation_fn=None, scope='fc8_nyu_depth') fc8_upsample = tf.image.resize_images( fc8_nyu_depth, [self.crop_size_h, self.crop_size_w], method=0, align_corners=True) # ---------------------------------------vgg depth end --------------------------------------- ## ----------------- vgg norm--------------------------------------------------------------- conv1_norm = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1_norm') pool1_norm = slim.max_pool2d(conv1_norm, [3, 3], stride=2, padding='SAME', scope='pool1_norm') conv2_norm = slim.repeat(pool1_norm, 2, slim.conv2d, 128, [3, 3], scope='conv2_norm') pool2_norm = slim.max_pool2d(conv2_norm, [3, 3], stride=2, padding='SAME', scope='pool2_norm') conv3_norm = slim.repeat(pool2_norm, 3, slim.conv2d, 256, [3, 3], scope='conv3_norm') pool3_norm = slim.max_pool2d(conv3_norm, [3, 3], stride=2, padding='SAME', scope='pool3_norm') conv4_norm = slim.repeat(pool3_norm, 3, slim.conv2d, 512, [3, 3], scope='conv4_norm') pool4_norm = slim.max_pool2d(conv4_norm, [3, 3], stride=1, padding='SAME', scope='pool4_norm') conv5_norm = slim.repeat(pool4_norm, 3, slim.conv2d, 512, [3, 3], rate=2, scope='conv5_norm') pool5_norm = slim.max_pool2d(conv5_norm, [3, 3], stride=1, padding='SAME', scope='pool5_norm') pool5a_norm = slim.avg_pool2d(pool5_norm, [3, 3], stride=1, padding='SAME', scope='pool5a_norm') fc6_norm = slim.conv2d(pool5a_norm, 1024, [3, 3], stride=1, rate=12, scope='fc6_norm') fc6_norm = slim.dropout(fc6_norm, 0.5, is_training=is_training, scope='drop6_norm') fc7_norm = slim.conv2d(fc6_norm, 1024, [1, 1], scope='fc7_norm') fc7_norm = slim.dropout(fc7_norm, 0.5, is_training=is_training, scope='drop7_norm') pool6_1x1_norm_new = slim.avg_pool2d( fc7_norm, [61, 81], stride=[61, 81], padding='SAME', scope='pool6_1x1_norm_new') pool6_1x1_norm_norm = slim.unit_norm( pool6_1x1_norm_new, dim=3, scope='pool6_1x1_norm_new') pool6_1x1_norm_scale_norm = pool6_1x1_norm_norm * 10 pool6_1x1_norm_upsample_norm = tf.tile( pool6_1x1_norm_scale_norm, [1, 61, 81, 1], name='pool6_1x1_norm_upsample') out_norm = tf.concat([fc7_norm, pool6_1x1_norm_upsample_norm], axis=-1, name='out_norm') fc8_nyu_norm_norm = slim.conv2d(out_norm, 3, [1, 1], activation_fn=None, scope='fc8_nyu_norm_norm') fc8_upsample_norm = tf.image.resize_images( fc8_nyu_norm_norm, [self.crop_size_h, self.crop_size_w], method=0, align_corners=True) fc8_upsample_norm = slim.unit_norm(fc8_upsample_norm, dim=3) # -------------------------------------vgg norm end--------------------------------------------- # ------------- depth to normal + norm refinement--------------------------------------------------- with tf.variable_scope('noise', reuse=reuse): fc8_upsample_norm = tf.squeeze(fc8_upsample_norm) fc8_upsample_norm = tf.reshape( fc8_upsample_norm, [self.batch_size, self.crop_size_h, self.crop_size_w, 3]) norm_matrix = tf.extract_image_patches( images=fc8_upsample_norm, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1], rates=[1, self.rate, self.rate, 1], padding='SAME') matrix_c = tf.reshape(norm_matrix, [ self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3 ]) fc8_upsample_norm = tf.expand_dims(fc8_upsample_norm, axis=4) angle = tf.matmul(matrix_c, fc8_upsample_norm) valid_condition = tf.greater(angle, self.thresh) valid_condition_all = tf.tile(valid_condition, [1, 1, 1, 1, 3]) exp_depth = tf.exp(fc8_upsample * 0.69314718056) depth_repeat = tf.tile(exp_depth, [1, 1, 1, 3]) points = tf.multiply(grid, depth_repeat) point_matrix = tf.extract_image_patches( images=points, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1], rates=[1, self.rate, self.rate, 1], padding='SAME') matrix_a = tf.reshape(point_matrix, [ self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3 ]) matrix_a_zero = tf.zeros_like(matrix_a, dtype=tf.float32) matrix_a_valid = tf.where(valid_condition_all, matrix_a, matrix_a_zero) matrix_a_trans = tf.matrix_transpose(matrix_a_valid, name='matrix_transpose') matrix_b = tf.ones(shape=[ self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 1 ]) point_multi = tf.matmul(matrix_a_trans, matrix_a_valid, name='matrix_multiplication') with tf.device('cpu:0'): matrix_deter = tf.matrix_determinant(point_multi) inverse_condition = tf.greater(matrix_deter, 1e-5) inverse_condition = tf.expand_dims(inverse_condition, axis=3) inverse_condition = tf.expand_dims(inverse_condition, axis=4) inverse_condition_all = tf.tile(inverse_condition, [1, 1, 1, 3, 3]) diag_constant = tf.ones([3], dtype=tf.float32) diag_element = tf.diag(diag_constant) diag_element = tf.expand_dims(diag_element, axis=0) diag_element = tf.expand_dims(diag_element, axis=0) diag_element = tf.expand_dims(diag_element, axis=0) diag_matrix = tf.tile(diag_element, [ self.batch_size, self.crop_size_h, self.crop_size_w, 1, 1 ]) inversible_matrix = tf.where(inverse_condition_all, point_multi, diag_matrix) with tf.device('cpu:0'): inv_matrix = tf.matrix_inverse(inversible_matrix) generated_norm = tf.matmul( tf.matmul(inv_matrix, matrix_a_trans), matrix_b) norm_normalize = slim.unit_norm((generated_norm), dim=3) norm_normalize = tf.reshape( norm_normalize, [self.batch_size, self.crop_size_h, self.crop_size_w, 3]) norm_scale = norm_normalize * 10.0 conv1_noise = slim.repeat(norm_scale, 2, slim.conv2d, 64, [3, 3], scope='conv1_noise') pool1_noise = slim.max_pool2d(conv1_noise, [3, 3], stride=2, padding='SAME', scope='pool1_noise') # conv2_noise = slim.repeat(pool1_noise, 2, slim.conv2d, 128, [3, 3], scope='conv2_noise') conv3_noise = slim.repeat(conv2_noise, 3, slim.conv2d, 256, [3, 3], scope='conv3_noise') fc1_noise = slim.conv2d(conv3_noise, 512, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='fc1_noise', padding='SAME') encode_norm_noise = slim.conv2d(fc1_noise, 3, [3, 3], activation_fn=None, stride=1, scope='encode_norm_noise', padding='SAME') encode_norm_upsample_noise = tf.image.resize_images( encode_norm_noise, [self.crop_size_h, self.crop_size_w], method=0, align_corners=True) sum_norm_noise = tf.add(norm_normalize, encode_norm_upsample_noise) norm_pred_noise = slim.unit_norm(sum_norm_noise, dim=3) norm_pred_all = tf.concat([ tf.expand_dims(tf.squeeze(fc8_upsample_norm), axis=0), norm_pred_noise, inputs * 0.00392156862 ], axis=3) norm_pred_all = slim.repeat( norm_pred_all, 3, slim.conv2d, 128, [3, 3], rate=2, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv1_norm_noise_new') norm_pred_all = slim.repeat( norm_pred_all, 3, slim.conv2d, 128, [3, 3], weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv2_norm_noise_new') norm_pred_final = slim.conv2d( norm_pred_all, 3, [3, 3], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='norm_conv3_noise_new') norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) # ------------- normal to depth + depth refinement--------------------------------------------------- with tf.variable_scope('norm_depth', reuse=reuse): grid_patch = tf.extract_image_patches( images=grid, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1], rates=[1, self.rate, self.rate, 1], padding='SAME') grid_patch = tf.reshape(grid_patch, [ self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3 ]) _, _, depth_data = tf.split(value=matrix_a, num_or_size_splits=3, axis=4) tmp_matrix_zero = tf.zeros_like(angle, dtype=tf.float32) valid_angle = tf.where(valid_condition, angle, tmp_matrix_zero) lower_matrix = tf.matmul(matrix_c, tf.expand_dims(grid, axis=4)) condition = tf.greater(lower_matrix, 1e-5) tmp_matrix = tf.ones_like(lower_matrix) lower_matrix = tf.where(condition, lower_matrix, tmp_matrix) lower = tf.reciprocal(lower_matrix) valid_angle = tf.where(condition, valid_angle, tmp_matrix_zero) upper = tf.reduce_sum(tf.multiply(matrix_c, grid_patch), [4]) ratio = tf.multiply(lower, tf.expand_dims(upper, axis=4)) estimate_depth = tf.multiply(ratio, depth_data) valid_angle = tf.multiply( valid_angle, tf.reciprocal( tf.tile( tf.reduce_sum(valid_angle, [3, 4], keep_dims=True) + 1e-5, [1, 1, 1, 81, 1]))) depth_stage1 = tf.reduce_sum( tf.multiply(estimate_depth, valid_angle), [3, 4]) depth_stage1 = tf.expand_dims(tf.squeeze(depth_stage1), axis=2) depth_stage1 = tf.clip_by_value(depth_stage1, 0, 10.0) exp_depth = tf.expand_dims(tf.squeeze(exp_depth), axis=2) depth_all = tf.expand_dims(tf.concat([ depth_stage1, exp_depth, tf.squeeze(inputs) * 0.00392156862 ], axis=2), axis=0) depth_pred_all = slim.repeat( depth_all, 3, slim.conv2d, 128, [3, 3], rate=2, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv1_depth_noise_new') depth_pred_all = slim.repeat( depth_pred_all, 3, slim.conv2d, 128, [3, 3], weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv2_depth_noise_new') final_depth = slim.conv2d( depth_pred_all, 1, [3, 3], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='depth_conv3_noise_new') with tf.variable_scope('edge_refinemet', reuse=reuse): print(inputs.shape) edges = tf.py_func(myfunc_canny, [inputs], tf.float32) edges = tf.reshape(edges, [1, self.crop_size_h, self.crop_size_w, 1]) edge_input_depth = final_depth edge_input_norm = norm_pred_final # edge prediction for depth edge_inputs = tf.concat([edges, inputs * 0.00784], axis=3) edges_encoder = slim.repeat( edge_inputs, 3, slim.conv2d, 32, [3, 3], rate=2, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv1_edge_refinement') edges_encoder = slim.repeat( edges_encoder, 3, slim.conv2d, 32, [3, 3], weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv2_edge_refinement') edges_predictor = slim.conv2d( edges_encoder, 8, [3, 3], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='edge_weight') edges_all = edges_predictor + tf.tile(edges, [1, 1, 1, 8]) edges_all = tf.clip_by_value(edges_all, 0.0, 1.0) dlr, drl, dud, ddu, nlr, nrl, nud, ndu = tf.split( edges_all, num_or_size_splits=8, axis=3) # 4 iteration depth final_depth = propagate(edge_input_depth, dlr, drl, dud, ddu, 1) final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1) final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1) final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1) # 4 iteration norm norm_pred_final = propagate(edge_input_norm, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) return final_depth, fc8_upsample_norm, norm_pred_final, fc8_upsample