def tower_loss(scope): images, labels = read_and_decode() if net == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes) elif net == 'vgg_19': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes) elif net == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v2_50': with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) else: raise Exception('No network matched with net %s.' % net) assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes) _ = cal_loss(logits, labels) losses = tf.get_collection('losses', scope) total_loss = tf.add_n(losses, name='total_loss') for l in losses + [total_loss]: loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def main(args): start = time() print('Start') splits = get_splits(args.image_dir) img_size = 224 # TF graph creation images_placeholder = tf.placeholder(tf.float32, [None, None, None, 3], name='image') proc_image_op = tf.image.resize_image_with_crop_or_pad(images_placeholder, target_height=224, target_width=224) _, end_points = vgg.vgg_16(proc_image_op, is_training=False, dropout_keep_prob=1.0) ft_name = os.path.join("vgg_16", "fc8") ft_output = end_points[ft_name] #### for split in splits: extract_features(images_placeholder=images_placeholder, image_dir=os.path.join(args.image_dir, split), ft_output=ft_output, out_dir=args.output_dir, split=split, network_ckpt=args.model_ckpt) print('Image Features extracted.') print('Time taken: ', time() - start)
def testEndPoints(self): batch_size = 5 height, width = 224, 224 num_classes = 1000 for is_training in [True, False]: with ops.Graph().as_default(): inputs = random_ops.random_uniform( (batch_size, height, width, 3)) _, end_points = vgg.vgg_16(inputs, num_classes, is_training=is_training) expected_names = [ 'vgg_16/conv1/conv1_1', 'vgg_16/conv1/conv1_2', 'vgg_16/pool1', 'vgg_16/conv2/conv2_1', 'vgg_16/conv2/conv2_2', 'vgg_16/pool2', 'vgg_16/conv3/conv3_1', 'vgg_16/conv3/conv3_2', 'vgg_16/conv3/conv3_3', 'vgg_16/pool3', 'vgg_16/conv4/conv4_1', 'vgg_16/conv4/conv4_2', 'vgg_16/conv4/conv4_3', 'vgg_16/pool4', 'vgg_16/conv5/conv5_1', 'vgg_16/conv5/conv5_2', 'vgg_16/conv5/conv5_3', 'vgg_16/pool5', 'vgg_16/fc6', 'vgg_16/fc7', 'vgg_16/fc8' ] self.assertSetEqual(set(end_points.keys()), set(expected_names))
def testEndPoints(self): batch_size = 5 height, width = 224, 224 num_classes = 1000 with self.test_session(): inputs = tf.random_uniform((batch_size, height, width, 3)) _, end_points = vgg.vgg_16(inputs, num_classes) expected_names = ['vgg_16/conv1/conv1_1', 'vgg_16/conv1/conv1_2', 'vgg_16/pool1', 'vgg_16/conv2/conv2_1', 'vgg_16/conv2/conv2_2', 'vgg_16/pool2', 'vgg_16/conv3/conv3_1', 'vgg_16/conv3/conv3_2', 'vgg_16/conv3/conv3_3', 'vgg_16/pool3', 'vgg_16/conv4/conv4_1', 'vgg_16/conv4/conv4_2', 'vgg_16/conv4/conv4_3', 'vgg_16/pool4', 'vgg_16/conv5/conv5_1', 'vgg_16/conv5/conv5_2', 'vgg_16/conv5/conv5_3', 'vgg_16/pool5', 'vgg_16/fc6', 'vgg_16/fc7', 'vgg_16/fc8' ] print(end_points.keys()) self.assertSetEqual(set(end_points.keys()), set(expected_names))
def top_feature_net(input, anchors, inds_inside, num_bases): stride=8 # arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0) # with slim.arg_scope(arg_scope) : with slim.arg_scope(vgg.vgg_arg_scope()): # net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8) block5, end_points = vgg.vgg_16(input) block3 = end_points['conv3/conv3_3'] # block = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2') tf.summary.histogram('rpn_top_block', block) # tf.summary.histogram('rpn_top_block_weights', tf.get_collection('2/conv_weight')[0]) with tf.variable_scope('top') as scope: #up = upsample2d(block, factor = 2, has_bias=True, trainable=True, name='1') #up = block up = conv2d_bn_relu(block, num_kernels=128, kernel_size=(3,3), stride=[1,1,1,1], padding='SAME', name='2') scores = conv2d(up, num_kernels=2*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='score') probs = tf.nn.softmax( tf.reshape(scores,[-1,2]), name='prob') deltas = conv2d(up, num_kernels=4*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='delta') #<todo> flip to train and test mode nms (e.g. different nms_pre_topn values): use tf.cond with tf.variable_scope('top-nms') as scope: #non-max batch_size, img_height, img_width, img_channel = input.get_shape().as_list() img_scale = 1 # pdb.set_trace() rois, roi_scores = tf_rpn_nms( probs, deltas, anchors, inds_inside, stride, img_width, img_height, img_scale, nms_thresh=0.7, min_size=stride, nms_pre_topn=nms_pre_topn_, nms_post_topn=nms_post_topn_, name ='nms') #<todo> feature = upsample2d(block, factor = 4, ...) feature = block
def VGG_16(input_image): arg_scope = vgg.vgg_arg_scope() with slim.arg_scope(arg_scope): features, _ = vgg.vgg_16(input_image) # feature flatten features = tf.reshape(features, shape=[1, -1]) features = tf.squeeze(features) return features
def testModelVariables(self): batch_size = 5 height, width = 224, 224 num_classes = 1000 with self.test_session(): inputs = random_ops.random_uniform((batch_size, height, width, 3)) vgg.vgg_16(inputs, num_classes) expected_names = [ 'vgg_16/conv1/conv1_1/weights', 'vgg_16/conv1/conv1_1/biases', 'vgg_16/conv1/conv1_2/weights', 'vgg_16/conv1/conv1_2/biases', 'vgg_16/conv2/conv2_1/weights', 'vgg_16/conv2/conv2_1/biases', 'vgg_16/conv2/conv2_2/weights', 'vgg_16/conv2/conv2_2/biases', 'vgg_16/conv3/conv3_1/weights', 'vgg_16/conv3/conv3_1/biases', 'vgg_16/conv3/conv3_2/weights', 'vgg_16/conv3/conv3_2/biases', 'vgg_16/conv3/conv3_3/weights', 'vgg_16/conv3/conv3_3/biases', 'vgg_16/conv4/conv4_1/weights', 'vgg_16/conv4/conv4_1/biases', 'vgg_16/conv4/conv4_2/weights', 'vgg_16/conv4/conv4_2/biases', 'vgg_16/conv4/conv4_3/weights', 'vgg_16/conv4/conv4_3/biases', 'vgg_16/conv5/conv5_1/weights', 'vgg_16/conv5/conv5_1/biases', 'vgg_16/conv5/conv5_2/weights', 'vgg_16/conv5/conv5_2/biases', 'vgg_16/conv5/conv5_3/weights', 'vgg_16/conv5/conv5_3/biases', 'vgg_16/fc6/weights', 'vgg_16/fc6/biases', 'vgg_16/fc7/weights', 'vgg_16/fc7/biases', 'vgg_16/fc8/weights', 'vgg_16/fc8/biases', ] model_variables = [ v.op.name for v in variables_lib.get_model_variables() ] self.assertSetEqual(set(model_variables), set(expected_names))
def testForward(self): batch_size = 1 height, width = 224, 224 with self.test_session() as sess: inputs = random_ops.random_uniform((batch_size, height, width, 3)) logits, _ = vgg.vgg_16(inputs) sess.run(variables.global_variables_initializer()) output = sess.run(logits) self.assertTrue(output.any())
def encoder_vgg(x, enc_final_size, reuse=False, scope_prefix='', hparams=None, is_training=True): """VGG network to use as encoder without the top few layers. Can be pretrained. Args: x: The image to encode. In the range 0 to 1. enc_final_size: The desired size of the encoding. reuse: To reuse in variable scope or not. scope_prefix: The prefix before the scope name. hparams: The python hparams. is_training: boolean value indicating if training is happening. Returns: The generated image. """ with tf.variable_scope(scope_prefix + 'encoder', reuse=reuse): # Preprocess input x *= 256 x = x - COLOR_NORMALIZATION_VECTOR with arg_scope(vgg.vgg_arg_scope()): # Padding because vgg_16 accepts images of size at least VGG_IMAGE_SIZE. x = tf.pad(x, [[0, 0], [0, VGG_IMAGE_SIZE - IMG_WIDTH], [0, VGG_IMAGE_SIZE - IMG_HEIGHT], [0, 0]]) _, end_points = vgg.vgg_16(x, num_classes=enc_final_size, is_training=is_training) pool5_key = [key for key in end_points.keys() if 'pool5' in key] assert len(pool5_key) == 1 enc = end_points[pool5_key[0]] # Undoing padding. enc = tf.slice(enc, [0, 0, 0, 0], [-1, 2, 2, -1]) enc_shape = enc.get_shape().as_list() enc_shape[0] = -1 enc_size = enc_shape[1] * enc_shape[2] * enc_shape[3] enc_flat = tf.reshape(enc, (-1, enc_size)) enc_flat = tf.nn.dropout(enc_flat, hparams.enc_keep_prob) enc_flat = tf.layers.dense( enc_flat, enc_final_size, kernel_initializer=tf.truncated_normal_initializer(stddev=1e-4, )) if hparams.enc_pred_use_l2norm: enc_flat = tf.nn.l2_normalize(enc_flat, 1) return enc_flat
def testBuild(self): batch_size = 5 height, width = 224, 224 num_classes = 1000 with self.test_session(): inputs = random_ops.random_uniform((batch_size, height, width, 3)) logits, _ = vgg.vgg_16(inputs, num_classes) self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed') self.assertListEqual(logits.get_shape().as_list(), [batch_size, num_classes])
def testFullyConvolutional(self): batch_size = 1 height, width = 256, 256 num_classes = 1000 with self.test_session(): inputs = random_ops.random_uniform((batch_size, height, width, 3)) logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False) self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd') self.assertListEqual(logits.get_shape().as_list(), [batch_size, 2, 2, num_classes])
def testModelVariables(self): batch_size = 5 height, width = 224, 224 num_classes = 1000 with self.test_session(): inputs = random_ops.random_uniform((batch_size, height, width, 3)) vgg.vgg_16(inputs, num_classes) expected_names = [ 'vgg_16/conv1/conv1_1/weights', 'vgg_16/conv1/conv1_1/biases', 'vgg_16/conv1/conv1_2/weights', 'vgg_16/conv1/conv1_2/biases', 'vgg_16/conv2/conv2_1/weights', 'vgg_16/conv2/conv2_1/biases', 'vgg_16/conv2/conv2_2/weights', 'vgg_16/conv2/conv2_2/biases', 'vgg_16/conv3/conv3_1/weights', 'vgg_16/conv3/conv3_1/biases', 'vgg_16/conv3/conv3_2/weights', 'vgg_16/conv3/conv3_2/biases', 'vgg_16/conv3/conv3_3/weights', 'vgg_16/conv3/conv3_3/biases', 'vgg_16/conv4/conv4_1/weights', 'vgg_16/conv4/conv4_1/biases', 'vgg_16/conv4/conv4_2/weights', 'vgg_16/conv4/conv4_2/biases', 'vgg_16/conv4/conv4_3/weights', 'vgg_16/conv4/conv4_3/biases', 'vgg_16/conv5/conv5_1/weights', 'vgg_16/conv5/conv5_1/biases', 'vgg_16/conv5/conv5_2/weights', 'vgg_16/conv5/conv5_2/biases', 'vgg_16/conv5/conv5_3/weights', 'vgg_16/conv5/conv5_3/biases', 'vgg_16/fc6/weights', 'vgg_16/fc6/biases', 'vgg_16/fc7/weights', 'vgg_16/fc7/biases', 'vgg_16/fc8/weights', 'vgg_16/fc8/biases', ] model_variables = [v.op.name for v in variables_lib.get_model_variables()] self.assertSetEqual(set(model_variables), set(expected_names))
def testEvaluation(self): batch_size = 2 height, width = 224, 224 num_classes = 1000 with self.test_session(): eval_inputs = random_ops.random_uniform((batch_size, height, width, 3)) logits, _ = vgg.vgg_16(eval_inputs, is_training=False) self.assertListEqual(logits.get_shape().as_list(), [batch_size, num_classes]) predictions = math_ops.argmax(logits, 1) self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
def testEvaluation(self): batch_size = 2 height, width = 224, 224 num_classes = 1000 with self.test_session(): eval_inputs = tf.random_uniform((batch_size, height, width, 3)) logits, _ = vgg.vgg_16(eval_inputs, is_training=False) self.assertListEqual(logits.get_shape().as_list(), [batch_size, num_classes]) predictions = tf.argmax(logits, 1) self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
def encoder_vgg(x, enc_final_size, reuse=False, scope_prefix='', hparams=None, is_training=True): """VGG network to use as encoder without the top few layers. Can be pretrained. Args: x: The image to encode. In the range 0 to 1. enc_final_size: The desired size of the encoding. reuse: To reuse in variable scope or not. scope_prefix: The prefix before the scope name. hparams: The python hparams. is_training: boolean value indicating if training is happening. Returns: The generated image. """ with tf.variable_scope(scope_prefix + 'encoder', reuse=reuse): # Preprocess input x *= 256 x = x - COLOR_NORMALIZATION_VECTOR with arg_scope(vgg.vgg_arg_scope()): # Padding because vgg_16 accepts images of size at least VGG_IMAGE_SIZE. x = tf.pad(x, [[0, 0], [0, VGG_IMAGE_SIZE - IMG_WIDTH], [0, VGG_IMAGE_SIZE - IMG_HEIGHT], [0, 0]]) _, end_points = vgg.vgg_16( x, num_classes=enc_final_size, is_training=is_training) pool5_key = [key for key in end_points.keys() if 'pool5' in key] assert len(pool5_key) == 1 enc = end_points[pool5_key[0]] # Undoing padding. enc = tf.slice(enc, [0, 0, 0, 0], [-1, 2, 2, -1]) enc_shape = enc.get_shape().as_list() enc_shape[0] = -1 enc_size = enc_shape[1] * enc_shape[2] * enc_shape[3] enc_flat = tf.reshape(enc, (-1, enc_size)) enc_flat = tf.nn.dropout(enc_flat, hparams.enc_keep_prob) enc_flat = tf.layers.dense( enc_flat, enc_final_size, kernel_initializer=tf.truncated_normal_initializer(stddev=1e-4,)) if hparams.enc_pred_use_l2norm: enc_flat = tf.nn.l2_normalize(enc_flat, 1) return enc_flat
def testTrainEvalWithReuse(self): train_batch_size = 2 eval_batch_size = 1 train_height, train_width = 224, 224 eval_height, eval_width = 256, 256 num_classes = 1000 with self.test_session(): train_inputs = tf.random_uniform( (train_batch_size, train_height, train_width, 3)) logits, _ = vgg.vgg_16(train_inputs) self.assertListEqual(logits.get_shape().as_list(), [train_batch_size, num_classes]) tf.get_variable_scope().reuse_variables() eval_inputs = tf.random_uniform( (eval_batch_size, eval_height, eval_width, 3)) logits, _ = vgg.vgg_16(eval_inputs, is_training=False, spatial_squeeze=False) self.assertListEqual(logits.get_shape().as_list(), [eval_batch_size, 2, 2, num_classes]) logits = tf.reduce_mean(logits, [1, 2]) predictions = tf.argmax(logits, 1) self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
def get_logits_prob(self, batch_input): """ Prediction from the model on a single batch. :param batch_input: the input batch. Must be from size [?, 224, 224, 3] :return: the logits and probabilities for the batch """ with slim.arg_scope(vgg.vgg_arg_scope()): logits, _ = vgg.vgg_16(batch_input, num_classes=1000, is_training=False) probs = tf.squeeze(tf.nn.softmax(logits))[1:] return logits, probs
def testTrainEvalWithReuse(self): train_batch_size = 2 eval_batch_size = 1 train_height, train_width = 224, 224 eval_height, eval_width = 256, 256 num_classes = 1000 with self.test_session(): train_inputs = random_ops.random_uniform( (train_batch_size, train_height, train_width, 3)) logits, _ = vgg.vgg_16(train_inputs) self.assertListEqual(logits.get_shape().as_list(), [train_batch_size, num_classes]) variable_scope.get_variable_scope().reuse_variables() eval_inputs = random_ops.random_uniform( (eval_batch_size, eval_height, eval_width, 3)) logits, _ = vgg.vgg_16( eval_inputs, is_training=False, spatial_squeeze=False) self.assertListEqual(logits.get_shape().as_list(), [eval_batch_size, 2, 2, num_classes]) logits = math_ops.reduce_mean(logits, [1, 2]) predictions = math_ops.argmax(logits, 1) self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
def rgb_feature_net(input): # with tf.variable_scope("rgb_base"): # arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0) # with slim.arg_scope(arg_scope): with slim.arg_scope(vgg.vgg_arg_scope()): # net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8) # block=end_points['resnet_v1_50/block4'] # block = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2') block, _ = vgg.vgg_16(input) #<todo> feature = upsample2d(block, factor = 4, ...) tf.summary.histogram('rgb_top_block', block) feature = block return feature
def build_model(self, is_training=True, dropout_keep_prob=0.5): self.inputs = tf.placeholder(real_type(self.FLAGS), [self.FLAGS.batch_size, 224, 224, 3]) self.targets = tf.placeholder(tf.int32, [self.FLAGS.batch_size]) with slim.arg_scope(vgg_arg_scope()): logits, endpoints = vgg.vgg_16(self.inputs, num_classes=self.FLAGS.num_classes) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.targets) self.cost = tf.reduce_sum(loss) self.global_step = tf.contrib.framework.get_or_create_global_step() self.train_op = tf.train.AdagradOptimizer(0.01).minimize( self.cost, global_step=self.global_step)
def __init__(self): self.holder = tf.placeholder( tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE, len(CHANNEL_MEAN)], name='image') _, self.end_points = vgg.vgg_16(self.holder, is_training=False, dropout_keep_prob=1.0) tf_config = tf.ConfigProto(log_device_placement=False) tf_config.gpu_options.allow_growth = True self.sess = tf.Session(config=tf_config) rospy.on_shutdown(self.sess.close) saver = tf.train.Saver() saver.restore(self.sess, VGG16_NTW_PATH)
def __init__(self, tensor, keep_prob=1.0, num_classes=1000, retrain_layer=[], weights_path='./weights/vgg_16.ckpt'): # Call the parent class Model.__init__(self, tensor, keep_prob, num_classes, retrain_layer, weights_path) # TODO This implementation has a problem while validation (is still set to training) is_training = True if retrain_layer else False with slim.arg_scope(vgg_arg_scope()): self.final, self.endpoints = vgg_16(self.tensor, num_classes=self.num_classes, is_training=is_training, dropout_keep_prob=keep_prob)
def build(self): # Input self.input = tf.placeholder( dtype=tf.float32, shape=[None, self.img_size[0], self.img_size[1], self.img_size[2]]) self.input_mean = tfutils.mean_value(self.input, self.img_mean) if self.base_net == 'vgg16': with slim.arg_scope(vgg.vgg_arg_scope()): outputs, end_points = vgg.vgg_16(self.input_mean, self.num_classes) self.prob = tf.nn.softmax(outputs, -1) self.logits = outputs elif self.base_net == 'res50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] elif self.base_net == 'res101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] elif self.base_net == 'res152': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_152( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] else: raise ValueError( 'base network should be vgg16, res50, -101, -152...') self.gt = tf.placeholder(dtype=tf.int32, shape=[None]) # self.var_list = tf.trainable_variables() if self.is_train: self.loss()
def vgg_encode(inputs, trainable=False, is_training=False, dropout_keep_prob=0.8, add_summaries=True): fine_tune = is_training & trainable net, end_points = vgg_16(inputs, is_training=fine_tune, dropout_keep_prob=dropout_keep_prob, spatial_squeeze=True, scope='vgg_16') # Add summaries if add_summaries: for v in end_points.values(): tf.contrib.layers.summaries.summarize_activation(v) return net, end_points
def testEndPoints(self): batch_size = 5 height, width = 224, 224 num_classes = 1000 for is_training in [True, False]: with ops.Graph().as_default(): inputs = random_ops.random_uniform((batch_size, height, width, 3)) _, end_points = vgg.vgg_16(inputs, num_classes, is_training=is_training) expected_names = [ 'vgg_16/conv1/conv1_1', 'vgg_16/conv1/conv1_2', 'vgg_16/pool1', 'vgg_16/conv2/conv2_1', 'vgg_16/conv2/conv2_2', 'vgg_16/pool2', 'vgg_16/conv3/conv3_1', 'vgg_16/conv3/conv3_2', 'vgg_16/conv3/conv3_3', 'vgg_16/pool3', 'vgg_16/conv4/conv4_1', 'vgg_16/conv4/conv4_2', 'vgg_16/conv4/conv4_3', 'vgg_16/pool4', 'vgg_16/conv5/conv5_1', 'vgg_16/conv5/conv5_2', 'vgg_16/conv5/conv5_3', 'vgg_16/pool5', 'vgg_16/fc6', 'vgg_16/fc7', 'vgg_16/fc8' ] self.assertSetEqual(set(end_points.keys()), set(expected_names))
def build_model(self): is_train = self.FLAGS.is_train dropout_keep_prob = 1.0 if is_train: dropout_keep_prob = 0.5 images_placeholder = tf.image.resize_images(self.input_placeholder, (224, 224)) with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(images_placeholder, is_training=is_train, dropout_keep_prob=dropout_keep_prob) image_features = end_points['vgg_16/fc8'] scene_logits = slim.fully_connected(image_features, 100, activation_fn=None, scope='scene_pred', trainable=True) multi_hot_logits = slim.fully_connected(image_features, 175, activation_fn=None, scope='multi_hot_logits', trainable=True) word_embedding_logits = slim.fully_connected(image_features, 300, activation_fn=None, scope='word_embedding_pred', trainable=True) obj_embedding_size = 40 object_embedding_logits = slim.fully_connected(image_features, obj_embedding_size, activation_fn=None, scope='object_embedding_pred', trainable=True) outputs = [scene_logits, multi_hot_logits, word_embedding_logits, object_embedding_logits] return outputs
def get_featuremap(net_name, input, num_classes=None): ''' #tensorlayer input = tl.layers.InputLayer(input) if net_name == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=resnet_v1.resnet_v1_50, slim_args={ 'num_classes': num_classes, 'is_training': True, 'global_pool': False }, name='resnet_v1_50' ) sv = tf.train.Supervisor() with sv.managed_session() as sess: a = sess.run(featuremap.all_layers) print(a) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params if net_name == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=resnet_v1.resnet_v1_101, slim_args={ 'num_classes': num_classes, 'is_training': True, 'global_pool': False }, name='resnet_v1_101' ) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params if net_name == 'resnet_v1_152': with slim.arg_scope(resnet_v1.resnet_arg_scope()): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=resnet_v1.resnet_v1_152, slim_args={ 'num_classes': num_classes, 'is_training': True, 'global_pool': False }, name='resnet_v1_152' ) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params if net_name == 'vgg16': with slim.arg_scope(vgg.vgg_arg_scope()): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=vgg.vgg_16, slim_args={ 'num_classes': num_classes, 'is_training': True, 'spatial_squeeze': False }, name='vgg_16' ) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params ''' #slim if net_name == 'resnet_v1_50': with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap, layer_dic = resnet_v1.resnet_v1_50( inputs=input, num_classes=num_classes, is_training=False, global_pool=False) if cfg.USE_FPN: feature_maps_dict = { 'C2': layer_dic[ 'resnet_v1_50/block1/unit_2/bottleneck_v1'], # [56, 56] 'C3': layer_dic[ 'resnet_v1_50/block2/unit_3/bottleneck_v1'], # [28, 28] 'C4': layer_dic[ 'resnet_v1_50/block3/unit_5/bottleneck_v1'], # [14, 14] 'C5': layer_dic['resnet_v1_50/block4'] # [7, 7] } return feature_maps_dict return layer_dic['resnet_v1_50/block3/unit_5/bottleneck_v1'] #return featuremap if net_name == 'resnet_v1_101': with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap, layer_dic = resnet_v1.resnet_v1_101( inputs=input, num_classes=num_classes, is_training=True, global_pool=False) if cfg.USE_FPN: feature_maps_dict = { 'C2': layer_dic[ 'resnet_v1_101/block1/unit_2/bottleneck_v1'], # [56, 56] 'C3': layer_dic[ 'resnet_v1_101/block2/unit_3/bottleneck_v1'], # [28, 28] 'C4': layer_dic[ 'resnet_v1_101/block3/unit_22/bottleneck_v1'], # [14, 14] 'C5': layer_dic['resnet_v1_101/block4'] } return feature_maps_dict return featuremap if net_name == 'vgg_16': with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap, layer_dic = vgg.vgg_16( inputs=input, num_classes=7, is_training=False, spatial_squeeze=False, ) return layer_dic['vgg_16/conv5/conv5_3']
def network_vgg_16(): input_shape = [1, 224, 224, 3] input_ = tf.placeholder(dtype=tf.float32, name='input', shape=input_shape) net, _end_points = vgg_16(input_, num_classes=1000, is_training=False) return net
if args.network == "resnet": # create network with slim.arg_scope(slim_utils.resnet_arg_scope(is_training=False)): _, end_points = resnet_v1.resnet_v1_152(images, 1000) # 1000 is the number of softmax class # define the feature name according slim standard feature_name = os.path.join("resnet_v1_152", args.feature_name) # create the output directory out_dir = os.path.join(args.data_dir, out_file) if not os.path.exists(out_dir): os.makedirs(out_dir) elif args.network == "vgg": _, end_points = vgg.vgg_16(images) out_dir = os.path.join(args.data_dir, out_file + ".pkl") feature_name = os.path.join("vgg_16", args.feature_name) else: assert False, "Incorrect Network" # check that the feature name is correct assert feature_name in end_points, \ "Invalid Feature name ({}), Must be on of the following {}"\ .format({feature_name}, end_points.keys()) # CPU/GPU option cpu_pool = Pool(args.no_thread, maxtasksperchild=1000)
def run_training(): config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) # sess = tf.Session() # config=tf.ConfigProto(log_device_placement=True)) # create input path and labels np.array from csv annotations df_annos = pd.read_csv(ANNOS_CSV, index_col=0) df_annos = df_annos.sample(frac=1).reset_index( drop=True) # shuffle the whole datasets if DATA == 'l8': path_col = ['l8_vis_jpg'] elif DATA == 's1': path_col = ['s1_vis_jpg'] elif DATA == 'l8s1': path_col = ['l8_vis_jpg', 's1_vis_jpg'] input_files_train = JPG_DIR + df_annos.loc[df_annos.partition == 'train', path_col].values input_labels_train = df_annos.loc[df_annos.partition == 'train', 'pop_density_log2'].values input_files_val = JPG_DIR + df_annos.loc[df_annos.partition == 'val', path_col].values input_labels_val = df_annos.loc[df_annos.partition == 'val', 'pop_density_log2'].values input_id_train = df_annos.loc[df_annos.partition == 'train', 'village_id'].values input_id_val = df_annos.loc[df_annos.partition == 'val', 'village_id'].values print('input_files_train shape:', input_files_train.shape) train_set_size = len(input_labels_train) # data input with tf.device('/cpu:0'): train_images_batch, train_labels_batch, _ = \ dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_train, input_labels_train, input_id_train, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=True, normalization=True) val_images_batch, val_labels_batch, _ = \ dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_val, input_labels_val, input_id_val, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=False, normalization=True) images_placeholder = tf.placeholder( tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL]) labels_placeholder = tf.placeholder(tf.float32, shape=[ None, ]) print('finish data input') TRAIN_BATCHES_PER_EPOCH = int( train_set_size / FLAGS.batch_size) # number of training batches/steps in each epoch MAX_STEPS = TRAIN_BATCHES_PER_EPOCH * FLAGS.max_epoch # total number of training batches/steps # CNN forward reference if MODEL == 'vgg': with slim.arg_scope( vgg.vgg_arg_scope(weight_decay=FLAGS.weight_decay)): outputs, _ = vgg.vgg_16(images_placeholder, num_classes=FLAGS.output_size, dropout_keep_prob=FLAGS.dropout_keep, is_training=True) outputs = tf.squeeze( outputs ) # change shape from (B,1) to (B,), same as label input if MODEL == 'resnet': with slim.arg_scope(resnet_v1.resnet_arg_scope()): outputs, _ = resnet_v1.resnet_v1_152(images_placeholder, num_classes=FLAGS.output_size, is_training=True) outputs = tf.squeeze( outputs ) # change shape from (B,1) to (B,), same as label input # loss labels_real = tf.pow(2.0, labels_placeholder) outputs_real = tf.pow(2.0, outputs) # only loss_log2_mse are used for gradient calculate, model minimize this value loss_log2_mse = tf.reduce_mean(tf.squared_difference( labels_placeholder, outputs), name='loss_log2_mse') loss_real_rmse = tf.sqrt(tf.reduce_mean( tf.squared_difference(labels_real, outputs_real)), name='loss_real_rmse') loss_real_mae = tf.losses.absolute_difference(labels_real, outputs_real) tf.summary.scalar('loss_log2_mse', loss_log2_mse) tf.summary.scalar('loss_real_rmse', loss_real_rmse) tf.summary.scalar('loss_real_mae', loss_real_mae) # accuracy (R2) def r_sqaured(labels, outputs): sst = tf.reduce_sum( tf.squared_difference(labels, tf.reduce_mean(labels))) sse = tf.reduce_sum(tf.squared_difference(labels, outputs)) return (1.0 - tf.div(sse, sst)) r2_log2 = r_sqaured(labels_placeholder, outputs) r2_real = r_sqaured(labels_real, outputs_real) tf.summary.scalar('r2_log2', r2_log2) tf.summary.scalar('r2_real', r2_real) # determine the model vairables to restore from pre-trained checkpoint if MODEL == 'vgg': if DATA == 'l8s1': model_variables = slim.get_variables_to_restore( exclude=['vgg_16/fc8', 'vgg_16/conv1']) else: model_variables = slim.get_variables_to_restore( exclude=['vgg_16/fc8']) if MODEL == 'resnet': model_variables = slim.get_variables_to_restore( exclude=['resnet_v1_152/logits', 'resnet_v1_152/conv1']) # training step and learning rate global_step = tf.Variable(0, name='global_step', trainable=False) #, dtype=tf.int64) learning_rate = tf.train.exponential_decay( FLAGS.learning_rate, # initial learning rate global_step=global_step, # current step decay_steps=MAX_STEPS, # total numbers step to decay decay_rate=FLAGS.lr_decay_rate ) # final learning rate = FLAGS.learning_rate * decay_rate tf.summary.scalar('learning_rate', learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # to only update gradient in first and last layer # vars_update = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'vgg_16/(conv1|fc8)') # print('variables to update in traing: ', vars_update) train_op = optimizer.minimize( loss_log2_mse, global_step=global_step) #, var_list = vars_update) # summary output in tensorboard summary = tf.summary.merge_all() summary_writer_train = tf.summary.FileWriter( os.path.join(LOG_DIR, 'log_train'), sess.graph) summary_writer_val = tf.summary.FileWriter( os.path.join(LOG_DIR, 'log_val'), sess.graph) # variable initialize init = tf.global_variables_initializer() sess.run(init) # restore the model from pre-trained checkpoint restorer = tf.train.Saver(model_variables) restorer.restore(sess, PRETRAIN_WEIGHTS) print('loaded pre-trained weights: ', PRETRAIN_WEIGHTS) # saver object to save checkpoint during training saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) print('start training...') epoch = 0 best_r2 = -float('inf') for step in xrange(MAX_STEPS): if step % TRAIN_BATCHES_PER_EPOCH == 0: epoch += 1 start_time = time.time() # record the time used for each batch images_out, labels_out = sess.run( [train_images_batch, train_labels_batch]) # inputs of this batch, numpy array format duration_batch = time.time() - start_time if step == 0: print("finished reading batch data") print("images_out shape:", images_out.shape) feed_dict = { images_placeholder: images_out, labels_placeholder: labels_out } _, train_loss, train_accuracy, train_outputs, lr = \ sess.run([train_op, loss_log2_mse, r2_log2, outputs, learning_rate], feed_dict=feed_dict) duration = time.time() - start_time if step % 10 == 0 or ( step + 1) == MAX_STEPS: # print traing loss every 10 batches print('Step %d epoch %d lr %.3e: log2 MSE loss = %.4f log2 R2 = %.4f (%.3f sec, %.3f sec(each batch))' \ % (step, epoch, lr, train_loss, train_accuracy, duration*10, duration_batch)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer_train.add_summary(summary_str, step) summary_writer_train.flush() if step % 50 == 0 or ( step + 1 ) == MAX_STEPS: # calculate and print validation loss every 50 batches images_out, labels_out = sess.run( [val_images_batch, val_labels_batch]) feed_dict = { images_placeholder: images_out, labels_placeholder: labels_out } val_loss, val_accuracy = sess.run([loss_log2_mse, r2_log2], feed_dict=feed_dict) print('Step %d epoch %d: val log2 MSE = %.4f val log2 R2 = %.4f ' % (step, epoch, val_loss, val_accuracy)) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer_val.add_summary(summary_str, step) summary_writer_val.flush() # in each epoch, if the validation R2 is higher than best R2, save the checkpoint if step % (TRAIN_BATCHES_PER_EPOCH - TRAIN_BATCHES_PER_EPOCH % 50) == 0: if val_accuracy > best_r2: best_r2 = val_accuracy checkpoint_file = os.path.join(LOG_DIR, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step, write_state=True)
def main(argv=None): # 加载处理好的数据 processed_data = np.load(INPUT_DATA) training_images = processed_data[0] n_training_examples = len(training_images) training_labels = processed_data[1] validation_images = processed_data[2] validation_labels = processed_data[3] testing_images = processed_data[4] testing_labels = processed_data[5] print('%d training, %d validation, %d testing' % (n_training_examples, len(validation_labels), len(testing_labels))) # 定义vgg16的输入 images = tf.placeholder(tf.float32, [None, 224, 224, 3], name='input_image') labels = tf.placeholder(tf.int64, [None], name='labels') # 定义vgg16模型 with slim.arg_scope(vgg.vgg_arg_scope()): logits, _ = vgg.vgg_16(images, num_classes=N_CLASSES) # 损失函数 loss_fun = tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits) # 训练 # train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(tf.losses.get_total_loss()) # 只训练最后一层 train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize( tf.losses.get_total_loss(), var_list=get_trainable_variables()) # 正确率 with tf.variable_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(logits, 1), labels) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) ckpt = tf.train.get_checkpoint_state(SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: # 加载之前训练的参数继续训练 variables_to_restore = slim.get_model_variables() print('continue training from %s' % ckpt) step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] step = int(step) ckpt = ckpt.model_checkpoint_path else: # 没有训练数据,就先迁移一部分训练好的 ckpt = TRAINED_CKPT_FILE variables_to_restore = get_tuned_variable() print('loading tuned variables from %s' % TRAINED_CKPT_FILE) step = 0 load_fn = slim.assign_from_checkpoint_fn(ckpt, variables_to_restore, ignore_missing_vars=True) # 开启会话训练 saver = tf.train.Saver() with tf.Session() as sess: # 初始化所有参数 init = tf.global_variables_initializer() sess.run(init) load_fn(sess) start = 0 end = BATCH for i in range(step + 1, step + 1 + STEPS): start_time = time.time() # 运行训练,不会更新所有参数 _, loss_val = sess.run( [train_step, loss_fun], feed_dict={ images: training_images[start:end], labels: training_labels[start:end] }) duration = time.time() - start_time #print('current train step duration %.3f' % duration) if i % 10 == 0: print('after %d train step, loss value is: %.4f' % (i, loss_val)) # 输出日志 if i % 100 == 0: saver.save(sess, TRAIN_FILE, global_step=i) validation_accuracy = sess.run(evaluation_step, feed_dict={ images: validation_images, labels: validation_labels }) print('Step %d Validation accuracy = %.1f%%' % (i, validation_accuracy * 100.0)) start = end if start == n_training_examples: start = 0 end = start + BATCH if end > n_training_examples: end = n_training_examples # 在测试集上测试正确率 test_accuracy = sess.run(evaluation_step, feed_dict={ images: testing_images, labels: testing_labels }) print('Final test accuracy = %.1f%%' % (test_accuracy * 100.0))
def network(): input = tf.placeholder(dtype=tf.float32, name='input', shape=input_shape) net, end_points = vgg_16(input, spatial_squeeze = False, num_classes=1000, is_training=False) return net
mode=args.mode, network=args.network, feature_name=args.feature_name, size=args.img_size) print("Create networks...") if args.network == "resnet": ft_output = resnet.create_resnet(images, resnet_out=args.feature_name, resnet_version=args.resnet_version, is_training=False) # create network with slim.arg_scope(slim_utils.resnet_arg_scope(is_training=False)): _, end_points = resnet_v1.resnet_v1_152(images, 1000) # 1000 is the number of softmax class elif args.network == "vgg": _, end_points = vgg.vgg_16(images, is_training=False, dropout_keep_prob=1.0) ft_name = os.path.join("vgg_16", args.feature_name) ft_output = end_points[ft_name] else: assert False, "Incorrect Network" extract_features( img_input = images, ft_output = ft_output, dataset_cstor = dataset_cstor, dataset_args = {"folder": args.data_dir, "image_builder":image_builder, "crop_builder":crop_builder, "dataset_name": args.dataset_name}, batchifier_cstor = OracleBatchifier, out_dir = args.out_dir, set_type = args.set_type, network_ckpt=args.ckpt,
def get_model(ref_dict, num_point, is_training, bn=False, bn_decay=None, img_size = (137,137), wd=1e-5, FLAGS=None): ref_img = ref_dict['imgs'] ref_pc = ref_dict['pc'] ref_sample_pc = ref_dict['sample_pc'] ref_sample_pc_rot = ref_dict['sample_pc_rot'] trans_mat = ref_dict['trans_mat'] K = ref_dict['K'] RT = ref_dict['RT'] gt_xyshift = ref_dict['shifts'] batch_size = ref_img.get_shape()[0].value # endpoints end_points = {} end_points['ref_pc'] = ref_pc end_points['RT'] = RT end_points['K'] = K end_points['gt_xyshift'] = gt_xyshift end_points['trans_mat'] = trans_mat end_points['sample_pc'] = ref_sample_pc #* 10 # Image extract features if ref_img.shape[1] != 224 or ref_img.shape[2] != 224: ref_img = tf.image.resize_bilinear(ref_img, [224, 224]) end_points['ref_img'] = ref_img # vgg.vgg_16.default_image_size = (224, 224) with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(wd)): ref_feats_embedding, vgg_end_points = vgg.vgg_16(ref_img, num_classes=1024, is_training=False, scope='vgg_16', spatial_squeeze=False) ref_feats_embedding_cnn = tf.squeeze(ref_feats_embedding, axis = [1,2]) end_points['embedding'] = ref_feats_embedding_cnn print(vgg_end_points.keys()) with tf.variable_scope("cameraprediction") as scope: if FLAGS.shift: pred_rotation, pred_translation, pred_RT, pred_xyshift = posenet.get_cam_mat_shft(ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay, wd) end_points['pred_rotation'] = pred_rotation end_points['pred_translation'] = pred_translation end_points['pred_RT'] = pred_RT end_points['pred_xyshift'] = pred_xyshift else: pred_rotation, pred_translation, pred_RT = posenet.get_cam_mat(ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay, wd) end_points['pred_rotation'] = pred_rotation end_points['pred_translation'] = pred_translation end_points['pred_RT'] = pred_RT end_points['pred_xyshift'] = None pred_xyshift = None print('trans_mat', trans_mat.shape) sample_img_points, gt_xy = get_img_points(ref_sample_pc, trans_mat, gt_xyshift, FLAGS) end_points['sample_img_points'] = sample_img_points end_points['gt_xy'] = gt_xy K_transpose = tf.transpose(K, perm=[0, 2, 1]) pred_trans_mat = tf.matmul(pred_RT, K_transpose) pred_sample_img_points, pred_xy = get_img_points(ref_sample_pc, pred_trans_mat, pred_xyshift, FLAGS) end_points['pred_sample_img_points'] = pred_sample_img_points end_points['pred_trans_mat'] = pred_trans_mat end_points['pred_xy'] = pred_xy print("gt_xy, pred_xy", gt_xy.get_shape(), pred_xy.get_shape()) return end_points
def get_model(input_pls, is_training, bn=False, bn_decay=None, img_size=224, FLAGS=None): if FLAGS.act == "relu": activation_fn = tf.nn.relu elif FLAGS.act == "elu": activation_fn = tf.nn.elu input_imgs = input_pls['imgs'] input_pnts = input_pls['pnts'] input_gvfs = input_pls['gvfs'] input_onedge = input_pls['onedge'] input_trans_mat = input_pls['trans_mats'] input_obj_rot_mats = input_pls['obj_rot_mats'] batch_size = input_imgs.get_shape()[0].value # endpoints end_points = {} end_points['pnts'] = input_pnts if FLAGS.rot: end_points['gt_gvfs_xyz'] = tf.matmul(input_gvfs, input_obj_rot_mats) end_points['pnts_rot'] = tf.matmul(input_pnts, input_obj_rot_mats) else: end_points['gt_gvfs_xyz'] = input_gvfs #* 10 end_points['pnts_rot'] = input_pnts if FLAGS.edgeweight != 1.0: end_points['onedge'] = input_onedge input_pnts_rot = end_points['pnts_rot'] end_points['imgs'] = input_imgs # B*H*W*3|4 # Image extract features if input_imgs.shape[1] != img_size or input_imgs.shape[2] != img_size: if FLAGS.alpha: ref_img_rgb = tf.compat.v1.image.resize_bilinear( input_imgs[:, :, :, :3], [img_size, img_size]) ref_img_alpha = tf.image.resize_nearest_neighbor( tf.expand_dims(input_imgs[:, :, :, 3], axis=-1), [img_size, img_size]) ref_img = tf.concat([ref_img_rgb, ref_img_alpha], axis=-1) else: ref_img = tf.compat.v1.image.resize_bilinear( input_imgs, [img_size, img_size]) else: ref_img = input_imgs end_points['resized_ref_img'] = ref_img if FLAGS.encoder[:6] == "vgg_16": vgg.vgg_16.default_image_size = img_size with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(FLAGS.wd)): ref_feats_embedding, encdr_end_points = vgg.vgg_16( ref_img, num_classes=FLAGS.num_classes, is_training=False, scope='vgg_16', spatial_squeeze=False) elif FLAGS.encoder == "sim_res": ref_feats_embedding, encdr_end_points = res_sim_encoder.res_sim_encoder( ref_img, FLAGS.batch_size, is_training=is_training, activation_fn=activation_fn, bn=bn, bn_decay=bn_decay, wd=FLAGS.wd) elif FLAGS.encoder == "resnet_v1_50": resnet_v1.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_50( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v1_50') scopelst = [ "resnet_v1_50/block1", "resnet_v1_50/block2", "resnet_v1_50/block3", 'resnet_v1_50/block4' ] elif FLAGS.encoder == "resnet_v1_101": resnet_v1.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_101( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v1_101') scopelst = [ "resnet_v1_101/block1", "resnet_v1_101/block2", "resnet_v1_101/block3", 'resnet_v1_101/block4' ] elif FLAGS.encoder == "resnet_v2_50": resnet_v2.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_50( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v2_50') scopelst = [ "resnet_v2_50/block1", "resnet_v2_50/block2", "resnet_v2_50/block3", 'resnet_v2_50/block4' ] elif FLAGS.encoder == "resnet_v2_101": resnet_v2.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_101( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v2_101') scopelst = [ "resnet_v2_101/block1", "resnet_v2_101/block2", "resnet_v2_101/block3", 'resnet_v2_101/block4' ] end_points['img_embedding'] = ref_feats_embedding point_img_feat = None gvfs_feat = None sample_img_points = get_img_points(input_pnts, input_trans_mat) # B * N * 2 if FLAGS.img_feat_onestream: with tf.compat.v1.variable_scope("sdfimgfeat") as scope: if FLAGS.encoder[:3] == "vgg": conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv1/conv1_2'], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv2/conv2_2'], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv3/conv3_3'], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) if FLAGS.encoder[-7:] != "smaller": conv4 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv4/conv4_3'], (FLAGS.img_h, FLAGS.img_w)) point_conv4 = tf.contrib.resampler.resampler( conv4, sample_img_points) point_img_feat = tf.concat(axis=2, values=[ point_conv1, point_conv2, point_conv3, point_conv4 ]) # small else: print("smaller vgg") point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) # small elif FLAGS.encoder[:3] == "res": # print(encdr_end_points.keys()) conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[0]], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[1]], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[2]], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w)) # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points) point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) else: conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points[0], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points[1], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points[2], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w)) # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points) point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) print("point_img_feat.shape", point_img_feat.get_shape()) point_img_feat = tf.expand_dims(point_img_feat, axis=2) if FLAGS.decoder == "att": gvfs_feat = gvfnet.get_gvf_att_imgfeat( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) elif FLAGS.decoder == "skip": gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream_skip( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) else: gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) else: if not FLAGS.multi_view: with tf.compat.v1.variable_scope("sdfprediction") as scope: gvfs_feat = gvfnet.get_gvf_basic(input_pnts_rot, ref_feats_embedding, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_xyz'], end_points['pred_gvfs_dist'], end_points[ 'pred_gvfs_direction'] = None, None, None if FLAGS.XYZ: end_points['pred_gvfs_xyz'] = gvfnet.xyz_gvfhead( gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_dist'] = tf.sqrt( tf.reduce_sum(tf.square(end_points['pred_gvfs_xyz']), axis=2, keepdims=True)) end_points[ 'pred_gvfs_direction'] = end_points['pred_gvfs_xyz'] / tf.maximum( end_points['pred_gvfs_dist'], 1e-6) else: end_points['pred_gvfs_dist'], end_points[ 'pred_gvfs_direction'] = gvfnet.dist_direct_gvfhead( gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_xyz'] = end_points[ 'pred_gvfs_direction'] * end_points['pred_gvfs_dist'] end_points["sample_img_points"] = sample_img_points # end_points["ref_feats_embedding"] = ref_feats_embedding end_points["point_img_feat"] = point_img_feat return end_points
def get_model(ref_dict, num_point, is_training, bn=False, bn_decay=None, img_size=(137, 137), wd=1e-5, FLAGS=None): ref_img = ref_dict['imgs'] ref_pc = ref_dict['pc'] ref_sample_pc = ref_dict['sample_pc'] ref_sample_pc_rot = ref_dict['sample_pc_rot'] trans_mat = ref_dict['trans_mat'] K = ref_dict['K'] norm_params = ref_dict['norm_params'] rot_mat_inv = ref_dict['rot_mat_inv'] regress_mat = ref_dict['regress_mat'] gt_xyshift = ref_dict['shifts'] batch_size = ref_img.get_shape()[0].value norm_mat_inv = get_inverse_norm_matrix(norm_params, batch_size) # endpoints end_points = {} end_points['ref_pc'] = ref_pc end_points['regress_mat'] = regress_mat end_points['K'] = K end_points['gt_xyshift'] = gt_xyshift end_points['trans_mat'] = trans_mat end_points['sample_pc'] = ref_sample_pc #* 10 # Image extract features if ref_img.shape[1] != 224 or ref_img.shape[2] != 224: ref_img = tf.image.resize_bilinear(ref_img, [224, 224]) else: print("image size:", img_size) end_points['ref_img'] = ref_img # vgg.vgg_16.default_image_size = (224, 224) with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(wd)): ref_feats_embedding, vgg_end_points = vgg.vgg_16(ref_img, num_classes=1024, is_training=False, scope='vgg_16', spatial_squeeze=False) ref_feats_embedding_cnn = tf.squeeze(ref_feats_embedding, axis=[1, 2]) end_points['embedding'] = ref_feats_embedding_cnn print(vgg_end_points.keys()) with tf.variable_scope("cameraprediction") as scope: if FLAGS.shift: pred_rotation, pred_translation, pred_RT, pred_xyshift = posenet.get_cam_mat_shft( ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay, wd) end_points['pred_rotation'] = pred_rotation end_points['pred_translation'] = pred_translation end_points['pred_RT'] = pred_RT end_points['pred_xyshift'] = pred_xyshift elif FLAGS.space_shift: pred_rotation, pred_translation, pred_RT, predxyzshift = posenet.get_cam_mat_spaceshft( ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay, wd) end_points['pred_rotation'] = pred_rotation end_points['pred_translation'] = pred_translation end_points['pred_RT'] = pred_RT end_points['pred_xyshift'] = None pred_xyshift = None else: pred_rotation, pred_translation, pred_RT = posenet.get_cam_mat( ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay, wd) end_points['pred_rotation'] = pred_rotation end_points['pred_translation'] = pred_translation end_points['pred_RT'] = pred_RT end_points['pred_xyshift'] = None pred_xyshift = None print('trans_mat', trans_mat.shape) sample_img_points, gt_xy = get_img_points(ref_sample_pc, trans_mat, gt_xyshift, FLAGS, img_size=img_size) end_points['sample_img_points'] = sample_img_points end_points['gt_xy'] = gt_xy # K, RT, rot_mat, W2O_mat, norm_mat, pred_transmat = inverse norm_mat * inverse W2O_mat * inverse rot_mat * pred_RT * K inverse if FLAGS.space_shift: pred_regress_mat = norm_mat_inv @ predxyzshift @ rot_mat_inv @ pred_RT else: pred_regress_mat = norm_mat_inv @ rot_mat_inv @ pred_RT pred_trans_mat = pred_regress_mat @ K pred_sample_img_points, pred_xy = get_img_points(ref_sample_pc, pred_trans_mat, pred_xyshift, FLAGS, img_size=img_size) end_points['pred_sample_img_points'] = pred_sample_img_points end_points['pred_trans_mat'] = pred_trans_mat end_points['pred_regress_mat'] = pred_regress_mat end_points['pred_xy'] = pred_xy print("gt_xy, pred_xy", gt_xy.get_shape(), pred_xy.get_shape()) return end_points