def network_fn(inputs):
        """Fine grained classification with multiplex spatial transformation channels utilizing inception nets

                """
        end_points = {}
        arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=FLAGS.weight_decay)
        with slim.arg_scope(arg_scope):
            with tf.variable_scope('stn'):
                with tf.variable_scope('localization'):
                    transformer_theta = localization_net_alpha(inputs, NUM_TRANSFORMER, NUM_THETA_PARAMS)
                    transformer_theta_split = tf.split(transformer_theta, NUM_TRANSFORMER, axis=1)
                    end_points['stn/localization/transformer_theta'] = transformer_theta

                transformer_outputs = []
                for theta in transformer_theta_split:
                    transformer_outputs.append(
                        transformer(inputs, theta, transformer_output_size, sampling_kernel='bilinear'))

                inception_outputs = []
                transformer_outputs_shape = [FLAGS.batch_size, transformer_output_size[0],
                                             transformer_output_size[1], 3]
                with tf.variable_scope('classification'):
                    for path_idx, inception_inputs in enumerate(transformer_outputs):
                        with tf.variable_scope('path_{}'.format(path_idx)):
                            inception_inputs.set_shape(transformer_outputs_shape)
                            net, _ = inception_v2.inception_v2_base(inception_inputs)
                            inception_outputs.append(net)
                    # concatenate the endpoints: num_batch*7*7*(num_transformer*1024)
                    multipath_outputs = tf.concat(inception_outputs, axis=-1)

                    # final fc layer logits
                    classification_logits = _inception_logits(multipath_outputs, NUM_CLASSES, dropout_keep_prob)
                    end_points['stn/classification/logits'] = classification_logits

        return classification_logits, end_points
示例#2
0
def transformer_inference(image):
    arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=0.0)
    with slim.arg_scope(arg_scope):
        with slim.arg_scope([layers_lib.batch_norm, layers_lib.dropout],
                            is_training=False):
            with tf.variable_scope('stn'):
                with tf.variable_scope('localization'):
                    transformer_theta = localization_net_alpha(
                        image, num_transformer, NUM_THETA_PARAMS)
                    transformer_theta_split = tf.split(transformer_theta,
                                                       num_transformer,
                                                       axis=1)

                transformer_outputs = []
                transformer_output_size = [
                    transformed_height, transformed_width
                ]
                for theta in transformer_theta_split:
                    transformer_outputs.append(
                        transformer(image,
                                    theta,
                                    transformer_output_size,
                                    sampling_kernel='bilinear'))

    return transformer_outputs
示例#3
0
 def testModelHasExpectedNumberOfParameters(self):
   batch_size = 5
   height, width = 224, 224
   inputs = tf.random_uniform((batch_size, height, width, 3))
   with slim.arg_scope(inception.inception_v2_arg_scope()):
     inception.inception_v2_base(inputs)
   total_params, _ = slim.model_analyzer.analyze_vars(
       slim.get_model_variables())
   self.assertAlmostEqual(10173112, total_params)
示例#4
0
    def __call__(self, imgs, seqVec, seqNums, batchSize):
        arg_scope = inception_v2_arg_scope()
        with slim.arg_scope(arg_scope):
            _, feat = inception_v2(preprocess(imgs),
                                   num_classes=1001,
                                   is_training=False,
                                   reuse=tf.AUTO_REUSE)

        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            img_inputs = MLP("img_embedding", feat, 1024, self.embedSize)
            img_inputs = tf.expand_dims(img_inputs, axis=1)
            if self.rnnType == "lstm":
                cell = tf.nn.rnn_cell.BasicLSTMCell(self.hiddenSize)
            if self.rnnType == "rnn":
                cell = tf.nn.rnn_cell.BasicRNNCell(self.hiddenSize)
            if self.rnnType == "gru":
                cell = tf.nn.rnn_cell.GRUCell(self.hiddenSize)
            cells = tf.nn.rnn_cell.MultiRNNCell([cell] * self.layerSize)
            if batchSize > 1:
                cells = tf.nn.rnn_cell.DropoutWrapper(cells,
                                                      input_keep_prob=0.7,
                                                      output_keep_prob=0.7,
                                                      state_keep_prob=0.7)
            init_state = cells.zero_state(batchSize, tf.float32)
            _, img_states = tf.nn.dynamic_rnn(cells,
                                              img_inputs,
                                              initial_state=init_state)
            embeddingMat = tf.get_variable(
                "embeddingMat", [self.targetVocSize, self.embedSize],
                initializer=tf.truncated_normal_initializer(stddev=0.08))
            seqVec = tf.nn.embedding_lookup(embeddingMat, seqVec)
            if batchSize == 1:
                #Test phase
                outputs, states = tf.nn.dynamic_rnn(cells,
                                                    seqVec,
                                                    initial_state=init_state)
                outputs = tf.reshape(outputs, [-1, self.hiddenSize])
                logits = MLP("logits", outputs, self.hiddenSize,
                             self.targetVocSize)
                probs = tf.nn.softmax(logits)
                wordVal = tf.argmax(probs, axis=1)
                return probs, wordVal, states, img_states, init_state
            else:
                #Training phase
                outputs, _ = tf.nn.dynamic_rnn(cells, seqVec, seqNums,
                                               img_states)
                outputs = tf.reshape(outputs, [-1, self.hiddenSize])
                logits = MLP("logits", outputs, self.hiddenSize,
                             self.targetVocSize)
                probs = tf.nn.softmax(logits)
                return probs
示例#5
0
def stn_cnn_with_image_output(inputs, transformer_output_size, num_classes):
    """Fine grained classification with multiplex spatial transformation channels utilizing inception nets

    """
    arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=weight_decay)
    with slim.arg_scope(arg_scope):
        with tf.variable_scope('stn'):
            with tf.variable_scope('localization'):
                transformer_theta = localization_net_beta(
                    inputs, NUM_TRANSFORMER, NUM_THETA_PARAMS)
                transformer_theta_split = tf.split(transformer_theta,
                                                   NUM_TRANSFORMER,
                                                   axis=1)

            transformer_outputs = []
            for theta in transformer_theta_split:
                transformer_outputs.append(
                    transformer(inputs,
                                theta,
                                transformer_output_size,
                                sampling_kernel='bilinear'))

    return transformer_outputs
示例#6
0
                                           tf.FixedLenFeature([], tf.int64),
                                           "image":
                                           tf.FixedLenFeature([], tf.string)
                                       })
    img = tf.decode_raw(features["image"], tf.uint8)
    img = tf.reshape(img, [image_pixels, image_pixels, 3])
    img = tf.cast(img, tf.float32)
    label = tf.cast(features["label"], tf.int32)
    return img, label


images = tf.placeholder(tf.float32, [None, image_pixels, image_pixels, 3],
                        name="input/x_input")
labels = tf.placeholder(tf.int64, [None], name="input/y_input")

with slim.arg_scope(inception_v2_arg_scope()):
    logits, end_points = inception_v2(images,
                                      num_classes=classes,
                                      is_training=True)

exclude = ['InceptionV2/Logits']
variables_to_restore = slim.get_variables_to_restore(exclude=exclude)

one_hot_labels = slim.one_hot_encoding(labels, classes)
loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                       logits=logits)
total_loss = tf.losses.get_total_loss()
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_step = tf.train.AdamOptimizer(0.00002).minimize(loss=total_loss)
correct_prediction = tf.equal(labels,
def train(train_record_file,
          train_log_step,
          train_param,
          val_record_file,
          val_log_step,
          labels_nums,
          data_shape,
          snapshot,
          snapshot_prefix):
    '''
    :param train_record_file: 训练的tfrecord文件
    :param train_log_step: 显示训练过程log信息间隔
    :param train_param: train参数
    :param val_record_file: 验证的tfrecord文件
    :param val_log_step: 显示验证过程log信息间隔
    :param val_param: val参数
    :param labels_nums: labels数
    :param data_shape: 输入数据shape
    :param snapshot: 保存模型间隔
    :param snapshot_prefix: 保存模型文件的前缀名
    :return:
    '''
    [base_lr,max_steps]=train_param
    [batch_size,resize_height,resize_width,depths]=data_shape

    # 获得训练和测试的样本数
    train_nums=get_example_nums(train_record_file)
    val_nums=get_example_nums(val_record_file)
    print('train nums:%d,val nums:%d'%(train_nums,val_nums))

    # 从record中读取图片和labels数据
    # train数据,训练数据一般要求打乱顺序shuffle=True
    train_images, train_labels = read_records(train_record_file, resize_height, resize_width, type='normalization')
    train_images_batch, train_labels_batch = get_batch_images(train_images, train_labels,
                                                              batch_size=batch_size, labels_nums=labels_nums,
                                                              one_hot=True, shuffle=True)
    # val数据,验证数据可以不需要打乱数据
    val_images, val_labels = read_records(val_record_file, resize_height, resize_width, type='normalization')
    val_images_batch, val_labels_batch = get_batch_images(val_images, val_labels,
                                                          batch_size=batch_size, labels_nums=labels_nums,
                                                          one_hot=True, shuffle=False)

    # Define the model:
    with slim.arg_scope(inception_v2.inception_v2_arg_scope()):
        out, end_points = inception_v2.inception_v2(inputs=input_images, num_classes=labels_nums, dropout_keep_prob=keep_prob, is_training=is_training)

    # Specify the loss function: tf.losses定义的loss函数都会自动添加到loss函数,不需要add_loss()了
    tf.losses.softmax_cross_entropy(onehot_labels=input_labels, logits=out)#添加交叉熵损失loss=1.6
    # slim.losses.add_loss(my_loss)
    loss = tf.losses.get_total_loss(add_regularization_losses=True)#添加正则化损失loss=2.2

    # Specify the optimization scheme:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=base_lr)


    # global_step = tf.Variable(0, trainable=False)
    # learning_rate = tf.train.exponential_decay(0.05, global_step, 150, 0.9)
    #
    # optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
    # # train_tensor = optimizer.minimize(loss, global_step)
    # train_op = slim.learning.create_train_op(loss, optimizer,global_step=global_step)


    # 在定义训练的时候, 注意到我们使用了`batch_norm`层时,需要更新每一层的`average`和`variance`参数,
    # 更新的过程不包含在正常的训练过程中, 需要我们去手动像下面这样更新
    # 通过`tf.get_collection`获得所有需要更新的`op`
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    # 使用`tensorflow`的控制流, 先执行更新算子, 再执行训练
    with tf.control_dependencies(update_ops):
        # create_train_op that ensures that when we evaluate it to get the loss,
        # the update_ops are done and the gradient updates are computed.
        # train_op = slim.learning.create_train_op(total_loss=loss,optimizer=optimizer)
        train_op = slim.learning.create_train_op(total_loss=loss, optimizer=optimizer)

    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(input_labels, 1)), tf.float32))
    # 循环迭代过程
    step_train(train_op, loss, accuracy,
               train_images_batch, train_labels_batch, train_nums, train_log_step,
               val_images_batch, val_labels_batch, val_nums, val_log_step,
               snapshot_prefix, snapshot)