예제 #1
0
def conv_net_shallow(input_batch, name):
    with tf.variable_scope(name):
        #conv1: 2*2@4/2
        conv1 = conv_relu('conv1',
                          input_batch,
                          kernel_size=2,
                          stride=2,
                          output_dim=4)
        print("conv1: ", conv1)
        #conv2: 2*2@4/1
        conv2 = conv_relu('conv2',
                          conv1,
                          kernel_size=2,
                          stride=1,
                          output_dim=8)
        print("conv2: ", conv2)
        #conv3: 2*2@8/2
        conv3 = conv_relu('conv3',
                          conv2,
                          kernel_size=2,
                          stride=2,
                          output_dim=16)
        print("conv3: ", conv3)
        #conv4: 2*2@8/1
        conv4 = conv_relu('conv4',
                          conv3,
                          kernel_size=2,
                          stride=1,
                          output_dim=16)
        print("conv4: ", conv4)
        #conv5: 2*2@8/2

        tanh = tf.nn.tanh(conv4)

        return tanh
예제 #2
0
def vgg_fc7_full_conv(input_batch, name, apply_dropout):
    pool5 = vgg_pool5(input_batch, name)
    with tf.variable_scope(name):
        # layer 6
        fc6 = conv_relu('fc6', pool5, kernel_size=7, stride=1, output_dim=4096)
        if apply_dropout: fc6 = drop(fc6, 0.5)
        # layer 7
        fc7 = conv_relu('fc7', fc6, kernel_size=1, stride=1, output_dim=4096)
        if apply_dropout: fc7 = drop(fc7, 0.5)
        return fc7
예제 #3
0
def shapes_convnet(input_batch, hidden_dim=64, output_dim=64,
    scope='shapes_convnet', reuse=None):
    # input_batch has shape [N, H_im, W_im, 3]
    with tf.variable_scope(scope, reuse=reuse):
        conv_1 = conv_relu('conv_1', input_batch, kernel_size=10, stride=10,
            output_dim=hidden_dim, padding='VALID')
        conv_2 = conv_relu('conv_2', conv_1, kernel_size=1, stride=1,
            output_dim=output_dim)

    return conv_2
예제 #4
0
def vgg_fc7_full_conv(input_batch, name, apply_dropout, reuse=None):
    pool5 = vgg_pool5(input_batch, name, reuse)
    with tf.variable_scope(name, reuse=reuse):
        # layer 6
        fc6 = conv_relu('fc6', pool5, kernel_size=7, stride=1, output_dim=4096)
        if apply_dropout: fc6 = drop(fc6, 0.5)
        # layer 7
        fc7 = conv_relu('fc7', fc6, kernel_size=1, stride=1, output_dim=4096)
        if apply_dropout: fc7 = drop(fc7, 0.5)
        return fc7
예제 #5
0
def rpn_net(conv5,
            im_info,
            name,
            feat_stride=16,
            anchor_scales=(8, 16, 32),
            phase='TEST'):
    with tf.variable_scope(name):
        # rpn_conv/3x3
        rpn_conv = conv_relu('rpn_conv/3x3',
                             conv5,
                             kernel_size=3,
                             stride=1,
                             output_dim=512)
        # rpn_cls_score
        # Note that we've already subtracted the bg weights from fg weights
        # and do sigmoid instead of softmax (actually sigmoid is not needed
        # for ranking)
        rpn_cls_score = conv('rpn_cls_score',
                             rpn_conv,
                             kernel_size=1,
                             stride=1,
                             output_dim=len(anchor_scales) * 3)
        # rpn_bbox_pred
        rpn_bbox_pred = conv('rpn_bbox_pred',
                             rpn_conv,
                             kernel_size=1,
                             stride=1,
                             output_dim=len(anchor_scales) * 3 * 4)

        rois = tf.py_func(ProposalLayer(feat_stride, anchor_scales, phase),
                          [rpn_cls_score, rpn_bbox_pred, im_info],
                          [tf.float32],
                          stateful=False)[0]
        rois.set_shape([None, 5])
        return rois
예제 #6
0
def text_objseg_full_conv(text_seq_batch, imcrop_batch, num_vocab, embed_dim,
    lstm_dim, mlp_hidden_dims, vgg_dropout, mlp_dropout):

    # Language feature (LSTM hidden state)
    feat_lang = lstm_net.lstm_net(text_seq_batch, num_vocab, embed_dim, lstm_dim)

    # Local image feature
    feat_vis = vgg_net.vgg_fc8_full_conv(imcrop_batch, 'vgg_local',
        apply_dropout=vgg_dropout)

    # Reshape and tile LSTM top
    featmap_H, featmap_W = feat_vis.get_shape().as_list()[1:3]
    N, D_text = feat_lang.get_shape().as_list()
    feat_lang = tf.tile(tf.reshape(feat_lang, [N, 1, 1, D_text]),
        [1, featmap_H, featmap_W, 1])

    # L2-normalize the features (except for spatial_batch)
    # and concatenate them along axis 3 (channel dimension)
    spatial_batch = tf.convert_to_tensor(generate_spatial_batch(N, featmap_H, featmap_W))
    feat_all = tf.concat(3, [tf.nn.l2_normalize(feat_lang, 3),
                             tf.nn.l2_normalize(feat_vis, 3),
                             spatial_batch])

    # MLP Classifier over concatenate feature
    with tf.variable_scope('classifier'):
        mlp_l1 = conv_relu('mlp_l1', feat_all, kernel_size=1, stride=1,
            output_dim=mlp_hidden_dims)
        if mlp_dropout: mlp_l1 = drop(mlp_l1, 0.5)
        mlp_l2 = conv('mlp_l2', mlp_l1, kernel_size=1, stride=1, output_dim=1)

    return mlp_l2
예제 #7
0
    def forward(self, imcrop_batch, text_seq_batch, is_training=True):
        num_vocab, embed_dim, lstm_dim, mlp_hidden_dims = self.num_vocab, self.embed_dim, self.lstm_dim, self.mlp_hidden_dims
        deeplab_dropout = self.kwargs[
            'deeplab_dropout'] if 'deeplab_dropout' in self.kwargs else False
        mlp_dropout = self.kwargs[
            'mlp_dropout'] if 'mlp_dropout' in self.kwargs else False

        with tf.variable_scope(self.model_name):
            # Language feature (LSTM hidden state)
            feat_lang = lstm_net.lstm_net(text_seq_batch, num_vocab, embed_dim,
                                          lstm_dim)[0]

            # Local image feature
            feat_vis = deeplab.deeplab_fc8_full_conv(
                imcrop_batch, 'deeplab', apply_dropout=deeplab_dropout)

            # Reshape and tile LSTM top
            featmap_H, featmap_W = feat_vis.get_shape().as_list()[1:3]
            N, D_text = feat_lang.get_shape().as_list()
            feat_lang = tf.tile(tf.reshape(feat_lang, [N, 1, 1, D_text]),
                                [1, featmap_H, featmap_W, 1])

            # L2-normalize the features (except for spatial_batch)
            # and concatenate them along axis 3 (channel dimension)
            spatial_batch = tf.convert_to_tensor(
                generate_spatial_batch(N, featmap_H, featmap_W))
            feat_all = tf.concat(axis=3,
                                 values=[
                                     tf.nn.l2_normalize(feat_lang, 3),
                                     tf.nn.l2_normalize(feat_vis, 3),
                                     spatial_batch
                                 ])

            # MLP Classifier over concatenate feature
            with tf.variable_scope('classifier'):
                mlp_l1 = conv_relu('mlp_l1',
                                   feat_all,
                                   kernel_size=1,
                                   stride=1,
                                   output_dim=mlp_hidden_dims)
                if mlp_dropout:
                    mlp_l1 = drop(mlp_l1, 0.5)
                mlp_l2 = conv('mlp_l2',
                              mlp_l1,
                              kernel_size=1,
                              stride=1,
                              output_dim=1)

                upsample8s = deconv('upsample8s',
                                    mlp_l2,
                                    kernel_size=16,
                                    stride=8,
                                    output_dim=1,
                                    bias_term=False)

        return upsample8s
예제 #8
0
def vs_multilayer(input_batch,name,middle_layer_dim=1000,reuse=False):
    with tf.variable_scope(name):
        if reuse==True:
            print name+" reuse variables"
            tf.get_variable_scope().reuse_variables()
        else:
            print name+" doesn't reuse variables"

        layer1 = conv_relu('layer1', input_batch,
                        kernel_size=1,stride=1,output_dim=middle_layer_dim)
        sim_score = conv('layer2', layer1,
                        kernel_size=1,stride=1,output_dim=3)
    return sim_score
def text_objseg_full_conv(text_seq_batch, imcrop_batch, num_vocab, embed_dim,
                          lstm_dim, mlp_hidden_dims, deeplab_dropout,
                          mlp_dropout, is_training):

    # Language feature (LSTM hidden state)
    feat_lang = lstm_net.lstm_net(text_seq_batch, num_vocab, embed_dim,
                                  lstm_dim)[0]

    #deeplab101
    net = deeplab101.DeepLabResNetModel({'data': imcrop_batch},
                                        is_training=is_training)
    feat_vis = net.layers['fc1_voc12']

    # # Local image feature
    # feat_vis = deeplab.deeplab_fc8_full_conv(imcrop_batch, 'deeplab',
    #     apply_dropout=deeplab_dropout)

    # Reshape and tile LSTM top
    featmap_H, featmap_W = feat_vis.get_shape().as_list()[1:3]
    N, D_text = feat_lang.get_shape().as_list()
    feat_lang = tf.tile(tf.reshape(feat_lang, [N, 1, 1, D_text]),
                        [1, featmap_H, featmap_W, 1])

    # L2-normalize the features (except for spatial_batch)
    # and concatenate them along axis 3 (channel dimension)
    spatial_batch = tf.convert_to_tensor(
        generate_spatial_batch(N, featmap_H, featmap_W))
    feat_all = tf.concat(axis=3,
                         values=[
                             tf.nn.l2_normalize(feat_lang, 3),
                             tf.nn.l2_normalize(feat_vis, 3), spatial_batch
                         ])

    # MLP Classifier over concatenate feature
    with tf.variable_scope('classifier'):
        mlp_l1 = conv_relu('mlp_l1',
                           feat_all,
                           kernel_size=1,
                           stride=1,
                           output_dim=mlp_hidden_dims)
        if mlp_dropout: mlp_l1 = drop(mlp_l1, 0.5)
        mlp_l2 = conv('mlp_l2', mlp_l1, kernel_size=1, stride=1, output_dim=1)

    return mlp_l2
def vgg_conv5(input_batch, name, reuse=None):
    with tf.variable_scope(name, reuse=reuse):
        # layer 1
        conv1_1 = conv_relu('conv1_1', input_batch,
                            kernel_size=3, stride=1, output_dim=64)
        conv1_2 = conv_relu('conv1_2', conv1_1,
                            kernel_size=3, stride=1, output_dim=64)
        pool1 = pool('pool1', conv1_2, kernel_size=2, stride=2)
        # layer 2
        conv2_1 = conv_relu('conv2_1', pool1,
                            kernel_size=3, stride=1, output_dim=128)
        conv2_2 = conv_relu('conv2_2', conv2_1,
                            kernel_size=3, stride=1, output_dim=128)
        pool2 = pool('pool2', conv2_2, kernel_size=2, stride=2)
        # layer 3
        conv3_1 = conv_relu('conv3_1', pool2,
                            kernel_size=3, stride=1, output_dim=256)
        conv3_2 = conv_relu('conv3_2', conv3_1,
                            kernel_size=3, stride=1, output_dim=256)
        conv3_3 = conv_relu('conv3_3', conv3_2,
                            kernel_size=3, stride=1, output_dim=256)
        pool3 = pool('pool3', conv3_3, kernel_size=2, stride=2)
        # layer 4
        conv4_1 = conv_relu('conv4_1', pool3,
                            kernel_size=3, stride=1, output_dim=512)
        conv4_2 = conv_relu('conv4_2', conv4_1,
                            kernel_size=3, stride=1, output_dim=512)
        conv4_3 = conv_relu('conv4_3', conv4_2,
                            kernel_size=3, stride=1, output_dim=512)
        pool4 = pool('pool4', conv4_3, kernel_size=2, stride=2)
        # layer 5
        conv5_1 = conv_relu('conv5_1', pool4,
                            kernel_size=3, stride=1, output_dim=512)
        conv5_2 = conv_relu('conv5_2', conv5_1,
                            kernel_size=3, stride=1, output_dim=512)
        conv5_3 = conv_relu('conv5_3', conv5_2,
                            kernel_size=3, stride=1, output_dim=512)
    return conv5_3
예제 #11
0
def vgg_pool5(input_batch, name):
    with tf.variable_scope(name):
        # layer 1
        conv1_1 = conv_relu('conv1_1', input_batch,
                            kernel_size=3, stride=1, output_dim=64)
        conv1_2 = conv_relu('conv1_2', conv1_1,
                            kernel_size=3, stride=1, output_dim=64)
        pool1 = pool('pool1', conv1_2, kernel_size=2, stride=2)
        # layer 2
        conv2_1 = conv_relu('conv2_1', pool1,
                            kernel_size=3, stride=1, output_dim=128)
        conv2_2 = conv_relu('conv2_2', conv2_1,
                            kernel_size=3, stride=1, output_dim=128)
        pool2 = pool('pool2', conv2_2, kernel_size=2, stride=2)
        # layer 3
        conv3_1 = conv_relu('conv3_1', pool2,
                            kernel_size=3, stride=1, output_dim=256)
        conv3_2 = conv_relu('conv3_2', conv3_1,
                            kernel_size=3, stride=1, output_dim=256)
        conv3_3 = conv_relu('conv3_3', conv3_2,
                            kernel_size=3, stride=1, output_dim=256)
        pool3 = pool('pool3', conv3_3, kernel_size=2, stride=2)
        # layer 4
        conv4_1 = conv_relu('conv4_1', pool3,
                            kernel_size=3, stride=1, output_dim=512)
        conv4_2 = conv_relu('conv4_2', conv4_1,
                            kernel_size=3, stride=1, output_dim=512)
        conv4_3 = conv_relu('conv4_3', conv4_2,
                            kernel_size=3, stride=1, output_dim=512)
        pool4 = pool('pool4', conv4_3, kernel_size=2, stride=2)
        # layer 5
        conv5_1 = conv_relu('conv5_1', pool4,
                            kernel_size=3, stride=1, output_dim=512)
        conv5_2 = conv_relu('conv5_2', conv5_1,
                            kernel_size=3, stride=1, output_dim=512)
        conv5_3 = conv_relu('conv5_3', conv5_2,
                            kernel_size=3, stride=1, output_dim=512)
        pool5 = pool('pool5', conv5_3, kernel_size=2, stride=2)
        return pool5
예제 #12
0
 def __init__(self,input_batch, masking_batch, grad_scaling, scope='vgg_net', reuse=None):
     with tf.variable_scope(scope, reuse=reuse):
          # Adversarial layer
          W_adv = tf.get_variable('adv_weights', [1] + input_shape,initializer=tf.contrib.layers.xavier_initializer())
          # channel mean is not subtracted in input_batch so it's the original masked image
          W_adv1 = tf.where(masking_batch,tf.zeros(tf.shape(W_adv),dtype=tf.float32),W_adv)
          W_adv2 = tf.where(tf.greater(input_batch+grad_scaling*W_adv1,255.0), (1./grad_scaling)*(255.0 - input_batch), W_adv1)
          W_adv3 = tf.where(tf.less(input_batch+grad_scaling*W_adv2,0.0), (-1./grad_scaling)*input_batch , W_adv2)
          # we subtract the channel mean before passing it to vgg16
          adv_img = input_batch + grad_scaling*W_adv3 - channel_mean
          self.W_adv3 = W_adv3
          self.adv_input = adv_img
          # layer 1
          conv1_1 = conv_relu('conv1_1', adv_img,
                         kernel_size=3, stride=1, output_dim=64)
          conv1_2 = conv_relu('conv1_2', conv1_1,
                         kernel_size=3, stride=1, output_dim=64)
          pool1 = pool('pool1', conv1_2, kernel_size=2, stride=2)
          # layer 2
          conv2_1 = conv_relu('conv2_1', pool1,
                         kernel_size=3, stride=1, output_dim=128)
          conv2_2 = conv_relu('conv2_2', conv2_1,
                         kernel_size=3, stride=1, output_dim=128)
          pool2 = pool('pool2', conv2_2, kernel_size=2, stride=2)
          # layer 3
          conv3_1 = conv_relu('conv3_1', pool2,
                         kernel_size=3, stride=1, output_dim=256)
          conv3_2 = conv_relu('conv3_2', conv3_1,
                         kernel_size=3, stride=1, output_dim=256)
          conv3_3 = conv_relu('conv3_3', conv3_2,
                         kernel_size=3, stride=1, output_dim=256)
          pool3 = pool('pool3', conv3_3, kernel_size=2, stride=2)
          # layer 4
          conv4_1 = conv_relu('conv4_1', pool3,
                         kernel_size=3, stride=1, output_dim=512)
          conv4_2 = conv_relu('conv4_2', conv4_1,
                         kernel_size=3, stride=1, output_dim=512)
          conv4_3 = conv_relu('conv4_3', conv4_2,
                         kernel_size=3, stride=1, output_dim=512)
          pool4 = pool('pool4', conv4_3, kernel_size=2, stride=2)
          # layer 5
          conv5_1 = conv_relu('conv5_1', pool4,
                         kernel_size=3, stride=1, output_dim=512)
          conv5_2 = conv_relu('conv5_2', conv5_1,
                         kernel_size=3, stride=1, output_dim=512)
          conv5_3 = conv_relu('conv5_3', conv5_2,
                         kernel_size=3, stride=1, output_dim=512)
          pool5 = pool('pool5', conv5_3, kernel_size=2, stride=2)
          self.image_feat_grid = pool5