Exemplo n.º 1
0
    def _build_net(self, class_n):
        """Build the network.

        :param class_n: the number of label classes.

        """
        # 1st Layer FC(w ReLu) -> BatchNorm -> DropOut(or not)
        fc1 = fc(self._features, self.fc1_n, name="fc1")
        norm1 = tf.layers.batch_normalization(fc1,
                                              training=self._norm_mode,
                                              name="norm1")
        if self.do_dropout1:
            norm1 = tf.nn.dropout(norm1, self._keep_prob)

        # 2nd Layer FC(w ReLu) -> BatchNorm -> DropOut(or not)
        fc2 = fc(norm1, self.fc2_n, name="fc2")
        norm2 = tf.layers.batch_normalization(fc2,
                                              training=self._norm_mode,
                                              name="norm2")
        if self.do_dropout2:
            norm2 = tf.nn.dropout(norm2, self._keep_prob)

        # 3rd Layer FC(w ReLu) -> BatchNorm -> DropOut(or not)
        fc3 = fc(norm2, self.fc3_n, name="fc3")
        norm3 = tf.layers.batch_normalization(fc3,
                                              training=self._norm_mode,
                                              name="norm3")
        if self.do_dropout3:
            norm3 = tf.nn.dropout(norm3, self._keep_prob)

        # 4th Layer FC(w)
        fc4 = fc(norm1, class_n, relu=False, name="fc4")

        self._softmax = tf.nn.softmax(fc4, name="softmax")
        self._pred = tf.argmax(self._softmax, axis=1)
        self._acc = tf.reduce_mean(
            tf.cast(tf.equal(self._pred, self._labels), tf.float32))

        self._interlayers = [fc1, fc2, fc3, fc4]
Exemplo n.º 2
0
    def resnet_v2(self, input):

        strides = [1, 2, 2, 2]
        blocks = [3, 4, 6, 3]
        num_conv = [64, 128, 256, 512]

        input = self.pad(input, 7)
        res = util.conv2d(input, [7, 7, 64],
                          stride=[1, 2, 2, 1],
                          padding='VALID',
                          name="conv_pre",
                          use_bias=False)
        res = tf.nn.max_pool(res,
                             ksize=[1, 3, 3, 1],
                             strides=[1, 2, 2, 1],
                             padding='SAME')

        for j, b in enumerate(blocks):
            block_stride = [1, strides[j], strides[j], 1]

            res = self.resnet_v2_bottleneck_block(res,
                                                  num_conv=num_conv[j],
                                                  strides=block_stride,
                                                  name="block" + str(j + 1) +
                                                  "-1",
                                                  projection_shortcut=True)

            for i in range(1, b):
                res = self.resnet_v2_bottleneck_block(
                    res,
                    num_conv=num_conv[j],
                    strides=[1, 1, 1, 1],
                    name="block" + str(j + 1) + "-" + str(i + 1))

        res = util.batch_norm(res, "post_bn", self.phase)
        res = tf.nn.relu(res)

        self.spatial = res

        # Average Pooling over both spatial dimensions
        res = tf.reduce_mean(res, axis=[1, 2])

        # With ImageNet classifier
        if self.with_classifier:
            res = util.fc(res, 1001, "imagenet_dense")

        return res
Exemplo n.º 3
0
    def generate_attention_maps(self, state, feature):

        h, c = state
        DIM = self.DIM_ATT

        # There are 5 body parts. `tmp` is shared for each joint within a body part.
        # In other words, we need 5 `tmp` terms, or equivalently, 1 `tmp` term with 5*DIM channels.

        # Compute map (Eq. 2)
        Ac = util.conv2d(feature, [1, 1, 5 * DIM],
                         "att_pose_c",
                         use_bias=False)
        Ah = util.fc(h, 5 * DIM, "att_pose_h", use_bias=False)
        bias = tf.get_variable("bias",
                               shape=[5 * DIM],
                               initializer=tf.zeros_initializer())

        # A_c: Bx7x7x32; A_h: Bx32.
        # Add A_h to A_c by broadcasting
        tmp = tf.nn.tanh(tf.reshape(Ah, [self.BATCH, 1, 1, DIM]) + Ac + bias)
        tmp = tf.split(tmp, 5, axis=3)  # Split into 5 groups

        joint_maps = []
        joint_tens = []
        for i in range(5):
            # v is just a 1x1 convolution.
            # NOTE: From paper, it is not entirely clear if v is shared between body parts.
            # We assume this is NOT the case.
            res = util.conv2d(tmp[i], [1, 1, self.J], "att_map_bp" + str(i))
            res = tf.reshape(res, [self.BATCH, 7, 7, self.J])

            # Normalization (Eq. 3)
            t_res = tf.nn.softmax(res, 3)

            l_res = tf.split(t_res, self.J, axis=3)

            joint_maps.append(l_res)  # For use in assemble_parts
            joint_tens.append(tf.expand_dims(
                t_res, axis=1))  # For convenient loss computation

        joint_tens = tf.concat(
            joint_tens, axis=1)  # Resulting shape: BATCH x 5 x 7 x 7 x J
        return joint_maps, joint_tens
Exemplo n.º 4
0
  def generate_attention_maps( self, state, feature ):

    h, c  = state
    DIM   = self.DIM_ATT
    
    # Compute map (Eq. 2)
    Ac    = util.conv2d( feature, [1, 1, DIM], name="att_pose_c" )
    Ah    = util.fc( h, DIM, "att_pose_h" )

    # A_c: Bx7x7x32; A_h: Bx32.
    # Add A_h to A_c by broadcasting
    tmp   = tf.nn.tanh( tf.reshape( Ah, [self.BATCH, 1, 1, DIM] ) + Ac )

    # v
    res   = util.conv2d( tmp, [1, 1, self.J], name="att_map" )
    res   = tf.reshape( res, [self.BATCH, 7, 7, self.J] )

    # Normalization (Eq. 3)
    # t_res = tf.nn.softmax( res, axis=3 )      # Tensorflow 1.6 and higher
    t_res = tf.nn.softmax( res, dim=3 )         # This is deprecated in Tensorflow 1.8, but still works

    l_res = tf.split( t_res, self.J, axis=3 )

    return l_res, t_res
Exemplo n.º 5
0
  def build_graph( self ):
    # Extract DCN features (here ResNet v2, 50 layers)
    X           = tf.reshape( self.X, [self.BT, 224, 224, 3] )
    _           = self.net.resnet_v2( X )


    features    = tf.reshape( self.net.spatial, [self.BATCH, self.T, 7, 7, 2048] )
    self.features = features

    # Encoder
    with tf.variable_scope( self.scope ):
      with tf.variable_scope( "LSTM2" ) as scope:
        lstm  = tf.contrib.rnn.LSTMCell( self.DIM_LSTM, initializer=tf.contrib.layers.xavier_initializer() )
        state = lstm.zero_state( self.BATCH, tf.float32 )


        feat_T    = tf.split( features, self.T, axis=1 )

        outputs = []
        joint_maps = []
        for t in range( self.T ):
          # TODO: Each body part has its own variables
          if t > 0:
            scope.reuse_variables()

          # Generate Attention Map for each Joint and normalize
          h_rgb = tf.reshape( feat_T[t], [self.BATCH, 7, 7, 2048] )
          jm_list, jm_tensor  = self.generate_attention_maps( state, h_rgb )
          joint_maps.append( tf.expand_dims( jm_tensor, axis=2 ) )  # B x 5 x T x 7 x 7 x J

          # Assemble Parts
          body_parts  = self.assemble_parts( jm_list, h_rgb )   # F_t^P
          body_pooled = tf.reduce_max( body_parts, axis=1 )     # S_t

          # body_pooled = tf.reshape( body_pooled, [self.BATCH, 7*7*2048] )
          # Global pooling to save resources
          body_pooled   = tf.reduce_mean( body_pooled, axis=[1,2] )

          feat_out, state = lstm( body_pooled, state )

          outputs.append( tf.expand_dims( feat_out, axis=1 ) )


      h_lstm = tf.concat( outputs, axis=1 )
      h_lstm = tf.reshape( h_lstm, [self.BT, self.DIM_LSTM] )

      h_pred = util.fc( h_lstm, self.C, "classifier_pose" )
      h_pred = tf.reshape( h_pred, [self.BATCH, self.T, self.C] )

    # Loss computation
    var_list = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope = self.scope )
    reg_loss = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES, scope = self.scope )

    # Main losses: Softmax classification loss
    loss_pose_pre = tf.nn.sparse_softmax_cross_entropy_with_logits( logits = h_pred, labels = self.Y )
    loss_pose_T   = loss_pose_pre
    loss_pose_cls = tf.reduce_sum( loss_pose_pre, axis=1 )

    # Main losses: Joint map L2 regression loss
    joint_maps    = tf.concat( joint_maps, axis=2 )
    loss_pose_l2  = 0

    # Note, we got 5 sets of attention maps. Each have an L2 loss.
    for i in range( 5 ):
      diff          = tf.reshape( joint_maps[:,i] - self.P, [self.BATCH, self.T, 7*7*self.J] )
      loss_pose_l2 += 0.5 * tf.reduce_sum( diff ** 2, axis=2 )

    # Total Loss
    loss     = tf.reduce_mean(    self.l_action * loss_pose_pre
                                + self.l_pose   * loss_pose_l2 )

    reg_loss = self.lambda_l2 * tf.reduce_sum( reg_loss )  # Note: This is L2-regularization (see util.py)
    total    = reg_loss + loss

    # Optimizer + Batch Gradient Accumulation
    #opt         = tf.train.RMSPropOptimizer( learning_rate = self.LR )
    opt         = tf.train.AdamOptimizer( learning_rate = self.LR )

    accum_vars  = [tf.Variable( tf.zeros_like( tv.initialized_value() ), trainable = False ) for tv in var_list]
    zero_ops    = [tv.assign( tf.zeros_like( tv ) ) for tv in accum_vars] 

    gvs         = opt.compute_gradients( total, var_list )

    accum_ops   = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate( gvs )]
    op          = opt.apply_gradients( [(accum_vars[i], gv[1]) for i, gv in enumerate(gvs)] )

    # Exposing variables
    self.joint_maps = joint_maps
    self.reg_loss   = reg_loss
    self.loss_main_T= loss_pose_T
    self.loss_rpan  = loss_pose_cls
    self.loss_pose  = loss_pose_l2
    self.zero_ops   = zero_ops
    self.accum_ops  = accum_ops
    self.accum_vars = accum_vars

    self.result     = tf.nn.softmax( h_pred )
    self.op         = op
    self.total_loss = total
Exemplo n.º 6
0
        pos = pos.astype(numpy.int32)

        for j, t in enumerate(pos):
            t = min(t, h5data.shape[0] - 1)
            data[i * sample_size + j] = h5data[t]

            for c in cids:
                labels[i * sample_size + j, label_dict[c]] = 1.

    return data, labels


X = tf.placeholder("float", [None, 4096])
Y = tf.placeholder("int32", [None, C])

fc1 = util.fc(X, C, "fc1")
pre = tf.nn.softmax_cross_entropy_with_logits(logits=fc1, labels=Y)
loss = tf.reduce_mean(pre)

optimizer = tf.train.AdamOptimizer(learning_rate=LR, epsilon=1e-8)
gradvars = optimizer.compute_gradients(loss)

capped = [(tf.clip_by_value(grad, -5, 5), var) for grad, var in gradvars]
train_op = optimizer.apply_gradients(capped)

conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True),
                      device_count={'GPU': 1})

train_files = [l.strip() for l in open("picked_train.txt")]
test_files = [l.strip() for l in open("picked_test.txt")]
Exemplo n.º 7
0
def generator_caffenet_fc6(input_feat, reuse=False, trainable=False):
    with tf.variable_scope('generator', reuse=reuse) as vs:
        assert input_feat.get_shape().as_list()[-1] == 4096
        # input_feat = tf.placeholder(tf.float32, shape=(None, 4096), name='feat')

        relu_defc7 = leaky_relu(
            fc(input_feat, 4096, name='defc7', trainable=trainable))
        relu_defc6 = leaky_relu(
            fc(relu_defc7, 4096, name='defc6', trainable=trainable))
        relu_defc5 = leaky_relu(
            fc(relu_defc6, 4096, name='defc5', trainable=trainable))
        reshaped_defc5 = tf.reshape(relu_defc5, [-1, 256, 4, 4])
        relu_deconv5 = leaky_relu(
            upconv(tf.transpose(reshaped_defc5, perm=[0, 2, 3, 1]),
                   256,
                   4,
                   2,
                   'deconv5',
                   biased=True,
                   trainable=trainable))
        relu_conv5_1 = leaky_relu(
            upconv(relu_deconv5,
                   512,
                   3,
                   1,
                   'conv5_1',
                   biased=True,
                   trainable=trainable))
        relu_deconv4 = leaky_relu(
            upconv(relu_conv5_1,
                   256,
                   4,
                   2,
                   'deconv4',
                   biased=True,
                   trainable=trainable))
        relu_conv4_1 = leaky_relu(
            upconv(relu_deconv4,
                   256,
                   3,
                   1,
                   'conv4_1',
                   biased=True,
                   trainable=trainable))
        relu_deconv3 = leaky_relu(
            upconv(relu_conv4_1,
                   128,
                   4,
                   2,
                   'deconv3',
                   biased=True,
                   trainable=trainable))
        relu_conv3_1 = leaky_relu(
            upconv(relu_deconv3,
                   128,
                   3,
                   1,
                   'conv3_1',
                   biased=True,
                   trainable=trainable))
        deconv2 = leaky_relu(
            upconv(relu_conv3_1,
                   64,
                   4,
                   2,
                   'deconv2',
                   biased=True,
                   trainable=trainable))
        deconv1 = leaky_relu(
            upconv(deconv2,
                   32,
                   4,
                   2,
                   'deconv1',
                   biased=True,
                   trainable=trainable))
        deconv0 = upconv(deconv1,
                         3,
                         4,
                         2,
                         'deconv0',
                         biased=True,
                         trainable=trainable)

    variables = tf.contrib.framework.get_variables(vs)

    return deconv0, variables, [
        relu_defc7, relu_defc6, relu_defc5, reshaped_defc5, relu_deconv5,
        relu_conv5_1, relu_deconv4, relu_conv4_1, relu_deconv3, relu_conv3_1,
        deconv2, deconv1, deconv0
    ]
Exemplo n.º 8
0
    "c032": 15
}

# In[16]:

#fps_dict = { l.strip().split(' ')[0] : float( l.strip().split(' ')[1] ) for l in open( "video_fps.txt" ) }

# In[17]:

#X = tf.placeholder( "float", [None, crop_size*crop_size*3] )
Y = tf.placeholder("float", [None, C])

vgg16 = vgg.VGG()
vgg16.build()

fc1 = util.fc(vgg16.pool5, C, "fc1")
pre = tf.nn.softmax_cross_entropy_with_logits(logits=fc1, labels=Y)
loss = tf.reduce_mean(pre)

# In[19]:

train_op = tf.train.AdamOptimizer(learning_rate=LR,
                                  epsilon=1e-8).minimize(loss)

# In[20]:

conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True),
                      device_count={'GPU': 1})

# In[21]: