Exemplo n.º 1
0
def build_lstm_forward(H, x, googlenet, phase, reuse):
    grid_size = H['arch']['grid_width'] * H['arch']['grid_height']
    outer_size = grid_size * H['arch']['batch_size']
    input_mean = 117.
    x -= input_mean
    Z = googlenet_load.model(x, googlenet, H)
    with tf.variable_scope('decoder', reuse=reuse):
        scale_down = 0.01
        if H['arch']['early_dropout'] and phase == 'train':
            Z = tf.nn.dropout(Z, 0.5)
        lstm_input = tf.reshape(Z * scale_down, (H['arch']['batch_size'] * grid_size, 1024))
        lstm_outputs = build_lstm_inner(lstm_input, H)

        pred_boxes = []
        pred_logits = []
        for i in range(H['arch']['rnn_len']):
            output = lstm_outputs[i]
            if H['arch']['late_dropout'] and phase == 'train':
                output = tf.nn.dropout(output, 0.5)
            box_weights = tf.get_variable('box_ip%d' % i, shape=(H['arch']['lstm_size'], 4),
                initializer=tf.random_uniform_initializer(-0.1, 0.1))
            conf_weights = tf.get_variable('conf_ip%d' % i, shape=(H['arch']['lstm_size'], 2),
                initializer=tf.random_uniform_initializer(-0.1, 0.1))
            pred_boxes.append(tf.reshape(tf.matmul(output, box_weights) * 50,
                                         [outer_size, 1, 4]))
            pred_logits.append(tf.reshape(tf.matmul(output, conf_weights),
                                         [outer_size, 1, 2]))
        pred_boxes = tf.concat(1, pred_boxes)
        pred_logits = tf.concat(1, pred_logits)
        pred_logits_squash = tf.reshape(pred_logits,
                                        [outer_size * H['arch']['rnn_len'], 2])
        pred_confidences_squash = tf.nn.softmax(pred_logits_squash)
        pred_confidences = tf.reshape(pred_confidences_squash,
                                      [outer_size, H['arch']['rnn_len'], 2])
    return pred_boxes, pred_logits, pred_confidences
Exemplo n.º 2
0
def build_lstm_forward(H, x, googlenet, phase, reuse):
    grid_size = H['arch']['grid_width'] * H['arch']['grid_height']
    outer_size = grid_size * H['arch']['batch_size']
    input_mean = 117.
    x -= input_mean
    Z = googlenet_load.model(x, googlenet, H)
    with tf.variable_scope('decoder', reuse=reuse):
        scale_down = 0.01
        if H['arch']['early_dropout'] and phase == 'train':
            Z = tf.nn.dropout(Z, 0.5)
        lstm_input = tf.reshape(Z * scale_down, (H['arch']['batch_size'] * grid_size, 1024))
        lstm_outputs = build_lstm_inner(lstm_input, H)

        pred_boxes = []
        pred_logits = []
        for i in range(H['arch']['rnn_len']):
            output = lstm_outputs[i]
            if H['arch']['late_dropout'] and phase == 'train':
                output = tf.nn.dropout(output, 0.5)
            box_weights = tf.get_variable('box_ip%d' % i, shape=(H['arch']['lstm_size'], 4),
                initializer=tf.random_uniform_initializer(0.1))
            conf_weights = tf.get_variable('conf_ip%d' % i, shape=(H['arch']['lstm_size'], 2),
                initializer=tf.random_uniform_initializer(0.1))
            pred_boxes.append(tf.reshape(tf.matmul(output, box_weights) * 50,
                                         [outer_size, 1, 4]))
            pred_logits.append(tf.reshape(tf.matmul(output, conf_weights),
                                         [outer_size, 1, 2]))
        pred_boxes = tf.concat(1, pred_boxes)
        pred_logits = tf.concat(1, pred_logits)
        pred_logits_squash = tf.reshape(pred_logits,
                                        [outer_size * H['arch']['rnn_len'], 2])
        pred_confidences_squash = tf.nn.softmax(pred_logits_squash)
        pred_confidences = tf.reshape(pred_confidences_squash,
                                      [outer_size, H['arch']['rnn_len'], 2])
    return pred_boxes, pred_logits, pred_confidences
Exemplo n.º 3
0
def inference(hypes, images, phase):
    # Load googlenet and returns the cnn_codes

    if phase == 'train':
        encoder_net.append(googlenet_load.init(hypes))

    input_mean = 117.
    images -= input_mean
    cnn, early_feat, _ = googlenet_load.model(images, encoder_net[0], hypes)

    return cnn, early_feat, _
Exemplo n.º 4
0
def build_overfeat_forward(H, x, googlenet, phase):
    input_mean = 117.
    x -= input_mean
    Z = googlenet_load.model(x, googlenet, H)
    grid_size = H['arch']['grid_width'] * H['arch']['grid_height']
    if H['arch']['use_dropout'] and phase == 'train':
        Z = tf.nn.dropout(Z, 0.5)
    pred_logits = tf.reshape(tf.nn.xw_plus_b(Z, googlenet['W'][0], googlenet['B'][0],
                                             name=phase+'/logits_0'), 
                             [H['arch']['batch_size'] * grid_size, H['arch']['num_classes']])
    pred_confidences = tf.nn.softmax(pred_logits)
    pred_boxes = tf.reshape(tf.nn.xw_plus_b(Z, googlenet['W'][1], googlenet['B'][1],
                                            name=phase+'/logits_1'), 
                            [H['arch']['batch_size'] * grid_size, 1, 4]) * 100
    return pred_boxes, pred_logits, pred_confidences
Exemplo n.º 5
0
def build_overfeat_forward(H, x, googlenet, phase):
    input_mean = 117.
    x -= input_mean
    Z = googlenet_load.model(x, googlenet, H)
    grid_size = H['arch']['grid_width'] * H['arch']['grid_height']
    if H['arch']['use_dropout'] and phase == 'train':
        Z = tf.nn.dropout(Z, 0.5)
    pred_logits = tf.reshape(tf.nn.xw_plus_b(Z, googlenet['W'][0], googlenet['B'][0],
                                             name=phase+'/logits_0'), 
                             [H['arch']['batch_size'] * grid_size, H['arch']['num_classes']])
    pred_confidences = tf.nn.softmax(pred_logits)
    pred_boxes = tf.reshape(tf.nn.xw_plus_b(Z, googlenet['W'][1], googlenet['B'][1],
                                            name=phase+'/logits_1'), 
                            [H['arch']['batch_size'] * grid_size, 1, 4]) * 100
    return pred_boxes, pred_logits, pred_confidences
Exemplo n.º 6
0
def build_forward(H, x, phase, reuse):
    '''
    Construct the forward model
    '''

    grid_size = H['grid_width'] * H['grid_height']
    outer_size = grid_size * H['batch_size']
    input_mean = 117.
    x -= input_mean
    cnn, early_feat = googlenet_load.model(x, H, reuse)
    early_feat_channels = H['early_feat_channels']
    early_feat = early_feat[:, :, :, :early_feat_channels]

    if H['deconv']:
        size = 3
        stride = 2
        pool_size = 5

        with tf.variable_scope("deconv", reuse=reuse):
            w = tf.get_variable(
                'conv_pool_w',
                shape=[
                    size, size, H['later_feat_channels'],
                    H['later_feat_channels']
                ],
                initializer=tf.random_normal_initializer(stddev=0.01))
            cnn_s = tf.nn.conv2d(cnn,
                                 w,
                                 strides=[1, stride, stride, 1],
                                 padding='SAME')
            cnn_s_pool = tf.nn.avg_pool(cnn_s[:, :, :, :256],
                                        ksize=[1, pool_size, pool_size, 1],
                                        strides=[1, 1, 1, 1],
                                        padding='SAME')

            cnn_s_with_pool = tf_concat(3, [cnn_s_pool, cnn_s[:, :, :, 256:]])
            cnn_deconv = deconv(cnn_s_with_pool,
                                output_shape=[
                                    H['batch_size'], H['grid_height'],
                                    H['grid_width'], 256
                                ],
                                channels=[H['later_feat_channels'], 256])
            cnn = tf_concat(3, (cnn_deconv, cnn[:, :, :, 256:]))

    elif H['avg_pool_size'] > 1:
        pool_size = H['avg_pool_size']
        cnn1 = cnn[:, :, :, :700]
        cnn2 = cnn[:, :, :, 700:]
        cnn2 = tf.nn.avg_pool(cnn2,
                              ksize=[1, pool_size, pool_size, 1],
                              strides=[1, 1, 1, 1],
                              padding='SAME')
        cnn = tf_concat(3, [cnn1, cnn2])

    cnn = tf.reshape(cnn, [
        H['batch_size'] * H['grid_width'] * H['grid_height'],
        H['later_feat_channels']
    ])
    initializer = tf.random_uniform_initializer(-0.1, 0.1)
    with tf.variable_scope('decoder', reuse=reuse, initializer=initializer):
        scale_down = 0.01
        lstm_input = tf.reshape(
            cnn * scale_down,
            (H['batch_size'] * grid_size, H['later_feat_channels']))
        if H['use_lstm']:
            lstm_outputs = build_lstm_inner(H, lstm_input)
        else:
            lstm_outputs = build_overfeat_inner(H, lstm_input)

        pred_boxes = []
        pred_logits = []
        for k in range(H['rnn_len']):
            output = lstm_outputs[k]
            if phase == 'train':
                output = tf.nn.dropout(output, 0.5)
            box_weights = tf.get_variable('box_ip%d' % k,
                                          shape=(H['lstm_size'], 4))
            conf_weights = tf.get_variable('conf_ip%d' % k,
                                           shape=(H['lstm_size'],
                                                  H['num_classes']))

            pred_boxes_step = tf.reshape(
                tf.matmul(output, box_weights) * 50, [outer_size, 1, 4])

            pred_boxes.append(pred_boxes_step)
            pred_logits.append(
                tf.reshape(tf.matmul(output, conf_weights),
                           [outer_size, 1, H['num_classes']]))

        pred_boxes = tf_concat(1, pred_boxes)
        pred_logits = tf_concat(1, pred_logits)
        pred_logits_squash = tf.reshape(
            pred_logits, [outer_size * H['rnn_len'], H['num_classes']])
        pred_confidences_squash = tf.nn.softmax(pred_logits_squash)
        pred_confidences = tf.reshape(
            pred_confidences_squash,
            [outer_size, H['rnn_len'], H['num_classes']])

        if H['use_rezoom']:
            pred_confs_deltas = []
            pred_boxes_deltas = []
            w_offsets = H['rezoom_w_coords']
            h_offsets = H['rezoom_h_coords']
            num_offsets = len(w_offsets) * len(h_offsets)
            rezoom_features = rezoom(H, pred_boxes, early_feat,
                                     early_feat_channels, w_offsets, h_offsets)
            if phase == 'train':
                rezoom_features = tf.nn.dropout(rezoom_features, 0.5)
            for k in range(H['rnn_len']):
                delta_features = tf_concat(
                    1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.])
                dim = 128
                delta_weights1 = tf.get_variable(
                    'delta_ip1%d' % k,
                    shape=[
                        H['lstm_size'] + early_feat_channels * num_offsets, dim
                    ])
                # TODO: add dropout here ?
                ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1))
                if phase == 'train':
                    ip1 = tf.nn.dropout(ip1, 0.5)
                delta_confs_weights = tf.get_variable(
                    'delta_ip2%d' % k, shape=[dim, H['num_classes']])
                if H['reregress']:
                    delta_boxes_weights = tf.get_variable('delta_ip_boxes%d' %
                                                          k,
                                                          shape=[dim, 4])
                    pred_boxes_deltas.append(
                        tf.reshape(
                            tf.matmul(ip1, delta_boxes_weights) * 5,
                            [outer_size, 1, 4]))
                scale = H.get('rezoom_conf_scale', 50)
                pred_confs_deltas.append(
                    tf.reshape(
                        tf.matmul(ip1, delta_confs_weights) * scale,
                        [outer_size, 1, H['num_classes']]))
            pred_confs_deltas = tf_concat(1, pred_confs_deltas)
            if H['reregress']:
                pred_boxes_deltas = tf_concat(1, pred_boxes_deltas)
            return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas

    return pred_boxes, pred_logits, pred_confidences
Exemplo n.º 7
0
def build_forward(H, x, p1_x, p2_x, p3_x, p4_x, p5_x, p6_x, p7_x, p8_x, f_x,
                  phase, reuse):
    '''
    Construct the forward model
    '''

    grid_size = H['grid_width'] * H['grid_height']
    outer_size = grid_size * H['batch_size']
    input_mean = 117.
    x -= input_mean
    p1_x -= input_mean
    p2_x -= input_mean
    p3_x -= input_mean
    p4_x -= input_mean
    p5_x -= input_mean
    p6_x -= input_mean
    p7_x -= input_mean
    p8_x -= input_mean
    f_x -= input_mean
    print "x.shape %s" % str(x.get_shape())
    # x = tf.concat(0, (x, p1_x, p2_x, p3_x, p4_x, p5_x, p6_x, p7_x, p8_x))
    print "x.shape %s" % str(x.get_shape())

    cnn, early_feat = googlenet_load.model(x, H, reuse)
    p1_cnn, p1_early_feat = googlenet_load.p1_model(p1_x, H, reuse)
    p2_cnn, p2_early_feat = googlenet_load.p2_model(p2_x, H, reuse)
    # f_cnn, f_early_feat = googlenet_load.f_model(f_x, H, reuse)
    p3_cnn, p3_early_feat = googlenet_load.p3_model(p3_x, H, reuse)
    p4_cnn, p4_early_feat = googlenet_load.p4_model(p4_x, H, reuse)
    p5_cnn, p5_early_feat = googlenet_load.p5_model(p5_x, H, reuse)
    # p6_cnn, p6_early_feat = googlenet_load.p6_model(p6_x, H, reuse)
    # p7_cnn, p7_early_feat = googlenet_load.p7_model(p7_x, H, reuse)
    # p8_cnn, p8_early_feat = googlenet_load.p8_model(p8_x, H, reuse)
    '''
    p1_cnn = tf.expand_dims(cnn[1], 0)
    p2_cnn = tf.expand_dims(cnn[2], 0)
    p3_cnn = tf.expand_dims(cnn[3], 0)
    p4_cnn = tf.expand_dims(cnn[4], 0)
    p5_cnn = tf.expand_dims(cnn[5], 0)
    p6_cnn = tf.expand_dims(cnn[6], 0)
    p7_cnn = tf.expand_dims(cnn[7], 0)
    p8_cnn = tf.expand_dims(cnn[8], 0)
    cnn = tf.expand_dims(cnn[0], 0)
    
    print cnn.get_shape()
    
    
    with tf.variable_scope("conv_1x1", reuse=reuse):
    
        c_w = tf.get_variable('c_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
                                     initializer=tf.random_normal_initializer(stddev=0.01))
        cnn = tf.nn.conv2d(cnn, c_w, strides=[1, 1, 1, 1], padding='SAME')

        p1_w = tf.get_variable('p1_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
                                    initializer=tf.random_normal_initializer(stddev=0.001))
        p1_cnn = tf.nn.conv2d(p1_cnn, p1_w, strides=[1, 1, 1, 1], padding='SAME')

        p2_w = tf.get_variable('p2_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
                                    initializer=tf.random_normal_initializer(stddev=0.001))
        p2_cnn = tf.nn.conv2d(p2_cnn, p2_w, strides=[1, 1, 1, 1], padding='SAME')

        p3_w = tf.get_variable('p3_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
                                    initializer=tf.random_normal_initializer(stddev=0.001))
        p3_cnn = tf.nn.conv2d(p3_cnn, p3_w, strides=[1, 1, 1, 1], padding='SAME')
     
        p4_w = tf.get_variable('p4_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
                                    initializer=tf.random_normal_initializer(stddev=0.001))
        p4_cnn = tf.nn.conv2d(p4_cnn, p4_w, strides=[1, 1, 1, 1], padding='SAME')

        p5_w = tf.get_variable('p5_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
                                    initializer=tf.random_normal_initializer(stddev=0.001))
        p5_cnn = tf.nn.conv2d(p5_cnn, p5_w, strides=[1, 1, 1, 1], padding='SAME')

        p6_w = tf.get_variable('p6_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
                                    initializer=tf.random_normal_initializer(stddev=0.001))
        p6_cnn = tf.nn.conv2d(p6_cnn, p6_w, strides=[1, 1, 1, 1], padding='SAME')

        p7_w = tf.get_variable('p7_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
                                    initializer=tf.random_normal_initializer(stddev=0.001))
        p7_cnn = tf.nn.conv2d(p7_cnn, p7_w, strides=[1, 1, 1, 1], padding='SAME')

        p8_w = tf.get_variable('p8_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
                                    initializer=tf.random_normal_initializer(stddev=0.001))
        p8_cnn = tf.nn.conv2d(p8_cnn, p8_w, strides=[1, 1, 1, 1], padding='SAME')
  
        # f_w = tf.get_variable('f_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']],
        #                        initializer=tf.random_normal_initializer(stddev=0.01))

        # f_w = tf.Variable(tf.random_normal([1,1,H['later_feat_channels'],H['later_feat_channels']]))
        # f_cnn = tf.nn.conv2d(f_cnn, f_w, strides=[1, 1, 1, 1], padding='SAME')

        # ww = tf.Variable(tf.random_uniform([3], 0.5, 1.0), name='ww')
        # b = tf.Variable(tf.zeros([1]), name='b')
        # cnn = cnn * ww[0] + p_cnn * ww[1] + pp_cnn * ww[2]
        cnn = cnn + p1_cnn + p2_cnn + p3_cnn + p4_cnn + p5_cnn + p6_cnn + p7_cnn + p8_cnn

    '''
    cnn = tf.concat(3, (p5_cnn, p4_cnn, p3_cnn, p2_cnn, p1_cnn, cnn))

    print "p1_cnn: %s" % str(p1_cnn.get_shape())
    print "cnn: %s" % str(cnn.get_shape())

    early_feat_channels = H['early_feat_channels']
    early_feat = early_feat[:, :, :, :early_feat_channels]
    ''' 
    if H['deconv']:
        size = 3
        stride = 2
        pool_size = 5

        with tf.variable_scope("deconv", reuse=reuse):
            w = tf.get_variable('conv_pool_w', shape=[size, size, H['later_feat_channels'], H['later_feat_channels']],
                                initializer=tf.random_normal_initializer(stddev=0.01))
            cnn_s = tf.nn.conv2d(cnn, w, strides=[1, stride, stride, 1], padding='SAME')
            cnn_s_pool = tf.nn.avg_pool(cnn_s[:, :, :, :256], ksize=[1, pool_size, pool_size, 1],
                                        strides=[1, 1, 1, 1], padding='SAME')

            cnn_s_with_pool = tf.concat(3, [cnn_s_pool, cnn_s[:, :, :, 256:]])
            cnn_deconv = deconv(cnn_s_with_pool, output_shape=[H['batch_size'], H['grid_height'], H['grid_width'], 256], channels=[H['later_feat_channels'], 256])
            cnn = tf.concat(3, (cnn_deconv, cnn[:, :, :, 256:]))

    elif H['avg_pool_size'] > 1:
        pool_size = H['avg_pool_size']
        cnn1 = cnn[:, :, :, :700]
        cnn2 = cnn[:, :, :, 700:]
        cnn2 = tf.nn.avg_pool(cnn2, ksize=[1, pool_size, pool_size, 1],
                              strides=[1, 1, 1, 1], padding='SAME')
        cnn = tf.concat(3, [cnn1, cnn2])
    '''
    cnn = tf.reshape(cnn, [
        H['batch_size'] * H['grid_width'] * H['grid_height'],
        H['later_feat_channels'] * 6
    ])
    initializer = tf.random_uniform_initializer(-0.1, 0.1)
    with tf.variable_scope('decoder', reuse=reuse, initializer=initializer):
        scale_down = 0.01
        lstm_input = tf.reshape(
            cnn * scale_down,
            (H['batch_size'] * grid_size, H['later_feat_channels'] * 6))
        if H['use_lstm']:
            lstm_outputs = build_lstm_inner(H, lstm_input)
        else:
            lstm_outputs = build_overfeat_inner(H, lstm_input)

        pred_boxes = []
        pred_logits = []
        for k in range(H['rnn_len']):
            output = lstm_outputs[k]
            if phase == 'train':
                output = tf.nn.dropout(output, 0.5)
            box_weights = tf.get_variable('box_ip%d' % k,
                                          shape=(H['lstm_size'], 4))
            conf_weights = tf.get_variable('conf_ip%d' % k,
                                           shape=(H['lstm_size'],
                                                  H['num_classes']))

            pred_boxes_step = tf.reshape(
                tf.matmul(output, box_weights) * 50, [outer_size, 1, 4])

            pred_boxes.append(pred_boxes_step)
            pred_logits.append(
                tf.reshape(tf.matmul(output, conf_weights),
                           [outer_size, 1, H['num_classes']]))

        pred_boxes = tf.concat(1, pred_boxes)
        pred_logits = tf.concat(1, pred_logits)
        pred_logits_squash = tf.reshape(
            pred_logits, [outer_size * H['rnn_len'], H['num_classes']])
        pred_confidences_squash = tf.nn.softmax(pred_logits_squash)
        pred_confidences = tf.reshape(
            pred_confidences_squash,
            [outer_size, H['rnn_len'], H['num_classes']])

        if H['use_rezoom']:
            pred_confs_deltas = []
            pred_boxes_deltas = []
            w_offsets = H['rezoom_w_coords']
            h_offsets = H['rezoom_h_coords']
            num_offsets = len(w_offsets) * len(h_offsets)
            rezoom_features = rezoom(H, pred_boxes, early_feat,
                                     early_feat_channels, w_offsets, h_offsets)
            if phase == 'train':
                rezoom_features = tf.nn.dropout(rezoom_features, 0.5)
            for k in range(H['rnn_len']):
                delta_features = tf.concat(
                    1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.])
                dim = 128
                delta_weights1 = tf.get_variable(
                    'delta_ip1%d' % k,
                    shape=[
                        H['lstm_size'] + early_feat_channels * num_offsets, dim
                    ])
                # TODO: add dropout here ?
                ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1))
                if phase == 'train':
                    ip1 = tf.nn.dropout(ip1, 0.5)
                delta_confs_weights = tf.get_variable(
                    'delta_ip2%d' % k, shape=[dim, H['num_classes']])
                if H['reregress']:
                    delta_boxes_weights = tf.get_variable('delta_ip_boxes%d' %
                                                          k,
                                                          shape=[dim, 4])
                    pred_boxes_deltas.append(
                        tf.reshape(
                            tf.matmul(ip1, delta_boxes_weights) * 5,
                            [outer_size, 1, 4]))
                scale = H.get('rezoom_conf_scale', 50)
                pred_confs_deltas.append(
                    tf.reshape(
                        tf.matmul(ip1, delta_confs_weights) * scale,
                        [outer_size, 1, H['num_classes']]))
            pred_confs_deltas = tf.concat(1, pred_confs_deltas)
            if H['reregress']:
                pred_boxes_deltas = tf.concat(1, pred_boxes_deltas)
            return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas

    return pred_boxes, pred_logits, pred_confidences
Exemplo n.º 8
0
def build_forward(H, x, googlenet, phase, reuse):
    '''
    Construct the forward model
    '''

    grid_size = H['grid_width'] * H['grid_height']
    outer_size = grid_size * H['batch_size']
    input_mean = 117.
    x -= input_mean
    cnn, early_feat, _ = googlenet_load.model(x, googlenet, H)
    early_feat_channels = H['early_feat_channels']
    early_feat = early_feat[:, :, :, :early_feat_channels]
    
    if H['deconv']:
        size = 3
        stride = 2
        pool_size = 5

        with tf.variable_scope("deconv", reuse=reuse):
            w = tf.get_variable('conv_pool_w', shape=[size, size, 1024, 1024],
                                initializer=tf.random_normal_initializer(stddev=0.01))
            cnn_s = tf.nn.conv2d(cnn, w, strides=[1, stride, stride, 1], padding='SAME')
            cnn_s_pool = tf.nn.avg_pool(cnn_s[:, :, :, :256], ksize=[1, pool_size, pool_size, 1],
                                        strides=[1, 1, 1, 1], padding='SAME')

            cnn_s_with_pool = tf.concat(3, [cnn_s_pool, cnn_s[:, :, :, 256:]])
            cnn_deconv = deconv(cnn_s_with_pool, output_shape=[H['batch_size'], H['grid_height'], H['grid_width'], 256], channels=[1024, 256])
            cnn = tf.concat(3, (cnn_deconv, cnn[:, :, :, 256:]))

    elif H['avg_pool_size'] > 1:
        pool_size = H['avg_pool_size']
        cnn1 = cnn[:, :, :, :700]
        cnn2 = cnn[:, :, :, 700:]
        cnn2 = tf.nn.avg_pool(cnn2, ksize=[1, pool_size, pool_size, 1],
                              strides=[1, 1, 1, 1], padding='SAME')
        cnn = tf.concat(3, [cnn1, cnn2])

    cnn = tf.reshape(cnn,
                     [H['batch_size'] * H['grid_width'] * H['grid_height'], 1024])
    initializer = tf.random_uniform_initializer(-0.1, 0.1)
    with tf.variable_scope('decoder', reuse=reuse, initializer=initializer):
        scale_down = 0.01
        lstm_input = tf.reshape(cnn * scale_down, (H['batch_size'] * grid_size, 1024))
        if H['use_lstm']:
            lstm_outputs = build_lstm_inner(H, lstm_input)
        else:
            lstm_outputs = build_overfeat_inner(H, lstm_input)

        pred_boxes = []
        pred_logits = []
        for k in range(H['rnn_len']):
            output = lstm_outputs[k]
            if phase == 'train':
                output = tf.nn.dropout(output, 0.5)
            box_weights = tf.get_variable('box_ip%d' % k,
                                          shape=(H['lstm_size'], 4))
            conf_weights = tf.get_variable('conf_ip%d' % k,
                                           shape=(H['lstm_size'], H['num_classes']))

            pred_boxes_step = tf.reshape(tf.matmul(output, box_weights) * 50,
                                         [outer_size, 1, 4])

            pred_boxes.append(pred_boxes_step)
            pred_logits.append(tf.reshape(tf.matmul(output, conf_weights),
                                         [outer_size, 1, H['num_classes']]))
 
        pred_boxes = tf.concat(1, pred_boxes)
        pred_logits = tf.concat(1, pred_logits)
        pred_logits_squash = tf.reshape(pred_logits,
                                        [outer_size * H['rnn_len'], H['num_classes']])
        pred_confidences_squash = tf.nn.softmax(pred_logits_squash)
        pred_confidences = tf.reshape(pred_confidences_squash,
                                      [outer_size, H['rnn_len'], H['num_classes']])

        if H['use_rezoom']:
            pred_confs_deltas = []
            pred_boxes_deltas = []
            w_offsets = H['rezoom_w_coords']
            h_offsets = H['rezoom_h_coords']
            num_offsets = len(w_offsets) * len(h_offsets)
            rezoom_features = rezoom(H, pred_boxes, early_feat, early_feat_channels, w_offsets, h_offsets)
            if phase == 'train':
                rezoom_features = tf.nn.dropout(rezoom_features, 0.5)
            for k in range(H['rnn_len']):
                delta_features = tf.concat(1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.])
                dim = 128
                delta_weights1 = tf.get_variable(
                                    'delta_ip1%d' % k,
                                    shape=[H['lstm_size'] + early_feat_channels * num_offsets, dim])
                # TODO: add dropout here ?
                ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1))
                if phase == 'train':
                    ip1 = tf.nn.dropout(ip1, 0.5)
                delta_confs_weights = tf.get_variable(
                                    'delta_ip2%d' % k,
                                    shape=[dim, H['num_classes']])
                if H['reregress']:
                    delta_boxes_weights = tf.get_variable(
                                        'delta_ip_boxes%d' % k,
                                        shape=[dim, 4])
                    pred_boxes_deltas.append(tf.reshape(tf.matmul(ip1, delta_boxes_weights) * 5,
                                                        [outer_size, 1, 4]))
                scale = H.get('rezoom_conf_scale', 50) 
                pred_confs_deltas.append(tf.reshape(tf.matmul(ip1, delta_confs_weights) * scale,
                                                    [outer_size, 1, H['num_classes']]))
            pred_confs_deltas = tf.concat(1, pred_confs_deltas)
            if H['reregress']:
                pred_boxes_deltas = tf.concat(1, pred_boxes_deltas)
            return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas

    return pred_boxes, pred_logits, pred_confidences
def build_forward(H, x, googlenet, phase, reuse):
    '''
    Construct the forward model
    '''

    grid_size = H['arch']['grid_width'] * H['arch']['grid_height']
    outer_size = grid_size * H['arch']['batch_size']
    input_mean = 117.
    x -= input_mean
    global early_feat
    Z, early_feat, _ = googlenet_load.model(x, googlenet, H)
    early_feat_channels = H['arch']['early_feat_channels']
    early_feat = early_feat[:, :, :, :early_feat_channels]
    
    if H['arch']['avg_pool_size'] > 1:
        pool_size = H['arch']['avg_pool_size']
        Z1 = Z[:, :, :, :700]
        Z2 = Z[:, :, :, 700:]
        Z2 = tf.nn.avg_pool(Z2, ksize=[1, pool_size, pool_size, 1], strides=[1, 1, 1, 1], padding='SAME')
        Z = tf.concat(3, [Z1, Z2])
    Z = tf.reshape(Z, [H['arch']['batch_size'] * H['arch']['grid_width'] * H['arch']['grid_height'], 1024])
    with tf.variable_scope('decoder', reuse=reuse):
        scale_down = 0.01
        lstm_input = tf.reshape(Z * scale_down, (H['arch']['batch_size'] * grid_size, 1024))
        if H['arch']['use_lstm']:
            lstm_outputs = build_lstm_inner(lstm_input, H)
        else:
            lstm_outputs = build_overfeat_inner(lstm_input, H)

        pred_boxes = []
        pred_logits = []
        initializer = tf.random_uniform_initializer(-0.1, 0.1)
        for k in range(H['arch']['rnn_len']):
            output = lstm_outputs[k]
            if phase == 'train':
                output = tf.nn.dropout(output, 0.5)
            box_weights = tf.get_variable('box_ip%d' % k,
                                          shape=(H['arch']['lstm_size'], 4),
                                          initializer=initializer)
            conf_weights = tf.get_variable('conf_ip%d' % k,
                                           shape=(H['arch']['lstm_size'], H['arch']['num_classes']),
                                           initializer=initializer)

            pred_boxes_step = tf.reshape(tf.matmul(output, box_weights) * 50,
                                         [outer_size, 1, 4])

            pred_boxes.append(pred_boxes_step)
            pred_logits.append(tf.reshape(tf.matmul(output, conf_weights),
                                         [outer_size, 1, H['arch']['num_classes']]))
 
        pred_boxes = tf.concat(1, pred_boxes)
        pred_logits = tf.concat(1, pred_logits)
        pred_logits_squash = tf.reshape(pred_logits,
                                        [outer_size * H['arch']['rnn_len'], H['arch']['num_classes']])
        pred_confidences_squash = tf.nn.softmax(pred_logits_squash)
        pred_confidences = tf.reshape(pred_confidences_squash,
                                      [outer_size, H['arch']['rnn_len'], H['arch']['num_classes']])

        if H['arch']['use_rezoom']:
            pred_confs_deltas = []
            pred_boxes_deltas = []
            w_offsets = H['arch']['rezoom_w_coords']
            h_offsets = H['arch']['rezoom_h_coords']
            num_offsets = len(w_offsets) * len(h_offsets)
            rezoom_features = rezoom(H, pred_boxes, early_feat, early_feat_channels, w_offsets, h_offsets)
            if phase == 'train':
                rezoom_features = tf.nn.dropout(rezoom_features, 0.5)
            for k in range(H['arch']['rnn_len']):
                delta_features = tf.concat(1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.])
                dim = 128
                delta_weights1 = tf.get_variable(
                                    'delta_ip1%d' % k,
                                    shape=[H['arch']['lstm_size'] + early_feat_channels * num_offsets, dim],
                                    initializer=initializer)
                # TODO: add dropout here ?
                ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1))
                if phase == 'train':
                    ip1 = tf.nn.dropout(ip1, 0.5)
                delta_confs_weights = tf.get_variable(
                                    'delta_ip2%d' % k,
                                    shape=[dim, H['arch']['num_classes']],
                                    initializer=initializer)
                if H['arch']['reregress']:
                    delta_boxes_weights = tf.get_variable(
                                        'delta_ip_boxes%d' % k,
                                        shape=[dim, 4],
                                        initializer=initializer)
                    pred_boxes_deltas.append(tf.reshape(tf.matmul(ip1, delta_boxes_weights) * 5,
                                                        [outer_size, 1, 4]))
                scale = H['arch'].get('rezoom_conf_scale', 50) 
                pred_confs_deltas.append(tf.reshape(tf.matmul(ip1, delta_confs_weights) * scale,
                                                    [outer_size, 1, H['arch']['num_classes']]))
            pred_confs_deltas = tf.concat(1, pred_confs_deltas)
            if H['arch']['reregress']:
                pred_boxes_deltas = tf.concat(1, pred_boxes_deltas)
            return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas
    pred_logits = tf.Print(pred_logits, [tf.shape(pred_logits)])
    return pred_boxes, pred_logits, pred_confidences
Exemplo n.º 10
0
def build_forward(H, x, googlenet, phase, reuse):
    '''
    Construct the forward model
    '''

    grid_size = H['arch']['grid_width'] * H['arch']['grid_height']
    outer_size = grid_size * H['arch']['batch_size']
    input_mean = 117.
    x -= input_mean
    global early_feat
    Z, early_feat, _ = googlenet_load.model(x, googlenet, H)
    early_feat_channels = H['arch']['early_feat_channels']
    early_feat = early_feat[:, :, :, :early_feat_channels]

    if H['arch']['avg_pool_size'] > 1:
        pool_size = H['arch']['avg_pool_size']
        Z1 = Z[:, :, :, :700]
        Z2 = Z[:, :, :, 700:]
        Z2 = tf.nn.avg_pool(Z2,
                            ksize=[1, pool_size, pool_size, 1],
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        Z = tf.concat(3, [Z1, Z2])
    Z = tf.reshape(Z, [
        H['arch']['batch_size'] * H['arch']['grid_width'] *
        H['arch']['grid_height'], 1024
    ])
    with tf.variable_scope('decoder', reuse=reuse):
        scale_down = 0.01
        lstm_input = tf.reshape(Z * scale_down,
                                (H['arch']['batch_size'] * grid_size, 1024))
        if H['arch']['use_lstm']:
            lstm_outputs = build_lstm_inner(lstm_input, H)
        else:
            lstm_outputs = build_overfeat_inner(lstm_input, H)

        pred_boxes = []
        pred_logits = []
        initializer = tf.random_uniform_initializer(-0.1, 0.1)
        for k in range(H['arch']['rnn_len']):
            output = lstm_outputs[k]
            if phase == 'train':
                output = tf.nn.dropout(output, 0.5)
            box_weights = tf.get_variable('box_ip%d' % k,
                                          shape=(H['arch']['lstm_size'], 4),
                                          initializer=initializer)
            conf_weights = tf.get_variable('conf_ip%d' % k,
                                           shape=(H['arch']['lstm_size'], 2),
                                           initializer=initializer)

            pred_boxes_step = tf.reshape(
                tf.matmul(output, box_weights) * 50, [outer_size, 1, 4])

            pred_boxes.append(pred_boxes_step)
            pred_logits.append(
                tf.reshape(tf.matmul(output, conf_weights),
                           [outer_size, 1, 2]))

        pred_boxes = tf.concat(1, pred_boxes)
        pred_logits = tf.concat(1, pred_logits)
        pred_logits_squash = tf.reshape(pred_logits,
                                        [outer_size * H['arch']['rnn_len'], 2])
        pred_confidences_squash = tf.nn.softmax(pred_logits_squash)
        pred_confidences = tf.reshape(pred_confidences_squash,
                                      [outer_size, H['arch']['rnn_len'], 2])

        if H['arch']['use_rezoom']:
            pred_confs_deltas = []
            pred_boxes_deltas = []
            w_offsets = H['arch']['rezoom_w_coords']
            h_offsets = H['arch']['rezoom_h_coords']
            num_offsets = len(w_offsets) * len(h_offsets)
            rezoom_features = rezoom(H, pred_boxes, early_feat,
                                     early_feat_channels, w_offsets, h_offsets)
            if phase == 'train':
                rezoom_features = tf.nn.dropout(rezoom_features, 0.5)
            for k in range(H['arch']['rnn_len']):
                delta_features = tf.concat(
                    1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.])
                dim = 128
                delta_weights1 = tf.get_variable(
                    'delta_ip1%d' % k,
                    shape=[
                        H['arch']['lstm_size'] +
                        early_feat_channels * num_offsets, dim
                    ],
                    initializer=initializer)
                # TODO: add dropout here ?
                ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1))
                if phase == 'train':
                    ip1 = tf.nn.dropout(ip1, 0.5)
                delta_confs_weights = tf.get_variable('delta_ip2%d' % k,
                                                      shape=[dim, 2],
                                                      initializer=initializer)
                if H['arch']['reregress']:
                    delta_boxes_weights = tf.get_variable(
                        'delta_ip_boxes%d' % k,
                        shape=[dim, 4],
                        initializer=initializer)
                    pred_boxes_deltas.append(
                        tf.reshape(
                            tf.matmul(ip1, delta_boxes_weights) * 5,
                            [outer_size, 1, 4]))
                scale = H['arch'].get('rezoom_conf_scale', 50)
                pred_confs_deltas.append(
                    tf.reshape(
                        tf.matmul(ip1, delta_confs_weights) * scale,
                        [outer_size, 1, 2]))
            pred_confs_deltas = tf.concat(1, pred_confs_deltas)
            if H['arch']['reregress']:
                pred_boxes_deltas = tf.concat(1, pred_boxes_deltas)
            return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas

    return pred_boxes, pred_logits, pred_confidences