Пример #1
0
def interp(w, i, channel_dim):
    '''
    Input:
        w: A 4D block tensor of shape (n, h, w, c)
        i: A list of 3-tuples [(x_1, y_1, z_1), (x_2, y_2, z_2), ...],
            each having type (int, float, float)

        The 4D block represents a batch of 3D image feature volumes with c channels.
        The input i is a list of points  to index into w via interpolation. Direct
        indexing is not possible due to y_1 and z_1 being float values.
    Output:
        A list of the values: [
            w[x_1, y_1, z_1, :]
            w[x_2, y_2, z_2, :]
            ...
            w[x_k, y_k, z_k, :]
        ]
        of the same length == len(i)
    '''
    w_as_vector = tf.reshape(w,
                             [-1, channel_dim])  # gather expects w to be 1-d
    upper_l = tf.to_int32(
        tf_concat(
            1, [i[:, 0:1], tf.floor(i[:, 1:2]),
                tf.floor(i[:, 2:3])]))
    upper_r = tf.to_int32(
        tf_concat(
            1, [i[:, 0:1], tf.floor(i[:, 1:2]),
                tf.ceil(i[:, 2:3])]))
    lower_l = tf.to_int32(
        tf_concat(
            1, [i[:, 0:1], tf.ceil(i[:, 1:2]),
                tf.floor(i[:, 2:3])]))
    lower_r = tf.to_int32(
        tf_concat(
            1, [i[:, 0:1], tf.ceil(i[:, 1:2]),
                tf.ceil(i[:, 2:3])]))

    upper_l_idx = to_idx(upper_l, tf.shape(w))
    upper_r_idx = to_idx(upper_r, tf.shape(w))
    lower_l_idx = to_idx(lower_l, tf.shape(w))
    lower_r_idx = to_idx(lower_r, tf.shape(w))

    upper_l_value = tf.gather(w_as_vector, upper_l_idx)
    upper_r_value = tf.gather(w_as_vector, upper_r_idx)
    lower_l_value = tf.gather(w_as_vector, lower_l_idx)
    lower_r_value = tf.gather(w_as_vector, lower_r_idx)

    alpha_lr = tf.expand_dims(i[:, 2] - tf.floor(i[:, 2]), 1)
    alpha_ud = tf.expand_dims(i[:, 1] - tf.floor(i[:, 1]), 1)

    upper_value = (1 - alpha_lr) * upper_l_value + (alpha_lr) * upper_r_value
    lower_value = (1 - alpha_lr) * lower_l_value + (alpha_lr) * lower_r_value
    value = (1 - alpha_ud) * upper_value + (alpha_ud) * lower_value
    return value
Пример #2
0
def to_x1y1x2y2(box):
    w = tf.maximum(box[:, 2:3], 1)
    h = tf.maximum(box[:, 3:4], 1)
    x1 = box[:, 0:1] - w / 2
    x2 = box[:, 0:1] + w / 2
    y1 = box[:, 1:2] - h / 2
    y2 = box[:, 1:2] + h / 2
    return tf_concat(1, [x1, y1, x2, y2])
def rezoom(
        H, pred_boxes, early_feat, early_feat_channels, w_offsets, h_offsets
):
    '''
    Rezoom into a feature map at multiple interpolation points in a grid.

    If the predicted object center is at X, len(w_offsets) == 3, and len(h_offsets) == 5,
    the rezoom grid will look as follows:

    [o o o]
    [o o o]
    [o X o]
    [o o o]
    [o o o]

    Where each letter indexes into the feature map with bilinear interpolation
    '''

    grid_size = H['grid_width'] * H['grid_height']
    outer_size = grid_size * H['batch_size']
    indices = []
    for w_offset in w_offsets:
        for h_offset in h_offsets:
            indices.append(
                train_utils.bilinear_select(
                    H, pred_boxes, early_feat, early_feat_channels, w_offset,
                    h_offset
                )
            )

    interp_indices = tf_concat(0, indices)
    rezoom_features = train_utils.interp(
        early_feat, interp_indices, early_feat_channels
    )
    rezoom_features_r = tf.reshape(
        rezoom_features, [
            len(w_offsets) * len(h_offsets), outer_size, H['rnn_len'],
            early_feat_channels
        ]
    )
    rezoom_features_t = tf.transpose(rezoom_features_r, [1, 2, 0, 3])
    return tf.reshape(
        rezoom_features_t, [
            outer_size, H['rnn_len'],
            len(w_offsets) * len(h_offsets) * early_feat_channels
        ]
    )
Пример #4
0
def bilinear_select(H, pred_boxes, early_feat, early_feat_channels, w_offset,
                    h_offset):
    '''
    Function used for rezooming high level feature maps. Uses bilinear interpolation
    to select all channels at index (x, y) for a high level feature map, where x and y are floats.
    '''
    grid_size = H['grid_width'] * H['grid_height']
    outer_size = grid_size * H['batch_size']

    fine_stride = 8.  # pixels per 60x80 grid cell in 480x640 image
    coarse_stride = H[
        'region_size']  # pixels per 15x20 grid cell in 480x640 image
    batch_ids = []
    x_offsets = []
    y_offsets = []
    for n in range(H['batch_size']):
        for i in range(H['grid_height']):
            for j in range(H['grid_width']):
                for k in range(H['rnn_len']):
                    batch_ids.append([n])
                    x_offsets.append([coarse_stride / 2. + coarse_stride * j])
                    y_offsets.append([coarse_stride / 2. + coarse_stride * i])

    batch_ids = tf.constant(batch_ids)
    x_offsets = tf.constant(x_offsets)
    y_offsets = tf.constant(y_offsets)

    pred_boxes_r = tf.reshape(pred_boxes, [outer_size * H['rnn_len'], 4])
    scale_factor = coarse_stride / fine_stride  # scale difference between 15x20 and 60x80 features

    pred_x_center = (pred_boxes_r[:, 0:1] + w_offset * pred_boxes_r[:, 2:3] +
                     x_offsets) / fine_stride
    pred_x_center_clip = tf.clip_by_value(pred_x_center, 0,
                                          scale_factor * H['grid_width'] - 1)
    pred_y_center = (pred_boxes_r[:, 1:2] + h_offset * pred_boxes_r[:, 3:4] +
                     y_offsets) / fine_stride
    pred_y_center_clip = tf.clip_by_value(pred_y_center, 0,
                                          scale_factor * H['grid_height'] - 1)

    interp_indices = tf_concat(
        1, [tf.to_float(batch_ids), pred_y_center_clip, pred_x_center_clip])
    return interp_indices
Пример #5
0
def inception_v1_base(inputs, final_endpoint='Mixed_5c', scope='InceptionV1'):
    """Defines the Inception V1 base architecture.

  This architecture is defined in:
    Going deeper with convolutions
    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
    http://arxiv.org/pdf/1409.4842v1.pdf.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
      'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
      'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
    scope: Optional variable_scope.

  Returns:
    A dictionary from components of the network to the corresponding activation.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values.
  """
    end_points = {}
    with tf.variable_scope(scope, 'InceptionV1', [inputs]):
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            weights_initializer=trunc_normal(0.01)):
            with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                                stride=1,
                                padding='SAME'):
                end_point = 'Conv2d_1a_7x7'
                net = slim.conv2d(inputs,
                                  64, [7, 7],
                                  stride=2,
                                  scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points
                end_point = 'MaxPool_2a_3x3'
                net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points
                end_point = 'Conv2d_2b_1x1'
                net = slim.conv2d(net, 64, [1, 1], scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points
                end_point = 'Conv2d_2c_3x3'
                net = slim.conv2d(net, 192, [3, 3], scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points
                end_point = 'MaxPool_3a_3x3'
                net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'Mixed_3b'
                with tf.variable_scope(end_point):
                    with tf.variable_scope('Branch_0'):
                        branch_0 = slim.conv2d(net,
                                               64, [1, 1],
                                               scope='Conv2d_0a_1x1')
                    with tf.variable_scope('Branch_1'):
                        branch_1 = slim.conv2d(net,
                                               96, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_1 = slim.conv2d(branch_1,
                                               128, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_2'):
                        branch_2 = slim.conv2d(net,
                                               16, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_2 = slim.conv2d(branch_2,
                                               32, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_3'):
                        branch_3 = slim.max_pool2d(net, [3, 3],
                                                   scope='MaxPool_0a_3x3')
                        branch_3 = slim.conv2d(branch_3,
                                               32, [1, 1],
                                               scope='Conv2d_0b_1x1')
                    net = tf_concat(3,
                                    [branch_0, branch_1, branch_2, branch_3])
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'Mixed_3c'
                with tf.variable_scope(end_point):
                    with tf.variable_scope('Branch_0'):
                        branch_0 = slim.conv2d(net,
                                               128, [1, 1],
                                               scope='Conv2d_0a_1x1')
                    with tf.variable_scope('Branch_1'):
                        branch_1 = slim.conv2d(net,
                                               128, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_1 = slim.conv2d(branch_1,
                                               192, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_2'):
                        branch_2 = slim.conv2d(net,
                                               32, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_2 = slim.conv2d(branch_2,
                                               96, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_3'):
                        branch_3 = slim.max_pool2d(net, [3, 3],
                                                   scope='MaxPool_0a_3x3')
                        branch_3 = slim.conv2d(branch_3,
                                               64, [1, 1],
                                               scope='Conv2d_0b_1x1')
                    net = tf_concat(3,
                                    [branch_0, branch_1, branch_2, branch_3])
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'MaxPool_4a_3x3'
                net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'Mixed_4b'
                with tf.variable_scope(end_point):
                    with tf.variable_scope('Branch_0'):
                        branch_0 = slim.conv2d(net,
                                               192, [1, 1],
                                               scope='Conv2d_0a_1x1')
                    with tf.variable_scope('Branch_1'):
                        branch_1 = slim.conv2d(net,
                                               96, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_1 = slim.conv2d(branch_1,
                                               208, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_2'):
                        branch_2 = slim.conv2d(net,
                                               16, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_2 = slim.conv2d(branch_2,
                                               48, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_3'):
                        branch_3 = slim.max_pool2d(net, [3, 3],
                                                   scope='MaxPool_0a_3x3')
                        branch_3 = slim.conv2d(branch_3,
                                               64, [1, 1],
                                               scope='Conv2d_0b_1x1')
                    net = tf_concat(3,
                                    [branch_0, branch_1, branch_2, branch_3])
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'Mixed_4c'
                with tf.variable_scope(end_point):
                    with tf.variable_scope('Branch_0'):
                        branch_0 = slim.conv2d(net,
                                               160, [1, 1],
                                               scope='Conv2d_0a_1x1')
                    with tf.variable_scope('Branch_1'):
                        branch_1 = slim.conv2d(net,
                                               112, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_1 = slim.conv2d(branch_1,
                                               224, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_2'):
                        branch_2 = slim.conv2d(net,
                                               24, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_2 = slim.conv2d(branch_2,
                                               64, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_3'):
                        branch_3 = slim.max_pool2d(net, [3, 3],
                                                   scope='MaxPool_0a_3x3')
                        branch_3 = slim.conv2d(branch_3,
                                               64, [1, 1],
                                               scope='Conv2d_0b_1x1')
                    net = tf_concat(3,
                                    [branch_0, branch_1, branch_2, branch_3])
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'Mixed_4d'
                with tf.variable_scope(end_point):
                    with tf.variable_scope('Branch_0'):
                        branch_0 = slim.conv2d(net,
                                               128, [1, 1],
                                               scope='Conv2d_0a_1x1')
                    with tf.variable_scope('Branch_1'):
                        branch_1 = slim.conv2d(net,
                                               128, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_1 = slim.conv2d(branch_1,
                                               256, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_2'):
                        branch_2 = slim.conv2d(net,
                                               24, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_2 = slim.conv2d(branch_2,
                                               64, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_3'):
                        branch_3 = slim.max_pool2d(net, [3, 3],
                                                   scope='MaxPool_0a_3x3')
                        branch_3 = slim.conv2d(branch_3,
                                               64, [1, 1],
                                               scope='Conv2d_0b_1x1')
                    net = tf_concat(3,
                                    [branch_0, branch_1, branch_2, branch_3])
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'Mixed_4e'
                with tf.variable_scope(end_point):
                    with tf.variable_scope('Branch_0'):
                        branch_0 = slim.conv2d(net,
                                               112, [1, 1],
                                               scope='Conv2d_0a_1x1')
                    with tf.variable_scope('Branch_1'):
                        branch_1 = slim.conv2d(net,
                                               144, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_1 = slim.conv2d(branch_1,
                                               288, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_2'):
                        branch_2 = slim.conv2d(net,
                                               32, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_2 = slim.conv2d(branch_2,
                                               64, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_3'):
                        branch_3 = slim.max_pool2d(net, [3, 3],
                                                   scope='MaxPool_0a_3x3')
                        branch_3 = slim.conv2d(branch_3,
                                               64, [1, 1],
                                               scope='Conv2d_0b_1x1')
                    net = tf_concat(3,
                                    [branch_0, branch_1, branch_2, branch_3])
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'Mixed_4f'
                with tf.variable_scope(end_point):
                    with tf.variable_scope('Branch_0'):
                        branch_0 = slim.conv2d(net,
                                               256, [1, 1],
                                               scope='Conv2d_0a_1x1')
                    with tf.variable_scope('Branch_1'):
                        branch_1 = slim.conv2d(net,
                                               160, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_1 = slim.conv2d(branch_1,
                                               320, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_2'):
                        branch_2 = slim.conv2d(net,
                                               32, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_2 = slim.conv2d(branch_2,
                                               128, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_3'):
                        branch_3 = slim.max_pool2d(net, [3, 3],
                                                   scope='MaxPool_0a_3x3')
                        branch_3 = slim.conv2d(branch_3,
                                               128, [1, 1],
                                               scope='Conv2d_0b_1x1')
                    net = tf_concat(3,
                                    [branch_0, branch_1, branch_2, branch_3])
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'MaxPool_5a_2x2'
                net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point)
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'Mixed_5b'
                with tf.variable_scope(end_point):
                    with tf.variable_scope('Branch_0'):
                        branch_0 = slim.conv2d(net,
                                               256, [1, 1],
                                               scope='Conv2d_0a_1x1')
                    with tf.variable_scope('Branch_1'):
                        branch_1 = slim.conv2d(net,
                                               160, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_1 = slim.conv2d(branch_1,
                                               320, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_2'):
                        branch_2 = slim.conv2d(net,
                                               32, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_2 = slim.conv2d(branch_2,
                                               128, [3, 3],
                                               scope='Conv2d_0a_3x3')
                    with tf.variable_scope('Branch_3'):
                        branch_3 = slim.max_pool2d(net, [3, 3],
                                                   scope='MaxPool_0a_3x3')
                        branch_3 = slim.conv2d(branch_3,
                                               128, [1, 1],
                                               scope='Conv2d_0b_1x1')
                    net = tf_concat(3,
                                    [branch_0, branch_1, branch_2, branch_3])
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points

                end_point = 'Mixed_5c'
                with tf.variable_scope(end_point):
                    with tf.variable_scope('Branch_0'):
                        branch_0 = slim.conv2d(net,
                                               384, [1, 1],
                                               scope='Conv2d_0a_1x1')
                    with tf.variable_scope('Branch_1'):
                        branch_1 = slim.conv2d(net,
                                               192, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_1 = slim.conv2d(branch_1,
                                               384, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_2'):
                        branch_2 = slim.conv2d(net,
                                               48, [1, 1],
                                               scope='Conv2d_0a_1x1')
                        branch_2 = slim.conv2d(branch_2,
                                               128, [3, 3],
                                               scope='Conv2d_0b_3x3')
                    with tf.variable_scope('Branch_3'):
                        branch_3 = slim.max_pool2d(net, [3, 3],
                                                   scope='MaxPool_0a_3x3')
                        branch_3 = slim.conv2d(branch_3,
                                               128, [1, 1],
                                               scope='Conv2d_0b_1x1')
                    net = tf_concat(3,
                                    [branch_0, branch_1, branch_2, branch_3])
                end_points[end_point] = net
                if final_endpoint == end_point: return net, end_points
        raise ValueError('Unknown final endpoint %s' % final_endpoint)
Пример #6
0
def build_forward(H, x, phase, reuse):
    '''
    Construct the forward model
    '''

    grid_size = H['grid_width'] * H['grid_height']
    outer_size = grid_size * H['batch_size']
    input_mean = 117.
    x -= input_mean
    cnn, early_feat = googlenet_load.model(x, H, reuse)
    early_feat_channels = H['early_feat_channels']
    early_feat = early_feat[:, :, :, :early_feat_channels]

    if H['deconv']:
        size = 3
        stride = 2
        pool_size = 5

        with tf.variable_scope("deconv", reuse=reuse):
            w = tf.get_variable(
                'conv_pool_w',
                shape=[
                    size, size, H['later_feat_channels'],
                    H['later_feat_channels']
                ],
                initializer=tf.random_normal_initializer(stddev=0.01))
            cnn_s = tf.nn.conv2d(cnn,
                                 w,
                                 strides=[1, stride, stride, 1],
                                 padding='SAME')
            cnn_s_pool = tf.nn.avg_pool(cnn_s[:, :, :, :256],
                                        ksize=[1, pool_size, pool_size, 1],
                                        strides=[1, 1, 1, 1],
                                        padding='SAME')

            cnn_s_with_pool = tf_concat(3, [cnn_s_pool, cnn_s[:, :, :, 256:]])
            cnn_deconv = deconv(cnn_s_with_pool,
                                output_shape=[
                                    H['batch_size'], H['grid_height'],
                                    H['grid_width'], 256
                                ],
                                channels=[H['later_feat_channels'], 256])
            cnn = tf_concat(3, (cnn_deconv, cnn[:, :, :, 256:]))

    elif H['avg_pool_size'] > 1:
        pool_size = H['avg_pool_size']
        cnn1 = cnn[:, :, :, :700]
        cnn2 = cnn[:, :, :, 700:]
        cnn2 = tf.nn.avg_pool(cnn2,
                              ksize=[1, pool_size, pool_size, 1],
                              strides=[1, 1, 1, 1],
                              padding='SAME')
        cnn = tf_concat(3, [cnn1, cnn2])

    cnn = tf.reshape(cnn, [
        H['batch_size'] * H['grid_width'] * H['grid_height'],
        H['later_feat_channels']
    ])
    initializer = tf.random_uniform_initializer(-0.1, 0.1)
    with tf.variable_scope('decoder', reuse=reuse, initializer=initializer):
        scale_down = 0.01
        lstm_input = tf.reshape(
            cnn * scale_down,
            (H['batch_size'] * grid_size, H['later_feat_channels']))
        if H['use_lstm']:
            lstm_outputs = build_lstm_inner(H, lstm_input)
        else:
            lstm_outputs = build_overfeat_inner(H, lstm_input)

        pred_boxes = []
        pred_logits = []
        for k in range(H['rnn_len']):
            output = lstm_outputs[k]
            if phase == 'train':
                output = tf.nn.dropout(output, 0.5)
            box_weights = tf.get_variable('box_ip%d' % k,
                                          shape=(H['lstm_size'], 4))
            conf_weights = tf.get_variable('conf_ip%d' % k,
                                           shape=(H['lstm_size'],
                                                  H['num_classes']))

            pred_boxes_step = tf.reshape(
                tf.matmul(output, box_weights) * 50, [outer_size, 1, 4])

            pred_boxes.append(pred_boxes_step)
            pred_logits.append(
                tf.reshape(tf.matmul(output, conf_weights),
                           [outer_size, 1, H['num_classes']]))

        pred_boxes = tf_concat(1, pred_boxes)
        pred_logits = tf_concat(1, pred_logits)
        pred_logits_squash = tf.reshape(
            pred_logits, [outer_size * H['rnn_len'], H['num_classes']])
        pred_confidences_squash = tf.nn.softmax(pred_logits_squash)
        pred_confidences = tf.reshape(
            pred_confidences_squash,
            [outer_size, H['rnn_len'], H['num_classes']])

        if H['use_rezoom']:
            pred_confs_deltas = []
            pred_boxes_deltas = []
            w_offsets = H['rezoom_w_coords']
            h_offsets = H['rezoom_h_coords']
            num_offsets = len(w_offsets) * len(h_offsets)
            rezoom_features = rezoom(H, pred_boxes, early_feat,
                                     early_feat_channels, w_offsets, h_offsets)
            if phase == 'train':
                rezoom_features = tf.nn.dropout(rezoom_features, 0.5)
            for k in range(H['rnn_len']):
                delta_features = tf_concat(
                    1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.])
                dim = 128
                delta_weights1 = tf.get_variable(
                    'delta_ip1%d' % k,
                    shape=[
                        H['lstm_size'] + early_feat_channels * num_offsets, dim
                    ])
                ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1))
                if phase == 'train':
                    ip1 = tf.nn.dropout(ip1, 0.5)
                delta_confs_weights = tf.get_variable(
                    'delta_ip2%d' % k, shape=[dim, H['num_classes']])
                if H['reregress']:
                    delta_boxes_weights = tf.get_variable('delta_ip_boxes%d' %
                                                          k,
                                                          shape=[dim, 4])
                    pred_boxes_deltas.append(
                        tf.reshape(
                            tf.matmul(ip1, delta_boxes_weights) * 5,
                            [outer_size, 1, 4]))
                scale = H.get('rezoom_conf_scale', 50)
                pred_confs_deltas.append(
                    tf.reshape(
                        tf.matmul(ip1, delta_confs_weights) * scale,
                        [outer_size, 1, H['num_classes']]))
            pred_confs_deltas = tf_concat(1, pred_confs_deltas)
            if H['reregress']:
                pred_boxes_deltas = tf_concat(1, pred_boxes_deltas)
            return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas

    return pred_boxes, pred_logits, pred_confidences