def build_lstm_forward(H, x, googlenet, phase, reuse): grid_size = H['arch']['grid_width'] * H['arch']['grid_height'] outer_size = grid_size * H['arch']['batch_size'] input_mean = 117. x -= input_mean Z = googlenet_load.model(x, googlenet, H) with tf.variable_scope('decoder', reuse=reuse): scale_down = 0.01 if H['arch']['early_dropout'] and phase == 'train': Z = tf.nn.dropout(Z, 0.5) lstm_input = tf.reshape(Z * scale_down, (H['arch']['batch_size'] * grid_size, 1024)) lstm_outputs = build_lstm_inner(lstm_input, H) pred_boxes = [] pred_logits = [] for i in range(H['arch']['rnn_len']): output = lstm_outputs[i] if H['arch']['late_dropout'] and phase == 'train': output = tf.nn.dropout(output, 0.5) box_weights = tf.get_variable('box_ip%d' % i, shape=(H['arch']['lstm_size'], 4), initializer=tf.random_uniform_initializer(-0.1, 0.1)) conf_weights = tf.get_variable('conf_ip%d' % i, shape=(H['arch']['lstm_size'], 2), initializer=tf.random_uniform_initializer(-0.1, 0.1)) pred_boxes.append(tf.reshape(tf.matmul(output, box_weights) * 50, [outer_size, 1, 4])) pred_logits.append(tf.reshape(tf.matmul(output, conf_weights), [outer_size, 1, 2])) pred_boxes = tf.concat(1, pred_boxes) pred_logits = tf.concat(1, pred_logits) pred_logits_squash = tf.reshape(pred_logits, [outer_size * H['arch']['rnn_len'], 2]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape(pred_confidences_squash, [outer_size, H['arch']['rnn_len'], 2]) return pred_boxes, pred_logits, pred_confidences
def build_lstm_forward(H, x, googlenet, phase, reuse): grid_size = H['arch']['grid_width'] * H['arch']['grid_height'] outer_size = grid_size * H['arch']['batch_size'] input_mean = 117. x -= input_mean Z = googlenet_load.model(x, googlenet, H) with tf.variable_scope('decoder', reuse=reuse): scale_down = 0.01 if H['arch']['early_dropout'] and phase == 'train': Z = tf.nn.dropout(Z, 0.5) lstm_input = tf.reshape(Z * scale_down, (H['arch']['batch_size'] * grid_size, 1024)) lstm_outputs = build_lstm_inner(lstm_input, H) pred_boxes = [] pred_logits = [] for i in range(H['arch']['rnn_len']): output = lstm_outputs[i] if H['arch']['late_dropout'] and phase == 'train': output = tf.nn.dropout(output, 0.5) box_weights = tf.get_variable('box_ip%d' % i, shape=(H['arch']['lstm_size'], 4), initializer=tf.random_uniform_initializer(0.1)) conf_weights = tf.get_variable('conf_ip%d' % i, shape=(H['arch']['lstm_size'], 2), initializer=tf.random_uniform_initializer(0.1)) pred_boxes.append(tf.reshape(tf.matmul(output, box_weights) * 50, [outer_size, 1, 4])) pred_logits.append(tf.reshape(tf.matmul(output, conf_weights), [outer_size, 1, 2])) pred_boxes = tf.concat(1, pred_boxes) pred_logits = tf.concat(1, pred_logits) pred_logits_squash = tf.reshape(pred_logits, [outer_size * H['arch']['rnn_len'], 2]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape(pred_confidences_squash, [outer_size, H['arch']['rnn_len'], 2]) return pred_boxes, pred_logits, pred_confidences
def inference(hypes, images, phase): # Load googlenet and returns the cnn_codes if phase == 'train': encoder_net.append(googlenet_load.init(hypes)) input_mean = 117. images -= input_mean cnn, early_feat, _ = googlenet_load.model(images, encoder_net[0], hypes) return cnn, early_feat, _
def build_overfeat_forward(H, x, googlenet, phase): input_mean = 117. x -= input_mean Z = googlenet_load.model(x, googlenet, H) grid_size = H['arch']['grid_width'] * H['arch']['grid_height'] if H['arch']['use_dropout'] and phase == 'train': Z = tf.nn.dropout(Z, 0.5) pred_logits = tf.reshape(tf.nn.xw_plus_b(Z, googlenet['W'][0], googlenet['B'][0], name=phase+'/logits_0'), [H['arch']['batch_size'] * grid_size, H['arch']['num_classes']]) pred_confidences = tf.nn.softmax(pred_logits) pred_boxes = tf.reshape(tf.nn.xw_plus_b(Z, googlenet['W'][1], googlenet['B'][1], name=phase+'/logits_1'), [H['arch']['batch_size'] * grid_size, 1, 4]) * 100 return pred_boxes, pred_logits, pred_confidences
def build_forward(H, x, phase, reuse): ''' Construct the forward model ''' grid_size = H['grid_width'] * H['grid_height'] outer_size = grid_size * H['batch_size'] input_mean = 117. x -= input_mean cnn, early_feat = googlenet_load.model(x, H, reuse) early_feat_channels = H['early_feat_channels'] early_feat = early_feat[:, :, :, :early_feat_channels] if H['deconv']: size = 3 stride = 2 pool_size = 5 with tf.variable_scope("deconv", reuse=reuse): w = tf.get_variable( 'conv_pool_w', shape=[ size, size, H['later_feat_channels'], H['later_feat_channels'] ], initializer=tf.random_normal_initializer(stddev=0.01)) cnn_s = tf.nn.conv2d(cnn, w, strides=[1, stride, stride, 1], padding='SAME') cnn_s_pool = tf.nn.avg_pool(cnn_s[:, :, :, :256], ksize=[1, pool_size, pool_size, 1], strides=[1, 1, 1, 1], padding='SAME') cnn_s_with_pool = tf_concat(3, [cnn_s_pool, cnn_s[:, :, :, 256:]]) cnn_deconv = deconv(cnn_s_with_pool, output_shape=[ H['batch_size'], H['grid_height'], H['grid_width'], 256 ], channels=[H['later_feat_channels'], 256]) cnn = tf_concat(3, (cnn_deconv, cnn[:, :, :, 256:])) elif H['avg_pool_size'] > 1: pool_size = H['avg_pool_size'] cnn1 = cnn[:, :, :, :700] cnn2 = cnn[:, :, :, 700:] cnn2 = tf.nn.avg_pool(cnn2, ksize=[1, pool_size, pool_size, 1], strides=[1, 1, 1, 1], padding='SAME') cnn = tf_concat(3, [cnn1, cnn2]) cnn = tf.reshape(cnn, [ H['batch_size'] * H['grid_width'] * H['grid_height'], H['later_feat_channels'] ]) initializer = tf.random_uniform_initializer(-0.1, 0.1) with tf.variable_scope('decoder', reuse=reuse, initializer=initializer): scale_down = 0.01 lstm_input = tf.reshape( cnn * scale_down, (H['batch_size'] * grid_size, H['later_feat_channels'])) if H['use_lstm']: lstm_outputs = build_lstm_inner(H, lstm_input) else: lstm_outputs = build_overfeat_inner(H, lstm_input) pred_boxes = [] pred_logits = [] for k in range(H['rnn_len']): output = lstm_outputs[k] if phase == 'train': output = tf.nn.dropout(output, 0.5) box_weights = tf.get_variable('box_ip%d' % k, shape=(H['lstm_size'], 4)) conf_weights = tf.get_variable('conf_ip%d' % k, shape=(H['lstm_size'], H['num_classes'])) pred_boxes_step = tf.reshape( tf.matmul(output, box_weights) * 50, [outer_size, 1, 4]) pred_boxes.append(pred_boxes_step) pred_logits.append( tf.reshape(tf.matmul(output, conf_weights), [outer_size, 1, H['num_classes']])) pred_boxes = tf_concat(1, pred_boxes) pred_logits = tf_concat(1, pred_logits) pred_logits_squash = tf.reshape( pred_logits, [outer_size * H['rnn_len'], H['num_classes']]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape( pred_confidences_squash, [outer_size, H['rnn_len'], H['num_classes']]) if H['use_rezoom']: pred_confs_deltas = [] pred_boxes_deltas = [] w_offsets = H['rezoom_w_coords'] h_offsets = H['rezoom_h_coords'] num_offsets = len(w_offsets) * len(h_offsets) rezoom_features = rezoom(H, pred_boxes, early_feat, early_feat_channels, w_offsets, h_offsets) if phase == 'train': rezoom_features = tf.nn.dropout(rezoom_features, 0.5) for k in range(H['rnn_len']): delta_features = tf_concat( 1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.]) dim = 128 delta_weights1 = tf.get_variable( 'delta_ip1%d' % k, shape=[ H['lstm_size'] + early_feat_channels * num_offsets, dim ]) # TODO: add dropout here ? ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1)) if phase == 'train': ip1 = tf.nn.dropout(ip1, 0.5) delta_confs_weights = tf.get_variable( 'delta_ip2%d' % k, shape=[dim, H['num_classes']]) if H['reregress']: delta_boxes_weights = tf.get_variable('delta_ip_boxes%d' % k, shape=[dim, 4]) pred_boxes_deltas.append( tf.reshape( tf.matmul(ip1, delta_boxes_weights) * 5, [outer_size, 1, 4])) scale = H.get('rezoom_conf_scale', 50) pred_confs_deltas.append( tf.reshape( tf.matmul(ip1, delta_confs_weights) * scale, [outer_size, 1, H['num_classes']])) pred_confs_deltas = tf_concat(1, pred_confs_deltas) if H['reregress']: pred_boxes_deltas = tf_concat(1, pred_boxes_deltas) return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas return pred_boxes, pred_logits, pred_confidences
def build_forward(H, x, p1_x, p2_x, p3_x, p4_x, p5_x, p6_x, p7_x, p8_x, f_x, phase, reuse): ''' Construct the forward model ''' grid_size = H['grid_width'] * H['grid_height'] outer_size = grid_size * H['batch_size'] input_mean = 117. x -= input_mean p1_x -= input_mean p2_x -= input_mean p3_x -= input_mean p4_x -= input_mean p5_x -= input_mean p6_x -= input_mean p7_x -= input_mean p8_x -= input_mean f_x -= input_mean print "x.shape %s" % str(x.get_shape()) # x = tf.concat(0, (x, p1_x, p2_x, p3_x, p4_x, p5_x, p6_x, p7_x, p8_x)) print "x.shape %s" % str(x.get_shape()) cnn, early_feat = googlenet_load.model(x, H, reuse) p1_cnn, p1_early_feat = googlenet_load.p1_model(p1_x, H, reuse) p2_cnn, p2_early_feat = googlenet_load.p2_model(p2_x, H, reuse) # f_cnn, f_early_feat = googlenet_load.f_model(f_x, H, reuse) p3_cnn, p3_early_feat = googlenet_load.p3_model(p3_x, H, reuse) p4_cnn, p4_early_feat = googlenet_load.p4_model(p4_x, H, reuse) p5_cnn, p5_early_feat = googlenet_load.p5_model(p5_x, H, reuse) # p6_cnn, p6_early_feat = googlenet_load.p6_model(p6_x, H, reuse) # p7_cnn, p7_early_feat = googlenet_load.p7_model(p7_x, H, reuse) # p8_cnn, p8_early_feat = googlenet_load.p8_model(p8_x, H, reuse) ''' p1_cnn = tf.expand_dims(cnn[1], 0) p2_cnn = tf.expand_dims(cnn[2], 0) p3_cnn = tf.expand_dims(cnn[3], 0) p4_cnn = tf.expand_dims(cnn[4], 0) p5_cnn = tf.expand_dims(cnn[5], 0) p6_cnn = tf.expand_dims(cnn[6], 0) p7_cnn = tf.expand_dims(cnn[7], 0) p8_cnn = tf.expand_dims(cnn[8], 0) cnn = tf.expand_dims(cnn[0], 0) print cnn.get_shape() with tf.variable_scope("conv_1x1", reuse=reuse): c_w = tf.get_variable('c_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.01)) cnn = tf.nn.conv2d(cnn, c_w, strides=[1, 1, 1, 1], padding='SAME') p1_w = tf.get_variable('p1_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.001)) p1_cnn = tf.nn.conv2d(p1_cnn, p1_w, strides=[1, 1, 1, 1], padding='SAME') p2_w = tf.get_variable('p2_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.001)) p2_cnn = tf.nn.conv2d(p2_cnn, p2_w, strides=[1, 1, 1, 1], padding='SAME') p3_w = tf.get_variable('p3_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.001)) p3_cnn = tf.nn.conv2d(p3_cnn, p3_w, strides=[1, 1, 1, 1], padding='SAME') p4_w = tf.get_variable('p4_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.001)) p4_cnn = tf.nn.conv2d(p4_cnn, p4_w, strides=[1, 1, 1, 1], padding='SAME') p5_w = tf.get_variable('p5_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.001)) p5_cnn = tf.nn.conv2d(p5_cnn, p5_w, strides=[1, 1, 1, 1], padding='SAME') p6_w = tf.get_variable('p6_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.001)) p6_cnn = tf.nn.conv2d(p6_cnn, p6_w, strides=[1, 1, 1, 1], padding='SAME') p7_w = tf.get_variable('p7_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.001)) p7_cnn = tf.nn.conv2d(p7_cnn, p7_w, strides=[1, 1, 1, 1], padding='SAME') p8_w = tf.get_variable('p8_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.001)) p8_cnn = tf.nn.conv2d(p8_cnn, p8_w, strides=[1, 1, 1, 1], padding='SAME') # f_w = tf.get_variable('f_w', shape=[1, 1, H['later_feat_channels'], H['later_feat_channels']], # initializer=tf.random_normal_initializer(stddev=0.01)) # f_w = tf.Variable(tf.random_normal([1,1,H['later_feat_channels'],H['later_feat_channels']])) # f_cnn = tf.nn.conv2d(f_cnn, f_w, strides=[1, 1, 1, 1], padding='SAME') # ww = tf.Variable(tf.random_uniform([3], 0.5, 1.0), name='ww') # b = tf.Variable(tf.zeros([1]), name='b') # cnn = cnn * ww[0] + p_cnn * ww[1] + pp_cnn * ww[2] cnn = cnn + p1_cnn + p2_cnn + p3_cnn + p4_cnn + p5_cnn + p6_cnn + p7_cnn + p8_cnn ''' cnn = tf.concat(3, (p5_cnn, p4_cnn, p3_cnn, p2_cnn, p1_cnn, cnn)) print "p1_cnn: %s" % str(p1_cnn.get_shape()) print "cnn: %s" % str(cnn.get_shape()) early_feat_channels = H['early_feat_channels'] early_feat = early_feat[:, :, :, :early_feat_channels] ''' if H['deconv']: size = 3 stride = 2 pool_size = 5 with tf.variable_scope("deconv", reuse=reuse): w = tf.get_variable('conv_pool_w', shape=[size, size, H['later_feat_channels'], H['later_feat_channels']], initializer=tf.random_normal_initializer(stddev=0.01)) cnn_s = tf.nn.conv2d(cnn, w, strides=[1, stride, stride, 1], padding='SAME') cnn_s_pool = tf.nn.avg_pool(cnn_s[:, :, :, :256], ksize=[1, pool_size, pool_size, 1], strides=[1, 1, 1, 1], padding='SAME') cnn_s_with_pool = tf.concat(3, [cnn_s_pool, cnn_s[:, :, :, 256:]]) cnn_deconv = deconv(cnn_s_with_pool, output_shape=[H['batch_size'], H['grid_height'], H['grid_width'], 256], channels=[H['later_feat_channels'], 256]) cnn = tf.concat(3, (cnn_deconv, cnn[:, :, :, 256:])) elif H['avg_pool_size'] > 1: pool_size = H['avg_pool_size'] cnn1 = cnn[:, :, :, :700] cnn2 = cnn[:, :, :, 700:] cnn2 = tf.nn.avg_pool(cnn2, ksize=[1, pool_size, pool_size, 1], strides=[1, 1, 1, 1], padding='SAME') cnn = tf.concat(3, [cnn1, cnn2]) ''' cnn = tf.reshape(cnn, [ H['batch_size'] * H['grid_width'] * H['grid_height'], H['later_feat_channels'] * 6 ]) initializer = tf.random_uniform_initializer(-0.1, 0.1) with tf.variable_scope('decoder', reuse=reuse, initializer=initializer): scale_down = 0.01 lstm_input = tf.reshape( cnn * scale_down, (H['batch_size'] * grid_size, H['later_feat_channels'] * 6)) if H['use_lstm']: lstm_outputs = build_lstm_inner(H, lstm_input) else: lstm_outputs = build_overfeat_inner(H, lstm_input) pred_boxes = [] pred_logits = [] for k in range(H['rnn_len']): output = lstm_outputs[k] if phase == 'train': output = tf.nn.dropout(output, 0.5) box_weights = tf.get_variable('box_ip%d' % k, shape=(H['lstm_size'], 4)) conf_weights = tf.get_variable('conf_ip%d' % k, shape=(H['lstm_size'], H['num_classes'])) pred_boxes_step = tf.reshape( tf.matmul(output, box_weights) * 50, [outer_size, 1, 4]) pred_boxes.append(pred_boxes_step) pred_logits.append( tf.reshape(tf.matmul(output, conf_weights), [outer_size, 1, H['num_classes']])) pred_boxes = tf.concat(1, pred_boxes) pred_logits = tf.concat(1, pred_logits) pred_logits_squash = tf.reshape( pred_logits, [outer_size * H['rnn_len'], H['num_classes']]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape( pred_confidences_squash, [outer_size, H['rnn_len'], H['num_classes']]) if H['use_rezoom']: pred_confs_deltas = [] pred_boxes_deltas = [] w_offsets = H['rezoom_w_coords'] h_offsets = H['rezoom_h_coords'] num_offsets = len(w_offsets) * len(h_offsets) rezoom_features = rezoom(H, pred_boxes, early_feat, early_feat_channels, w_offsets, h_offsets) if phase == 'train': rezoom_features = tf.nn.dropout(rezoom_features, 0.5) for k in range(H['rnn_len']): delta_features = tf.concat( 1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.]) dim = 128 delta_weights1 = tf.get_variable( 'delta_ip1%d' % k, shape=[ H['lstm_size'] + early_feat_channels * num_offsets, dim ]) # TODO: add dropout here ? ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1)) if phase == 'train': ip1 = tf.nn.dropout(ip1, 0.5) delta_confs_weights = tf.get_variable( 'delta_ip2%d' % k, shape=[dim, H['num_classes']]) if H['reregress']: delta_boxes_weights = tf.get_variable('delta_ip_boxes%d' % k, shape=[dim, 4]) pred_boxes_deltas.append( tf.reshape( tf.matmul(ip1, delta_boxes_weights) * 5, [outer_size, 1, 4])) scale = H.get('rezoom_conf_scale', 50) pred_confs_deltas.append( tf.reshape( tf.matmul(ip1, delta_confs_weights) * scale, [outer_size, 1, H['num_classes']])) pred_confs_deltas = tf.concat(1, pred_confs_deltas) if H['reregress']: pred_boxes_deltas = tf.concat(1, pred_boxes_deltas) return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas return pred_boxes, pred_logits, pred_confidences
def build_forward(H, x, googlenet, phase, reuse): ''' Construct the forward model ''' grid_size = H['grid_width'] * H['grid_height'] outer_size = grid_size * H['batch_size'] input_mean = 117. x -= input_mean cnn, early_feat, _ = googlenet_load.model(x, googlenet, H) early_feat_channels = H['early_feat_channels'] early_feat = early_feat[:, :, :, :early_feat_channels] if H['deconv']: size = 3 stride = 2 pool_size = 5 with tf.variable_scope("deconv", reuse=reuse): w = tf.get_variable('conv_pool_w', shape=[size, size, 1024, 1024], initializer=tf.random_normal_initializer(stddev=0.01)) cnn_s = tf.nn.conv2d(cnn, w, strides=[1, stride, stride, 1], padding='SAME') cnn_s_pool = tf.nn.avg_pool(cnn_s[:, :, :, :256], ksize=[1, pool_size, pool_size, 1], strides=[1, 1, 1, 1], padding='SAME') cnn_s_with_pool = tf.concat(3, [cnn_s_pool, cnn_s[:, :, :, 256:]]) cnn_deconv = deconv(cnn_s_with_pool, output_shape=[H['batch_size'], H['grid_height'], H['grid_width'], 256], channels=[1024, 256]) cnn = tf.concat(3, (cnn_deconv, cnn[:, :, :, 256:])) elif H['avg_pool_size'] > 1: pool_size = H['avg_pool_size'] cnn1 = cnn[:, :, :, :700] cnn2 = cnn[:, :, :, 700:] cnn2 = tf.nn.avg_pool(cnn2, ksize=[1, pool_size, pool_size, 1], strides=[1, 1, 1, 1], padding='SAME') cnn = tf.concat(3, [cnn1, cnn2]) cnn = tf.reshape(cnn, [H['batch_size'] * H['grid_width'] * H['grid_height'], 1024]) initializer = tf.random_uniform_initializer(-0.1, 0.1) with tf.variable_scope('decoder', reuse=reuse, initializer=initializer): scale_down = 0.01 lstm_input = tf.reshape(cnn * scale_down, (H['batch_size'] * grid_size, 1024)) if H['use_lstm']: lstm_outputs = build_lstm_inner(H, lstm_input) else: lstm_outputs = build_overfeat_inner(H, lstm_input) pred_boxes = [] pred_logits = [] for k in range(H['rnn_len']): output = lstm_outputs[k] if phase == 'train': output = tf.nn.dropout(output, 0.5) box_weights = tf.get_variable('box_ip%d' % k, shape=(H['lstm_size'], 4)) conf_weights = tf.get_variable('conf_ip%d' % k, shape=(H['lstm_size'], H['num_classes'])) pred_boxes_step = tf.reshape(tf.matmul(output, box_weights) * 50, [outer_size, 1, 4]) pred_boxes.append(pred_boxes_step) pred_logits.append(tf.reshape(tf.matmul(output, conf_weights), [outer_size, 1, H['num_classes']])) pred_boxes = tf.concat(1, pred_boxes) pred_logits = tf.concat(1, pred_logits) pred_logits_squash = tf.reshape(pred_logits, [outer_size * H['rnn_len'], H['num_classes']]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape(pred_confidences_squash, [outer_size, H['rnn_len'], H['num_classes']]) if H['use_rezoom']: pred_confs_deltas = [] pred_boxes_deltas = [] w_offsets = H['rezoom_w_coords'] h_offsets = H['rezoom_h_coords'] num_offsets = len(w_offsets) * len(h_offsets) rezoom_features = rezoom(H, pred_boxes, early_feat, early_feat_channels, w_offsets, h_offsets) if phase == 'train': rezoom_features = tf.nn.dropout(rezoom_features, 0.5) for k in range(H['rnn_len']): delta_features = tf.concat(1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.]) dim = 128 delta_weights1 = tf.get_variable( 'delta_ip1%d' % k, shape=[H['lstm_size'] + early_feat_channels * num_offsets, dim]) # TODO: add dropout here ? ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1)) if phase == 'train': ip1 = tf.nn.dropout(ip1, 0.5) delta_confs_weights = tf.get_variable( 'delta_ip2%d' % k, shape=[dim, H['num_classes']]) if H['reregress']: delta_boxes_weights = tf.get_variable( 'delta_ip_boxes%d' % k, shape=[dim, 4]) pred_boxes_deltas.append(tf.reshape(tf.matmul(ip1, delta_boxes_weights) * 5, [outer_size, 1, 4])) scale = H.get('rezoom_conf_scale', 50) pred_confs_deltas.append(tf.reshape(tf.matmul(ip1, delta_confs_weights) * scale, [outer_size, 1, H['num_classes']])) pred_confs_deltas = tf.concat(1, pred_confs_deltas) if H['reregress']: pred_boxes_deltas = tf.concat(1, pred_boxes_deltas) return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas return pred_boxes, pred_logits, pred_confidences
def build_forward(H, x, googlenet, phase, reuse): ''' Construct the forward model ''' grid_size = H['arch']['grid_width'] * H['arch']['grid_height'] outer_size = grid_size * H['arch']['batch_size'] input_mean = 117. x -= input_mean global early_feat Z, early_feat, _ = googlenet_load.model(x, googlenet, H) early_feat_channels = H['arch']['early_feat_channels'] early_feat = early_feat[:, :, :, :early_feat_channels] if H['arch']['avg_pool_size'] > 1: pool_size = H['arch']['avg_pool_size'] Z1 = Z[:, :, :, :700] Z2 = Z[:, :, :, 700:] Z2 = tf.nn.avg_pool(Z2, ksize=[1, pool_size, pool_size, 1], strides=[1, 1, 1, 1], padding='SAME') Z = tf.concat(3, [Z1, Z2]) Z = tf.reshape(Z, [H['arch']['batch_size'] * H['arch']['grid_width'] * H['arch']['grid_height'], 1024]) with tf.variable_scope('decoder', reuse=reuse): scale_down = 0.01 lstm_input = tf.reshape(Z * scale_down, (H['arch']['batch_size'] * grid_size, 1024)) if H['arch']['use_lstm']: lstm_outputs = build_lstm_inner(lstm_input, H) else: lstm_outputs = build_overfeat_inner(lstm_input, H) pred_boxes = [] pred_logits = [] initializer = tf.random_uniform_initializer(-0.1, 0.1) for k in range(H['arch']['rnn_len']): output = lstm_outputs[k] if phase == 'train': output = tf.nn.dropout(output, 0.5) box_weights = tf.get_variable('box_ip%d' % k, shape=(H['arch']['lstm_size'], 4), initializer=initializer) conf_weights = tf.get_variable('conf_ip%d' % k, shape=(H['arch']['lstm_size'], H['arch']['num_classes']), initializer=initializer) pred_boxes_step = tf.reshape(tf.matmul(output, box_weights) * 50, [outer_size, 1, 4]) pred_boxes.append(pred_boxes_step) pred_logits.append(tf.reshape(tf.matmul(output, conf_weights), [outer_size, 1, H['arch']['num_classes']])) pred_boxes = tf.concat(1, pred_boxes) pred_logits = tf.concat(1, pred_logits) pred_logits_squash = tf.reshape(pred_logits, [outer_size * H['arch']['rnn_len'], H['arch']['num_classes']]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape(pred_confidences_squash, [outer_size, H['arch']['rnn_len'], H['arch']['num_classes']]) if H['arch']['use_rezoom']: pred_confs_deltas = [] pred_boxes_deltas = [] w_offsets = H['arch']['rezoom_w_coords'] h_offsets = H['arch']['rezoom_h_coords'] num_offsets = len(w_offsets) * len(h_offsets) rezoom_features = rezoom(H, pred_boxes, early_feat, early_feat_channels, w_offsets, h_offsets) if phase == 'train': rezoom_features = tf.nn.dropout(rezoom_features, 0.5) for k in range(H['arch']['rnn_len']): delta_features = tf.concat(1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.]) dim = 128 delta_weights1 = tf.get_variable( 'delta_ip1%d' % k, shape=[H['arch']['lstm_size'] + early_feat_channels * num_offsets, dim], initializer=initializer) # TODO: add dropout here ? ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1)) if phase == 'train': ip1 = tf.nn.dropout(ip1, 0.5) delta_confs_weights = tf.get_variable( 'delta_ip2%d' % k, shape=[dim, H['arch']['num_classes']], initializer=initializer) if H['arch']['reregress']: delta_boxes_weights = tf.get_variable( 'delta_ip_boxes%d' % k, shape=[dim, 4], initializer=initializer) pred_boxes_deltas.append(tf.reshape(tf.matmul(ip1, delta_boxes_weights) * 5, [outer_size, 1, 4])) scale = H['arch'].get('rezoom_conf_scale', 50) pred_confs_deltas.append(tf.reshape(tf.matmul(ip1, delta_confs_weights) * scale, [outer_size, 1, H['arch']['num_classes']])) pred_confs_deltas = tf.concat(1, pred_confs_deltas) if H['arch']['reregress']: pred_boxes_deltas = tf.concat(1, pred_boxes_deltas) return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas pred_logits = tf.Print(pred_logits, [tf.shape(pred_logits)]) return pred_boxes, pred_logits, pred_confidences
def build_forward(H, x, googlenet, phase, reuse): ''' Construct the forward model ''' grid_size = H['arch']['grid_width'] * H['arch']['grid_height'] outer_size = grid_size * H['arch']['batch_size'] input_mean = 117. x -= input_mean global early_feat Z, early_feat, _ = googlenet_load.model(x, googlenet, H) early_feat_channels = H['arch']['early_feat_channels'] early_feat = early_feat[:, :, :, :early_feat_channels] if H['arch']['avg_pool_size'] > 1: pool_size = H['arch']['avg_pool_size'] Z1 = Z[:, :, :, :700] Z2 = Z[:, :, :, 700:] Z2 = tf.nn.avg_pool(Z2, ksize=[1, pool_size, pool_size, 1], strides=[1, 1, 1, 1], padding='SAME') Z = tf.concat(3, [Z1, Z2]) Z = tf.reshape(Z, [ H['arch']['batch_size'] * H['arch']['grid_width'] * H['arch']['grid_height'], 1024 ]) with tf.variable_scope('decoder', reuse=reuse): scale_down = 0.01 lstm_input = tf.reshape(Z * scale_down, (H['arch']['batch_size'] * grid_size, 1024)) if H['arch']['use_lstm']: lstm_outputs = build_lstm_inner(lstm_input, H) else: lstm_outputs = build_overfeat_inner(lstm_input, H) pred_boxes = [] pred_logits = [] initializer = tf.random_uniform_initializer(-0.1, 0.1) for k in range(H['arch']['rnn_len']): output = lstm_outputs[k] if phase == 'train': output = tf.nn.dropout(output, 0.5) box_weights = tf.get_variable('box_ip%d' % k, shape=(H['arch']['lstm_size'], 4), initializer=initializer) conf_weights = tf.get_variable('conf_ip%d' % k, shape=(H['arch']['lstm_size'], 2), initializer=initializer) pred_boxes_step = tf.reshape( tf.matmul(output, box_weights) * 50, [outer_size, 1, 4]) pred_boxes.append(pred_boxes_step) pred_logits.append( tf.reshape(tf.matmul(output, conf_weights), [outer_size, 1, 2])) pred_boxes = tf.concat(1, pred_boxes) pred_logits = tf.concat(1, pred_logits) pred_logits_squash = tf.reshape(pred_logits, [outer_size * H['arch']['rnn_len'], 2]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape(pred_confidences_squash, [outer_size, H['arch']['rnn_len'], 2]) if H['arch']['use_rezoom']: pred_confs_deltas = [] pred_boxes_deltas = [] w_offsets = H['arch']['rezoom_w_coords'] h_offsets = H['arch']['rezoom_h_coords'] num_offsets = len(w_offsets) * len(h_offsets) rezoom_features = rezoom(H, pred_boxes, early_feat, early_feat_channels, w_offsets, h_offsets) if phase == 'train': rezoom_features = tf.nn.dropout(rezoom_features, 0.5) for k in range(H['arch']['rnn_len']): delta_features = tf.concat( 1, [lstm_outputs[k], rezoom_features[:, k, :] / 1000.]) dim = 128 delta_weights1 = tf.get_variable( 'delta_ip1%d' % k, shape=[ H['arch']['lstm_size'] + early_feat_channels * num_offsets, dim ], initializer=initializer) # TODO: add dropout here ? ip1 = tf.nn.relu(tf.matmul(delta_features, delta_weights1)) if phase == 'train': ip1 = tf.nn.dropout(ip1, 0.5) delta_confs_weights = tf.get_variable('delta_ip2%d' % k, shape=[dim, 2], initializer=initializer) if H['arch']['reregress']: delta_boxes_weights = tf.get_variable( 'delta_ip_boxes%d' % k, shape=[dim, 4], initializer=initializer) pred_boxes_deltas.append( tf.reshape( tf.matmul(ip1, delta_boxes_weights) * 5, [outer_size, 1, 4])) scale = H['arch'].get('rezoom_conf_scale', 50) pred_confs_deltas.append( tf.reshape( tf.matmul(ip1, delta_confs_weights) * scale, [outer_size, 1, 2])) pred_confs_deltas = tf.concat(1, pred_confs_deltas) if H['arch']['reregress']: pred_boxes_deltas = tf.concat(1, pred_boxes_deltas) return pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas return pred_boxes, pred_logits, pred_confidences