def pre_trained_net(weights, image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', ) net = {} # net stores the output tensor of each layer cur_data = image for i, name in enumerate(layers): layer_type = name[:4] if layer_type == 'conv': kernels = weights[name]["weights"][0][0] bias = weights[name]["bias"][0][0] # obtain kernel and bias from vgg net (note the difference in layout) # matconvnet: weights are [out_channels, in_channels, height, width] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name=name + "_w") bias = net_utils.get_variable(bias.reshape(-1), name=name + "_b") cur_data = net_utils.conv2d_basic(cur_data, kernels, bias) elif layer_type == 'relu': cur_data = tf.nn.relu(cur_data, name=name) elif layer_type == 'pool': cur_data = net_utils.max_pool_2x2(cur_data) net[name] = cur_data return net
def inference(image, weights): # start a new variable scope "inference" with tf.variable_scope("inference"): ################################################## # obtain the forward result of each layer in vgg ################################################## # as well as the tensor after last conv layer in each stage # NOTE: we do not make use of the result of conv layers in stage 1 image_net = pre_trained_net(weights, image) output_stage_2 = image_net['conv2_2'] output_stage_3 = image_net['conv3_3'] output_stage_4 = image_net['conv4_3'] output_stage_5 = image_net['conv5_3'] ############################## # Preperation for upsampling ############################## # Prep 2 output_stage_2_shape = output_stage_2.get_shape() nChannels_in = output_stage_2_shape[3].value kernels = weights['conv2_2_16']['weights'][0][0] bias = weights["conv2_2_16"]["bias"][0][0] prep2_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="prep2_w") prep2_b = net_utils.get_variable(bias.reshape(-1), name="prep2_b") prep_2 = net_utils.conv2d_basic(output_stage_2, prep2_w, prep2_b) # Prep 3 output_stage_3_shape = output_stage_3.get_shape() nChannels_in = output_stage_3_shape[3].value kernels = weights['conv3_3_16']['weights'][0][0] bias = weights['conv3_3_16']['bias'][0][0] prep3_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="prep3_w") prep3_b = net_utils.get_variable(bias.reshape(-1), name="prep3_b") prep_3 = net_utils.conv2d_basic(output_stage_3, prep3_w, prep3_b) # Prep 4 output_stage_4_shape = output_stage_4.get_shape() nChannels_in = output_stage_4_shape[3].value kernels = weights['conv4_3_16']['weights'][0][0] bias = weights['conv4_3_16']['bias'][0][0] prep4_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="prep4_w") prep4_b = net_utils.get_variable(bias.reshape(-1), name="prep4_b") prep_4 = net_utils.conv2d_basic(output_stage_4, prep4_w, prep4_b) # Prep 5 output_stage_5_shape = output_stage_5.get_shape() nChannels_in = output_stage_5_shape[3].value kernels = weights['conv5_3_16']['weights'][0][0] bias = weights['conv5_3_16']['bias'][0][0] prep5_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="prep5_w") prep5_b = net_utils.get_variable(bias.reshape(-1), name="prep5_b") prep_5 = net_utils.conv2d_basic(output_stage_5, prep5_w, prep5_b) ############################# # Upsampling for each stage ############################# # matconvnet: weights are [out_channels, in_channels, height, width] # tensorflow: weights are [height, width, out_channels, in_channels] image_shape = tf.shape(image) upsample_shape = tf.stack([FLAGS.batch_size, 480, 854, 16]) # upsample output_stage_2: upsample by ratio of 2 # t2_w = net_utils.weight_variable([4, 4, 16, 16], name="t2_w") # Directly assign a bilinear kernel filter to the weight t2_w = weights['upsample2_']['weights'][0][0] t2_w = np.transpose(t2_w, (2, 3, 0, 1)) upsample_2 = net_utils.conv2d_transpose_strided(prep_2, t2_w, output_shape=upsample_shape, stride=2) # upsample output_stage_3: upsample by ratio of 4 # t3_w = net_utils.weight_variable([8, 8, 16, 16], name="t3_w") # Directly assign a bilinear kernel filter to the weight t3_w = weights['upsample4_']['weights'][0][0] t3_w = np.transpose(t3_w, (2, 3, 0, 1)) upsample_3 = net_utils.conv2d_transpose_strided(prep_3, t3_w, output_shape=upsample_shape, stride=4) # upsample output_stage_4: upsample by ratio of 8 # t4_w = net_utils.weight_variable([16, 16, 16, 16], name="t4_w") # Directly assign a bilinear kernel filter to the weight t4_w = weights['upsample8_']['weights'][0][0] t4_w = np.transpose(t4_w, (2, 3, 0, 1)) upsample_4 = net_utils.conv2d_transpose_strided(prep_4, t4_w, output_shape=upsample_shape, stride=8) # upsample output_stage_5: upsample by ratio of 16 # t5_w = net_utils.weight_variable([32, 32, 16, 16], name="t5_w") # Directly assign a bilinear kernel filter to the weight t5_w = weights['upsample16_']['weights'][0][0] t5_w = np.transpose(t5_w, (2, 3, 0, 1)) upsample_5 = net_utils.conv2d_transpose_strided(prep_5, t5_w, output_shape=upsample_shape, stride=16) ######################################## # Concatenation and Weighted Summation ######################################## fuse = tf.concat([upsample_2, upsample_3, upsample_4, upsample_5], 3) fuse_shape = fuse.get_shape() kernels = weights['new_score_weighting']['weights'][0][0] bias = weights['new_score_weighting']['bias'][0][0] fuse_w = net_utils.get_variable(np.transpose(kernels, (2, 3, 1, 0)), name="fuse_w") fuse_b = net_utils.get_variable(bias.reshape(-1), name="fuse_b") output_fuse = net_utils.conv2d_basic(fuse, fuse_w, fuse_b) return output_fuse