def make_model(self, inputs, is_training): layer1_1 = relu(conv2d(inputs, 64, [3, 3], name='layer1_1')) layer1_2 = relu(conv2d(layer1_1, 64, [3, 3], name='layer1_2')) layer1_3 = max_pool(layer1_2, name='layer1_3') # original image 1/2 layer2_1 = relu(bn(conv2d(layer1_3, 128, [3, 3], name='layer2_1'), is_training)) layer2_2 = relu(bn(conv2d(layer2_1, 128, [3, 3], name='layer2_2'), is_training)) layer2_3 = max_pool(layer2_2, name='layer2_3') # original image 1/4 layer3_1 = relu(bn(conv2d(layer2_3, 256, [3, 3], name='layer3_1'), is_training)) layer3_2 = relu(bn(conv2d(layer3_1, 256, [3, 3], name='layer3_2'), is_training)) layer3_3 = max_pool(layer3_2, name='layer3_3') # original image 1/8 layer4_1 = relu(bn(conv2d(layer3_3, 512, [3, 3], name='layer4_1'), is_training)) layer4_2 = relu(bn(conv2d(layer4_1, 512, [3, 3], name='layer4_2'), is_training)) layer4_3 = max_pool(layer4_2, name='layer4_3') # original image 1/16 layer5_1 = relu(bn(conv2d(layer4_3, 512, [3, 3], name='layer5_1'), is_training)) layer5_2 = relu(bn(conv2d(layer5_1, 512, [3, 3], name='layer5_2'), is_training)) layer5_3 = max_pool(layer5_2, name='layer5_3') # original image 1/32 layer6_1 = relu(bn(conv2d(layer5_3, 2048, [7, 7], name='layer6_1'), is_training)) layer6_2 = relu(bn(conv2d(layer6_1, 2048, [1, 1], name='layer6_2'), is_training)) layer6_3 = relu(bn(conv2d(layer6_2, self.N_CLASS, [1, 1], name='layer6_3'), is_training)) layer7_1 = conv2d_t(layer6_3, [None, 14, 14, 512], [4, 4], name='layer7_1') layer7_2 = tf.add(layer7_1, layer4_3, name='layer7_3') layer7_3 = conv2d_t(layer7_2, [None, 28, 28, 256], [4, 4], name='layer7_3') layer7_4 = tf.add(layer7_3, layer3_3, name='layer7_4') layer7_5 = conv2d_t(layer7_4, [None, self.RESIZE, self.RESIZE, self.N_CLASS], [16, 16], strides=[1, 8, 8, 1], name='layer7_5') annot_pred = tf.argmax(layer7_5, axis=3) expand_pred = tf.expand_dims(annot_pred, dim=3) return layer7_5, expand_pred
def generator_AB(self, inputs, is_training, reuse=False): with tf.variable_scope('Generator_AB', reuse=reuse): with tf.variable_scope('g_ab_hidden1'): layer1_1 = lrelu( bn( conv2d(inputs, 64, [3, 3], initializer='random', name='conv_1'), is_training)) with tf.variable_scope('g_ab_hidden2'): layer2_1 = max_pool(layer1_1, name='pool1') layer2_2 = lrelu( bn( conv2d(layer2_1, 128, [3, 3], initializer='random', name='conv_2'), is_training)) with tf.variable_scope('g_ab_hidden3'): layer3_1 = max_pool(layer2_2, name='pool2') layer3_2 = lrelu( bn( conv2d(layer3_1, 256, [3, 3], initializer='random', name='conv_3'), is_training)) with tf.variable_scope('g_ab_hidden4'): layer4_1 = conv2d_t(layer3_2, [None, 14, 14, 128], [2, 2], initializer='random', name='convT_4') layer4_2 = tf.concat([layer4_1, layer2_2], axis=3) layer4_3 = lrelu( bn( conv2d(layer4_2, 128, [3, 3], initializer='random', name='conv_4'), is_training)) with tf.variable_scope('g_ab_hidden5'): layer5_1 = conv2d_t(layer4_3, [None, 28, 28, 64], [2, 2], initializer='random', name='convT5') layer5_2 = tf.concat([layer1_1, layer5_1], axis=3) layer5_3 = conv2d(layer5_2, 1, [3, 3], initializer='random', name='conv5') layer5_4 = conv2d(layer5_3, 1, [1, 1], initializer='random', name='conv6') gen_ab = tf.nn.sigmoid(layer5_4) return gen_ab, layer5_4
def make_model(self, inputs, is_training): layer1_1 = relu(conv2d(inputs, 64, [3, 3], name='layer1_1')) layer1_2 = relu(conv2d(layer1_1, 64, [3, 3], name='layer1_2')) layer1_3 = max_pool(layer1_2, name='layer1_3') # original image 1/2 layer2_1 = relu( bn(conv2d(layer1_3, 128, [3, 3], name='layer2_1'), is_training)) layer2_2 = relu( bn(conv2d(layer2_1, 128, [3, 3], name='layer2_2'), is_training)) layer2_3 = max_pool(layer2_2, name='layer2_3') # original image 1/4 layer3_1 = relu( bn(conv2d(layer2_3, 256, [3, 3], name='layer3_1'), is_training)) layer3_2 = relu( bn(conv2d(layer3_1, 256, [3, 3], name='layer3_2'), is_training)) layer3_3 = max_pool(layer3_2, name='layer3_3') # original image 1/8 layer4_1 = relu( bn(conv2d(layer3_3, 512, [3, 3], name='layer4_1'), is_training)) layer4_2 = relu( bn(conv2d(layer4_1, 512, [3, 3], name='layer4_2'), is_training)) layer4_3 = max_pool(layer4_2, name='layer4_3') # original image 1/16 layer5_1 = relu( bn(conv2d(layer4_3, 512, [3, 3], name='layer5_1'), is_training)) layer5_2 = relu( bn(conv2d(layer5_1, 512, [3, 3], name='layer5_2'), is_training)) layer5_3 = max_pool(layer5_2, name='layer5_3') # original image 1/32 # make [batch, 1, 1, 2048] similary flatten in fully connected layer layer6_1 = relu( bn(conv2d(layer5_3, 2048, [7, 7], name='layer6_1'), is_training)) layer6_2 = relu( bn(conv2d(layer6_1, 2048, [1, 1], name='layer6_2'), is_training)) layer6_3 = relu( bn(conv2d(layer6_2, self.N_CLASS, [1, 1], name='layer6_3'), is_training)) # FCN32 is not use previous pooling information # just last layer size up(x32) # conv2d_transpose로 upscaling 할때, strides 크기로 결정됨. # 만약, 32배로 사이즈를 늘리려면 strides=[1, 32, 32, 1], 16배로 늘리려면 strides=[1, 16, 16, 1]로 하면 되는 듯하다. layer7_1 = conv2d_t(layer6_3, [None, 224, 224, self.N_CLASS], [4, 4], strides=[1, 32, 32, 1], name='layer7_1') annot_pred = tf.argmax(layer7_1, axis=3) expand_pred = tf.expand_dims(annot_pred, dim=3) return layer7_1, expand_pred
def make_model(self, inputs, is_training): with tf.variable_scope('ENCODER'): layer1_1 = conv2d(inputs, 32, [3, 3], name='layer1_1') layer1_2 = conv2d(layer1_1, 32, [3, 3], name='layer1_2') layer1_3 = max_pool(layer1_2, name='layer1_3') # original image 1/2, (112, 112) layer2_1 = relu(bn(conv2d(layer1_3, 64, [3, 3], name='layer2_1'), is_training)) layer2_2 = relu(bn(conv2d(layer2_1, 64, [3, 3], name='layer2_2'), is_training)) layer2_3 = max_pool(layer2_2, name='layer2_3') # original image 1/4, (56, 56) layer3_1 = relu(bn(conv2d(layer2_3, 128, [3, 3], name='layer3_1'), is_training)) layer3_2 = relu(bn(conv2d(layer3_1, 128, [3, 3], name='layer3_2'), is_training)) layer3_3 = max_pool(layer3_2, name='layer3_3') # original image 1/8, (28, 28) layer4_1 = relu(bn(conv2d(layer3_3, 256, [3, 3], name='layer4_1'), is_training)) layer4_2 = relu(bn(conv2d(layer4_1, 256, [3, 3], name='layer4_2'), is_training)) layer4_3 = max_pool(layer4_2, name='layer4_3') # original image 1/16, (14, 14) layer5_1 = relu(bn(conv2d(layer4_3, 512, [3, 3], name='layer5_1'), is_training)) layer5_2 = relu(bn(conv2d(layer5_1, 512, [3, 3], name='layer5_2'), is_training)) with tf.variable_scope('DECODER'): layer6_1 = relu(bn(conv2d_t(layer5_2, [None, 28, 28, 256], [2, 2], name='layer6_1'), is_training)) layer6_2 = tf.concat([layer4_2, layer6_1], axis=3, name='layer6_2') layer6_3 = relu(bn(conv2d(layer6_2, 256, [3, 3], name='layer6_3'), is_training)) layer6_4 = relu(bn(conv2d(layer6_3, 256, [3, 3], name='layer6_4'), is_training)) l6_4_shape = layer6_4.get_shape() layer7_1 = relu(bn(conv2d_t(layer6_4, [None, 56, 56, 128], [2, 2], name='layer7_1'), is_training)) layer7_2 = tf.concat([layer3_2, layer7_1], axis=3, name='layer7_2') layer7_3 = relu(bn(conv2d(layer7_2, 128, [3, 3], name='layer7_2'), is_training)) layer7_4 = relu(bn(conv2d(layer7_3, 128, [3, 3], name='layer7_3'), is_training)) l7_4_shape = layer7_4.get_shape() layer8_1 = relu(bn(conv2d_t(layer7_4, [None, 112, 112, 64], [2, 2], name='layer8_1'), is_training)) layer8_2 = tf.concat([layer2_2, layer8_1], axis=3, name='layer8_2') layer8_3 = relu(bn(conv2d(layer8_2, 64, [3, 3], name='layer8_3'), is_training)) layer8_4 = relu(bn(conv2d(layer8_3, 64, [3, 3], name='layer8_4'), is_training)) l8_4_shape = layer8_4.get_shape() layer9_1 = relu(bn(conv2d_t(layer8_4, [None, 224, 224, 32], [2, 2], name='layer9_1'), is_training)) layer9_2 = tf.concat([layer1_2, layer9_1], axis=3, name='layer9_2') layer9_3 = relu(bn(conv2d(layer9_2, self.N_CLASS, [3, 3], name='layer9_3'), is_training)) layer9_4 = relu(bn(conv2d(layer9_3, self.N_CLASS, [3, 3], name='layer9_4'), is_training)) logits = conv2d(layer9_4, self.N_CLASS, [1, 1], name='logits') annot_pred = tf.argmax(logits, axis=3) expand_pred = tf.expand_dims(annot_pred, dim=3) return logits, expand_pred, layer5_2
def make_model(self, inputs, is_training): with tf.variable_scope('Darknet19'): net = lrelu(bn(conv2d(inputs, 32, [3, 3], name='conv1'), is_training)) net = max_pool(net, name='pool1') net = lrelu(bn(conv2d(net, 64, [3, 3], name='conv2'), is_training)) net = max_pool(net, name='pool2') net = lrelu(bn(conv2d(net, 128, [3, 3], name='conv3'), is_training)) net = lrelu(bn(conv2d(net, 64, [1, 1], name='conv4'), is_training)) net = lrelu(bn(conv2d(net, 128, [3, 3], name='conv5'), is_training)) net = max_pool(net, name='pool3') net = lrelu(bn(conv2d(net, 256, [3, 3], name='conv6'), is_training)) net = lrelu(bn(conv2d(net, 128, [1, 1], name='conv7'), is_training)) net = lrelu(bn(conv2d(net, 256, [3, 3], name='conv8'), is_training)) net = max_pool(net, name='pool4') net = lrelu(bn(conv2d(net, 512, [3, 3], name='conv9'), is_training)) net = lrelu(bn(conv2d(net, 256, [1, 1], name='conv10'), is_training)) net = lrelu(bn(conv2d(net, 512, [3, 3], name='conv11'), is_training)) net = lrelu(bn(conv2d(net, 256, [3, 3], name='conv12'), is_training)) skip = lrelu(bn(conv2d(net, 512, [3, 3], name='conv13'), is_training)) net = max_pool(net, name='pool5') net = lrelu(bn(conv2d(net, 1024, [3, 3], name='conv14'), is_training)) net = lrelu(bn(conv2d(net, 512, [1, 1], name='conv15'), is_training)) net = lrelu(bn(conv2d(net, 1024, [3, 3], name='conv16'), is_training)) net = lrelu(bn(conv2d(net, 512, [1, 1], name='conv17'), is_training)) net = lrelu(bn(conv2d(net, 1024, [3, 3], name='conv18'), is_training)) with tf.variable_scope('Detection'): net = lrelu(bn(conv2d(net, 1024, [3, 3], name='conv19'), is_training)) net = lrelu(bn(conv2d(net, 1024, [3, 3], name='conv20'), is_training)) passthrough = lrelu(bn(conv2d(skip, 64, [1, 1], name='conv21_passthrough'), is_training)) passthrough = tf.space_to_depth(passthrough, block_size=2) concated = tf.concat([passthrough, net], axis=3) net = lrelu(bn(conv2d(concated, 1024, [3, 3], name='conv22'), is_training)) out_depth = self.N_ANCHORS * (5 + self.N_CLASSES) net = conv2d(net, out_depth, [1, 1], name='conv23') return net
def initial_block(self, inputs, is_training): conv = prelu(bn( conv2d(inputs, 13, [3, 3], name='init_conv', strides=[1, 2, 2, 1]), is_training), name='init_conv') pool = max_pool(inputs, name='init_pool') concated = tf.concat([conv, pool], axis=3, name='init_concat') return concated
def make_model(self, inputs, is_training): with tf.variable_scope('STAGE_1'): layer = relu( bn( conv2d(inputs, 64, [7, 7], strides=[1, 2, 2, 1], name='initial_block'), is_training)) layer = max_pool(layer) with tf.variable_scope('STAGE_2'): layer = self.conv_block(layer, [64, 64, 256], is_training, 'a', s=1) layer = self.identity_block(layer, [64, 64, 256], is_training, 'b') layer = self.identity_block(layer, [64, 64, 256], is_training, 'c') with tf.variable_scope('STAGE_3'): layer = self.conv_block(layer, [128, 128, 512], is_training, 'a') layer = self.identity_block(layer, [128, 128, 512], is_training, 'b') layer = self.identity_block(layer, [128, 128, 512], is_training, 'c') with tf.variable_scope('STAGE_4'): layer = self.conv_block(layer, [256, 256, 1024], is_training, 'a') layer = self.identity_block(layer, [256, 256, 1024], is_training, 'b') layer = self.identity_block(layer, [256, 256, 1024], is_training, 'c') layer = self.identity_block(layer, [256, 256, 1024], is_training, 'd') layer = self.identity_block(layer, [256, 256, 1024], is_training, 'e') layer = self.identity_block(layer, [256, 256, 1024], is_training, 'f') with tf.variable_scope('STAGE_5'): layer = self.conv_block(layer, [512, 512, 2048], is_training, 'a') layer = self.identity_block(layer, [512, 512, 2048], is_training, 'b') layer = self.identity_block(layer, [512, 512, 2048], is_training, 'c') with tf.variable_scope('FINAL_STAGE'): layer = avg_pool(layer, [1, 7, 7, 1], [1, 1, 1, 1], padding='VALID') _, h, w, d = layer.get_shape().as_list() layer = tf.reshape(layer, [-1, h * w * d]) layer = fully_connect(layer, self.N_CLASS, 'fc') return layer
def make_model(self, inputs, keep_prob): conv1_1 = conv2d(inputs, 64, [3, 3], name='conv1_1') conv1_2 = conv2d(conv1_1, 64, [3, 3], name='conv1_2') pool1 = max_pool(conv1_2, name='pool1') conv2_1 = conv2d(pool1, 128, [3, 3], name='conv2_1') conv2_2 = conv2d(conv2_1, 128, [3, 3], name='conv2_2') pool2 = max_pool(conv2_2, name='pool2') conv3_1 = conv2d(pool2, 256, [3, 3], name='conv3_1') conv3_2 = conv2d(conv3_1, 256, [3, 3], name='conv3_2') conv3_3 = conv2d(conv3_2, 256, [3, 3], name='conv3_3') pool3 = max_pool(conv3_3, name='pool3') conv4_1 = conv2d(pool3, 512, [3, 3], name='conv4_1') conv4_2 = conv2d(conv4_1, 512, [3, 3], name='conv4_2') conv4_3 = conv2d(conv4_2, 512, [3, 3], name='conv4_3') pool4 = max_pool(conv4_3, name='pool4') conv5_1 = conv2d(pool4, 512, [3, 3], name='conv5_1') conv5_2 = conv2d(conv5_1, 512, [3, 3], name='conv5_2') conv5_3 = conv2d(conv5_2, 512, [3, 3], name='conv5_3') _, h, w, d = conv5_3.get_shape().as_list() flatten = tf.reshape(conv5_3, shape=[-1, h * w * d], name='flatten') fc1 = fully_connect(flatten, 4096, name='fc1') fc1_dropout = tf.nn.dropout(fc1, keep_prob=keep_prob, name='fc1_dropout') fc2 = fully_connect(fc1_dropout, 4096, name='fc2') fc2_dropout = tf.nn.dropout(fc2, keep_prob=keep_prob, name='fc2_dropout') logits = fully_connect(fc2_dropout, self.N_CLASS, name='fc3') return logits
def make_model(self, inputs, is_training): """ extract feature using ResNet. Encoder """ with tf.variable_scope('ResNet50'): x = conv2d(inputs, 64, [7, 7], strides=[1, 2, 2, 1], name='conv1') # size 1/2 x = bn(x, is_training) x = relu(x) x = max_pool(x, ksize=[1, 3, 3, 1], name='pool1') # size 1/4 x = self.conv_block(x, [64, 64, 256], '2_1', is_training, s=1) x = self.identity_block(x, [64, 64, 256], '2_2', is_training) x = self.identity_block(x, [64, 64, 256], '2_3', is_training) x = self.conv_block(x, [128, 128, 512], '3_1', is_training) x = self.identity_block(x, [128, 128, 512], '3_2', is_training) x = self.identity_block(x, [128, 128, 512], '3_3', is_training) x = self.atrous_conv_block(x, [256, 256, 1024], '4_1', 2, is_training, s=1) x = self.atrous_identity_block(x, [256, 256, 1024], '4_2', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_3', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_4', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_5', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_6', 2, is_training) x = self.atrous_conv_block(x, [512, 512, 2048], '5_1', 4, is_training, s=1) x = self.atrous_identity_block(x, [512, 512, 2048], '5_2', 4, is_training) x = self.atrous_identity_block(x, [512, 512, 2048], '5_3', 4, is_training) """ Astrous Pyrimid Pooling. Decoder """ with tf.variable_scope('ASPP'): rate6 = atrous_conv2d(x, self.N_CLASS, [3, 3], 6, name='rate6') rate6 = conv2d(rate6, self.N_CLASS, [1, 1], name='rate6_conv1') rate6 = conv2d(rate6, self.N_CLASS, [1, 1], name='rate6_conv2') rate12 = atrous_conv2d(x, self.N_CLASS, [3, 3], 12, name='rate12') rate12 = conv2d(rate12, self.N_CLASS, [1, 1], name='rate12_conv1') rate12 = conv2d(rate12, self.N_CLASS, [1, 1], name='rate12_conv2') rate18 = atrous_conv2d(x, self.N_CLASS, [3, 3], 18, name='rate18') rate18 = conv2d(rate18, self.N_CLASS, [1, 1], name='rate18_conv1') rate18 = conv2d(rate18, self.N_CLASS, [1, 1], name='rate18_conv2') rate24 = atrous_conv2d(x, self.N_CLASS, [3, 3], 24, name='rate24') rate24 = conv2d(rate24, self.N_CLASS, [1, 1], name='rate24_conv1') rate24 = conv2d(rate24, self.N_CLASS, [1, 1], name='rate24_conv2') # self.logits = tf.add_n([rate6, rate12, rate18, rate24]) # self.out = tf.image.resize_bilinear(self.logits, size=[192, 192]) add_aspp = tf.add_n([rate6, rate12, rate18, rate24]) logits = tf.image.resize_bilinear(add_aspp, size=[self.RESIZE, self.RESIZE]) pred = tf.argmax(logits, axis=3) pred = tf.expand_dims(pred, dim=3) return logits, pred
def make_model(self, inputs, is_training): with tf.variable_scope('ResNet50'): x = conv2d(inputs, 64, [7, 7], strides=[1, 2, 2, 1], name='conv1') # size 1/2 x = bn(x, is_training) x = relu(x) x = max_pool(x, ksize=[1, 3, 3, 1], name='max_pool1') # size 1/4 x = self.conv_block(x, [64, 64, 256], is_training, '2_1', s=1) x = self.identity_block(x, [64, 64, 256], is_training, '2_2') x = self.identity_block(x, [64, 64, 256], is_training, '2_3') x = self.conv_block(x, [128, 128, 512], is_training, '3_1') x = self.identity_block(x, [128, 128, 512], is_training, '3_2') x = self.identity_block(x, [128, 128, 512], is_training, '3_3') x = self.conv_block(x, [256, 256, 1024], is_training, '4_1') x = self.identity_block(x, [256, 256, 1024], is_training, '4_2') x = self.identity_block(x, [256, 256, 1024], is_training, '4_3') x = self.identity_block(x, [256, 256, 1024], is_training, '4_4') x = self.identity_block(x, [256, 256, 1024], is_training, '4_5') x = self.identity_block(x, [256, 256, 1024], is_training, '4_6') x = self.conv_block(x, [512, 512, 2048], is_training, '5_1') x = self.identity_block(x, [512, 512, 2048], is_training, '5_2') feature_map = self.identity_block(x, [512, 512, 2048], is_training, '5_3') # size: (6, 6) with tf.variable_scope('Pyramid_Pool'): pool_1x1 = max_pool(feature_map, ksize=[1, 6, 6, 1], strides=[1, 6, 6, 1], name='pool_1x1') pool_2x2 = max_pool(feature_map, ksize=[1, 3, 3, 1], strides=[1, 3, 3, 1], name='pool_2x2') pool_3x3 = max_pool(feature_map, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], name='pool_3x3') pool_6x6 = max_pool(feature_map, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], name='pool_6x6') conv_1x1 = relu( bn(conv2d(pool_1x1, 512, [3, 3], name='conv_1x1'), is_training)) # reduce dimension conv_2x2 = relu( bn(conv2d(pool_2x2, 512, [3, 3], name='conv_2x2'), is_training)) # reduce dimension conv_3x3 = relu( bn(conv2d(pool_3x3, 512, [3, 3], name='conv_3x3'), is_training)) # reduce dimension conv_6x6 = relu( bn(conv2d(pool_6x6, 512, [3, 3], name='conv_6x6'), is_training)) # reduce dimension upconv_1x1 = tf.image.resize_bilinear(conv_1x1, [6, 6]) upconv_2x2 = tf.image.resize_bilinear(conv_2x2, [6, 6]) upconv_3x3 = tf.image.resize_bilinear(conv_3x3, [6, 6]) upconv_6x6 = tf.image.resize_bilinear(conv_6x6, [6, 6]) concated = tf.concat( [feature_map, upconv_1x1, upconv_2x2, upconv_3x3, upconv_6x6], axis=3) out = relu( bn(conv2d(concated, 512, [3, 3], name='out1'), is_training)) out = conv2d_t(out, [None, 12, 12, 256], [3, 3], name='out2') # (12, 12) out = conv2d_t(out, [None, 24, 24, self.N_CLASS], [3, 3], name='out3') # (24, 24) out = conv2d_t(out, [None, 48, 48, self.N_CLASS], [3, 3], name='out4') # (24, 24) out = conv2d_t(out, [None, self.RESIZE, self.RESIZE, self.N_CLASS], [3, 3], name='out5', strides=[1, 4, 4, 1]) # (24, 24) pred = tf.argmax(out, axis=3) pred = tf.expand_dims(pred, dim=3) return out, pred
def bottleneck(self, inputs, out_depth, f_h, f_w, is_training, keep_prob, dilated_rate=None, mode=None, scope=None): reduce_depth = int(inputs.get_shape().as_list()[3] / 4) with tf.variable_scope(scope): if mode == 'downsampling': main_branch = max_pool(inputs, name='_pool') depth_to_pad = abs(inputs.get_shape().as_list()[3] - out_depth) paddings = tf.convert_to_tensor([[0, 0], [0, 0], [0, 0], [0, depth_to_pad]]) main_branch = tf.pad(main_branch, paddings=paddings, name='_main_padding') sub_branch = prelu(bn( conv2d(inputs, reduce_depth, [2, 2], name='_conv1', strides=[1, 2, 2, 1]), is_training), name='prelu_conv1') sub_branch = prelu(bn( conv2d(sub_branch, reduce_depth, [f_h, f_w], name='_conv2', strides=[1, 1, 1, 1]), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d(sub_branch, out_depth, [1, 1], name='_conv3', strides=[1, 1, 1, 1]), is_training), name='prelu_conv3') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out elif mode == 'dilated': main_branch = inputs sub_branch = prelu(bn( conv2d( inputs, reduce_depth, [1, 1], name='_conv1', ), is_training), name='prelu_conv1') sub_branch = prelu(bn( atrous_conv2d(sub_branch, reduce_depth, [f_h, f_w], dilated_rate, name='_conv2'), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d(inputs, out_depth, [1, 1], name='_conv3'), is_training), name='prelu_conv3') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out elif mode == 'asymmetric': main_branch = inputs sub_branch = prelu(bn( conv2d(inputs, reduce_depth, [1, 1], name='_conv1'), is_training), name='prelu_conv1') sub_branch = prelu(bn( conv2d(sub_branch, reduce_depth, [f_h, 1], name='_conv2'), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d(sub_branch, reduce_depth, [1, f_w], name='_conv3'), is_training), name='prelu_conv3') sub_branch = prelu(bn( conv2d(sub_branch, out_depth, [1, 1], name='_conv4'), is_training), name='prelu_conv4') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out elif mode == 'upsampling': # 논문에서 나오는 unpool 대신 bilinear interpolation 사용 in_shape = inputs.get_shape().as_list() main_branch = tf.image.resize_bilinear( inputs, size=[in_shape[1] * 2, in_shape[2] * 2]) main_branch = prelu(bn( conv2d(main_branch, out_depth, [3, 3], name='_conv0'), is_training), name='prelu_conv1') sub_branch = prelu(bn( conv2d(inputs, reduce_depth, [1, 1], name='_conv1'), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d_t(sub_branch, [ in_shape[0], in_shape[1] * 2, in_shape[2] * 2, reduce_depth ], [3, 3], name='_conv2'), is_training), name='prelu_conv3') sub_branch = prelu(bn( conv2d(sub_branch, out_depth, [1, 1], name='_conv3'), is_training), name='prelu_conv4') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out elif mode == 'normal': main_branch = inputs sub_branch = prelu(bn( conv2d(inputs, reduce_depth, [1, 1], name='_conv1', strides=[1, 1, 1, 1]), is_training), name='prelu_conv1') sub_branch = prelu(bn( conv2d(sub_branch, reduce_depth, [f_h, f_w], name='_conv2', strides=[1, 1, 1, 1]), is_training), name='prelu_conv2') sub_branch = prelu(bn( conv2d(sub_branch, out_depth, [1, 1], name='_conv3', strides=[1, 1, 1, 1]), is_training), name='prelu_conv3') sub_branch = prelu(spatial_dropout(sub_branch, keep_prob), name='prelu_dropout') out = prelu(tf.add(main_branch, sub_branch), name='prelu_add') return out
def make_model(self, inputs, is_training): """ extract feature using ResNet. Encoder """ with tf.variable_scope('ResNet50'): x = conv2d(inputs, 64, [7, 7], strides=[1, 2, 2, 1], name='conv1') # size 1/2 x = bn(x, is_training) x = relu(x) x = max_pool(x, ksize=[1, 3, 3, 1], name='pool1') # size 1/4 x = self.conv_block(x, [64, 64, 256], '2_1', is_training, s=1) x = self.identity_block(x, [64, 64, 256], '2_2', is_training) x = self.identity_block(x, [64, 64, 256], '2_3', is_training) x = self.conv_block(x, [128, 128, 512], '3_1', is_training) x = self.identity_block(x, [128, 128, 512], '3_2', is_training) x = self.identity_block(x, [128, 128, 512], '3_3', is_training) x = self.atrous_conv_block(x, [256, 256, 1024], '4_1', 2, is_training, s=1) x = self.atrous_identity_block(x, [256, 256, 1024], '4_2', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_3', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_4', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_5', 2, is_training) x = self.atrous_identity_block(x, [256, 256, 1024], '4_6', 2, is_training) x = self.atrous_conv_block(x, [512, 512, 2048], '5_1', 4, is_training, s=1) x = self.atrous_identity_block(x, [512, 512, 2048], '5_2', 4, is_training) x = self.atrous_identity_block(x, [512, 512, 2048], '5_3', 4, is_training) """ Astrous Pyrimid Pooling. Decoder """ with tf.variable_scope('ASPP'): feature_map_shape = x.get_shape().as_list() # global average pooling # feature 맵의 height, width를 평균을 낸다. feature_map = tf.reduce_mean(x, [1, 2], keepdims=True) feature_map = conv2d(feature_map, 256, [1, 1], name='gap_feature_map') feature_map = tf.image.resize_bilinear( feature_map, [feature_map_shape[1], feature_map_shape[2]]) rate1 = conv2d(x, 256, [1, 1], name='rate1') rate6 = atrous_conv2d(x, 256, [3, 3], rate=6, name='rate6') rate12 = atrous_conv2d(x, 256, [3, 3], rate=12, name='rate12') rate18 = atrous_conv2d(x, 256, [3, 3], rate=18, name='rate18') concated = tf.concat([feature_map, rate1, rate6, rate12, rate18], axis=3) net = conv2d(concated, 256, [1, 1], name='net') logits = conv2d(net, self.N_CLASS, [1, 1], name='logits') logits = tf.image.resize_bilinear(logits, size=[self.RESIZE, self.RESIZE], name='out') pred = tf.argmax(logits, axis=3) pred = tf.expand_dims(pred, dim=3) return logits, pred