def vgg16_backbone(image): with argscope(Conv2D, kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \ argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'): x = convnormrelu(image, 'conv1_1', 64) x = convnormrelu(x, 'conv1_2', 64) x = MaxPooling('pool1', x, 2, strides=2, padding='VALID') x = convnormrelu(x, 'conv2_1', 128) x = convnormrelu(x, 'conv2_2', 128) x = MaxPooling('pool2', x, 2, strides=2, padding='VALID') x = convnormrelu(x, 'conv3_1', 256) x = convnormrelu(x, 'conv3_2', 256) x = convnormrelu(x, 'conv3_3', 256) x = MaxPooling('pool3', x, 2, strides=2, padding='VALID') x = convnormrelu(x, 'conv4_1', 512) x = convnormrelu(x, 'conv4_2', 512) x = convnormrelu(x, 'conv4_3', 512) x = MaxPooling('pool4', x, 2, strides=2, padding='VALID') x = convnormrelu(x, 'conv5_1', 512) x = convnormrelu(x, 'conv5_2', 512) x = convnormrelu(x, 'conv5_3', 512) x = MaxPooling('pool5', x, 2, strides=2, padding='VALID') return x
def encoder(name, i, basis_filter_list, rot_matrix_list, nr_orients, filter_type, is_training): """ Dense Steerable Filter Encoder """ dense_basis_list = [basis_filter_list[1],basis_filter_list[0]] dense_rot_list = [rot_matrix_list[1], rot_matrix_list[0]] with tf.variable_scope(name): c1 = GConv2D('ds_conv1', i, 10, 7, nr_orients, filter_type, basis_filter_list[1], rot_matrix_list[1], input_layer=True) c2 = GConv2D('ds_conv2', c1, 10, 7, nr_orients, filter_type, basis_filter_list[1], rot_matrix_list[1], activation='identity') p1 = MaxPooling('max_pool1', c2, 2) #### d1 = g_dense_blk('dense1', p1, [14,6], [7,5], 3, nr_orients, filter_type, dense_basis_list, dense_rot_list) c3 = GConv2D('ds_conv3', d1, 16, 5, nr_orients, filter_type, basis_filter_list[0], rot_matrix_list[0], activation='identity') p2 = MaxPooling('max_pool2', c3, 2, padding= 'valid') #### d2 = g_dense_blk('dense2', p2, [14,6], [7,5], 4, nr_orients, filter_type, dense_basis_list, dense_rot_list) c4 = GConv2D('ds_conv4', d2, 32, 5, nr_orients, filter_type, basis_filter_list[0], rot_matrix_list[0], activation='identity') p3 = MaxPooling('max_pool3', c4, 2, padding= 'valid') #### d3 = g_dense_blk('dense3', p3, [14,6], [7,5], 5, nr_orients, filter_type, dense_basis_list, dense_rot_list) c5 = GConv2D('ds_conv5', d3, 32, 5, nr_orients, filter_type, basis_filter_list[0], rot_matrix_list[0], activation='identity') p4 = MaxPooling('max_pool4', c5, 2, padding= 'valid') #### d4 = g_dense_blk('dense4', p4, [14,6], [7,5], 6, nr_orients, filter_type, dense_basis_list, dense_rot_list) c6 = GConv2D('ds_conv6', d4, 32, 5, nr_orients, filter_type, basis_filter_list[0], rot_matrix_list[0], activation='identity') return [c2, c3, c4, c5, c6]
def vgg_gap(image, option, importance=False): ctx = get_current_tower_context() is_training = ctx.is_training with argscope(Conv2D, kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \ argscope([Conv2D, MaxPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'): l = convnormrelu(image, 'conv1_1', 64, option) if option.attdrop[11]: l = ADL(11, l, option) l = convnormrelu(l, 'conv1_2', 64, option) if option.attdrop[12]: l = ADL(12, l, option) l = MaxPooling('pool1', l, 2) if option.attdrop[1]: l = ADL(1, l, option) l = convnormrelu(l, 'conv2_1', 128, option) if option.attdrop[21]: l = ADL(21, l, option) l = convnormrelu(l, 'conv2_2', 128, option) if option.attdrop[22]: l = ADL(21, l, option) l = MaxPooling('pool2', l, 2) if option.attdrop[2]: l = ADL(2, l, option) l = convnormrelu(l, 'conv3_1', 256, option) if option.attdrop[31]: l = ADL(31, l, option) l = convnormrelu(l, 'conv3_2', 256, option) if option.attdrop[32]: l = ADL(32, l, option) l = convnormrelu(l, 'conv3_3', 256, option) if option.attdrop[33]: l = ADL(33, l, option) l = MaxPooling('pool3', l, 2) if option.attdrop[3]: l = ADL(3, l, option) l = convnormrelu(l, 'conv4_1', 512, option) if option.attdrop[41]: l = ADL(41, l, option) l = convnormrelu(l, 'conv4_2', 512, option) if option.attdrop[42]: l = ADL(42, l, option) l = convnormrelu(l, 'conv4_3', 512, option) if option.attdrop[43]: l = ADL(43, l, option) l = MaxPooling('pool4', l, 2) if option.attdrop[4]: l = ADL(4, l, option) l = convnormrelu(l, 'conv5_1', 512, option) if option.attdrop[51]: l = ADL(51, l, option) l = convnormrelu(l, 'conv5_2', 512, option) if option.attdrop[52]: l = ADL(52, l, option) l = convnormrelu(l, 'conv5_3', 512, option) if option.attdrop[53]: l = ADL(53, l, option) convmaps = convnormrelu(l, 'new', 1024, option) if option.attdrop[6]: l = ADL(6, l, option) pre_logits = GlobalAvgPooling('gap', convmaps) logits = FullyConnected( 'linear', pre_logits, option.classnum, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits, convmaps
def Fusion2DBlock( prevIn: Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor]], filters: int, kernel_size: int, stride: int, downscale: bool = True, activation=INReLU, ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: lmain = Conv2D("main_conv", prevIn[0], filters, kernel_size, activation=activation) laux = Conv2D("aux_conv", prevIn[1], filters, kernel_size, activation=activation) mixInput = [lmain, laux] prevMixOutput = prevIn[2] if prevMixOutput is not None: mixInput.append(prevMixOutput) mixIn = tf.concat(mixInput, -1, "mix_input") lmix = Conv2D("mix_conv", mixIn, filters, kernel_size, activation=activation) lmix = tf.add_n([laux, lmain, lmix], "mix_summation") if stride > 1: if downscale: lmain = MaxPooling("main_pool", lmain, 3, strides=stride, padding="SAME") laux = MaxPooling("aux_pool", laux, 3, strides=stride, padding="SAME") lmix = MaxPooling("mix_pool", lmix, 3, strides=stride, padding="SAME") else: lmain = upsample("main_upsample", lmain, factor=stride) laux = upsample("aux_upsample", laux, factor=stride) lmix = upsample("mix_upsample", lmix, factor=stride) return (lmain, laux, lmix)
def vgg_gap(image, option): with argscope(Conv2D, use_bias=True, kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \ argscope([Conv2D, MaxPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'): l = convnormrelu(image, 'conv1_1', 64) if option.gating_position[11]: l = gating_op(l, option) l = convnormrelu(l, 'conv1_2', 64) if option.gating_position[12]: l = gating_op(l, option) l = MaxPooling('pool1', l, 2) if option.gating_position[1]: l = gating_op(l, option) l = convnormrelu(l, 'conv2_1', 128) if option.gating_position[21]: l = gating_op(l, option) l = convnormrelu(l, 'conv2_2', 128) if option.gating_position[22]: l = gating_op(l, option) l = MaxPooling('pool2', l, 2) if option.gating_position[2]: l = gating_op(l, option) l = convnormrelu(l, 'conv3_1', 256) if option.gating_position[31]: l = gating_op(l, option) l = convnormrelu(l, 'conv3_2', 256) if option.gating_position[32]: l = gating_op(l, option) l = convnormrelu(l, 'conv3_3', 256) if option.gating_position[33]: l = gating_op(l, option) l = MaxPooling('pool3', l, 2) if option.gating_position[3]: l = gating_op(l, option) l = convnormrelu(l, 'conv4_1', 512) if option.gating_position[41]: l = gating_op(l, option) l = convnormrelu(l, 'conv4_2', 512) if option.gating_position[42]: l = gating_op(l, option) l = convnormrelu(l, 'conv4_3', 512) if option.gating_position[43]: l = gating_op(l, option) l = MaxPooling('pool4', l, 2) if option.gating_position[4]: l = gating_op(l, option) l = convnormrelu(l, 'conv5_1', 512) if option.gating_position[51]: l = gating_op(l, option) l = convnormrelu(l, 'conv5_2', 512) if option.gating_position[52]: l = gating_op(l, option) l = convnormrelu(l, 'conv5_3', 512) if option.gating_position[53]: l = gating_op(l, option) convmaps = convnormrelu(l, 'new', 1024) if option.gating_position[6]: convmaps = gating_op(l, option) p_logits = GlobalAvgPooling('gap', convmaps) logits = FullyConnected( 'linear', p_logits, option.number_of_class, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits, convmaps
def resnet_c4_backbone(image, num_blocks): with varreplace.freeze_variables(stop_gradient=False, skip_collection=True): assert len(num_blocks) == 3 freeze_at = cfg.BACKBONE.FREEZE_AT with backbone_scope(freeze=freeze_at > 0): l = tf.pad(image, [[0, 0], [0, 0], maybe_reverse_pad(2, 3), maybe_reverse_pad(2, 3)]) l = Conv2D('conv0', l, 64, 7, strides=2, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') with backbone_scope(freeze=freeze_at > 1): c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1) with backbone_scope(freeze=False): c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) # 16x downsampling up to now return c4
def resnet_backbone(image, num_blocks, group_func, block_func, scope=None): # with tf.variable_scope(scope, 'resnet', [image]) as sc: # end_points_collection = sc.original_name_scope + '_end_points' with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # logits = (LinearWrap(image) # .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU) # .MaxPooling('pool0', shape=3, stride=2, padding='SAME') # .apply(group_func, 'group0', block_func, 64, num_blocks[0], 2, 1) # .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2, 1) # .apply(group_func, 'group2', block_func, 256, num_blocks[2], 1, 1) # .apply(group_func, 'group3', block_func, 512, num_blocks[3], 1, 2)()) logits = Conv2D('conv0', image, 64, 7, stride=2, nl=BNReLU) logits = MaxPooling('pool0', logits, shape=3, stride=2, padding='SAME') logits, low_level_f = group_func(logits, 'group0', block_func, 64, num_blocks[0], 2, 1) logits, _ = group_func(logits, 'group1', block_func, 128, num_blocks[1], 2, 1) logits, _ = group_func(logits, 'group2', block_func, 256, num_blocks[2], 1, 1) logits, _ = group_func(logits, 'group3', block_func, 512, num_blocks[3], 1, 2) return logits, low_level_f
def pretrained_resnet_conv4(image, num_blocks, freeze_c2=True): assert len(num_blocks) == 3 image = tf.transpose(image, [0, 2, 3, 1]) with resnet_argscope(): l = tf.pad(image, [[0, 0], [2, 3], [2, 3], [0, 0]]) l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID') l = tf.pad(l, [[0, 0], [0, 1], [0, 1], [0, 0]]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1) # TODO replace var by const to enable optimization if freeze_c2: c2 = tf.stop_gradient(c2) c3 = resnet_group(c2, 'group1', resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group(c3, 'group2', resnet_bottleneck, 256, num_blocks[2], 2) # 16x downsampling up to now c4_trans = tf.transpose(c4, [0, 3, 1, 2]) return c4, c4_trans
def inception(name, x, nr1x1, nr3x3r, nr3x3, nr233r, nr233, nrpool, pooltype, freeze): """ Inception block """ stride = 2 if nr1x1 == 0 else 1 with tf.variable_scope(name): outs = [] if nr1x1 != 0: outs.append(Conv2D('conv1x1', x, nr1x1, 1)) x2 = Conv2D('conv3x3r', x, nr3x3r, 1) outs.append(Conv2D('conv3x3', x2, nr3x3, 3, strides=stride)) x3 = Conv2D('conv233r', x, nr233r, 1) x3 = Conv2D('conv233a', x3, nr233, 3) outs.append(Conv2D('conv233b', x3, nr233, 3, strides=stride)) if pooltype == 'max': x4 = MaxPooling('mpool', x, 3, stride, padding='SAME') else: assert pooltype == 'avg' x4 = AvgPooling('apool', x, 3, stride, padding='SAME') if nrpool != 0: # pool + passthrough if nrpool == 0 x4 = Conv2D('poolproj', x4, nrpool, 1) outs.append(x4) l = tf.concat(outs, 1, name='concat') l = tf.stop_gradient(l) if freeze else l return l
def resnet_fpn_backbone(image, num_blocks): freeze_at = 2 shape2d = tf.shape(image)[2:] mult = float(32) new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult) pad_shape2d = new_shape2d - shape2d assert len(num_blocks) == 4, num_blocks with backbone_scope(freeze=freeze_at > 0): chan = image.shape[1] #pad_base = maybe_reverse_pad(2, 3) #l = tf.pad(image, tf.stack( # [[0, 0], [0, 0], # [pad_base[0], pad_base[1] + pad_shape2d[0]], # [pad_base[0], pad_base[1] + pad_shape2d[1]]])) #l.set_shape([None, chan, None, None]) l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]]) l = Conv2D('conv0', l, 64, 7, strides=2, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') with backbone_scope(freeze=freeze_at > 1): c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1) with backbone_scope(freeze=False): c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2) # 32x downsampling up to now # size of c5: ceil(input/32) if config.PAN: feat = fpn_model('fpn', [c2, c3, c4, c5]) feat = pan_model('pan', feat) return feat return fpn_model('fpn', [c2, c3, c4, c5])
def resnet_c4_backbone(image, num_blocks): assert len(num_blocks) == 3 freeze_at = cfg.BACKBONE.FREEZE_AT with backbone_scope(freeze=freeze_at > 0): # print(l) l = tf.pad(image, [[0, 0], [0, 0], maybe_reverse_pad(2, 3), maybe_reverse_pad(2, 3)]) l = Conv2D('conv0', l, 64, 7, strides=2, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') with backbone_scope(freeze=freeze_at > 1): c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1) with backbone_scope(freeze=False): c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2) # can try to print the argscope here # find the initializer in the original source code c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) # 16x downsampling up to now return c4
def get_logits(image, num_classes=1000): # with ssdnet_argscope(): # dropblock if get_current_tower_context().is_training: dropblock_keep_prob = tf.get_variable('dropblock_keep_prob', (), dtype=tf.float32, trainable=False) else: dropblock_keep_prob = None l = image #tf.transpose(image, perm=[0, 2, 3, 1]) # conv1 l = Conv2D('conv1', l, 16, 4, strides=2, activation=None, padding='SAME') with tf.variable_scope('conv1'): l = BNReLU(tf.concat([l, -l], axis=-1)) l = MaxPooling('pool1', l, 2) # conv2 l = LinearBottleneck('conv2', l, 48, 24, 5, t=1, use_ab=True) l = l + LinearBottleneck('conv3', l, 24, 24, 5, t=2, use_ab=True) ch_all = [48, 72, 96] iters = [2, 4, 4] mults = [3, 4, 6] bsize = [3, 3, 3] hlist = [] for ii, (ch, it, mu, bs) in enumerate(zip(ch_all, iters, mults, bsize)): use_ab = (ii < 2) for jj in range(it): name = 'inc{}/{}'.format(ii, jj) stride = 2 if jj == 0 else 1 swap_block = True if jj % 2 == 1 else False l = inception(name, l, ch, stride, t=mu, swap_block=swap_block, use_ab=use_ab) l = DropBlock('inc{}/drop'.format(ii), l, keep_prob=dropblock_keep_prob, block_size=bs) l = Conv2D('convf', l, 96 * 6, 1, activation=None) l = BatchNorm('convf/bn', l) l = tf.nn.relu(l) l = GlobalAvgPooling('poolf', l) fc = FullyConnected('fc', l, 1280, activation=BNReLU) fc = Dropout(fc, keep_prob=0.9) logits = FullyConnected('linear', fc, num_classes, use_bias=True) return logits
def down_branch(name, main_in, aux_in, ch): with tf.variable_scope(name): a = Conv2D('conv1', main_in, ch, 3, padding='valid', use_bias=False, activation=BNReLU) a = Conv2D('conv2', a, ch, 3, padding='valid', use_bias=True, activation=tf.nn.relu) a = MaxPooling('pool', a, 2, strides=2, padding='same') b = Conv2D('conv3', aux_in, ch, 3, padding='valid', use_bias=False, activation=BNReLU) b = Conv2D('conv4', b, ch, 3, padding='valid', use_bias=True, activation=tf.nn.relu) c = tf.concat([a, b], axis=1) return c
def resnet_fpn_backbone(image, num_blocks, freeze_c2=True): shape2d = tf.shape(image)[2:] mult = float(cfg.FPN.RESOLUTION_REQUIREMENT) new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult) pad_shape2d = new_shape2d - shape2d assert len(num_blocks) == 4, num_blocks with resnet_argscope(): chan = image.shape[1] pad_base = maybe_reverse_pad(2, 3) l = tf.pad(image, tf.stack( [[0, 0], [0, 0], [pad_base[0], pad_base[1] + pad_shape2d[0]], [pad_base[0], pad_base[1] + pad_shape2d[1]]])) l.set_shape([None, chan, None, None]) l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1) if freeze_c2: c2 = tf.stop_gradient(c2) c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2) # 32x downsampling up to now # size of c5: ceil(input/32) return c2, c3, c4, c5
def resnet_c4_backbone(image, num_blocks, freeze_c2=True): assert len(num_blocks) == 3 with resnet_argscope(): l = tf.pad(image, [[0, 0], [0, 0], maybe_reverse_pad(2, 3), maybe_reverse_pad(2, 3)]) l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1) # TODO replace var by const to enable optimization if freeze_c2: c2 = tf.stop_gradient(c2) c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) # 16x downsampling up to now return c4
def pretrained_resnet_conv4(image, num_blocks, prefix=''): assert len(num_blocks) == 3 with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, nl=tf.identity, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out'), use_bias=False), \ argscope(BatchNorm, use_local_stat=None): l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]]) l = Conv2D(prefix + 'conv0', l, 64, 7, stride=2, nl=BNReLU, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]]) l = MaxPooling(prefix + 'pool0', l, shape=3, stride=2, padding='VALID') l = resnet_group(l, prefix + 'group0', resnet_bottleneck, 64, num_blocks[0], 1) # TODO replace var by const to enable folding #l = tf.stop_gradient(l) l = resnet_group(l, prefix + 'group1', resnet_bottleneck, 128, num_blocks[1], 2) l = resnet_group(l, prefix + 'group2', resnet_bottleneck, 256, num_blocks[2], 2) # 16x downsampling up to now return l
def ssdnet_backbone(image, **kwargs): # with ssdnet_argscope(): l = image #tf.transpose(image, perm=[0, 2, 3, 1]) with argscope([BatchNorm], training=False): # conv1 l = Conv2D('conv1', l, 24, 4, strides=2, activation=None, padding='SAME') with tf.variable_scope('conv1'): l = BNReLU(tf.concat([l, -l], axis=-1)) l = MaxPooling('pool1', l, 2) l = tf.stop_gradient(l) with argscope([BatchNorm], training=None): # conv2 l = LinearBottleneck('conv2', l, 48, 24, 3, t=1, use_ab=True) l = l + LinearBottleneck('conv3', l, 24, 24, 5, t=2, use_ab=True) ch_all = [48, 72, 96] iters = [2, 4, 4] mults = [3, 4, 6] hlist = [] for ii, (ch, it, mu) in enumerate(zip(ch_all, iters, mults)): use_ab = (ii < 2) for jj in range(it): name = 'inc{}/{}'.format(ii, jj) stride = 2 if jj == 0 else 1 k = 3 if jj < (it // 2) else 5 swap_block = True if jj % 2 == 1 else False l = inception(name, l, ch, k, stride, t=mu, swap_block=swap_block, use_ab=use_ab) hlist.append(l) return hlist
def resnet_fpn_backbone(image, num_blocks, freeze_c2=True): shape2d = tf.shape(image)[2:] mult = config.FPN_RESOLUTION_REQUIREMENT * 1. new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult) pad_shape2d = new_shape2d - shape2d assert len(num_blocks) == 4 # TODO pad 1 at each stage with resnet_argscope(): chan = image.shape[1] l = tf.pad( image, tf.stack([[0, 0], [0, 0], [2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]])) l.set_shape([None, chan, None, None]) l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1) if freeze_c2: c2 = tf.stop_gradient(c2) c3 = resnet_group(c2, 'group1', resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group(c3, 'group2', resnet_bottleneck, 256, num_blocks[2], 2) c5 = resnet_group(c4, 'group3', resnet_bottleneck, 512, num_blocks[3], 2) # 32x downsampling up to now return c2, c3, c4, c5
def resnet_backbone(image, num_blocks, group_func, block_func): """ Sec 5.1: We adopt the initialization of [15] for all convolutional layers. TensorFlow does not have the true "MSRA init". We use variance_scaling as an approximation. """ with argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected( 'linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) """ Sec 5.1: The 1000-way fully-connected layer is initialized by drawing weights from a zero-mean Gaussian with standard deviation of 0.01. """ return logits
def resnet_backbone(images, num_blocks, grp_fun, blck_fun, nfeatures, bn=True): # from tf.contrib.layers import variance_scaling_initializer with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): first_input = images[0] second_input = images[1] act = BNReLU if bn else tf.nn.relu x = Conv2D('conv0', first_input, 64, 7, stride=2, nl=act) y = Conv2D('conv1', second_input, 64, 7, stride=2, nl=act) # stack second_input into channel-dimension of conv0 output x = tf.concat([x, y], axis=3, name='stack_second_input') x = MaxPooling('pool0', x, shape=3, stride=2, padding='SAME') x = grp_fun(x, 'group0', blck_fun, 64, num_blocks[0], 1, bn=bn) x = grp_fun(x, 'group1', blck_fun, 128, num_blocks[1], 2, bn=bn) x = grp_fun(x, 'group2', blck_fun, 256, num_blocks[2], 2, bn=bn) x = grp_fun(x, 'group3', blck_fun, 256, num_blocks[3], 1, bn=bn) x = GlobalAvgPooling('gap', x) x = FullyConnected('fc0', x, 1000) # NOTE linear activations gewollt ? x = FullyConnected('fc1', x, 500) # NOTE linear activations gewollt ? x = FullyConnected( 'linear', x, nfeatures, nl=tf.identity ) # NOTE sieht aus als ging Fabi von non-linear act. als default aus # NOTE die letzten 3 FC layers werden linear aktiviert (siehe Graph in TB) d.h. ein einzelnes FC layer sollte ausreichen (evtl. bessere Laufzeit) return x
def resnet_fpn_backbone(image, num_blocks): freeze_at = cfg.BACKBONE.FREEZE_AT shape2d = tf.shape(image)[2:] mult = float(cfg.FPN.RESOLUTION_REQUIREMENT) new_shape2d = tf.cast( tf.ceil(tf.cast(shape2d, tf.float32) / mult) * mult, tf.int32) pad_shape2d = new_shape2d - shape2d assert len(num_blocks) == 4, num_blocks with backbone_scope(freeze=freeze_at > 0): chan = image.shape[1] pad_base = maybe_reverse_pad(2, 3) l = tf.pad( image, tf.stack([[0, 0], [0, 0], [pad_base[0], pad_base[1] + pad_shape2d[0]], [pad_base[0], pad_base[1] + pad_shape2d[1]]])) l.set_shape([None, chan, None, None]) l = Conv2D('conv0', l, 64, 7, strides=2, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') with backbone_scope(freeze=freeze_at > 1): c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1) with backbone_scope(freeze=False): c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2) # 32x downsampling up to now # size of c5: ceil(input/32) return c2, c3, c4, c5
def fpn_model(features): """ Args: features ([tf.Tensor]): ResNet features c2-c5 Returns: [tf.Tensor]: FPN features p2-p6 """ assert len(features) == 4, features num_channel = cfg.FPN.NUM_CHANNEL def upsample2x(name, x): return FixedUnPooling(name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'), data_format='channels_first') # tf.image.resize is, again, not aligned. # with tf.name_scope(name): # shape2d = tf.shape(x)[2:] # x = tf.transpose(x, [0, 2, 3, 1]) # x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True) # x = tf.transpose(x, [0, 3, 1, 2]) # return x with argscope( Conv2D, data_format='channels_first', activation=tf.identity, use_bias=True, kernel_initializer=tf.variance_scaling_initializer(scale=1.)): lat_2345 = [ Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1) for i, c in enumerate(features) ] lat_sum_5432 = [] for idx, lat in enumerate(lat_2345[::-1]): if idx == 0: lat_sum_5432.append(lat) else: lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1]) lat_sum_5432.append(lat) p2345 = [ Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3) for i, c in enumerate(lat_sum_5432[::-1]) ] p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first', padding='VALID') return p2345 + [p6]
def net(name, i, basis_filter_list, rot_matrix_list, nr_orients, filter_type, is_training): """ Dense Steerable Filter CNN """ dense_basis_list = [basis_filter_list[0],basis_filter_list[1]] dense_rot_list = [rot_matrix_list[0], rot_matrix_list[1]] with tf.variable_scope(name): c1 = GConv2D('ds_conv1', i, 8, 7, nr_orients, filter_type, basis_filter_list[1], rot_matrix_list[1], input_layer=True) c2 = GConv2D('ds_conv2', c1, 8, 7, nr_orients, filter_type, basis_filter_list[1], rot_matrix_list[1]) p1 = MaxPooling('max_pool1', c2, 2) #### d1 = g_dense_blk('dense1', p1, [32,8], [5,7], 2, nr_orients, filter_type, dense_basis_list, dense_rot_list, bn_init=False) c3 = GConv2D('ds_conv3', d1, 32, 5, nr_orients, filter_type, basis_filter_list[0], rot_matrix_list[0]) p2 = MaxPooling('max_pool2', c3, 2, padding= 'valid') #### d2 = g_dense_blk('dense2', p2, [32,8], [5,7], 2, nr_orients, filter_type, dense_basis_list, dense_rot_list, bn_init=False) c4 = GConv2D('ds_conv4', d2, 32, 5, nr_orients, filter_type, basis_filter_list[0], rot_matrix_list[0]) p3 = MaxPooling('max_pool3', c4, 2, padding= 'valid') #### d3 = g_dense_blk('dense3', p3, [32,8], [5,7], 3, nr_orients, filter_type, dense_basis_list, dense_rot_list, bn_init=False) c5 = GConv2D('ds_conv5', d3, 32, 5, nr_orients, filter_type, basis_filter_list[0], rot_matrix_list[0]) p4 = MaxPooling('max_pool4', c5, 2, padding= 'valid') #### d4 = g_dense_blk('dense4', p4, [32,8], [5,7], 3, nr_orients, filter_type, dense_basis_list, dense_rot_list, bn_init=False) c6 = GConv2D('ds_conv6', d4, 32, 5, nr_orients, filter_type, basis_filter_list[0], rot_matrix_list[0]) p5 = AvgPooling('glb_avg_pool', c6, 6, padding= 'valid') p6 = GroupPool('orient_pool', p5, nr_orients, pool_type='max') #### c7 = Conv2D('conv3', p6, 96, 1, use_bias=True, nl=BNReLU) c7 = tf.layers.dropout(c7, rate=0.3, seed=5, training=is_training) c8 = Conv2D('conv4', c7, 96, 1, use_bias=True, nl=BNReLU) c8 = tf.layers.dropout(c8, rate=0.3, seed=5, training=is_training) return c8
def fpn_model(features, seed_gen, fp16=False): """ Args: features ([tf.Tensor]): ResNet features c2-c5 Returns: [tf.Tensor]: FPN features p2-p6 """ assert len(features) == 4, features num_channel = cfg.FPN.NUM_CHANNEL use_gn = cfg.FPN.NORM == 'GN' def upsample2x(name, x): dtype_str = 'float16' if fp16 else 'float32' return FixedUnPooling( name, x, 2, unpool_mat=np.ones((2, 2), dtype=dtype_str), data_format='channels_first' if cfg.TRAIN.FPN_NCHW else 'channels_last') # tf.image.resize is, again, not aligned. # with tf.name_scope(name): # shape2d = tf.shape(x)[2:] # x = tf.transpose(x, [0, 2, 3, 1]) # x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True) # x = tf.transpose(x, [0, 3, 1, 2]) # return x with mixed_precision_scope(mixed=fp16): with argscope(Conv2D, data_format='channels_first' if cfg.TRAIN.FPN_NCHW else 'channels_last', activation=tf.identity, use_bias=True, kernel_initializer=tf.variance_scaling_initializer(scale=1., seed=seed_gen.next())): lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1, seed=seed_gen.next()) for i, c in enumerate(features)] if use_gn: lat_2345 = [GroupNorm('gn_c{}'.format(i + 2), c) for i, c in enumerate(lat_2345)] lat_sum_5432 = [] for idx, lat in enumerate(lat_2345[::-1]): if idx == 0: lat_sum_5432.append(lat) else: lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1]) lat_sum_5432.append(lat) p2345 = [Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3, seed=seed_gen.next()) for i, c in enumerate(lat_sum_5432[::-1])] if use_gn: p2345 = [GroupNorm('gn_p{}'.format(i + 2), c) for i, c in enumerate(p2345)] p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first' if cfg.TRAIN.FPN_NCHW else 'channels_last', padding='VALID') if fp16: return [tf.cast(l, tf.float32) for l in p2345] + [tf.cast(p6, tf.float32)] return p2345 + [p6]
def resnet(input_, DEPTH, option): ctx = get_current_tower_context() is_training = ctx.is_training mode = option.mode basicblock = preresnet_basicblock \ if mode == 'preact' else resnet_basicblock bottleneck = { 'resnet': resnet_bottleneck, 'preact': preresnet_bottleneck, 'se': se_resnet_bottleneck }[mode] cfg = { 18: ([2, 2, 2, 2], basicblock), 34: ([3, 4, 6, 3], basicblock), 50: ([3, 4, 6, 3], bottleneck), 101: ([3, 4, 23, 3], bottleneck), 152: ([3, 8, 36, 3], bottleneck) } defs, block_func = cfg[DEPTH] group_func = preresnet_group if mode == 'preact' else resnet_group with argscope(Conv2D, use_bias=False, kernel_initializer= \ tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \ argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'): l = Conv2D('conv0', input_, 64, 7, strides=2, activation=BNReLU) # 112 if option.attdrop[0]: l = ADL(0, l, option) l = MaxPooling('pool0', l, 3, strides=2, padding='SAME') # 56 if option.attdrop[1]: l = ADL(1, l, option) l = group_func('group0', l, block_func, 64, defs[0], 1, option) # 56 if option.attdrop[2]: l = ADL(2, l, option) l = group_func('group1', l, block_func, 128, defs[1], 2, option) # 28 if option.attdrop[3]: l = ADL(3, l, option) l = group_func('group2', l, block_func, 256, defs[2], 2, option) # 14 if option.attdrop[4]: l = ADL(4, l, option) l = group_func('group3', l, block_func, 512, defs[3], option.laststride, option) # 7 if option.attdrop[5]: l = ADL(5, l, option) prelogits = GlobalAvgPooling('gap', l) logits = FullyConnected('linearnew', prelogits, option.classnum) return logits, l
def down_conv_block(name, l, channel, nr_blks, stride=1): with tf.variable_scope(name): if stride != 1: assert stride == 2, 'U-Net supports stride 2 down-sample only' l = MaxPooling('max_pool', l, 2, strides=2) for idx in range(0, nr_blks): l = Conv2D('conv_%d' % idx, l, channel, 3, padding='valid', strides=1, activation=BNReLU) return l
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope(mpusim_conv2d, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): # Note that TF pads the image by [2, 3] instead of [3, 2]. # Similar things happen in later stride=2 layers as well. l = mpusim_conv2d('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = mpusim_fully_connected('linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits
def resnet_c4_backbone(image, num_blocks): assert len(num_blocks) == 3 freeze_at = 2 with backbone_scope(freeze=freeze_at > 0): l = tf.pad(image, [[0, 0], [0, 0], maybe_reverse_pad(2, 3), maybe_reverse_pad(2, 3)]) l = Conv2D('conv0', l, 64, 7, strides=2, padding='VALID') l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)]) l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') with backbone_scope(freeze=freeze_at > 1): c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1) with backbone_scope(freeze=False): c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) # 16x downsampling up to now return c4
def fpn_model(features): """ Args: features ([tf.Tensor]): ResNet features c2-c5 Returns: [tf.Tensor]: FPN features p2-p6 """ assert len(features) == 4, features num_channel = cfg.FPN.NUM_CHANNEL use_gn = cfg.FPN.NORM == 'GN' def upsample2x(name, x): try: resize = tf.compat.v2.image.resize_images with tf.name_scope(name): shp2d = tf.shape(x)[2:] x = tf.transpose(x, [0, 2, 3, 1]) x = resize(x, shp2d * 2, 'nearest') x = tf.transpose(x, [0, 3, 1, 2]) return x except AttributeError: return FixedUnPooling( name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'), data_format='channels_first') with argscope(Conv2D, data_format='channels_first', activation=tf.identity, use_bias=True, kernel_initializer=tfv1.variance_scaling_initializer(scale=1.)): lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1) for i, c in enumerate(features)] if use_gn: lat_2345 = [GroupNorm('gn_c{}'.format(i + 2), c) for i, c in enumerate(lat_2345)] lat_sum_5432 = [] for idx, lat in enumerate(lat_2345[::-1]): if idx == 0: lat_sum_5432.append(lat) else: lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1]) lat_sum_5432.append(lat) p2345 = [Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3) for i, c in enumerate(lat_sum_5432[::-1])] if use_gn: p2345 = [GroupNorm('gn_p{}'.format(i + 2), c) for i, c in enumerate(p2345)] p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first', padding='VALID') return p2345 + [p6]
def resnet_backbone(image, num_blocks, group_func, block_func): with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False, kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l) logits = FullyConnected( 'linear', l, 1000, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) return logits