def DenseBlock(inputs, inter_channel, growth_rate, data_format, training, momentum, name, mode): with tf.variable_scope(name, reuse=False): channel_axis = 1 if data_format == 'channels_first' else -1 x1 = CBR(inputs, inter_channel, 1, 1, training=training, momentum=momentum, name="1x1CBR_R", data_format=data_format, mode=mode) x1 = CBR(x1, growth_rate, 3, 1, training=training, momentum=momentum, name="3x3CBR_R", data_format=data_format, mode=mode) out = tf.concat([inputs, x1], axis=channel_axis) return out
def StemBlock(inputs, num_init_features, data_format, training, momentum, mode): channel_axis = 1 if data_format == 'channels_first' else -1 x = CBR(inputs, num_init_features, 3, 2, training=training, momentum=momentum, name="CBR1", data_format=data_format, mode=mode) x1 = CBR(x, num_init_features // 2, 1, 1, training=training, momentum=momentum, name="1x1CBR_R", data_format=data_format, mode=mode) x1 = CBR(x1, num_init_features, 3, 2, training=training, momentum=momentum, name="3x3CBR_R", data_format=data_format, mode=mode) x2 = tf.layers.max_pooling2d(x, (2, 2), (2, 2), data_format=data_format, padding='same') out = tf.concat([x1, x2], axis=channel_axis) out = CBR(out, num_init_features, 1, 1, training=training, momentum=momentum, name="CBR2", data_format=data_format, mode=mode) return out
def GhostModule(name, x, filters, kernel_size, dw_size, ratio, mode, padding='SAME', strides=1, data_format='channels_first', use_bias=False, is_training=False, activation='relu', momentum=0.9): if data_format == 'channels_first': axis = 1 else: axis = -1 with tf.variable_scope(name): init_channels = math.ceil(filters / ratio) x = CBR(x, init_channels, kernel_size, strides=strides, training=is_training, momentum=momentum, mode=mode, name=name, padding='same', data_format=data_format, activation='relu', bn=True, use_bias=use_bias) # x = tf.layers.Conv2D(init_channels, kernel_size, strides=strides, padding=padding, # kernel_initializer=kernel_initializer, use_bias=use_bias, name=name + 'Conv', data_format='channels_last')(x) # x = tf.layers.batch_normalization(x, training=is_training, name=name+'BN_1') # if activation =='relu': # x = tf.nn.relu(x, name=name+'Relu_1') # elif activation == 'mish': # x = mish(x) # elif activation == 'swish': # x = swish(x, name=name+'swish_1') # else: # pass if ratio == 1: return x dw1 = MyDepthConv(x, [dw_size, dw_size], channel_mult=ratio - 1, stride=1, data_format=data_format, name=name) dw1 = tf.layers.batch_normalization(dw1, training=is_training, name=name+'BN_2', axis=axis) if activation == 'relu': dw1 = tf.nn.relu(dw1, name=name + 'Relu_2') elif activation == 'mish': dw1 = mish(dw1) elif activation == 'swish': dw1 = swish(dw1, name=name+'swish_2') else: pass if data_format == 'channels_first': dw1 = dw1[:, :filters - init_channels, :, :] else: dw1 = dw1[:, :, :, :filters - init_channels] x = tf.concat([x, dw1], axis=axis) return x
def decision_head(x, y, class_num, scope, keep_dropout_head, training, data_format, momentum, mode, reuse=None, drop_rate=0.2, activation='relu'): with tf.variable_scope(scope, reuse=reuse): channel_axis = 1 if data_format == 'channels_first' else -1 x = tf.concat([x, y], axis=channel_axis) x = CBR(x, 16, 3, 2, training, momentum, mode, name='CBR1', padding='same', data_format=data_format, activation=activation, bn=True) x = CBR(x, 16, 3, 1, training, momentum, mode, name='CBR2', padding='same', data_format=data_format, activation=activation, bn=True) x = CBR(x, 32, 3, 2, training, momentum, mode, name='CBR3', padding='same', data_format=data_format, activation=activation, bn=True) x = CBR(x, 32, 3, 1, training, momentum, mode, name='CBR4', padding='same', data_format=data_format, activation=activation, bn=True) x = CBR(x, 32, 3, 2, training, momentum, mode, name='CBR5', padding='same', data_format=data_format, activation=None, bn=False) # de_glob_ds = tf.keras.layers.DepthwiseConv2D(filters=64, kernel_size=(x.shape[1], x.shape[2]), # strides=(1, 1), name='GlobalDwConv')(x) reduction_indices = [1, 2 ] if data_format == 'channels_last' else [2, 3] vector1 = math_ops.reduce_mean(x, reduction_indices, name='pool4', keepdims=True) vector2 = math_ops.reduce_max(x, reduction_indices, name='pool5', keepdims=True) vector3 = math_ops.reduce_mean(y, reduction_indices, name='pool6', keepdims=True) vector4 = math_ops.reduce_max(y, reduction_indices, name='pool7', keepdims=True) # de_glob_ds = tf.layers.Flatten(name='dec_flatten0')(de_glob_ds) vector = tf.concat([vector1, vector2, vector3, vector4], axis=channel_axis) vector = tf.squeeze(vector, axis=reduction_indices) if keep_dropout_head: vector = tf.nn.dropout(vector, keep_prob=1 - drop_rate) logits = slim.fully_connected(vector, class_num, activation_fn=None) return logits
def pyramid_pooling_block(input_tensor, nOut, bin_sizes, training, momentum, data_format='channels_first', name=None, mode=None): concat_list = [input_tensor] if data_format == 'channels_last': w = input_tensor.get_shape().as_list()[2] h = input_tensor.get_shape().as_list()[1] axis = -1 else: w = input_tensor.get_shape().as_list()[3] h = input_tensor.get_shape().as_list()[2] axis = 1 nbin = len(bin_sizes) laynOut = nOut // nbin outlist = nbin * [laynOut] outlist[0] = outlist[0] + (nOut - laynOut * nbin) n = 0 for bin_size in bin_sizes: n = n + 1 x = tf.layers.average_pooling2d( input_tensor, pool_size=(h - (bin_size - 1) * (h // bin_size), w - (bin_size - 1) * (w // bin_size)), strides=(h // bin_size, w // bin_size), data_format=data_format, name=name + '_' + str(n) + '_agp2d') x = CBR(x, outlist[n - 1], (1, 1), strides=(1, 1), padding='valid', name=name + '_' + str(n) + 'conv', training=training, momentum=momentum, data_format=data_format, mode=mode) if data_format == 'channels_last': x = tf.image.resize_images(x, (h, w), align_corners=True) else: x = tf.transpose(x, [0, 2, 3, 1]) # NCHW->NHWC x = tf.image.resize_images(x, (h, w), align_corners=True) x = tf.transpose(x, [0, 3, 1, 2]) # NHWC -> NCHW concat_list.append(x) x = tf.concat(concat_list, axis=axis) x = CBR(x, nOut, (1, 1), strides=(1, 1), training=training, momentum=momentum, name=name + 'conv', padding='valid', data_format=data_format, mode=mode) return x
def CSPPeleeNet(inputs, data_format, drop_rate, training, momentum, name, pelee_cfg=pelee_cfg, mode=None, activation='relu'): with tf.variable_scope(name, reuse=False): if data_format == 'channels_first': inputs = tf.transpose(inputs, [0, 3, 1, 2]) # NHWC -> NCHW channel_axis = 1 if data_format == 'channels_first' else -1 num_init_features = pelee_cfg["num_init_features"] growthRate = pelee_cfg["growthRate"] # half_growth_rate = growthRate// 2 nDenseBlocks = pelee_cfg["nDenseBlocks"] bottleneck_width = pelee_cfg["bottleneck_width"] x = StemBlock(inputs, num_init_features, data_format, training, momentum, mode=mode) inter_channel = list() total_filter = list() dense_inp = list() for i, b_w in enumerate(bottleneck_width): inter_channel.append(int(growthRate * b_w / 4) * 4) if i == 0: total_filter.append(num_init_features + growthRate * nDenseBlocks[i]) dense_inp.append(num_init_features) else: total_filter.append(total_filter[i - 1] + growthRate * nDenseBlocks[i]) dense_inp.append(total_filter[i - 1]) relu = activation if i == 0 else None x1 = CBR(x, dense_inp[i], 1, 1, training=training, momentum=momentum, name="split_conv_L" + str(i), data_format=data_format, activation=relu, mode=mode) x2 = CBR(x, dense_inp[i], 1, 1, training=training, momentum=momentum, name="split_conv_R" + str(i), data_format=data_format, activation=relu, mode=mode) x = x1 for n in range(nDenseBlocks[i]): x = DenseBlock(x, inter_channel[i], growthRate, data_format, training, momentum, name="Denseblock" + str(i) + "_" + str(n), mode=mode) # transition layer-1 x = CBR(x, total_filter[i], 1, 1, training=training, momentum=momentum, name="transition_1_" + str(i), data_format=data_format, mode=mode) x = tf.concat([x, x2], axis=channel_axis) # transition layer-2 x = CBR(x, total_filter[i], 1, 1, training=training, momentum=momentum, name="transition_2_" + str(i), data_format=data_format, mode=mode) if i != len(nDenseBlocks) - 1: x = tf.layers.AveragePooling2D(pool_size=2, strides=2, name='agp' + str(i), data_format=data_format)(x) if i == 0: hi_res = x x = pyramid_pooling_block(x, total_filter[-1], BIN_SIZE, training=training, momentum=momentum, data_format=data_format, name='ppb', mode=mode) x = CBR(x, total_filter[-1], 1, 1, training=training, momentum=momentum, name="low_res_conv", data_format=data_format, activation=None, mode=mode) x = tf.keras.layers.UpSampling2D((4, 4), data_format=data_format)(x) hi_res = CBR(hi_res, total_filter[0], 1, 1, training=training, momentum=momentum, name="hi_res_conv", data_format=data_format, activation=None, mode=mode) x = tf.concat([x, hi_res], axis=channel_axis) # x = x+hi_res x = CBR(x, 128, 1, 1, training=training, momentum=momentum, name="mix_conv", data_format=data_format, activation=activation, mode=mode) features = tf.layers.dropout(x, drop_rate, training=training, name='dropout') logits = CBR(features, CLASS_NUM, 1, 1, training=training, momentum=momentum, name="classify_conv", data_format=data_format, activation=None, mode=mode) return [features, logits]
def bifpn_neck(features, num_channels, scope, momentum, mode, data_format, is_training=False, reuse=None): """ BiFPN """ with tf.variable_scope(scope, reuse=reuse): P3_in, P4_in, P5_in, P6_in, P7_in = features # P3_in = ConvBlock(P3_in, num_channels, kernel_size=1, strides=1, is_training=is_training, name='BiFPN_P3') # P4_in = ConvBlock(P4_in, num_channels, kernel_size=1, strides=1, is_training=is_training, name='BiFPN_P4') # P5_in = ConvBlock(P5_in, num_channels, kernel_size=1, strides=1, is_training=is_training, name='BiFPN_P5') # P6_in = ConvBlock(P6_in, num_channels, kernel_size=1, strides=1, is_training=is_training, name='BiFPN_P6') # P7_in = ConvBlock(P7_in, num_channels, kernel_size=1, strides=1, is_training=is_training, name='BiFPN_P7') P3_in = CBR(P3_in, num_channels, 1, 1, is_training, momentum=momentum, mode=mode, name='BiFPN_P3', padding='same', data_format=data_format, activation=ACTIVATION, bn=True) P4_in = CBR(P4_in, num_channels, 1, 1, is_training, momentum=momentum, mode=mode, name='BiFPN_P4', padding='same', data_format=data_format, activation=ACTIVATION, bn=True) P5_in = CBR(P5_in, num_channels, 1, 1, is_training, momentum=momentum, mode=mode, name='BiFPN_P5', padding='same', data_format=data_format, activation=ACTIVATION, bn=True) P6_in = CBR(P6_in, num_channels, 1, 1, is_training, momentum=momentum, mode=mode, name='BiFPN_P6', padding='same', data_format=data_format, activation=ACTIVATION, bn=True) P7_in = CBR(P7_in, num_channels, 1, 1, is_training, momentum=momentum, mode=mode, name='BiFPN_P7', padding='same', data_format=data_format, activation=ACTIVATION, bn=True) # upsample P7_U = keras.layers.UpSampling2D(interpolation='bilinear', data_format=data_format)(P7_in) # P7_U = tf.image.resize_images(P7_in, (IMAGE_SIZE[0]//16, IMAGE_SIZE[1]//16), align_corners=True, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) P6_td = keras.layers.Add()([P7_U, P6_in]) P6_td = DepthwiseConvBlock(P6_td, kernel_size=3, strides=1, data_format=data_format, is_training=is_training, name='BiFPN_U_P6') P6_U = keras.layers.UpSampling2D(interpolation='bilinear', data_format=data_format)(P6_td) # P6_U = tf.image.resize_images(P6_td, (IMAGE_SIZE[0] // 8, IMAGE_SIZE[1] // 8), align_corners=True, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) P5_td = keras.layers.Add()([P6_U, P5_in]) P5_td = DepthwiseConvBlock(P5_td, kernel_size=3, strides=1, data_format=data_format, is_training=is_training, name='BiFPN_U_P5') P5_U = keras.layers.UpSampling2D(interpolation='bilinear', data_format=data_format)(P5_td) # P5_U = tf.image.resize_images(P5_td, (IMAGE_SIZE[0] // 4, IMAGE_SIZE[1] // 4), align_corners=True, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) P4_td = keras.layers.Add()([P5_U, P4_in]) P4_td = DepthwiseConvBlock(P4_td, kernel_size=3, strides=1, data_format=data_format, is_training=is_training, name='BiFPN_U_P4') P4_U = keras.layers.UpSampling2D(interpolation='bilinear', data_format=data_format)(P4_td) # P4_U = tf.image.resize_images(P4_td, (IMAGE_SIZE[0] // 2, IMAGE_SIZE[1] // 2), align_corners=True, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) P3_out = keras.layers.Add()([P4_U, P3_in]) P3_out = DepthwiseConvBlock(P3_out, kernel_size=3, strides=1, data_format=data_format, is_training=is_training, name='BiFPN_U_P3') # downsample P3_D = keras.layers.MaxPooling2D(strides=(2, 2), data_format=data_format)(P3_out) P4_out = keras.layers.Add()([P3_D, P4_td, P4_in]) P4_out = DepthwiseConvBlock(P4_out, kernel_size=3, strides=1, data_format=data_format, is_training=is_training, name='BiFPN_D_P4') P4_D = keras.layers.MaxPooling2D(strides=(2, 2), data_format=data_format)(P4_out) P5_out = keras.layers.Add()([P4_D, P5_td, P5_in]) P5_out = DepthwiseConvBlock(P5_out, kernel_size=3, strides=1, data_format=data_format, is_training=is_training, name='BiFPN_D_P5') P5_D = keras.layers.MaxPooling2D(strides=(2, 2), data_format=data_format)(P5_out) P6_out = keras.layers.Add()([P5_D, P6_td, P6_in]) P6_out = DepthwiseConvBlock(P6_out, kernel_size=3, strides=1, data_format=data_format, is_training=is_training, name='BiFPN_D_P6') P6_D = keras.layers.MaxPooling2D(strides=(2, 2), data_format=data_format)(P6_out) P7_out = keras.layers.Add()([P6_D, P7_in]) P7_out = DepthwiseConvBlock(P7_out, kernel_size=3, strides=1, data_format=data_format, is_training=is_training, name='BiFPN_D_P7') return P3_out, P4_out, P5_out, P6_out, P7_out
def ghostnet_base(inputs, mode, data_format, min_depth=8, depth_multiplier=1.0, depth=1.0, conv_defs=None, output_stride=None, dw_code=None, ratio_code=None, se=1, scope=None, is_training=False, momentum=0.9): """ By adjusting depth_multiplier can change the depth of network """ if data_format == 'channels_first': axis = 1 inputs = tf.transpose(inputs, [0, 3, 1, 2]) else: axis = -1 output_layers = [] def depth(d): d = max(int(d * depth_multiplier), min_depth) d = round(d / 4) * 4 return d end_points = {} # Used to find thinned depths for each layer. if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') if conv_defs is None: conv_defs = _CONV_DEFS_0 if dw_code is None or len(dw_code) < len(conv_defs): dw_code = [3] * len(conv_defs) print('dw_code', dw_code) if ratio_code is None or len(ratio_code) < len(conv_defs): ratio_code = [2] * len(conv_defs) print('ratio_code', ratio_code) se_code = [x.se for x in conv_defs] print('se_code', se_code) if output_stride is not None and output_stride not in [8, 16, 32]: raise ValueError('Only allowed output_stride values are 8, 16, 32.') with tf.variable_scope(scope, 'MobilenetV2', [inputs]): # The current_stride variable keeps track of the output stride of the # activations, i.e., the running product of convolution strides up to the # current network layer. This allows us to invoke atrous convolution # whenever applying the next convolution would result in the activations # having output stride larger than the target output_stride. current_stride = 1 # The atrous convolution rate parameter. rate = 1 net = inputs in_depth = 3 gi = 0 for i, conv_def in enumerate(conv_defs): layer_stride = conv_def.stride current_stride *= conv_def.stride if layer_stride != 1: output_layers.append(net) if isinstance(conv_def, Conv): # net = ConvBlock(net, depth(conv_def.depth), conv_def.kernel, stride=conv_def.stride, # name='ConvBlock_{}'.format(i), is_training=is_training, activation=ACTIVATION) net = CBR(net, depth(conv_def.depth), conv_def.kernel[0], strides=conv_def.stride, training=is_training, momentum=momentum, mode=mode, name='ConvBlock_{}'.format(i), padding='same', data_format=data_format, activation=ACTIVATION, bn=True, use_bias=False) # Bottleneck block. elif isinstance(conv_def, Bottleneck): # Stride > 1 or different depth: no residual part. if layer_stride == 1 and in_depth == conv_def.depth: res = net else: res = DepthConv( net, conv_def.kernel, stride=layer_stride, data_format=data_format, name='Bottleneck_block_{}_shortcut_dw'.format(i)) res = tf.layers.batch_normalization( res, training=is_training, name='Bottleneck_block_{}_shortcut_dw_BN'.format(i), axis=axis) # res = ConvBlock(res, depth(conv_def.depth), (1, 1), (1, 1), # name='Bottleneck_block_{}_shortcut_1x1'.format(i), is_training=is_training, activation=ACTIVATION) res = CBR( res, depth(conv_def.depth), 1, 1, training=is_training, momentum=momentum, mode=mode, name='Bottleneck_block_{}_shortcut_1x1'.format(i), padding='same', data_format=data_format, activation=ACTIVATION, bn=True, use_bias=False) # Increase depth with 1x1 conv. net = MyConv('Bottleneck_block_{}_up_pointwise'.format(i), net, depth(in_depth * conv_def.factor), 1, dw_code[gi], ratio_code[gi], mode=mode, strides=1, data_format=data_format, use_bias=False, is_training=is_training, activation=ACTIVATION, momentum=momentum) # Depthwise conv2d. if layer_stride > 1: net = DepthConv( net, conv_def.kernel, stride=layer_stride, data_format=data_format, name='Bottleneck_block_{}_depthwise'.format(i)) net = tf.layers.batch_normalization( net, training=is_training, name='Bottleneck_block_{}_depthwise_BN'.format(i), axis=axis) # SE if se_code[i] > 0 and se > 0: if ATTENTION == 'se': # net = SELayer(net, depth(in_depth * conv_def.factor), 4) net = SElayer(net, depth(in_depth * conv_def.factor), depth(in_depth * conv_def.factor) // 4, "se_{}".format(i), data_format=data_format) elif ATTENTION == 'cbma': net = moduleCBAM( net, depth(in_depth * conv_def.factor), depth(in_depth * conv_def.factor) // 4, str(i)) # Downscale 1x1 conv. net = MyConv('Bottleneck_block_{}_down_pointwise'.format(i), net, depth(conv_def.depth), 1, dw_code[gi], ratio_code[gi], mode=mode, strides=1, data_format=data_format, use_bias=False, is_training=is_training, activation=ACTIVATION, momentum=momentum) net = tf.layers.batch_normalization( net, training=is_training, name='Bottleneck_block_{}_down_pointwise_BN'.format(i), axis=axis) gi += 1 # Residual connection? net = tf.add( res, net, name='Bottleneck_block_{}_Add'.format( i)) if res is not None else net in_depth = conv_def.depth # Final end point? output_layers.pop(0) output_layers.append(net) return output_layers