def _se_block(inputs, se_ratio=16, name=None): channel = K.int_shape(inputs)[-1] x = GlobalAveragePooling2D(name=name + '_gap')(inputs) x = Dense(channel // se_ratio, name=name + '_fc1')(x) x = _activation(x, activation='relu', name=name + '_relu') x = Dense(channel, name=name + '_fc2')(x) x = _activation(x, activation='sigmoid', name=name + '_sigmoid') x = Reshape([1, 1, channel], name=name + '_reshape')(x) x = Multiply(name=name + '_multiply')([inputs, x]) return x
def _channel_attention(_inputs, cbam_ratio=8): channel = K.int_shape(_inputs)[-1] shared_layer_one = Dense(channel // cbam_ratio, activation='relu', kernel_initializer='he_normal', use_bias=True, bias_initializer='zeros', name=name + '_sl1') shared_layer_two = Dense(channel, kernel_initializer='he_normal', use_bias=True, bias_initializer='zeros', name=name + '_sl2') avg_pool = GlobalAveragePooling2D(name=name + '_gap')(_inputs) avg_pool = Reshape((1, 1, channel))(avg_pool) avg_pool = shared_layer_one(avg_pool) avg_pool = shared_layer_two(avg_pool) max_pool = GlobalMaxPooling2D(name=name + '_gmp')(_inputs) max_pool = Reshape((1, 1, channel))(max_pool) max_pool = shared_layer_one(max_pool) max_pool = shared_layer_two(max_pool) cbam_feature = Add()([avg_pool, max_pool]) cbam_feature = _activation(cbam_feature, activation='sigmoid', name=name + '_sigmoid') return Multiply()([_inputs, cbam_feature])
def conv_block(args, x, growth_rate, name): x1 = _normalization(x, norm=args.norm, name=name + '_0_norm') x1 = _activation(x, activation=args.activation, name=name + '_0_acti') x1 = Conv2D(4 * growth_rate, 1, use_bias=False, name=name + '_1_conv')(x1) x1 = _normalization(x1, norm=args.norm, name=name + '_1_norm') x1 = _activation(x1, activation=args.activation, name=name + '_1_acti') if args.attention == 'se': x1 = _se_block(x1, name=name + '_1_se') elif args.attention == 'cbam': x1 = _cbam_block(x1, name=name + '_1_cbam') x1 = Conv2D(growth_rate, 3, padding='same', use_bias=False, name=name + '_2_conv')(x1) x = Concatenate(name=name + '_concat')([x, x1]) return x
def transition_block(args, x, reduction, name): channel = K.int_shape(x)[-1] x = _normalization(x, norm=args.norm, name=name + '_norm') x = _activation(x, activation=args.activation, name=name + '_acti') x = Conv2D(int(channel * reduction), 1, use_bias=False, name=name + '_conv')(x) x = AveragePooling2D(2, strides=2, name=name + '_pool')(x) return x
def block2(args, x, filters, kernel_size=3, stride=1, conv_shortcut=False, attention='no', name=None): preact = _normalization(x, norm=args.norm, name=name + '_pre_norm') preact = _activation(preact, activation=args.activation, name=name + '_pre_acti') if conv_shortcut is True: shortcut = Conv2D(4 * filters, 1, strides=stride, name=name + '_0_conv')(preact) else: shortcut = MaxPooling2D(1, strides=stride)(x) if stride > 1 else x x = Conv2D(filters, 1, strides=1, use_bias=False, name=name + '_1_conv')(preact) x = _normalization(x, norm=args.norm, name=name + '_1_norm') x = _activation(x, activation=args.activation, name=name + '_1_acti') x = ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) x = Conv2D(filters, kernel_size, strides=stride, use_bias=False, name=name + '_2_conv')(x) x = _normalization(x, norm=args.norm, name=name + '_2_norm') x = _activation(x, activation=args.activation, name=name + '_2_acti') x = Conv2D(4 * filters, 1, name=name + '_3_conv')(x) if args.attention == 'se': x = _se_block(x, name=name + '_3_se') elif args.attention == 'cbam': x = _cbam_block(x, name=name + '_3_cbam') x = Add(name=name + '_out')([shortcut, x]) return x
def block1(args, x, filters, kernel_size=3, stride=1, conv_shortcut=True, attention='no', name=None): if conv_shortcut is True: shortcut = Conv2D(4 * filters, 1, strides=stride, name=name + '_0_conv')(x) shortcut = _normalization(shortcut, norm=args.norm, name=name + '_0_norm') else: shortcut = x x = Conv2D(filters, 1, strides=stride, name=name + '_1_conv')(x) x = _normalization(x, norm=args.norm, name=name + '_1_norm') x = _activation(x, activation=args.activation, name=name + '_1_acti') x = Conv2D(filters, kernel_size, padding='same', name=name + '_2_conv')(x) x = _normalization(x, norm=args.norm, name=name + '_2_norm') x = _activation(x, activation=args.activation, name=name + '_2_acti') x = Conv2D(4 * filters, 1, name=name + '_3_conv')(x) x = _normalization(x, norm=args.norm, name=name + '_3_norm') if args.attention == 'se': x = _se_block(x, name=name + '_3_se') elif args.attention == 'cbam': x = _cbam_block(x, name=name + '_3_cbam') x = Add(name=name + '_add')([shortcut, x]) x = _activation(x, activation=args.activation, name=name + '_3_acti') return x
def DenseNet(blocks, args, **kwargs): img_input = x = Input(shape=(args.img_size, args.img_size, args.img_channel), name='main_input') x = ZeroPadding2D(padding=((3, 3), (3, 3)))(x) x = Conv2D(64, 7, strides=2, use_bias=False, name='conv1/conv')(x) x = _normalization(x, norm=args.norm, name='conv1/norm') x = _activation(x, activation=args.activation, name='conv1/acti') x = ZeroPadding2D(padding=((1, 1), (1, 1)))(x) x = MaxPooling2D(3, strides=2, name='pool1')(x) x = dense_block(args, x, blocks[0], name='conv2') x = transition_block(args, x, 0.5, name='pool2') x = dense_block(args, x, blocks[1], name='conv3') x = transition_block(args, x, 0.5, name='pool3') x = dense_block(args, x, blocks[2], name='conv4') x = transition_block(args, x, 0.5, name='pool4') x = dense_block(args, x, blocks[3], name='conv5') x = _normalization(x, norm=args.norm, name='norm') x = _activation(x, activation=args.activation, name='acti') x = GlobalAveragePooling2D(name='avg_pool')(x) if args.embedding == 'softmax': x = Dense(args.classes, activation='softmax' if args.classes > 1 else 'sigmoid', name='main_output')(x) model_input = [img_input] model_output = [x] elif args.embedding == 'arcface': x = Dense(args.embd_shape, name='fc2')(x) x = _normalization(x, norm=args.norm, name='fc2_norm') label = Input(shape=(args.classes, ), name='arcface_input') x = ArcMarginPenaltyLogists(num_classes=args.classes, margin=args.margin, logist_scale=args.logist_scale, name='arcface_output')(x, label) model_input = [img_input, label] model_output = [x] elif args.embedding == 'dual': x = Dense(args.embd_shape, name='fc2')(x) x = _normalization(x, norm=args.norm, name='fc2_norm') x1 = _activation(x, activation=args.activation, name='fc2_acti') x1 = Dense(args.classes, activation='softmax' if args.classes > 1 else 'sigmoid', name='main_output')(x1) label = Input(shape=(args.classes, ), name='arcface_input') x2 = ArcMarginPenaltyLogists(num_classes=args.classes, margin=args.margin, logist_scale=args.logist_scale, name='arcface_output')(x, label) model_input = [img_input, label] model_output = [x1, x2] else: raise ValueError() model = Model(model_input, model_output, name='{}_{}'.format(args.backbone, args.embedding)) return model
def ResNet(args, embd_shape, logist_scale, stack_fn, preact, use_bias, **kwargs): img_input = x = Input(shape=(args.img_size, args.img_size, args.img_channel), name='main_input') x = ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(x) x = Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x) if preact is False: x = _normalization(x, norm=args.norm, name='conv1_norm') x = _activation(x, activation=args.activation, name='conv1_acti') x = ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x) x = MaxPooling2D(3, strides=2, name='pool1_pool')(x) x = stack_fn(x) if preact is True: x = _normalization(x, norm=args.norm, name='post_norm') x = _activation(x, activation=args.activation, name='post_acti') x = GlobalAveragePooling2D(name='avg_pool')(x) if args.embedding == 'softmax': x = Dense(args.classes, activation='softmax' if args.classes > 1 else 'sigmoid', name='main_output')(x) model_input = [img_input] model_output = [x] elif args.embedding == 'arcface': x = _normalization(x, norm=args.norm, name='avg_pool_norm') x = Flatten(name='flatten')(x) x = Dense(args.embd_shape, kernel_regularizer=tf.keras.regularizers.l2(5e-4), name='fc2')(x) x = _normalization(x, norm=args.norm, name='fc2_norm') label = Input(shape=(args.classes, ), name='arcface_input') x = ArcMarginPenaltyLogists(num_classes=args.classes, margin=args.margin, logist_scale=args.logist_scale, name='arcface_output')(x, label) model_input = [img_input, label] model_output = [x] elif args.embedding == 'dual': # bengali 8th model x = Dense(args.embd_shape, name='fc2')(x) x = _activation(x, activation=args.activation, name='avg_pool_acti') x1 = Dense(args.classes, activation='softmax' if args.classes > 1 else 'sigmoid', name='main_output')(x) label = Input(shape=(args.classes, ), name='arcface_input') x2 = ArcMarginPenaltyLogists(num_classes=args.classes, margin=args.margin, logist_scale=args.logist_scale, name='arcface_output')(x, label) model_input = [img_input, label] model_output = [x1, x2] else: raise ValueError() model = Model(model_input, model_output, name='{}_{}'.format(args.backbone, args.embedding)) return model
def block3(args, x, filters, kernel_size=3, stride=1, groups=32, conv_shortcut=True, attention='no', name=None): if conv_shortcut is True: shortcut = Conv2D((64 // groups) * filters, 1, strides=stride, use_bias=False, name=name + '_0_conv')(x) shortcut = _normalization(shortcut, norm=args.norm, name=name + '_0_norm') else: shortcut = x x = Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x) x = _normalization(x, norm=args.norm, name=name + '_1_norm') x = _activation(x, activation=args.activation, name=name + '_1_acti') c = filters // groups x = ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) x = DepthwiseConv2D(kernel_size, strides=stride, depth_multiplier=c, use_bias=False, name=name + '_2_conv')(x) kernel = np.zeros((1, 1, filters * c, filters), dtype=np.float32) for i in range(filters): start = (i // c) * c * c + i % c end = start + c * c kernel[:, :, start:end:c, i] = 1. x = Conv2D(filters, 1, use_bias=False, trainable=False, kernel_initializer={ 'class_name': 'Constant', 'config': { 'value': kernel } }, name=name + '_2_gconv')(x) x = _normalization(x, norm=args.norm, name=name + '_2_norm') x = _activation(x, activation=args.activation, name=name + '_2_acti') x = Conv2D((64 // groups) * filters, 1, use_bias=False, name=name + '_3_conv')(x) x = _normalization(x, norm=args.norm, name=name + '_3_norm') if args.attention == 'se': x = _se_block(x, name=name + '_3_se') elif args.attention == 'cbam': x = _cbam_block(x, name=name + '_3_cbam') x = Add(name=name + '_add')([shortcut, x]) x = _activation(x, activation=args.activation, name=name + '_3_acti') return x
def EfficientNet(args, width_coefficient, depth_coefficient, default_size, dropout_rate, drop_connect_rate=0.2, depth_divisor=8, blocks_args=DEFAULT_BLOCKS_ARGS, **kwargs): img_input = x = Input(shape=(args.img_size, args.img_size, args.img_channel), name='main_input') def round_filters(filters, divisor=depth_divisor): """Round number of filters based on depth multiplier.""" filters *= width_coefficient new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += divisor return int(new_filters) def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coefficient * repeats)) # Build stem x = ZeroPadding2D(padding=correct_pad(x, 3), name='stem_conv_pad')(x) x = Conv2D(round_filters(32), 3, strides=2, padding='valid', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='stem_conv')(x) x = _normalization(x, norm=args.norm, name='stem_norm') x = _activation(x, activation=args.activation, name='stem_acti') # Build blocks from copy import deepcopy blocks_args = deepcopy(blocks_args) b = 0 blocks = float(sum(ba['repeats'] for ba in blocks_args)) for (i, ba) in enumerate(blocks_args): assert ba['repeats'] > 0 # Update block input and output filters based on depth multiplier. ba['filters_in'] = round_filters(ba['filters_in']) ba['filters_out'] = round_filters(ba['filters_out']) for j in range(round_repeats(ba.pop('repeats'))): # The first block needs to take care of stride and filter size increase. if j > 0: ba['strides'] = 1 ba['filters_in'] = ba['filters_out'] x = block(args, x, drop_connect_rate * b / blocks, name='block{}{}_'.format(i + 1, chr(j + 97)), **ba) b += 1 # Build top x = Conv2D(round_filters(1280), 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='top_conv')(x) x = _normalization(x, norm=args.norm, name='top_norm') x = _activation(x, activation=args.activation, name='top_acti') x = GlobalAveragePooling2D(name='avg_pool')(x) if dropout_rate > 0: x = Dropout(dropout_rate, name='top_dropout')(x) if args.embedding == 'softmax': x = Dense(args.classes, activation='softmax' if args.classes > 1 else 'sigmoid', name='main_output')(x) model_input = [img_input] model_output = [x] elif args.embedding == 'arcface': # x = _normalization(x, norm=args.norm, name='avg_pool_norm') x = Dense(args.embd_shape, name='fc2')(x) x = _normalization(x, norm=args.norm, name='fc2_norm') label = Input(shape=(args.classes, ), name='arcface_input') x = ArcMarginPenaltyLogists(num_classes=args.classes, margin=args.margin, logist_scale=args.logist_scale, name='arcface_output')(x, label) model_input = [img_input, label] model_output = [x] elif args.embedding == 'dual': # bengali 8th model # x = _normalization(x, norm=args.norm, name='avg_pool_norm') x = Dense(args.embd_shape, name='fc2')(x) x = _normalization(x, norm=args.norm, name='fc2_norm') x1 = _activation(x, activation=args.activation, name='fc2_acti') x1 = Dense(args.classes, activation='softmax' if args.classes > 1 else 'sigmoid', name='main_output')(x1) label = Input(shape=(args.classes, ), name='arcface_input') x2 = ArcMarginPenaltyLogists(num_classes=args.classes, margin=args.margin, logist_scale=args.logist_scale, name='arcface_output')(x, label) model_input = [img_input, label] model_output = [x1, x2] else: raise ValueError() # Create model. model = Model(model_input, model_output, name='{}_{}'.format(args.backbone, args.embedding)) return model
def block(args, inputs, drop_rate=0., name='', filters_in=32, filters_out=16, kernel_size=3, strides=1, expand_ratio=1, se_ratio=0., id_skip=True): # Expansion phase filters = filters_in * expand_ratio if expand_ratio != 1: x = Conv2D(filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'expand_conv')(inputs) x = _normalization(x, norm=args.norm, name=name + 'expand_norm') x = _activation(x, activation=args.activation, name=name + 'expand_acti') else: x = inputs # Depthwise Convolution if strides == 2: x = ZeroPadding2D(padding=correct_pad(x, kernel_size), name=name + 'dwconv_pad')(x) conv_pad = 'valid' else: conv_pad = 'same' x = DepthwiseConv2D(kernel_size, strides=strides, padding=conv_pad, use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=name + 'dwconv')(x) x = _normalization(x, norm=args.norm, name=name + 'norm') x = _activation(x, activation=args.activation, name=name + 'acti') if args.attention == 'se': x = _se_block(x, name=name + 'se') elif args.attention == 'cbam': x = _cbam_block(x, name=name + 'cbam') # Output phase x = Conv2D(filters_out, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'project_conv')(x) x = _normalization(x, norm=args.norm, name=name + 'project_norm') if (id_skip is True and strides == 1 and filters_in == filters_out): if drop_rate > 0: x = Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=name + 'drop')(x) x = Add(name=name + 'add')([x, inputs]) return x
def VGG(args, **kwargs): total_layers = int(args.backbone[-2:]) num_layers = { 11: [1, 1, 2, 2, 2], 13: [2, 2, 2, 2, 2], 16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4] } filters = [1, 2, 4, 8, 8] img_input = x = Input(shape=(args.img_size, args.img_size, args.img_channel), name='main_input') for i, layers in enumerate(num_layers[total_layers]): for layer in range(layers): x = Conv2D(64 * filters[i], (3, 3), padding='same', name='block{}_conv{}'.format(i + 1, layer + 1))(x) x = _normalization(x, norm=args.norm, name='block{}_norm{}'.format(i + 1, layer + 1)) if layer == layers - 1: if args.attention == 'se': x = _se_block(x, name='block{}_se{}'.format(i + 1, layer + 1)) elif args.attention == 'cbam': x = _cbam_block(x, name='block{}_cbam{}'.format( i + 1, layer + 1)) x = _activation(x, activation=args.activation, name='block{}_acti{}'.format(i + 1, layer + 1)) x = MaxPooling2D((2, 2), name='block{}_pool'.format(i + 1))(x) x = Flatten(name='flatten')(x) x = Dense(4096, name='fc1')(x) x = _normalization(x, norm=args.norm, name='fc1_norm') x = _activation(x, activation=args.activation, name='fc1_acti') if args.embedding == 'softmax': x = Dense(4096, name='fc2')(x) x = _normalization(x, norm=args.norm, name='fc2_norm') x = _activation(x, activation=args.activation, name='fc2_acti') x = Dense(args.classes, activation='softmax' if args.classes > 1 else 'sigmoid', name='main_output')(x) model_input = [img_input] model_output = [x] elif args.embedding == 'arcface': x = Dense(args.embd_shape, name='fc2')(x) x = _normalization(x, norm=args.norm, name='fc2_norm') label = Input(shape=(args.classes, ), name='arcface_input') x = ArcMarginPenaltyLogists(num_classes=args.classes, margin=args.margin, logist_scale=args.logist_scale, name='arcface_output')(x, label) model_input = [img_input, label] model_output = [x] elif args.embedding == 'dual': x = Dense(args.embd_shape, name='fc2')(x) x = _normalization(x, norm=args.norm, name='fc2_norm') x1 = _activation(x, activation=args.activation, name='fc2_acti') x1 = Dense(args.classes, activation='softmax' if args.classes > 1 else 'sigmoid', name='main_output')(x1) label = Input(shape=(args.classes, ), name='arcface_input') x2 = ArcMarginPenaltyLogists(num_classes=args.classes, margin=args.margin, logist_scale=args.logist_scale, name='arcface_output')(x, label) model_input = [img_input, label] model_output = [x1, x2] else: raise ValueError() model = Model(model_input, model_output, name='{}_{}'.format(args.backbone, args.embedding)) return model