def build_class_head(width, depth, num_classes=20, num_anchors=9): options = { 'kernel_size': 3, 'strides': 1, 'padding': 'same', # 'kernel_initializer': initializers.normal(mean=0.0, stddev=0.01, seed=None), } inputs = layers.Input(shape=(None, None, width)) outputs = inputs for i in range(depth): outputs = layers.Conv2D(filters=width, activation='relu', kernel_initializer=initializers.RandomNormal( mean=0.0, stddev=0.01, seed=None), bias_initializer='zeros', **options)(outputs) # outputs = layers.Conv2D(num_anchors * num_classes, **options)(outputs) outputs = layers.Conv2D( filters=num_classes * num_anchors, kernel_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None), bias_initializer=PriorProbability(probability=0.01), name='pyramid_classification', **options)(outputs) # (b, num_anchors_this_feature_map, 4) outputs = layers.Reshape((-1, num_classes))(outputs) outputs = layers.Activation('sigmoid')(outputs) return models.Model(inputs=inputs, outputs=outputs, name='class_head')
def build_class_head(width, depth, num_classes=20): options = { 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'kernel_initializer': initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None), } inputs = layers.Input(shape=(None, None, width)) outputs = inputs for i in range(depth): outputs = layers.Conv2D(filters=width, activation='relu', bias_initializer='zeros', **options)(outputs) outputs = layers.Conv2D( filters=num_classes, bias_initializer=PriorProbability(probability=0.01), activation='sigmoid', **options)(outputs) outputs = layers.Reshape((-1, num_classes), name='class_head_reshape')(outputs) return models.Model(inputs=inputs, outputs=outputs, name='class_head')
def __init__(self, width, depth, num_classes=20, num_anchors=9, separable_conv=True, freeze_bn=False, **kwargs): super(ClassNet, self).__init__(**kwargs) self.width = width self.depth = depth self.num_classes = num_classes self.num_anchors = num_anchors self.separable_conv = separable_conv options = { 'kernel_size': 3, 'strides': 1, 'padding': 'same', } if self.separable_conv: kernel_initializer = { 'depthwise_initializer': initializers.VarianceScaling(), 'pointwise_initializer': initializers.VarianceScaling(), } options.update(kernel_initializer) self.convs = [layers.SeparableConv2D(filters=width, bias_initializer='zeros', name=f'{self.name}/class-{i}', **options) for i in range(depth)] self.head = layers.SeparableConv2D(filters=num_classes * num_anchors, bias_initializer=PriorProbability(probability=0.01), name=f'{self.name}/class-predict', **options) else: kernel_initializer = { 'kernel_initializer': initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None) } options.update(kernel_initializer) self.convs = [layers.Conv2D(filters=width, bias_initializer='zeros', name=f'{self.name}/class-{i}', **options) for i in range(depth)] self.head = layers.Conv2D(filters=num_classes*num_classes * num_anchors, bias_initializer=PriorProbability(probability=0.01), name='class-predict', **options) self.bns = [ [layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name=f'{self.name}/class-{i}-bn-{j}') for j in range(3, 8)] for i in range(depth)] # self.bns = [[BatchNormalization(freeze=freeze_bn, name=f'{self.name}/class-{i}-bn-{j}') for j in range(3, 8)] # for i in range(depth)] self.relu = layers.Lambda(lambda x: tf.nn.swish(x)) self.reshape = layers.Reshape((-1, num_classes)) self.activation = layers.Activation('sigmoid') self.level = 0
def classification_coco(fpn_features, w_head, d_head, num_anchors, num_classes): options = { 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'depthwise_initializer': initializers.VarianceScaling(), 'pointwise_initializer': initializers.VarianceScaling(), } cls_convs = [ layers.SeparableConv2D(filters=w_head, bias_initializer='zeros', name=f'class_net/class-{i}', **options) for i in range(d_head) ] cls_head_conv = layers.SeparableConv2D( filters=num_classes * num_anchors, bias_initializer=PriorProbability(probability=3e-4), name='class_net/class-predict', **options) cls_bns = [[ layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name=f'class_net/class-{i}-bn-{j}') for j in range(3, 8) ] for i in range(d_head)] cls_relu = layers.Lambda(lambda x: tf.nn.swish(x)) classification = [] cls_reshape = layers.Reshape((-1, num_classes)) cls_activation = layers.Activation('sigmoid') for i, feature in enumerate(fpn_features): for j in range(d_head): feature = cls_convs[j](feature) feature = cls_bns[j][i](feature) feature = cls_relu(feature) feature = cls_head_conv(feature) feature = cls_reshape(feature) feature = cls_activation(feature) classification.append(feature) classification = layers.Concatenate(axis=1, name='classification')(classification) return classification
def retinanet(self, num_classes, backbone='resnet50', **kwargs): """ Returns a retinanet model using the correct backbone. """ inputs_base = keras.layers.Input(shape=(None, None, 3)) input_shape = (None, None, 3) resnet = keras.applications.ResNet50(weights="imagenet", include_top=False,input_shape=input_shape,classes=num_classes,input_tensor=inputs_base) layer_names = ["conv3_block4_out", "conv4_block6_out", "conv5_block3_out"] layer_outputs = [resnet.get_layer(name).output for name in layer_names] num_anchors = 9 pyramid_feature_size = 256 regression_feature_size = 256 name = 'regression_submodel' options = { 'kernel_size': 3, 'strides': 1, 'padding': 'same', 'kernel_initializer': keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None), 'bias_initializer': 'zeros' } inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size)) # None None 256 outputs = inputs for i in range(4): outputs = keras.layers.Conv2D(filters=regression_feature_size, activation='relu', name='pyramid_regression_{}'.format(i), **options)(outputs) outputs = keras.layers.Conv2D(num_anchors * 4, name='pyramid_regression', **options)(outputs) outputs = keras.layers.Reshape((-1, 4), name='pyramid_regression_reshape')(outputs) default_regression_model = keras.models.Model(inputs=inputs, outputs=outputs, name=name) #num_classes = 1 prior_probability = 0.01 classification_feature_size = 256 name = 'classification_submodel' options = { 'kernel_size': 3, 'strides': 1, 'padding': 'same', } inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size)) outputs = inputs for i in range(4): outputs = keras.layers.Conv2D(filters=classification_feature_size, activation='relu', name='pyramid_classification_{}'.format(i), kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None), bias_initializer='zeros', **options )(outputs) outputs = keras.layers.Conv2D(filters=num_classes * num_anchors, kernel_initializer=keras.initializers.zeros(), bias_initializer=PriorProbability(probability=prior_probability), name='pyramid_classification', **options )(outputs) outputs = keras.layers.Reshape((-1, num_classes), name='pyramid_classification_reshape')(outputs) outputs = keras.layers.Activation('sigmoid', name='pyramid_classification_sigmoid')(outputs) default_classification_model = keras.models.Model(inputs=inputs, outputs=outputs, name=name) backbone_layers = layer_outputs name = 'retinanet' submodels = [('regression', default_regression_model), ('classification', default_classification_model) ] C3, C4, C5 = backbone_layers feature_size = 256 # upsample C5 to get P5 from the FPN paper P5 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C5_reduced')(C5) P5_upsampled = UpsampleLike(name='P5_upsampled')([P5, C4]) P5 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P5')(P5) # add P5 elementwise to C4 P4 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C4_reduced')(C4) P4 = keras.layers.Add(name='P4_merged')([P5_upsampled, P4]) P4_upsampled = UpsampleLike(name='P4_upsampled')([P4, C3]) P4 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P4')(P4) # add P4 elementwise to C3 P3 = keras.layers.Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='C3_reduced')(C3) P3 = keras.layers.Add(name='P3_merged')([P4_upsampled, P3]) P3 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name='P3')(P3) # "P6 is obtained via a 3x3 stride-2 conv on C5" P6 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P6')(C5) # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6" P7 = keras.layers.Activation('relu', name='C6_relu')(P6) P7 = keras.layers.Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='P7')(P7) features = [P3, P4, P5, P6, P7] pyramids = [] for n, m in submodels: list_models = [] for f in features: list_models.append(m(f)) pyramids.append(keras.layers.Concatenate(axis=1, name=n)(list_models)) backbone_retinanet = keras.models.Model(inputs=inputs_base, outputs=pyramids, name='retinanet') return backbone_retinanet