def __init__(self, model_file, num_classes, input_image_size):
     if not gfile.Exists(model_file) or input_image_size != (224, 224, 3):
         print("need download the model")
         mobile_net = MobileNet(weights='imagenet',
                                input_shape=input_image_size)
         self.mobile_net_model = models.Model(
             inputs=mobile_net.input,
             outputs=mobile_net.get_layer(
                 'global_average_pooling2d').output)
         print("save the downloaded model for reuse")
         mobile_net.save(model_file)
     else:
         self.mobile_net_model = models.load_model(model_file)
     classes = num_classes
     self.inputs = layers.Input(shape=(1024, ))
     self.outputs = layers.Dense(classes,
                                 activation='softmax',
                                 name='final_output')(self.inputs)
     self.one_layer_model = models.Model(inputs=[self.inputs],
                                         outputs=[self.outputs])
     final_output = layers.Dense(classes,
                                 activation='softmax',
                                 name='final_output')(
                                     self.mobile_net_model.output)
     self.final_model = models.Model(inputs=self.mobile_net_model.inputs,
                                     outputs=final_output)
示例#2
0
    def build():
        # Encoder: MobileNet (feature extractor)
        mobNet = MobileNet(
            input_shape=(224, 224,
                         3),  # Use 224 by 224 images with 3 channels (RGB)
            alpha=1.0,
            depth_multiplier=1,
            dropout=1e-3,
            include_top=False,  # Remove the last classifier
            weights='imagenet',  # Pretrained on ImageNet
            input_tensor=None,
            pooling=None)

        decIn = mobNet.layers[-1].output

        # Decoder
        # Upsample 1
        conv1Out = Conv2D(512, (5, 5), padding="same")(decIn)
        up1Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv1Out)
        # Upsample 2
        conv2Out = Conv2D(256, (5, 5), padding="same")(up1Out)
        up2Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv2Out)
        # Skip connection 1
        skip1 = mobNet.get_layer("conv_pw_5_relu").output
        skip1Out = Add()([up2Out, skip1])
        # Upsample 3
        conv3Out = Conv2D(128, (5, 5), padding="same")(skip1Out)
        up3Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv3Out)
        # Skip connection 2
        skip2 = mobNet.get_layer("conv_pw_3_relu").output
        skip2Out = Add()([up3Out, skip2])
        # Upsample 4
        conv4Out = Conv2D(64, (5, 5), padding="same")(skip2Out)
        up4Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv4Out)
        # Skip connection 3
        skip3 = mobNet.get_layer("conv_pw_1_relu").output
        skip3Out = Add()([up4Out, skip3])
        # Upsample 5
        conv5Out = Conv2D(32, (5, 5), padding="same")(skip3Out)
        up5Out = UpSampling2D(size=(2, 2), interpolation="nearest")(conv5Out)
        # Pointwise conv
        decOut = Conv2D(1, (1, 1), padding="same")(up5Out)

        # Combine full model
        model = Model(inputs=mobNet.input, outputs=decOut)
        return model
def create_object_basic_model():
    MobileNet_model = MobileNet(weights='imagenet',
                                include_top=False,
                                input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    MobileNet_model_out = MobileNet_model.get_layer('conv_pw_13_relu').output
    MobileNet_model_out = GlobalAveragePooling2D()(MobileNet_model_out)
    MobileNet_model_out = Dense(8, activation='softmax')(MobileNet_model_out)
    model = Model(inputs=MobileNet_model.input, outputs=MobileNet_model_out)
    return model
示例#4
0
def mobilenet_v1(num_classes, inputs, modifier = None):
	from tensorflow.keras.applications import MobileNet
	backbone = MobileNet(input_tensor = inputs, include_top = False, pooling = None)
	layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu']
	layer_outputs = [backbone.get_layer(name).output for name in layer_names]
	backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name)
	if modifier:
		backbone = modifier(backbone)
	return backbone
示例#5
0
 def _get_encoder(self):
     mobile_net = MobileNet(weights='imagenet',
                            include_top=False,
                            input_shape=self.input_shape)
     layer_names = [
         'conv_pw_1_relu',
         'conv_pw_3_relu',
         'conv_pw_5_relu',
         'conv_pw_11_relu',
         'conv_pw_13_relu',
     ]
     layers = [mobile_net.get_layer(name).output for name in layer_names]
     down_stack = tf.keras.Model(inputs=mobile_net.input, outputs=layers)
     down_stack.trainable = False
     return down_stack
示例#6
0
def mobilenet_encoder(input_shape=[224, 224, 3]):
    mn = MobileNet(weights='imagenet',
                   include_top=False,
                   input_shape=input_shape)
    mn.trainable = False
    layer_names = [
        'conv_pw_1_relu',
        'conv_pw_3_relu',
        'conv_pw_5_relu',
        'conv_pw_11_relu',
        'conv_pw_13_relu',
    ]
    layers = [mn.get_layer(name).output for name in layer_names]
    down_stack = tf.keras.Model(inputs=mn.input, outputs=layers)
    down_stack.trainable = False
    return down_stack
示例#7
0
def get_mobilenet_SSD(image_size, num_classes):
    mobilenet = MobileNet(input_shape=image_size,
                          include_top=False,
                          weights="imagenet")
    for layer in mobilenet.layers:
        layer._name = layer.name + '_base'

    x = layers.BatchNormalization(
        beta_initializer='glorot_uniform', gamma_initializer='glorot_uniform')(
            mobilenet.get_layer(name='conv_pad_6_base').output)
    conf1 = layers.Conv2D(4 * num_classes, kernel_size=3, padding='same')(x)
    conf1 = layers.Reshape(
        (conf1.shape[1] * conf1.shape[2] * conf1.shape[3] // num_classes,
         num_classes))(conf1)
    loc1 = layers.Conv2D(4 * 4, kernel_size=3, padding='same')(x)
    loc1 = layers.Reshape(
        (loc1.shape[1] * loc1.shape[2] * loc1.shape[3] // 4, 4))(loc1)

    x = layers.MaxPool2D(3, 1, padding='same')(
        mobilenet.get_layer(name='conv_pad_12_base').output)
    x = layers.Conv2D(1024,
                      3,
                      padding='same',
                      dilation_rate=6,
                      activation='relu')(x)
    x = layers.Conv2D(1024, 1, padding='same', activation='relu')(x)
    conf2 = layers.Conv2D(6 * num_classes, kernel_size=3, padding='same')(x)
    conf2 = layers.Reshape(
        (conf2.shape[1] * conf2.shape[2] * conf2.shape[3] // num_classes,
         num_classes))(conf2)
    loc2 = layers.Conv2D(6 * 4, kernel_size=3, padding='same')(x)
    loc2 = layers.Reshape(
        (loc2.shape[1] * loc2.shape[2] * loc2.shape[3] // 4, 4))(loc2)

    x = layers.Conv2D(256, 1, activation='relu')(x)
    x = layers.Conv2D(512, 3, strides=2, padding='same', activation='relu')(x)
    conf3 = layers.Conv2D(6 * num_classes, kernel_size=3, padding='same')(x)
    conf3 = layers.Reshape(
        (conf3.shape[1] * conf3.shape[2] * conf3.shape[3] // num_classes,
         num_classes))(conf3)
    loc3 = layers.Conv2D(6 * 4, kernel_size=3, padding='same')(x)
    loc3 = layers.Reshape(
        (loc3.shape[1] * loc3.shape[2] * loc3.shape[3] // 4, 4))(loc3)

    x = layers.Conv2D(128, 1, activation='relu')(x)
    x = layers.Conv2D(256, 3, strides=2, padding='same', activation='relu')(x)
    conf4 = layers.Conv2D(6 * num_classes, kernel_size=3, padding='same')(x)
    conf4 = layers.Reshape(
        (conf4.shape[1] * conf4.shape[2] * conf4.shape[3] // num_classes,
         num_classes))(conf4)
    loc4 = layers.Conv2D(6 * 4, kernel_size=3, padding='same')(x)
    loc4 = layers.Reshape(
        (loc4.shape[1] * loc4.shape[2] * loc4.shape[3] // 4, 4))(loc4)

    x = layers.Conv2D(128, 1, activation='relu')(x)
    x = layers.Conv2D(256, 3, activation='relu')(x)
    conf5 = layers.Conv2D(4 * num_classes, kernel_size=3, padding='same')(x)
    conf5 = layers.Reshape(
        (conf5.shape[1] * conf5.shape[2] * conf5.shape[3] // num_classes,
         num_classes))(conf5)
    loc5 = layers.Conv2D(4 * 4, kernel_size=3, padding='same')(x)
    loc5 = layers.Reshape(
        (loc5.shape[1] * loc5.shape[2] * loc5.shape[3] // 4, 4))(loc5)

    x = layers.Conv2D(128, 1, activation='relu')(x)
    x = layers.Conv2D(256, 3, activation='relu')(x)
    conf6 = layers.Conv2D(4 * num_classes, kernel_size=3, padding='same')(x)
    conf6 = layers.Reshape(
        (conf6.shape[1] * conf6.shape[2] * conf6.shape[3] // num_classes,
         num_classes))(conf6)
    loc6 = layers.Conv2D(4 * 4, kernel_size=3, padding='same')(x)
    loc6 = layers.Reshape(
        (loc6.shape[1] * loc6.shape[2] * loc6.shape[3] // 4, 4))(loc6)

    confs = layers.concatenate([conf1, conf2, conf3, conf4, conf5, conf6],
                               axis=1)
    locs = layers.concatenate([loc1, loc2, loc3, loc4, loc5, loc6], axis=1)
    model = tf.keras.Model(inputs=mobilenet.layers[0].output,
                           outputs=[confs, locs])

    return model
示例#8
0
def create_model(opt,
                 metrics,
                 loss,
                 trainable_pretrained=True,
                 input_shape=(224, 224, 3)):
    old_model = MobileNet(input_shape=input_shape,
                          weights='imagenet',
                          include_top=False)
    old_model.trainable = trainable_pretrained

    original_image = Lambda(
        lambda x: x,
        name='original_image',
        # trainable=True
    )(old_model.input)

    x = old_model.output
    y_names = [
        "conv_pw_11_relu", "conv_pw_5_relu", "conv_pw_3_relu", "conv_pw_1_relu"
    ]
    f_nums = [1024, 64, 64, 64]
    ys = [
        Conv2D(f_num, kernel_size=1, name=f'skip_hair_conv_{i}')(
            old_model.get_layer(name=name).output)
        for i, (name, f_num) in enumerate(zip(y_names, f_nums))
    ] + [None]

    for i in range(5):
        y = ys[i]
        x = UpSampling2D(name=f'upsampling_hair_{i}')(x)
        if y is not None:
            x = Add(name=f'skip_hair_add_{i}')([x, y])
        x = DepthwiseConv2D(
            kernel_size=3,
            padding='same',
            name=f'depth_conv2d_hair_{i}',
            kernel_initializer=GlorotNormal(seed=(i + 1)),
        )(x)
        x = Conv2D(
            64,
            kernel_size=1,
            padding='same',
            name=f'conv2d_hair_{i}',
            kernel_regularizer=L2(2e-5),
            kernel_initializer=GlorotNormal(seed=11 * (i + 1)),
        )(x)
        x = ReLU(name=f'relu_hair_{i}')(x)
    x = Conv2D(
        # 1,
        2,
        kernel_size=1,
        padding='same',
        name='conv2d_hair_final',
        kernel_regularizer=L2(2e-5),
        kernel_initializer=GlorotNormal(seed=0))(x)
    x = Softmax(name='sigmoid_hair_final')(x)
    x = Concatenate()([x, original_image])
    # x = Activation('sigmoid', name='sigmoid_hair_final')(x)

    model = Model(old_model.input, x)
    if opt:
        model.compile(
            optimizer=opt,
            loss=loss,
            metrics=metrics,
        )
    return model
示例#9
0
def SSD_MOBILENET(
    config,
    label_maps,
    num_predictions=10,
    is_training=True,
):
    """ Construct an SSD network that uses MobileNetV1 backbone.

    Args:
        - config: python dict as read from the config file
        - label_maps: A python list containing the classes
        - num_predictions: The number of predictions to produce as final output
        - is_training: whether the model is constructed for training purpose or inference purpose

    Returns:
        - A keras version of SSD300 with MobileNetV1 as backbone network.

    Code References:
        - https://github.com/chuanqi305/MobileNet-SSD
    """
    model_config = config["model"]
    input_shape = (model_config["input_size"], model_config["input_size"], 3)
    num_classes = len(label_maps) + 1  # for background class
    l2_reg = model_config["l2_regularization"]
    kernel_initializer = model_config["kernel_initializer"]
    default_boxes_config = model_config["default_boxes"]
    extra_box_for_ar_1 = model_config["extra_box_for_ar_1"]
    # construct the base network and extra feature layers
    base_network = MobileNet(
        input_shape=input_shape,
        alpha=config["model"]["width_multiplier"],
        depth_multiplier=config["model"]["depth_multiplier"],
        classes=num_classes,
        weights='imagenet',
        include_top=False)
    base_network.get_layer("input_1")._name = "input"
    for layer in base_network.layers:
        base_network.get_layer(layer.name)._kernel_initializer = "he_normal"
        base_network.get_layer(layer.name)._kernel_regularizer = l2(l2_reg)
        layer.trainable = False  # each layer of the base network should not be trainable

    conv11 = base_network.get_layer("conv_pw_11_relu").output
    conv13 = base_network.get_layer("conv_pw_13_relu").output

    def conv_block_1(x, filters, name):
        x = Conv2D(filters=filters,
                   kernel_size=(1, 1),
                   padding="valid",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name=name,
                   use_bias=False)(x)
        x = BatchNormalization(name=f"{name}/bn")(x)
        x = ReLU(name=f"{name}/relu")(x)
        return x

    def conv_block_2(x, filters, name):
        x = Conv2D(filters=filters,
                   kernel_size=(3, 3),
                   padding="same",
                   kernel_initializer='he_normal',
                   kernel_regularizer=l2(l2_reg),
                   name=name,
                   use_bias=False,
                   strides=(2, 2))(x)
        x = BatchNormalization(name=f"{name}/bn")(x)
        x = ReLU(name=f"{name}/relu")(x)
        return x

    conv14_1 = conv_block_1(x=conv13, filters=256, name="conv14_1")
    conv14_2 = conv_block_2(x=conv14_1, filters=512, name="conv14_2")
    conv15_1 = conv_block_1(x=conv14_2, filters=128, name="conv15_1")
    conv15_2 = conv_block_2(x=conv15_1, filters=256, name="conv15_2")
    conv16_1 = conv_block_1(x=conv15_2, filters=128, name="conv16_1")
    conv16_2 = conv_block_2(x=conv16_1, filters=256, name="conv16_2")
    conv17_1 = conv_block_1(x=conv16_2, filters=128, name="conv17_1")
    conv17_2 = conv_block_2(x=conv17_1, filters=256, name="conv17_2")
    model = Model(inputs=base_network.input, outputs=conv17_2)
    # construct the prediction layers (conf, loc, & default_boxes)
    scales = np.linspace(default_boxes_config["min_scale"],
                         default_boxes_config["max_scale"],
                         len(default_boxes_config["layers"]))
    mbox_conf_layers = []
    mbox_loc_layers = []
    mbox_default_boxes_layers = []
    for i, layer in enumerate(default_boxes_config["layers"]):
        num_default_boxes = get_number_default_boxes(
            layer["aspect_ratios"], extra_box_for_ar_1=extra_box_for_ar_1)
        x = model.get_layer(layer["name"]).output
        layer_name = layer["name"]

        layer_mbox_conf = Conv2D(filters=num_default_boxes * num_classes,
                                 kernel_size=(3, 3),
                                 padding='same',
                                 kernel_initializer=kernel_initializer,
                                 kernel_regularizer=l2(l2_reg),
                                 name=f"{layer_name}_mbox_conf")(x)
        layer_mbox_conf_reshape = Reshape(
            (-1, num_classes),
            name=f"{layer_name}_mbox_conf_reshape")(layer_mbox_conf)
        layer_mbox_loc = Conv2D(filters=num_default_boxes * 4,
                                kernel_size=(3, 3),
                                padding='same',
                                kernel_initializer=kernel_initializer,
                                kernel_regularizer=l2(l2_reg),
                                name=f"{layer_name}_mbox_loc")(x)
        layer_mbox_loc_reshape = Reshape(
            (-1, 4), name=f"{layer_name}_mbox_loc_reshape")(layer_mbox_loc)
        layer_default_boxes = DefaultBoxes(
            image_shape=input_shape,
            scale=scales[i],
            next_scale=scales[i + 1]
            if i + 1 <= len(default_boxes_config["layers"]) - 1 else 1,
            aspect_ratios=layer["aspect_ratios"],
            variances=default_boxes_config["variances"],
            extra_box_for_ar_1=extra_box_for_ar_1,
            name=f"{layer_name}_default_boxes")(x)
        layer_default_boxes_reshape = Reshape(
            (-1, 8),
            name=f"{layer_name}_default_boxes_reshape")(layer_default_boxes)
        mbox_conf_layers.append(layer_mbox_conf_reshape)
        mbox_loc_layers.append(layer_mbox_loc_reshape)
        mbox_default_boxes_layers.append(layer_default_boxes_reshape)

    # concentenate class confidence predictions from different feature map layers
    mbox_conf = Concatenate(axis=-2, name="mbox_conf")(mbox_conf_layers)
    mbox_conf_softmax = Activation('softmax',
                                   name='mbox_conf_softmax')(mbox_conf)
    # concentenate object location predictions from different feature map layers
    mbox_loc = Concatenate(axis=-2, name="mbox_loc")(mbox_loc_layers)
    # concentenate default boxes from different feature map layers
    mbox_default_boxes = Concatenate(
        axis=-2, name="mbox_default_boxes")(mbox_default_boxes_layers)
    # concatenate confidence score predictions, bounding box predictions, and default boxes
    predictions = Concatenate(axis=-1, name='predictions')(
        [mbox_conf_softmax, mbox_loc, mbox_default_boxes])

    if is_training:
        return Model(inputs=base_network.input, outputs=predictions)

    decoded_predictions = DecodeSSDPredictions(
        input_size=model_config["input_size"],
        num_predictions=num_predictions,
        name="decoded_predictions")(predictions)

    return Model(inputs=base_network.input, outputs=decoded_predictions)
示例#10
0
# neuronal ha aprendido.

from tensorflow.keras.applications import MobileNet, mobilenet
from tensorflow.keras import Model
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, save_img, img_to_array
import tensorflow as tf

img_w = 300
img_h = 300
crop_img = 5

#instanciamos el modelo
model = MobileNet(weights="imagenet", include_top=False)

layer = model.get_layer(index=-3)
feature_extractor = Model(inputs=model.inputs, outputs=layer.output)
"""
La siguiente función se la copié a Chollet sin asco,
es la única cosa que no terminé de entender
"""


def compute_loss(input_image, layer_filter):
    activation = feature_extractor(
        input_image
    )  # Esta es la imagen de salida, usando al model como función
    filter_activation = activation[:, 2:-2, 2:-2,
                                   layer_filter]  # los 2 son por los bordes
    return tf.reduce_mean(filter_activation)
示例#11
0
def load_backbone(backbone_type="resnet50",
                  backbone_outputs=('C3', 'C4', 'C5', 'P6', 'P7'),
                  num_features=256):
    global BACKBONE_LAYERS
    inputs = Input((None, None, 3), name='images')
    if backbone_type.lower() == 'resnet50':
        preprocess = BackBonePreProcess(rgb=False,
                                        mean_shift=True,
                                        normalize=0)(inputs)
        model = ResNet50(input_tensor=preprocess, include_top=False)
    elif backbone_type.lower() == 'resnet50v2':
        preprocess = BackBonePreProcess(rgb=True, mean_shift=True,
                                        normalize=2)(inputs)
        resnet50v2, _ = Classifiers.get('resnet50v2')
        model = resnet50v2(input_tensor=preprocess,
                           include_top=False,
                           weights='imagenet')
    elif backbone_type.lower() == "resnet101v2":
        preprocess = BackBonePreProcess(rgb=True,
                                        mean_shift=False,
                                        normalize=2)(inputs)
        model = ResNet101V2(input_tensor=preprocess,
                            include_top=False,
                            backend=tf.keras.backend,
                            layers=tf.keras.layers,
                            models=tf.keras.models,
                            utils=tf.keras.utils)
    elif backbone_type.lower() == 'resnext50':
        preprocess = BackBonePreProcess(rgb=True, mean_shift=True,
                                        normalize=2)(inputs)
        model = ResNeXt50(input_tensor=preprocess, include_top=False)
    elif backbone_type.lower() == "seresnet50":
        preprocess = BackBonePreProcess(rgb=True, mean_shift=True,
                                        normalize=3)(inputs)
        seresnet50, _ = Classifiers.get('seresnet50')
        model = seresnet50(input_tensor=preprocess,
                           original_input=inputs,
                           include_top=False,
                           weights='imagenet')
    elif backbone_type.lower() == "seresnet34":
        preprocess = BackBonePreProcess(rgb=True,
                                        mean_shift=False,
                                        normalize=0)(inputs)
        seresnet34, _ = Classifiers.get('seresnet34')
        model = seresnet34(input_tensor=preprocess,
                           original_input=inputs,
                           include_top=False,
                           weights='imagenet')
    elif backbone_type.lower() == "seresnext50":
        preprocess = BackBonePreProcess(rgb=True, mean_shift=True,
                                        normalize=3)(inputs)
        seresnext50, _ = Classifiers.get('seresnext50')
        model = seresnext50(input_tensor=preprocess,
                            original_input=inputs,
                            include_top=False,
                            weights='imagenet')
    elif backbone_type.lower() == "vgg16":
        preprocess = BackBonePreProcess(rgb=False,
                                        mean_shift=True,
                                        normalize=0)(inputs)
        model = VGG16(input_tensor=preprocess, include_top=False)
    elif backbone_type.lower() == "mobilenet":
        preprocess = BackBonePreProcess(rgb=False,
                                        mean_shift=False,
                                        normalize=2)(inputs)
        model = MobileNet(input_tensor=preprocess,
                          include_top=False,
                          alpha=1.0)
    elif backbone_type.lower() == 'efficientnetb2':
        preprocess = BackBonePreProcess(rgb=True, mean_shift=True,
                                        normalize=3)(inputs)
        model = efn.EfficientNetB2(input_tensor=preprocess,
                                   include_top=False,
                                   weights='imagenet')
    elif backbone_type.lower() == 'efficientnetb3':
        preprocess = BackBonePreProcess(rgb=True, mean_shift=True,
                                        normalize=3)(inputs)
        model = efn.EfficientNetB3(input_tensor=preprocess,
                                   include_top=False,
                                   weights='imagenet')
    elif backbone_type.lower() == 'efficientnetb4':
        preprocess = BackBonePreProcess(rgb=True, mean_shift=True,
                                        normalize=3)(inputs)
        model = efn.EfficientNetB4(input_tensor=preprocess,
                                   include_top=False,
                                   weights='imagenet')
    else:
        raise NotImplementedError(
            f"backbone_type은 {BACKBONE_LAYERS.keys()} 중에서 하나가 되어야 합니다.")
    model.trainable = False

    # Block Layer 가져오기
    features = []
    for key, layer_name in BACKBONE_LAYERS[backbone_type.lower()].items():
        if key in backbone_outputs:
            layer_tensor = model.get_layer(layer_name).output
            features.append(Identity(name=key)(layer_tensor))

    if backbone_type.lower() == "mobilenet":
        # Extra Layer for Feature Extracting
        Z6 = ZeroPadding2D(((0, 1), (0, 1)),
                           name=f'P6_zeropadding')(features[-1])
        P6 = Conv2D(num_features, (3, 3),
                    strides=(2, 2),
                    padding='valid',
                    activation='relu',
                    name=f'P6_conv')(Z6)
        if 'P6' in backbone_outputs:
            features.append(Identity(name='P6')(P6))
        G6 = GroupNormalization(name=f'P6_norm')(P6)
        Z7 = ZeroPadding2D(((0, 1), (0, 1)), name=f'P7_zeropadding')(G6)
        P7 = Conv2D(num_features, (3, 3),
                    strides=(2, 2),
                    padding='valid',
                    activation='relu',
                    name=f'P7_conv')(Z7)
        if 'P7' in backbone_outputs:
            features.append(Identity(name=f'P7')(P7))
    else:
        P6 = Conv2D(num_features, (3, 3),
                    strides=(2, 2),
                    padding='same',
                    activation='relu',
                    name=f'P6_conv')(features[-1])
        if 'P6' in backbone_outputs:
            features.append(Identity(name=f'P6')(P6))
        G6 = GroupNormalization(name=f'P6_norm')(P6)
        P7 = Conv2D(num_features, (3, 3),
                    strides=(2, 2),
                    padding='same',
                    activation='relu',
                    name=f'P7_conv')(G6)
        if 'P7' in backbone_outputs:
            features.append(Identity(name=f'P7')(P7))

    return Model(inputs, features, name=backbone_type)
def ssd_300(mode,
            image_size,
            n_classes,
            l2_regularization=0.0005,
            min_scale=None,
            max_scale=None,
            scales=None,
            aspect_ratios_global=None,
            aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0],
                                     [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]],
            two_boxes_for_ar1=True,
            steps=[8, 16, 32, 64, 100, 300],
            offsets=None,
            clip_boxes=False,
            variances=[0.1, 0.1, 0.2, 0.2],
            coords='centroids',
            normalize_coords=False,
            subtract_mean=[123, 117, 104],
            divide_by_stddev=None,
            swap_channels=True,
            return_predictor_sizes=False):

    n_predictor_layers = 6  # The number of predictor conv layers in the network is 6 for the original SSD300.
    n_classes += 1  # Account for the background class.
    l2_reg = l2_regularization  # Make the internal name shorter.
    img_height, img_width, img_channels = image_size[0], image_size[
        1], image_size[2]

    if aspect_ratios_global is None and aspect_ratios_per_layer is None:
        raise ValueError(
            "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified."
        )
    if aspect_ratios_per_layer:
        if len(aspect_ratios_per_layer) != n_predictor_layers:
            raise ValueError(
                "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}."
                .format(n_predictor_layers, len(aspect_ratios_per_layer)))

    if (min_scale is None or max_scale is None) and scales is None:
        raise ValueError(
            "Either `min_scale` and `max_scale` or `scales` need to be specified."
        )
    if scales:
        if len(scales) != n_predictor_layers + 1:
            raise ValueError(
                "It must be either scales is None or len(scales) == {}, but len(scales) == {}."
                .format(n_predictor_layers + 1, len(scales)))
    else:  # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale`
        scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1)

    if len(variances) != 4:
        raise ValueError(
            "4 variance values must be pased, but {} values were received.".
            format(len(variances)))
    variances = np.array(variances)
    if np.any(variances <= 0):
        raise ValueError(
            "All variances must be >0, but the variances given are {}".format(
                variances))

    if (not (steps is None)) and (len(steps) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one step value per predictor layer.")

    if (not (offsets is None)) and (len(offsets) != n_predictor_layers):
        raise ValueError(
            "You must provide at least one offset value per predictor layer.")

    ############################################################################
    # Compute the anchor box parameters.
    ############################################################################

    # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers.
    if aspect_ratios_per_layer:
        aspect_ratios = aspect_ratios_per_layer
    else:
        aspect_ratios = [aspect_ratios_global] * n_predictor_layers

    # Compute the number of boxes to be predicted per cell for each predictor layer.
    # We need this so that we know how many channels the predictor layers need to have.
    if aspect_ratios_per_layer:
        n_boxes = []
        for ar in aspect_ratios_per_layer:
            if (1 in ar) & two_boxes_for_ar1:
                n_boxes.append(len(ar) +
                               1)  # +1 for the second box for aspect ratio 1
            else:
                n_boxes.append(len(ar))
    else:  # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer
        if (1 in aspect_ratios_global) & two_boxes_for_ar1:
            n_boxes = len(aspect_ratios_global) + 1
        else:
            n_boxes = len(aspect_ratios_global)
        n_boxes = [n_boxes] * n_predictor_layers

    if steps is None:
        steps = [None] * n_predictor_layers
    if offsets is None:
        offsets = [None] * n_predictor_layers

    x = Input(shape=(img_height, img_width, img_channels))

    # The following identity layer is only needed so that the subsequent lambda layers can be optional.
    x1 = Lambda(lambda z: z,
                output_shape=(img_height, img_width, img_channels),
                name='identity_layer')(x)
    if not (subtract_mean is None):
        x1 = Lambda(lambda z: z - np.array(subtract_mean),
                    output_shape=(img_height, img_width, img_channels),
                    name='input_mean_normalization')(x1)
    if not (divide_by_stddev is None):
        x1 = Lambda(lambda z: z / np.array(divide_by_stddev),
                    output_shape=(img_height, img_width, img_channels),
                    name='input_stddev_normalization')(x1)
    #if swap_channels and (img_channels == 3):
    #    x1 = Lambda(lambda z: z[..., ::-1], output_shape=(img_height, img_width, img_channels),
    #                name='input_channel_swap')(x1)

    #conv4_3_norm , fc7 ,test= mobilenet(input_tensor=x1)
    mobilenet = MobileNet(input_shape=(224, 224, 3),
                          include_top=False,
                          weights='imagenet')
    FeatureExtractor = Model(inputs=mobilenet.input,
                             outputs=[
                                 mobilenet.get_layer('conv_pw_11_relu').output,
                                 mobilenet.get_layer('conv_pw_13_relu').output
                             ])

    conv4_3_norm, fc7 = FeatureExtractor(x1)

    print("conv11 shape: ", conv4_3_norm.shape)
    print("conv13 shape: ", fc7.shape)

    conv6_1 = Conv2D(256, (1, 1),
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv14_1',
                     use_bias=False)(fc7)
    conv6_1 = BatchNormalization(momentum=0.99,
                                 epsilon=0.00001,
                                 name='conv14_1/bn')(conv6_1)
    conv6_1 = Activation('relu', name='relu_conv6_1')(conv6_1)

    conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv6_padding')(conv6_1)
    conv6_2 = Conv2D(512, (3, 3),
                     strides=(2, 2),
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv14_2',
                     use_bias=False)(conv6_1)
    conv6_2 = BatchNormalization(momentum=0.99,
                                 epsilon=0.00001,
                                 name='conv14_2/bn')(conv6_2)
    conv6_2 = Activation('relu', name='relu_conv6_2')(conv6_2)

    print('conv14 shape', conv6_2.shape)

    conv7_1 = Conv2D(128, (1, 1),
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv15_1',
                     use_bias=False)(conv6_2)
    conv7_1 = BatchNormalization(momentum=0.99,
                                 epsilon=0.00001,
                                 name='conv15_1/bn')(conv7_1)
    conv7_1 = Activation('relu', name='relu_conv7_1')(conv7_1)

    conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv7_padding')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv15_2',
                     use_bias=False)(conv7_1)
    conv7_2 = BatchNormalization(momentum=0.99,
                                 epsilon=0.00001,
                                 name='conv15_2/bn')(conv7_2)
    conv7_2 = Activation('relu', name='relu_conv7_2')(conv7_2)

    print('conv15 shape', conv7_2.shape)

    conv8_1 = Conv2D(128, (1, 1),
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv16_1',
                     use_bias=False)(conv7_2)
    conv8_1 = BatchNormalization(momentum=0.99,
                                 epsilon=0.00001,
                                 name='conv16_1/bn')(conv8_1)
    conv8_1 = Activation('relu', name='relu_conv8_1')(conv8_1)
    conv8_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv8_padding')(conv8_1)
    conv8_2 = Conv2D(256, (3, 3),
                     strides=(2, 2),
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv16_2',
                     use_bias=False)(conv8_1)
    conv8_2 = BatchNormalization(momentum=0.99,
                                 epsilon=0.00001,
                                 name='conv16_2/bn')(conv8_2)
    conv8_2 = Activation('relu', name='relu_conv8_2')(conv8_2)

    print('conv16 shape', conv8_2.shape)

    conv9_1 = Conv2D(64, (1, 1),
                     padding='same',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv17_1',
                     use_bias=False)(conv8_2)
    conv9_1 = BatchNormalization(momentum=0.99,
                                 epsilon=0.00001,
                                 name='conv17_1/bn')(conv9_1)
    conv9_1 = Activation('relu', name='relu_conv9_1')(conv9_1)
    conv9_1 = ZeroPadding2D(padding=((1, 1), (1, 1)),
                            name='conv9_padding')(conv9_1)
    conv9_2 = Conv2D(128, (3, 3),
                     strides=(2, 2),
                     padding='valid',
                     kernel_initializer='he_normal',
                     kernel_regularizer=l2(l2_reg),
                     name='conv17_2',
                     use_bias=False)(conv9_1)
    conv9_2 = BatchNormalization(momentum=0.99,
                                 epsilon=0.00001,
                                 name='conv17_2/bn')(conv9_2)
    conv9_2 = Activation('relu', name='relu_conv9_2')(conv9_2)

    print('conv17 shape', conv9_2.shape)

    # Feed conv4_3 into the L2 normalization layer
    # conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3_norm)

    conv4_3_norm_mbox_conf = Conv2D(n_boxes[0] * n_classes, (1, 1),
                                    padding='same',
                                    kernel_initializer='he_normal',
                                    kernel_regularizer=l2(l2_reg),
                                    name='conv11_mbox_conf')(conv4_3_norm)
    fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (1, 1),
                           padding='same',
                           kernel_initializer='he_normal',
                           kernel_regularizer=l2(l2_reg),
                           name='conv13_mbox_conf')(fc7)
    conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (1, 1),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv14_2_mbox_conf')(conv6_2)
    conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (1, 1),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv15_2_mbox_conf')(conv7_2)
    conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (1, 1),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv16_2_mbox_conf')(conv8_2)
    conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (1, 1),
                               padding='same',
                               kernel_initializer='he_normal',
                               kernel_regularizer=l2(l2_reg),
                               name='conv17_2_mbox_conf')(conv9_2)
    # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4`
    # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)`
    conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (1, 1),
                                   padding='same',
                                   kernel_initializer='he_normal',
                                   kernel_regularizer=l2(l2_reg),
                                   name='conv11_mbox_loc')(conv4_3_norm)
    fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (1, 1),
                          padding='same',
                          kernel_initializer='he_normal',
                          kernel_regularizer=l2(l2_reg),
                          name='conv13_mbox_loc')(fc7)
    conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (1, 1),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv14_2_mbox_loc')(conv6_2)
    conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (1, 1),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv15_2_mbox_loc')(conv7_2)
    conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (1, 1),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv16_2_mbox_loc')(conv8_2)
    conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (1, 1),
                              padding='same',
                              kernel_initializer='he_normal',
                              kernel_regularizer=l2(l2_reg),
                              name='conv17_2_mbox_loc')(conv9_2)

    ### Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names)

    # Output shape of anchors: `(batch, height, width, n_boxes, 8)`
    conv4_3_norm_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[0],
        next_scale=scales[1],
        aspect_ratios=aspect_ratios[0],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[0],
        this_offsets=offsets[0],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc)
    fc7_mbox_priorbox = AnchorBoxes(img_height,
                                    img_width,
                                    this_scale=scales[1],
                                    next_scale=scales[2],
                                    aspect_ratios=aspect_ratios[1],
                                    two_boxes_for_ar1=two_boxes_for_ar1,
                                    this_steps=steps[1],
                                    this_offsets=offsets[1],
                                    clip_boxes=clip_boxes,
                                    variances=variances,
                                    coords=coords,
                                    normalize_coords=normalize_coords,
                                    name='fc7_mbox_priorbox')(fc7_mbox_loc)
    conv6_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[2],
        next_scale=scales[3],
        aspect_ratios=aspect_ratios[2],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[2],
        this_offsets=offsets[2],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc)
    conv7_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[3],
        next_scale=scales[4],
        aspect_ratios=aspect_ratios[3],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[3],
        this_offsets=offsets[3],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc)
    conv8_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[4],
        next_scale=scales[5],
        aspect_ratios=aspect_ratios[4],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[4],
        this_offsets=offsets[4],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc)
    conv9_2_mbox_priorbox = AnchorBoxes(
        img_height,
        img_width,
        this_scale=scales[5],
        next_scale=scales[6],
        aspect_ratios=aspect_ratios[5],
        two_boxes_for_ar1=two_boxes_for_ar1,
        this_steps=steps[5],
        this_offsets=offsets[5],
        clip_boxes=clip_boxes,
        variances=variances,
        coords=coords,
        normalize_coords=normalize_coords,
        name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc)

    ### Reshape

    # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)`
    # We want the classes isolated in the last axis to perform softmax on them
    conv4_3_norm_mbox_conf_reshape = Reshape(
        (-1, n_classes),
        name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf)
    fc7_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf)
    conv6_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf)
    conv7_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf)
    conv8_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf)
    conv9_2_mbox_conf_reshape = Reshape(
        (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf)
    # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)`
    # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss
    conv4_3_norm_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc)
    fc7_mbox_loc_reshape = Reshape((-1, 4),
                                   name='fc7_mbox_loc_reshape')(fc7_mbox_loc)
    conv6_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc)
    conv7_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc)
    conv8_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc)
    conv9_2_mbox_loc_reshape = Reshape(
        (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc)
    # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)`
    conv4_3_norm_mbox_priorbox_reshape = Reshape(
        (-1, 8),
        name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox)
    fc7_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox)
    conv6_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox)
    conv7_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox)
    conv8_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox)
    conv9_2_mbox_priorbox_reshape = Reshape(
        (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox)

    ### Concatenate the predictions from the different layers

    # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions,
    # so we want to concatenate along axis 1, the number of boxes per layer
    # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes)
    mbox_conf = Concatenate(axis=1, name='mbox_conf')([
        conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape,
        conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape,
        conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape
    ])

    # Output shape of `mbox_loc`: (batch, n_boxes_total, 4)
    mbox_loc = Concatenate(axis=1, name='mbox_loc')([
        conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape,
        conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape,
        conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape
    ])

    # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8)
    mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([
        conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape,
        conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape,
        conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape
    ])

    # The box coordinate predictions will go into the loss function just the way they are,
    # but for the class predictions, we'll apply a softmax activation layer first
    mbox_conf_softmax = Activation('softmax',
                                   name='mbox_conf_softmax')(mbox_conf)

    # Concatenate the class and box predictions and the anchors to one large predictions vector
    # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8)
    predictions = Concatenate(axis=2, name='predictions')(
        [mbox_conf_softmax, mbox_loc, mbox_priorbox])

    if mode == 'training':
        model = Model(inputs=x, outputs=predictions)
    elif mode == 'inference':
        decoded_predictions = DecodeDetections(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    elif mode == 'inference_fast':
        decoded_predictions = DecodeDetectionsFast(
            confidence_thresh=confidence_thresh,
            iou_threshold=iou_threshold,
            top_k=top_k,
            nms_max_output_size=nms_max_output_size,
            coords=coords,
            normalize_coords=normalize_coords,
            img_height=img_height,
            img_width=img_width,
            name='decoded_predictions')(predictions)
        model = Model(inputs=x, outputs=decoded_predictions)
    else:
        raise ValueError(
            "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'."
            .format(mode))

    if return_predictor_sizes:
        predictor_sizes = np.array([
            conv4_3_norm_mbox_conf._keras_shape[1:3],
            fc7_mbox_conf._keras_shape[1:3],
            conv6_2_mbox_conf._keras_shape[1:3],
            conv7_2_mbox_conf._keras_shape[1:3],
            conv8_2_mbox_conf._keras_shape[1:3],
            conv9_2_mbox_conf._keras_shape[1:3]
        ])
        return model, predictor_sizes
    else:
        return model
示例#13
0
def create_lightweight_mobilenet(layer_name, number):
    base_model = MobileNet(weights='imagenet',
                           include_top=False)  #imports the mobilenet model

    #layer_name = 'conv_dw_12_relu'
    # intermediate_layer_model = Model(inputs=base_model.input,
    #                                        outputs=base_model.get_layer(layer_name).output)

    # add a global spatial average pooling layer
    #x = base_model.output
    x = base_model.get_layer(layer_name).output
    x = GlobalAveragePooling2D()(x)
    # let's add a fully-connected layer
    x = Dense(1024, activation='relu')(
        x
    )  #we add dense layers so that the model can learn more complex functions and classify for better results.
    x = Dense(1024, activation='relu')(x)  #dense layer 2
    x = Dense(1024, activation='relu')(x)  #dense layer 2
    x = Dense(512, activation='relu')(x)  #dense layer 3
    # and a logistic layer -- let's say we have 20 voc classes
    preds = Dense(20, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=preds
                  )  ##now a model has been created based on our architecture

    for i, layer in enumerate(base_model.layers):
        print('Original Model*****', i, layer.name)

    for i, layer in enumerate(model.layers):
        print('Final Model*****', i, layer.name)
        print(len(model.layers))

    # first: train only the top layers (which were randomly initialized)
    # i.e. freeze all convolutional InceptionV3 layers
    for layer in base_model.layers:
        layer.trainable = False

    # the first 249 layers and unfreeze the rest:
    # for layer in model.layers[:20]:
    #    layer.trainable = False
    # for layer in model.layers[20:]:
    #    layer.trainable = True

    #opt = SGD(learning_rate=0.01, momentum=0.0, nesterov=False, name='SGD')
    model.compile(optimizer=SGD(learning_rate=0.01,
                                momentum=0.0,
                                nesterov=False,
                                name='SGD'),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    # Adam optimizer
    # loss function will be categorical cross entropy
    # evaluation metric will be accuracy

    # call the dataset

    train_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input)  #included in our dependencies

    train_generator = train_datagen.flow_from_directory(
        '/home/dhaval/piyush/Usecases_dataset/voc_dataset_created/training_data',
        target_size=(224, 224),
        color_mode='rgb',
        batch_size=32,
        class_mode='categorical',
        shuffle=True)

    validation_generator = train_datagen.flow_from_directory(
        '/home/dhaval/piyush/Usecases_dataset/voc_dataset_created/validation_data',
        target_size=(224, 224),
        color_mode='rgb',
        batch_size=32,
        class_mode='categorical',
        shuffle=True)

    step_size_train = train_generator.n // train_generator.batch_size
    step_size_val = validation_generator.n // validation_generator.batch_size

    tensorboard = TensorBoard(log_dir="logs/{}".format(time()),
                              update_freq='epoch',
                              profile_batch=0)

    #fit the model
    model.fit(train_generator,
              steps_per_epoch=step_size_train,
              epochs=200,
              validation_data=validation_generator,
              validation_steps=step_size_val,
              callbacks=[tensorboard])

    #model.fit(train_generator,steps_per_epoch=step_size_train,epochs=12)

    model.save('mobilenet_model_voc_20class_ep_200_sgd_layer_' +
               str(len(model.layers)) + '.h5')