Exemplo n.º 1
0
def VGG19(include_top=True,
          weights='imagenet',
          input_tensor=None,
          input_shape=None,
          pooling=None,
          classes=1000,
          **kwargs):
    """Instantiates the VGG19 architecture.

    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.

    # Arguments
        include_top: whether to include the 3 fully-connected
            layers at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor
            (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)`
            (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 32.
            E.g. `(200, 200, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            ' as true, `classes` should be 1000')
    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=32,
                                      data_format='channels_last',
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor
    # Block 1
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv1')(img_input)
    x = layers.Conv2D(64, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block1_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv1')(x)
    x = layers.Conv2D(128, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block2_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv1')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv2')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv3')(x)
    x = layers.Conv2D(256, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block3_conv4')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv3')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block4_conv4')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv1')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv2')(x)
    x = layers.Conv2D(512, (3, 3),
                      activation='relu',
                      padding='same',
                      name='block5_conv3')(x)
    x = layers.Conv2D(
        512,
        (3, 3),
        # activation='relu',
        padding='same',
        name='block5_conv4')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    if include_top:
        # Classification block
        x = layers.Flatten(name='flatten')(x)
        x = layers.Dense(4096, activation='relu', name='fc1')(x)
        x = layers.Dense(4096, activation='relu', name='fc2')(x)
        x = layers.Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    model = models.Model(inputs, x, name='vgg19')

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            weights_path = keras_utils.get_file(
                'vgg19_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='cbe5617147190e668d6c5d5026f83318')
        else:
            weights_path = keras_utils.get_file(
                'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
                WEIGHTS_PATH_NO_TOP,
                cache_subdir='models',
                file_hash='253f8cb515780f3b799900260a226db6')
        model.load_weights(weights_path)
        if backend.backend() == 'theano':
            keras_utils.convert_all_kernels_in_model(model)
    elif weights is not None:
        model.load_weights(weights)

    return model
conv_base = vgg16.VGG16(weights = 'imagenet', include_top=False, input_shape=(224, 224, 3))

import numpy as np
import os
from keras.preprocessing.image import ImageDataGenerator
base_dir ='..../DME_NORMAL'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir,'validation')
test_dir = os.path.join(base_dir, 'test')

from keras import models
from keras import layers

x = conv_base.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
predictions = layers.Dense(2, activation='softmax')(x)
model = models.Model(inputs=conv_base.input, outputs=predictions)





from keras import optimizers
test_datagen = ImageDataGenerator(rescale=1./255)


model.load_weights("vgg16_weights_dme_normal.best.hdf5")
height = 64
width = 64
channels = 3
num_classes = 10

model = Sequential()
model.add(
    layers.SeparableConv2D(32,
                           3,
                           activation='relu',
                           input_shape=(
                               height,
                               width,
                               channels,
                           )))
model.add(layers.SeparableConv2D(64, 3, activation='relu'))
model.add(layers.MaxPooling2D(2))

model.add(layers.SeparableConv2D(64, 3, activation='relu'))
model.add(layers.SeparableConv2D(128, 3, activation='relu'))
model.add(layers.MaxPooling2D(2))

model.add(layers.SeparableConv2D(64, 3, activation='relu'))
model.add(layers.SeparableConv2D(128, 3, activation='relu'))
model.add(layers.GlobalAveragePooling2D())

model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))

model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
Exemplo n.º 4
0
def EfficientNet(input_shape,
                 block_args_list: List[BlockArgs],
                 width_coefficient: float,
                 depth_coefficient: float,
                 include_top=True,
                 weights=None,
                 input_tensor=None,
                 pooling=None,
                 classes=1000,
                 dropout_rate=0.,
                 drop_connect_rate=0.,
                 batch_norm_momentum=0.99,
                 batch_norm_epsilon=1e-3,
                 depth_divisor=8,
                 min_depth=None,
                 data_format=None,
                 default_size=None,
                 **kwargs):
    """
    Builder model for EfficientNets.

    # Arguments:
        input_shape: Optional shape tuple, the input shape
            depends on the configuration, with a minimum
            decided by the number of stride 2 operations.
            When None is provided, it defaults to 224.
            Considered the "Resolution" parameter from
            the paper (inherently Resolution coefficient).
        block_args_list: Optional List of BlockArgs, each
            of which detail the arguments of the MBConvBlock.
            If left as None, it defaults to the blocks
            from the paper.
        width_coefficient: Determines the number of channels
            available per layer. Compound Coefficient that
            needs to be found using grid search on a base
            configuration model.
        depth_coefficient: Determines the number of layers
            available to the model. Compound Coefficient that
            needs to be found using grid search on a base
            configuration model.
        include_top: Whether to include the fully-connected
            layer at the top of the network.
        weights: `None` (random initialization) or
            `imagenet` (ImageNet weights)
        input_tensor: Optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: Optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
        dropout_rate: Float, percentage of random dropout.
        drop_connect_rate: Float, percentage of random droped
            connections.
        batch_norm_momentum: Float, default batch normalization
            momentum. Obtained from the paper.
        batch_norm_epsilon: Float, default batch normalization
            epsilon. Obtained from the paper.
        depth_divisor: Optional. Used when rounding off the coefficient
             scaled channels and depth of the layers.
        min_depth: Optional. Minimum depth value in order to
            avoid blocks with 0 layers.
        data_format: "channels_first" or "channels_last". If left
            as None, defaults to the value set in ~/.keras.
        default_size: Specifies the default image size of the model

    # Raises:
        - ValueError: If weights are not in 'imagenet' or None.
        - ValueError: If weights are 'imagenet' and `classes` is
            not 1000.

    # Returns:
        A Keras Model.
    """
    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    if data_format is None:
        data_format = K.image_data_format()

    if data_format == 'channels_first':
        channel_axis = 1
    else:
        channel_axis = -1

    if default_size is None:
        default_size = 224

    if block_args_list is None:
        block_args_list = get_default_block_list()

    # count number of strides to compute min size
    stride_count = 1
    for block_args in block_args_list:
        if block_args.strides is not None and block_args.strides[0] > 1:
            stride_count += 1

    min_size = int(2**stride_count)

    # Determine proper input shape and default size.
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=min_size,
                                      data_format=data_format,
                                      require_flatten=include_top,
                                      weights=weights)

    # Stem part
    if input_tensor is None:
        inputs = layers.Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            inputs = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            inputs = input_tensor

    x = inputs
    x = layers.Conv2D(filters=round_filters(32, width_coefficient,
                                            depth_divisor, min_depth),
                      kernel_size=[3, 3],
                      strides=[2, 2],
                      kernel_initializer=EfficientNetConvInitializer(),
                      padding='same',
                      use_bias=False)(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  momentum=batch_norm_momentum,
                                  epsilon=batch_norm_epsilon)(x)
    x = Swish()(x)

    num_blocks = sum([block_args.num_repeat for block_args in block_args_list])
    drop_connect_rate_per_block = drop_connect_rate / float(num_blocks)

    # Blocks part
    for block_idx, block_args in enumerate(block_args_list):
        assert block_args.num_repeat > 0

        # Update block input and output filters based on depth multiplier.
        block_args.input_filters = round_filters(block_args.input_filters,
                                                 width_coefficient,
                                                 depth_divisor, min_depth)
        block_args.output_filters = round_filters(block_args.output_filters,
                                                  width_coefficient,
                                                  depth_divisor, min_depth)
        block_args.num_repeat = round_repeats(block_args.num_repeat,
                                              depth_coefficient)

        # The first block needs to take care of stride and filter size increase.
        x = MBConvBlock(block_args.input_filters, block_args.output_filters,
                        block_args.kernel_size, block_args.strides,
                        block_args.expand_ratio, block_args.se_ratio,
                        block_args.identity_skip,
                        drop_connect_rate_per_block * block_idx,
                        batch_norm_momentum, batch_norm_epsilon,
                        data_format)(x)

        if block_args.num_repeat > 1:
            block_args.input_filters = block_args.output_filters
            block_args.strides = [1, 1]

        for _ in range(block_args.num_repeat - 1):
            x = MBConvBlock(block_args.input_filters,
                            block_args.output_filters, block_args.kernel_size,
                            block_args.strides, block_args.expand_ratio,
                            block_args.se_ratio, block_args.identity_skip,
                            drop_connect_rate_per_block * block_idx,
                            batch_norm_momentum, batch_norm_epsilon,
                            data_format)(x)

    # Head part
    x = layers.Conv2D(filters=round_filters(1280, width_coefficient,
                                            depth_coefficient, min_depth),
                      kernel_size=[1, 1],
                      strides=[1, 1],
                      kernel_initializer=EfficientNetConvInitializer(),
                      padding='same',
                      use_bias=False)(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  momentum=batch_norm_momentum,
                                  epsilon=batch_norm_epsilon)(x)
    x = Swish()(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(data_format=data_format)(x)

        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate)(x)

        x = layers.Dense(classes,
                         kernel_initializer=EfficientNetDenseInitializer())(x)
        x = layers.Activation('softmax')(x)

    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    outputs = x

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)

    model = Model(inputs, outputs)

    # Load weights
    if weights == 'imagenet':
        if default_size == 224:
            if include_top:
                weights_path = get_file(
                    'efficientnet-b0.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b0.h5",
                    cache_subdir='models')
            else:
                weights_path = get_file(
                    'efficientnet-b0_notop.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b0_notop.h5",
                    cache_subdir='models')
            model.load_weights(weights_path)

        elif default_size == 240:
            if include_top:
                weights_path = get_file(
                    'efficientnet-b1.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b1.h5",
                    cache_subdir='models')
            else:
                weights_path = get_file(
                    'efficientnet-b1_notop.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b1_notop.h5",
                    cache_subdir='models')
            model.load_weights(weights_path)

        elif default_size == 260:
            if include_top:
                weights_path = get_file(
                    'efficientnet-b2.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b2.h5",
                    cache_subdir='models')
            else:
                weights_path = get_file(
                    'efficientnet-b2_notop.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b2_notop.h5",
                    cache_subdir='models')
            model.load_weights(weights_path)

        elif default_size == 300:
            if include_top:
                weights_path = get_file(
                    'efficientnet-b3.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b3.h5",
                    cache_subdir='models')
            else:
                weights_path = get_file(
                    'efficientnet-b3_notop.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b3_notop.h5",
                    cache_subdir='models')
            model.load_weights(weights_path)

        elif default_size == 380:
            if include_top:
                weights_path = get_file(
                    'efficientnet-b4.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b4.h5",
                    cache_subdir='models')
            else:
                weights_path = get_file(
                    'efficientnet-b4_notop.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b4_notop.h5",
                    cache_subdir='models')
            model.load_weights(weights_path)

        elif default_size == 456:
            if include_top:
                weights_path = get_file(
                    'efficientnet-b5.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b5.h5",
                    cache_subdir='models')
            else:
                weights_path = get_file(
                    'efficientnet-b5_notop.h5',
                    "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b5_notop.h5",
                    cache_subdir='models')
            model.load_weights(weights_path)

        # TODO: When weights for efficientnet-b6 and efficientnet-b7 becomes available, uncomment this section and update
        #           the ValueError message below (line 537: ValueError('ImageNet weights can only be loaded with EfficientNetB0-5'))
        # elif default_size == 528:
        #     if include_top:
        #         weights_path = get_file(
        #             'efficientnet-b6.h5',
        #             "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b6.h5",
        #             cache_subdir='models')
        #     else:
        #         weights_path = get_file(
        #             'efficientnet-b6_notop.h5',
        #             "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b6_notop.h5",
        #             cache_subdir='models')
        #     model.load_weights(weights_path)
        #
        # elif default_size == 600:
        #     if include_top:
        #         weights_path = get_file(
        #             'efficientnet-b7.h5',
        #             "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b7.h5",
        #             cache_subdir='models')
        #     else:
        #         weights_path = get_file(
        #             'efficientnet-b7_notop.h5',
        #             "https://github.com/titu1994/keras-efficientnets/releases/download/v0.1/efficientnet-b7_notop.h5",
        #             cache_subdir='models')
        #     model.load_weights(weights_path)

        else:
            raise ValueError(
                'ImageNet weights can only be loaded with EfficientNetB0-5')

    elif weights is not None:
        model.load_weights(weights)

    return model
Exemplo n.º 5
0
def EfficientNetV2(
    width_coefficient,
    depth_coefficient,
    default_size,
    dropout_rate=0.2,
    drop_connect_rate=0.2,
    depth_divisor=8,
    min_depth=8,
    bn_momentum=0.9,
    activation="swish",
    blocks_args="default",
    model_name="efficientnetv2",
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
):
    """Instantiates the EfficientNetV2 architecture using given scaling coefficients.

  Args:
    width_coefficient: float, scaling coefficient for network width.
    depth_coefficient: float, scaling coefficient for network depth.
    default_size: integer, default input image size.
    dropout_rate: float, dropout rate before final classifier layer.
    drop_connect_rate: float, dropout rate at skip connections.
    depth_divisor: integer, a unit of network width.
    min_depth: integer, minimum number of filters.
    bn_momentum: float. Momentum parameter for Batch Normalization layers.
    activation: activation function.
    blocks_args: list of dicts, parameters to construct block modules.
    model_name: string, model name.
    include_top: whether to include the fully-connected layer at the top of the
      network.
    weights: one of `None` (random initialization), `"imagenet"` (pre-training
      on ImageNet), or the path to the weights file to be loaded.
    input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) or
      numpy array to use as image input for the model.
    input_shape: optional shape tuple, only to be specified if `include_top` is
      False. It should have exactly 3 inputs channels.
    pooling: optional pooling mode for feature extraction when `include_top` is
      `False`. - `None` means that the output of the model will be the 4D tensor
      output of the last convolutional layer. - "avg" means that global average
      pooling will be applied to the output of the last convolutional layer, and
      thus the output of the model will be a 2D tensor. - `"max"` means that
      global max pooling will be applied.
    classes: optional number of classes to classify images into, only to be
      specified if `include_top` is True, and if no `weights` argument is
      specified.
    classifier_activation: A string or callable. The activation function to use
      on the `"top"` layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the `"top"` layer.
    include_preprocessing: Boolean, whether to include the preprocessing layer
      (`Rescaling`) at the bottom of the network. Defaults to `True`.

  Returns:
    A `keras.Model` instance.

  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `"softmax"` or `None` when
      using a pretrained top layer.
  """

    if blocks_args == "default":
        blocks_args = DEFAULT_BLOCKS_ARGS[model_name]

    if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)):
        raise ValueError("The `weights` argument should be either "
                         "`None` (random initialization), `imagenet` "
                         "(pre-training on ImageNet), "
                         "or the path to the weights file to be loaded."
                         f"Received: weights={weights}")

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            "If using `weights` as `'imagenet'` with `include_top`"
            " as true, `classes` should be 1000"
            f"Received: classes={classes}")

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=default_size,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    x = img_input

    if include_preprocessing:
        # Apply original V1 preprocessing for Bx variants
        # if number of channels allows it
        num_channels = input_shape[bn_axis - 1]
        if model_name.split("-")[-1].startswith("b") and num_channels == 3:
            x = layers.Rescaling(scale=1. / 255)(x)
            x = layers.Normalization(
                mean=[0.485, 0.456, 0.406],
                variance=[0.229**2, 0.224**2, 0.225**2],
                axis=bn_axis,
            )(x)
        else:
            x = layers.Rescaling(scale=1. / 128.0, offset=-1)(x)

    # Build stem
    stem_filters = round_filters(
        filters=blocks_args[0]["input_filters"],
        width_coefficient=width_coefficient,
        min_depth=min_depth,
        depth_divisor=depth_divisor,
    )
    x = layers.Conv2D(
        filters=stem_filters,
        kernel_size=3,
        strides=2,
        kernel_initializer=CONV_KERNEL_INITIALIZER,
        padding="same",
        use_bias=False,
        name="stem_conv",
    )(x)
    x = layers.BatchNormalization(
        axis=bn_axis,
        momentum=bn_momentum,
        name="stem_bn",
    )(x)
    x = layers.Activation(activation, name="stem_activation")(x)

    # Build blocks
    blocks_args = copy.deepcopy(blocks_args)
    b = 0
    blocks = float(sum(args["num_repeat"] for args in blocks_args))

    for (i, args) in enumerate(blocks_args):
        assert args["num_repeat"] > 0

        # Update block input and output filters based on depth multiplier.
        args["input_filters"] = round_filters(
            filters=args["input_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor)
        args["output_filters"] = round_filters(
            filters=args["output_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor)

        # Determine which conv type to use:
        block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")]
        repeats = round_repeats(repeats=args.pop("num_repeat"),
                                depth_coefficient=depth_coefficient)
        for j in range(repeats):
            # The first block needs to take care of stride and filter size increase.
            if j > 0:
                args["strides"] = 1
                args["input_filters"] = args["output_filters"]

            x = block(
                activation=activation,
                bn_momentum=bn_momentum,
                survival_probability=drop_connect_rate * b / blocks,
                name="block{}{}_".format(i + 1, chr(j + 97)),
                **args,
            )(x)
            b += 1

    # Build top
    top_filters = round_filters(filters=1280,
                                width_coefficient=width_coefficient,
                                min_depth=min_depth,
                                depth_divisor=depth_divisor)
    x = layers.Conv2D(
        filters=top_filters,
        kernel_size=1,
        strides=1,
        kernel_initializer=CONV_KERNEL_INITIALIZER,
        padding="same",
        data_format="channels_last",
        use_bias=False,
        name="top_conv",
    )(x)
    x = layers.BatchNormalization(
        axis=bn_axis,
        momentum=bn_momentum,
        name="top_bn",
    )(x)
    x = layers.Activation(activation=activation, name="top_activation")(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         kernel_initializer=DENSE_KERNEL_INITIALIZER,
                         bias_initializer=tf.constant_initializer(0),
                         name="predictions")(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name=model_name)

    # Load weights.
    if weights == "imagenet":
        if include_top:
            file_suffix = ".h5"
            file_hash = WEIGHTS_HASHES[model_name[-2:]][0]
        else:
            file_suffix = "_notop.h5"
            file_hash = WEIGHTS_HASHES[model_name[-2:]][1]
        file_name = model_name + file_suffix
        weights_path = data_utils.get_file(file_name,
                                           BASE_WEIGHTS_PATH + file_name,
                                           cache_subdir="models",
                                           file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Exemplo n.º 6
0
def create_model(input_shape,
                 classes,
                 pooling=None,
                 include_top=True,
                 **kwargs):
    """Instantiates the ResNet50 architecture.
    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.
    # Arguments
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 32.
            E.g. `(200, 200, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """

    # Determine proper input shape
    if not input_shape:
        input_shape = _obtain_input_shape(
            input_shape,
            default_size=224,
            min_size=32,
            data_format=backend.image_data_format(),
            require_flatten=include_top,
            weights=weights)

    img_input = layers.Input(shape=input_shape)

    if backend.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1

    x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
    x = layers.Conv2D(64, (7, 7),
                      strides=(2, 2),
                      padding='valid',
                      kernel_initializer='he_normal',
                      name='conv1')(x)
    x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
    x = layers.Activation('relu')(x)
    x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

    if include_top:
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        x = layers.Dense(classes, activation='softmax', name='fc1000')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)
        else:
            warnings.warn('The output shape of `ResNet50(include_top=False)` '
                          'has been changed since Keras 2.2.0.')

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    inputs = img_input

    # Create model.
    model = models.Model(inputs, x, name='resnet50')

    return model
Exemplo n.º 7
0
def CapsNet(input_shape, n_class, routings):
    x = layers.Input(shape=input_shape)
    conv1 = layers.Conv2D(filters=64,
                          kernel_size=(1, 12),
                          strides=(1, 1),
                          padding='same',
                          dilation_rate=5)(x)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)
    conv1 = layers.Conv2D(filters=64,
                          kernel_size=(1, 12),
                          strides=(1, 2),
                          padding='same',
                          dilation_rate=1)(conv1)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)
    conv1 = layers.MaxPooling2D((1, 2), strides=(1, 2))(conv1)

    conv1 = layers.Conv2D(filters=96,
                          kernel_size=(1, 9),
                          strides=1,
                          padding='same',
                          dilation_rate=4)(conv1)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)
    conv1 = layers.Conv2D(filters=96,
                          kernel_size=(1, 9),
                          strides=1,
                          padding='same',
                          dilation_rate=4)(conv1)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)
    conv1 = layers.MaxPooling2D((1, 2), strides=(1, 2))(conv1)

    conv1 = layers.Conv2D(filters=128,
                          kernel_size=(1, 6),
                          strides=1,
                          padding='same',
                          dilation_rate=3)(conv1)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)
    conv1 = layers.Conv2D(filters=128,
                          kernel_size=(1, 6),
                          strides=1,
                          padding='same',
                          dilation_rate=3)(conv1)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)
    conv1 = layers.MaxPooling2D((1, 2), strides=(1, 2))(conv1)

    conv1 = layers.Conv2D(filters=192,
                          kernel_size=(1, 3),
                          strides=1,
                          padding='same',
                          dilation_rate=2)(conv1)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)
    conv1 = layers.Conv2D(filters=192,
                          kernel_size=(1, 3),
                          strides=1,
                          padding='same',
                          dilation_rate=2)(conv1)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)
    #conv1 = layers.Conv2D(filters=192, kernel_size=(1,3), strides=1, padding='same',dilation_rate = 2)(conv1)
    #conv1 = ELU(alpha=0.5)(conv1)
    #conv1 = BN()(conv1)
    conv1 = layers.MaxPooling2D((1, 2), strides=(1, 2))(conv1)

    conv1 = layers.Conv2D(filters=256,
                          kernel_size=(1, 3),
                          strides=1,
                          padding='same',
                          dilation_rate=2)(conv1)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)
    conv1 = layers.Conv2D(filters=256,
                          kernel_size=(1, 3),
                          strides=1,
                          padding='same',
                          dilation_rate=2)(conv1)
    conv1 = ELU(alpha=0.5)(conv1)
    conv1 = BN()(conv1)

    conv1 = layers.GlobalAveragePooling2D(data_format='channels_first')(conv1)
    output = layers.Dense(8, activation='sigmoid')(conv1)

    model = models.Model(x, output)
    return model
Exemplo n.º 8
0
def res_Net50(input, classes=51, attention_module=None):
    #global backend, layers, models, keras_utils
    #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
    #x = layers.Lambda(finite_difference)(input)
    #print(x.get_shape())
    #exit()
    x = layers.BatchNormalization()(input)
    if attention_module is not None:
        x = attach_attention_module(input, 'fcbam_block')
    x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x)
    x = layers.SeparableConv2D(128, (7, 7),
                               strides=(2, 2),
                               padding='valid',
                               kernel_initializer='he_normal',
                               name='conv1_he_normal')(x)
    x = layers.BatchNormalization(name='bn_conv1_he_normal')(x)
    x = layers.Activation('relu')(x)
    x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad_he_normal')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    if attention_module is not None:
        x = attach_attention_module(x, attention_module)

    x = conv_block(x,
                   3, [64, 64, 256],
                   stage=2,
                   block='a_he_normal',
                   strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b_he_normal')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c_he_normal')

    if attention_module is not None:
        x = attach_attention_module(x, attention_module)

    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    if attention_module is not None:
        x = attach_attention_module(x, attention_module)

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    if attention_module is not None:
        x = attach_attention_module(x, attention_module)

    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)

    # linear = layers.Dense(units=512,activation='sigmoid',name='dense_layer_1')(x)
    # linear = layers.Dropout(rate=0.75)(linear)

    linear = layers.Dense(units=classes,
                          activation='softmax',
                          name='dense_layer')(x)

    model = Model(inputs=input, outputs=linear)

    # weights_path = utils.get_file(
    #     'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
    #     WEIGHTS_PATH_NO_TOP,
    #     cache_subdir='models',
    #     md5_hash='a268eb855778b3df3c7506639542a6af')
    #model.load_weights(weights_path,by_name=True)
    return model
Exemplo n.º 9
0
def MobileNetV2(input_shape, alpha=1, pooling=None, nb_classes=10,
                input_tensor="no", type_freq=None, low_freq=0):

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1

    if (input_tensor == "no"):
        img_input = layers.Input(shape=input_shape)
        first_block_filters = _make_divisible(32 * alpha, 8)
        x = layers.ZeroPadding2D(padding=correct_pad(K, img_input, 3),
                                 name='Conv1_pad')(img_input)
    if (input_tensor != "no") & (type_freq == None):        
        img_input = input_tensor
        first_block_filters = _make_divisible(32 * alpha, 8)
        x = layers.ZeroPadding2D(padding=correct_pad(K, img_input, 3),
                                 name='Conv1_pad')(img_input)
    if (input_tensor != "no") & (type_freq != None):
        img_input = input_tensor
        if (type_freq == "low"):
            j = Lambda(fft_low_pass, output_shape=input_shape, arguments={'lim_freq': low_freq})(img_input)
        if (type_freq == "high"):
            j = Lambda(fft_high_pass, output_shape=input_shape, arguments={'lim_freq': low_freq})(img_input)
        first_block_filters = _make_divisible(32 * alpha, 8)
        x = layers.ZeroPadding2D(padding=correct_pad(K, img_input, 3),
                                 name='Conv1_pad')(j)

    x = layers.Conv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='bn_Conv1')(x)
    x = layers.ReLU(6., name='Conv1_relu')(x)

    x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1,
                            expansion=1, block_id=0)

    x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2,
                            expansion=6, block_id=1)
    x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1,
                            expansion=6, block_id=2)

    x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2,
                            expansion=6, block_id=3)
    x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
                            expansion=6, block_id=4)
    x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
                            expansion=6, block_id=5)

    x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2,
                            expansion=6, block_id=6)
    x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,
                            expansion=6, block_id=7)
    x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,
                            expansion=6, block_id=8)
    x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,
                            expansion=6, block_id=9)

    x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1,
                            expansion=6, block_id=10)
    x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1,
                            expansion=6, block_id=11)
    x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1,
                            expansion=6, block_id=12)

    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2,
                            expansion=6, block_id=13)
    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1,
                            expansion=6, block_id=14)
    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1,
                            expansion=6, block_id=15)

    x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1,
                            expansion=6, block_id=16)

    # no alpha applied to last conv as stated in the paper:
    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    x = layers.Conv2D(last_block_filters,
                      kernel_size=1,
                      use_bias=False,
                      name='Conv_1')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_1_bn')(x)
    x = layers.ReLU(6., name='out_relu')(x)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(nb_classes, activation='softmax',
                         use_bias=True)(x)
    # Create model.
    model = Model(img_input, x)
    print("MobileNetV2 created")

    return model
Exemplo n.º 10
0
def build_model(method, boneage_div, lambda_in=15.0, soften=160.0, alpha=0.05):
    """
    Build the model using 'method' loss (piven, qd or only_point)

    @param method: method name
    @param boneage_div: data std
    @param soften: soften parameter for qd loss
    @param lambda_in: lambda parameter for qd loss
    @param alpha: confidence level
    @return: complied model
    """

    def mae_months(y_true, y_pred):
        if method == 'only_point':
            return mean_absolute_error(boneage_div * y_true, boneage_div * y_pred)

        y_true = y_true[:, 0]
        y_u_pred = y_pred[:, 0]
        y_l_pred = y_pred[:, 1]

        if method == 'piven':
            y_v = y_pred[:, 2]
            y_eli = y_v * y_u_pred + (1 - y_v) * y_l_pred
        if method == 'qd':
            y_eli = 0.5 * y_u_pred + 0.5 * y_l_pred

        return mean_absolute_error(boneage_div * y_true, boneage_div * y_eli)

    def mpiw(y_true, y_pred):
        y_u_pred = y_pred[:, 0]
        y_l_pred = y_pred[:, 1]
        mpiw = tf.reduce_mean(y_u_pred - y_l_pred)
        return mpiw

    def picp(y_true, y_pred):
        y_true = y_true[:, 0]
        y_u_pred = y_pred[:, 0]
        y_l_pred = y_pred[:, 1]
        K_u = tf.cast(y_u_pred > y_true, tf.float32)
        K_l = tf.cast(y_l_pred < y_true, tf.float32)
        picp = tf.reduce_mean(K_l * K_u)
        return picp

    input_shape = (IMG_SIZE, IMG_SIZE, 3)

    input_layer = KL.Input(input_shape)
    base_pretrained_model = VGG16(input_shape=input_shape, include_top=False, weights='imagenet')
    base_pretrained_model.trainable = False
    pt_depth = base_pretrained_model.get_output_shape_at(0)[-1]
    pt_features = base_pretrained_model(input_layer)
    bn_features = KL.BatchNormalization()(pt_features)

    attn_layer = KL.Conv2D(64, kernel_size=(1, 1), padding='same', activation='relu')(bn_features)
    attn_layer = KL.Conv2D(16, kernel_size=(1, 1), padding='same', activation='relu')(attn_layer)
    attn_layer = KL.LocallyConnected2D(1,
                                       kernel_size=(1, 1),
                                       padding='valid',
                                       activation='sigmoid')(attn_layer)
    # fan it out to all of the channels
    up_c2_w = np.ones((1, 1, 1, pt_depth))
    up_c2 = KL.Conv2D(pt_depth, kernel_size=(1, 1), padding='same',
                      activation='linear', use_bias=False, weights=[up_c2_w])
    up_c2.trainable = False
    attn_layer = up_c2(attn_layer)

    mask_features = KL.multiply([attn_layer, bn_features])
    gap_features = KL.GlobalAveragePooling2D()(mask_features)
    gap_mask = KL.GlobalAveragePooling2D()(attn_layer)
    # to account for missing values from the attention model
    gap = KL.Lambda(lambda x: x[0] / x[1], name='RescaleGAP')([gap_features, gap_mask])
    gap_dr = KL.Dropout(0.5)(gap)
    x = KL.Dropout(0.25)(KL.Dense(1024, activation='elu')(gap_dr))

    point = KL.Dense(1, activation='linear')(x)
    pi = KL.Dense(2, activation='linear', kernel_initializer=RandomNormal(stddev=0.1),
                  bias_initializer=Constant(value=[2.0, -2.0]), name='pi')(x)

    v = KL.Dense(1, activation='sigmoid', name='v', bias_initializer=Constant(value=[0.]))(x)
    v_pi = KL.Concatenate(name='v_pi_concat')([pi, v])

    if method == 'piven':
        out = v_pi
        metrics = [picp, mpiw, mae_months]
        loss = piven_loss(True, lambda_in, soften, alpha)
    elif method == 'qd':
        out = pi
        metrics = [picp, mpiw, mae_months]
        loss = piven_loss(False, lambda_in, soften, alpha)
    elif method == 'only_point':
        out = point
        metrics = [mae_months]
        loss = 'mse'

    bone_age_model = Model(inputs=[input_layer], outputs=[out])

    # compile model
    opt = Adam(lr=0.001)
    bone_age_model.compile(loss=loss, optimizer=opt, metrics=metrics)

    # model summary
    bone_age_model.summary()

    return bone_age_model
Exemplo n.º 11
0
def DiagnosisCapsules(input_shape,
                      n_class=2,
                      k_size=5,
                      output_atoms=16,
                      routings1=3,
                      routings2=3):
    """
    A Capsule Network on Medical Image Diagnosis.
    :param input_shape: data shape
    :param n_class: number of classes
    :param k_size: kernel size for convolutional capsules
    :param output_atoms: number of atoms in D-Caps layer
    :param routings1: number of routing iterations when stride is 1
    :param routings2: number of routing iterations when stride is > 1
    :return: Two Keras Models, the first one used for training, and the second one for evaluation.
            `eval_model` can also be used for training.
    """
    if n_class == 2:
        n_class = 1  # binary output

    x = layers.Input(shape=input_shape)

    # Layer 1: Just a conventional Conv2D layer
    conv1 = layers.Conv2D(filters=16,
                          kernel_size=k_size,
                          strides=2,
                          padding='same',
                          activation='relu',
                          name='conv1')(x)

    # Reshape layer to be 1 capsule x [filters] atoms
    conv1_reshaped = ExpandDim(name='expand_dim')(conv1)

    # Layer 1: Primary Capsule: Conv cap with routing 1
    primary_caps = ConvCapsuleLayer(kernel_size=k_size,
                                    num_capsule=2,
                                    num_atoms=16,
                                    strides=2,
                                    padding='same',
                                    routings=1,
                                    name='primarycaps')(conv1_reshaped)

    # Layer 2: Convolutional Capsule
    conv_cap_2_1 = ConvCapsuleLayer(kernel_size=k_size,
                                    num_capsule=4,
                                    num_atoms=16,
                                    strides=1,
                                    padding='same',
                                    routings=routings1,
                                    name='conv_cap_2_1')(primary_caps)

    # Layer 2: Convolutional Capsule
    conv_cap_2_2 = ConvCapsuleLayer(kernel_size=k_size,
                                    num_capsule=4,
                                    num_atoms=32,
                                    strides=2,
                                    padding='same',
                                    routings=routings2,
                                    name='conv_cap_2_2')(conv_cap_2_1)

    # Layer 3: Convolutional Capsule
    conv_cap_3_1 = ConvCapsuleLayer(kernel_size=k_size,
                                    num_capsule=8,
                                    num_atoms=32,
                                    strides=1,
                                    padding='same',
                                    routings=routings1,
                                    name='conv_cap_3_1')(conv_cap_2_2)

    # Layer 3: Convolutional Capsule
    conv_cap_3_2 = ConvCapsuleLayer(kernel_size=k_size,
                                    num_capsule=8,
                                    num_atoms=64,
                                    strides=2,
                                    padding='same',
                                    routings=routings2,
                                    name='conv_cap_3_2')(conv_cap_3_1)

    # Layer 4: Convolutional Capsule
    conv_cap_4_1 = ConvCapsuleLayer(kernel_size=k_size,
                                    num_capsule=8,
                                    num_atoms=32,
                                    strides=1,
                                    padding='same',
                                    routings=routings1,
                                    name='conv_cap_4_1')(conv_cap_3_2)

    # Layer 3: Convolutional Capsule
    conv_cap_4_2 = ConvCapsuleLayer(kernel_size=k_size,
                                    num_capsule=n_class,
                                    num_atoms=output_atoms,
                                    strides=2,
                                    padding='same',
                                    routings=routings2,
                                    name='conv_cap_4_2')(conv_cap_4_1)

    if n_class > 1:
        # Perform GAP on each capsule type.
        class_caps_list = []
        for i in range(n_class):
            in_shape = conv_cap_4_2.get_shape().as_list()
            one_class_capsule = layers.Lambda(lambda x: x[:, :, :, i, :],
                                              output_shape=in_shape[1:3] +
                                              in_shape[4:])(conv_cap_4_2)
            gap = layers.GlobalAveragePooling2D(
                name='gap_{}'.format(i))(one_class_capsule)

            # Put capsule dimension back for length and recon
            class_caps_list.append(
                ExpandDim(name='expand_gap_{}'.format(i))(gap))

        class_caps = layers.Concatenate(axis=-2,
                                        name='class_caps')(class_caps_list)
    else:
        # Remove capsule dim, perform GAP, put capsule dim back
        conv_cap_4_2_reshaped = RemoveDim(
            name='conv_cap_4_2_reshaped')(conv_cap_4_2)
        gap = layers.GlobalAveragePooling2D(name='gap')(conv_cap_4_2_reshaped)
        class_caps = ExpandDim(name='expand_gap')(gap)

    # Output layer which predicts classes
    out_caps = Length(num_classes=n_class, name='out_caps')(class_caps)

    # Decoder network.
    _, C, A = class_caps.get_shape()
    y = layers.Input(shape=(n_class, ))
    masked_by_y = Mask()(
        [class_caps, y]
    )  # The true label is used to mask the output of capsule layer. For training
    masked = Mask(
    )(class_caps)  # Mask using the capsule with maximal length. For prediction

    def shared_reconstructor(mask_layer):
        recon_1 = layers.Dense(input_shape[0] // (2**6) * input_shape[1] //
                               (2**6),
                               kernel_initializer='he_normal',
                               activation='relu',
                               name='recon_1',
                               input_shape=(A.value, ))(mask_layer)

        recon_1a = layers.Reshape(
            (input_shape[0] // (2**6), input_shape[1] // (2**6), 1),
            name='recon_1a')(recon_1)

        recon_2 = layers.Conv2DTranspose(filters=128,
                                         kernel_size=5,
                                         strides=(8, 8),
                                         padding='same',
                                         kernel_initializer='he_normal',
                                         activation='relu',
                                         name='recon_2')(recon_1a)

        recon_3 = layers.Conv2DTranspose(filters=64,
                                         kernel_size=5,
                                         strides=(8, 8),
                                         padding='same',
                                         kernel_initializer='he_normal',
                                         activation='relu',
                                         name='recon_3')(recon_2)

        out_recon = layers.Conv2D(filters=3,
                                  kernel_size=3,
                                  padding='same',
                                  kernel_initializer='he_normal',
                                  activation='tanh',
                                  name='out_recon')(recon_3)

        return out_recon

    # Models for training and evaluation (prediction)
    train_model = models.Model(
        inputs=[x, y], outputs=[out_caps,
                                shared_reconstructor(masked_by_y)])
    eval_model = models.Model(inputs=x,
                              outputs=[out_caps,
                                       shared_reconstructor(masked)])

    # manipulate model
    noise = layers.Input(shape=((C.value, A.value)))
    noised_class_caps = layers.Add()([class_caps, noise])
    masked_noised_y = Mask()([noised_class_caps, y])
    manipulate_model = models.Model(
        inputs=[x, y, noise], outputs=shared_reconstructor(masked_noised_y))

    return train_model, eval_model, manipulate_model
Exemplo n.º 12
0
def EfficientNet(input_shape,
                 block_args_list,
                 global_params,
                 include_top=True):
    batch_norm_momentum = global_params.batch_norm_momentum
    batch_norm_epsilon = global_params.batch_norm_epsilon
    if global_params.data_format == 'channels_first':
        channel_axis = 1
    else:
        channel_axis = -1

    # Stem part
    inputs = KL.Input(shape=input_shape)
    x = inputs
    x = KL.Conv2D(filters=round_filters(32, global_params),
                  kernel_size=[3, 3],
                  strides=[2, 2],
                  kernel_initializer=conv_kernel_initializer,
                  padding='same',
                  use_bias=False)(x)
    x = KL.BatchNormalization(axis=channel_axis,
                              momentum=batch_norm_momentum,
                              epsilon=batch_norm_epsilon)(x)
    x = Swish()(x)

    # Blocks part
    for block_args in block_args_list:
        assert block_args.num_repeat > 0
        # Update block input and output filters based on depth multiplier.
        block_args = block_args._replace(
            input_filters=round_filters(block_args.input_filters,
                                        global_params),
            output_filters=round_filters(block_args.output_filters,
                                         global_params),
            num_repeat=round_repeats(block_args.num_repeat, global_params))

        # The first block needs to take care of stride and filter size increase.
        x = MBConvBlock(block_args, global_params)(x)

        if block_args.num_repeat > 1:
            block_args = block_args._replace(
                input_filters=block_args.output_filters, strides=[1, 1])

        for _ in xrange(block_args.num_repeat - 1):
            x = MBConvBlock(block_args, global_params)(x)

    # Head part
    x = KL.Conv2D(filters=round_filters(1280, global_params),
                  kernel_size=[1, 1],
                  strides=[1, 1],
                  kernel_initializer=conv_kernel_initializer,
                  padding='same',
                  use_bias=False)(x)
    x = KL.BatchNormalization(axis=channel_axis,
                              momentum=batch_norm_momentum,
                              epsilon=batch_norm_epsilon)(x)
    x = Swish()(x)

    if include_top:
        x = KL.GlobalAveragePooling2D(data_format=global_params.data_format)(x)
        if global_params.dropout_rate > 0:
            x = KL.Dropout(global_params.dropout_rate)(x)
        x = KL.Dense(global_params.num_classes,
                     kernel_initializer=dense_kernel_initializer)(x)
        x = KL.Activation('softmax')(x)

    outputs = x
    model = KM.Model(inputs, outputs)

    return model
Exemplo n.º 13
0
def efficientnet_model(channels,
                       init_block_channels,
                       final_block_channels,
                       kernel_sizes,
                       strides_per_stage,
                       expansion_factors,
                       dropout_rate=0.2,
                       tf_mode=False,
                       bn_epsilon=1e-5,
                       in_channels=3,
                       in_size=(224, 224),
                       classes=1000):
    """
    EfficientNet(-B0) model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    channels : list of list of int
        Number of output channels for each unit.
    init_block_channels : list of 2 int
        Numbers of output channels for the initial unit.
    final_block_channels : int
        Number of output channels for the final block of the feature extractor.
    kernel_sizes : list of list of int
        Number of kernel sizes for each unit.
    strides_per_stage : list int
        Stride value for the first unit of each stage.
    expansion_factors : list of list of int
        Number of expansion factors for each unit.
    dropout_rate : float, default 0.2
        Fraction of the input units to drop. Must be a number between 0 and 1.
    tf_mode : bool, default False
        Whether to use TF-like mode.
    bn_epsilon : float, default 1e-5
        Small float added to variance in Batch norm.
    in_channels : int, default 3
        Number of input channels.
    in_size : tuple of two ints, default (224, 224)
        Spatial size of the expected input image.
    classes : int, default 1000
        Number of classification classes.
    """
    input_shape = (in_channels, in_size[0], in_size[1]) if is_channels_first() else\
        (in_size[0], in_size[1], in_channels)
    input = nn.Input(shape=input_shape)
    activation = "swish"

    x = effi_init_block(x=input,
                        in_channels=in_channels,
                        out_channels=init_block_channels,
                        bn_epsilon=bn_epsilon,
                        activation=activation,
                        tf_mode=tf_mode,
                        name="features/init_block")
    in_channels = init_block_channels
    for i, channels_per_stage in enumerate(channels):
        kernel_sizes_per_stage = kernel_sizes[i]
        expansion_factors_per_stage = expansion_factors[i]
        for j, out_channels in enumerate(channels_per_stage):
            kernel_size = kernel_sizes_per_stage[j]
            expansion_factor = expansion_factors_per_stage[j]
            strides = strides_per_stage[i] if (j == 0) else 1
            if i == 0:
                x = effi_dws_conv_unit(x=x,
                                       in_channels=in_channels,
                                       out_channels=out_channels,
                                       strides=strides,
                                       bn_epsilon=bn_epsilon,
                                       activation=activation,
                                       tf_mode=tf_mode,
                                       name="features/stage{}/unit{}".format(
                                           i + 1, j + 1))
            else:
                x = effi_inv_res_unit(x=x,
                                      in_channels=in_channels,
                                      out_channels=out_channels,
                                      kernel_size=kernel_size,
                                      strides=strides,
                                      expansion_factor=expansion_factor,
                                      bn_epsilon=bn_epsilon,
                                      activation=activation,
                                      tf_mode=tf_mode,
                                      name="features/stage{}/unit{}".format(
                                          i + 1, j + 1))
            in_channels = out_channels
    x = conv1x1_block(x=x,
                      in_channels=in_channels,
                      out_channels=final_block_channels,
                      bn_epsilon=bn_epsilon,
                      activation=activation,
                      name="features/final_block")
    in_channels = final_block_channels
    x = nn.GlobalAveragePooling2D(name="features/final_pool")(x)

    if dropout_rate > 0.0:
        x = nn.Dropout(rate=dropout_rate, name="output/dropout")(x)
    x = nn.Dense(units=classes, input_dim=in_channels, name="output/fc")(x)

    model = Model(inputs=input, outputs=x)
    model.in_size = in_size
    model.classes = classes
    return model
Exemplo n.º 14
0
def EfficientNet(input_shape,
                 block_args_list: List[BlockArgs],
                 width_coefficient: float,
                 depth_coefficient: float,
                 include_top=True,
                 weights=None,
                 input_tensor=None,
                 pooling=None,
                 classes=1000,
                 dropout_rate=0.,
                 drop_connect_rate=0.,
                 batch_norm_momentum=0.99,
                 batch_norm_epsilon=1e-3,
                 depth_divisor=8,
                 min_depth=None,
                 data_format=None,
                 default_size=None,
                 **kwargs):


    if data_format is None:
        data_format = K.image_data_format()

    # if data_format == 'channels_first':
    #     channel_axis = 1
    # else:
    #     channel_axis = -1

    if default_size is None:
        default_size = 224

    if block_args_list is None:
        block_args_list = get_default_block_list()

    # count number of strides to compute min size
    stride_count = 1
    for block_args in block_args_list:
        if block_args.strides is not None and block_args.strides[0] > 1:
            stride_count += 1

    min_size = int(2 ** stride_count)

    # Determine proper input shape and default size.
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=min_size,
                                      data_format=data_format,
                                      require_flatten=include_top,
                                      weights=weights)

    # Stem part
    if input_tensor is None:
        inputs = layers.Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            inputs = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            inputs = input_tensor

    x = inputs
    x = layers.Conv2D(
        filters=round_filters(32, width_coefficient,
                              depth_divisor, min_depth),
        kernel_size=[3, 3],
        strides=[2, 2],
        kernel_initializer=EfficientNetConvInitializer(),
        padding='same',
        use_bias=False)(x)
    x = layers.BatchNormalization(
        axis=channel_axis,
        momentum=batch_norm_momentum,
        epsilon=batch_norm_epsilon)(x)
    x = Swish()(x)

    num_blocks = sum([block_args.num_repeat for block_args in block_args_list])
    drop_connect_rate_per_block = drop_connect_rate / float(num_blocks)

    # Blocks part
    for block_idx, block_args in enumerate(block_args_list):
        assert block_args.num_repeat > 0

        # Update block input and output filters based on depth multiplier.
        block_args.input_filters = round_filters(block_args.input_filters, width_coefficient, depth_divisor, min_depth)
        block_args.output_filters = round_filters(block_args.output_filters, width_coefficient, depth_divisor, min_depth)
        block_args.num_repeat = round_repeats(block_args.num_repeat, depth_coefficient)

        # The first block needs to take care of stride and filter size increase.
        x = MBConvBlock(block_args.input_filters, block_args.output_filters,
                        block_args.kernel_size, block_args.strides,
                        block_args.expand_ratio, block_args.se_ratio,
                        block_args.identity_skip, drop_connect_rate_per_block * block_idx,
                        batch_norm_momentum, batch_norm_epsilon, data_format)(x)

        if block_args.num_repeat > 1:
            block_args.input_filters = block_args.output_filters
            block_args.strides = [1, 1]

        for _ in range(block_args.num_repeat - 1):
            x = MBConvBlock(block_args.input_filters, block_args.output_filters,
                            block_args.kernel_size, block_args.strides,
                            block_args.expand_ratio, block_args.se_ratio,
                            block_args.identity_skip, drop_connect_rate_per_block * block_idx,
                            batch_norm_momentum, batch_norm_epsilon, data_format)(x)

    # Head part
    x = layers.Conv2D(
        filters=round_filters(1280, width_coefficient, depth_coefficient, min_depth),
        kernel_size=[1, 1],
        strides=[1, 1],
        kernel_initializer=EfficientNetConvInitializer(),
        padding='same',
        use_bias=False)(x)
    x = layers.BatchNormalization(
        axis=channel_axis,
        momentum=batch_norm_momentum,
        epsilon=batch_norm_epsilon)(x)
    x = Swish()(x)

    x = layers.GlobalAveragePooling2D(data_format=data_format)(x)

    if dropout_rate > 0:
        x = layers.Dropout(dropout_rate)(x)

    x = layers.Dense(classes, kernel_initializer=EfficientNetDenseInitializer())(x)
    x = layers.Activation('softmax')(x)

    outputs = x

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)

    model = Model(inputs, outputs)

    # Load weights
    
    elif weights is not None:
        model.load_weights(weights)
Exemplo n.º 15
0
def ResNet(stack_fn,
           preact,
           use_bias,
           model_name='resnet',
           include_top=True,
           weights=None,
           input_tensor=None,
           input_shape=None,
           pooling=None,
           nclass=1000,
           **kwargs):
    """Instantiates the ResNet, ResNetV2, and ResNeXt architecture.
    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.
    # Arguments
        stack_fn: a function that returns output tensor for the
            stacked residual blocks.
        preact: whether to use pre-activation or not
            (True for ResNetV2, False for ResNet and ResNeXt).
        use_bias: whether to use biases for convolutional layers or not
            (True for ResNet and ResNetV2, False for ResNeXt).
        model_name: string, model name.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor
            (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """

    # Determine proper input shape
    #     input_shape = input_shape

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1

    x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)),
                             name='conv1_pad')(img_input)
    x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias,
                      name='conv1_conv')(x)

    if preact is False:
        x = layers.BatchNormalization(axis=bn_axis,
                                      epsilon=1.001e-5,
                                      name='conv1_bn')(x)
        x = layers.Activation('relu', name='conv1_relu')(x)

    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x)
    x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x)

    x = stack_fn(x)

    if preact is True:
        x = layers.BatchNormalization(axis=bn_axis,
                                      epsilon=1.001e-5,
                                      name='post_bn')(x)
        x = layers.Activation('relu', name='post_relu')(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        x = layers.Dense(nclass, activation='softmax', name='probs')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D(name='max_pool')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs, x, name=model_name)

    # Load weights.
    if weights is not None:
        model.load_weights(weights)

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model
Exemplo n.º 16
0
def MobileNet(input_shape=None,
              alpha=1.0,
              depth_multiplier=1,
              dropout=1e-3,
              include_top=True,
              weights='imagenet',
              input_tensor=None,
              pooling=None,
              classes=1000,
              **kwargs):
    """Instantiates the MobileNet architecture.

    # Arguments
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)`
            (with `channels_last` data format)
            or (3, 224, 224) (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 32.
            E.g. `(200, 200, 3)` would be one valid value.
        alpha: controls the width of the network.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        depth_multiplier: depth multiplier for depthwise convolution
            (also called the resolution multiplier)
        dropout: dropout rate
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
    """

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    if input_shape is None:
        default_size = 224
    else:
        if backend.image_data_format() == 'channels_first':
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [128, 160, 192, 224]:
            default_size = rows
        else:
            default_size = 224

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=backend.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if weights == 'imagenet':
        if depth_multiplier != 1:
            raise ValueError('If imagenet weights are being loaded, '
                             'depth multiplier must be 1')

        if alpha not in [0.25, 0.50, 0.75, 1.0]:
            raise ValueError('If imagenet weights are being loaded, '
                             'alpha can be one of'
                             '`0.25`, `0.50`, `0.75` or `1.0` only.')

        if rows != cols or rows not in [128, 160, 192, 224]:
            if rows is None:
                rows = 224
                warnings.warn('MobileNet shape is undefined.'
                              ' Weights for input shape '
                              '(224, 224) will be loaded.')
            else:
                raise ValueError('If imagenet weights are being loaded, '
                                 'input must have a static square shape '
                                 '(one of (128, 128), (160, 160), '
                                 '(192, 192), or (224, 224)). '
                                 'Input shape provided = %s' % (input_shape, ))

    if backend.image_data_format() != 'channels_last':
        warnings.warn('The MobileNet family of models is only available '
                      'for the input data format "channels_last" '
                      '(width, height, channels). '
                      'However your settings specify the default '
                      'data format "channels_first" (channels, width, height).'
                      ' You should set `image_data_format="channels_last"` '
                      'in your Keras config located at ~/.keras/keras.json. '
                      'The model being returned right now will expect inputs '
                      'to follow the "channels_last" data format.')
        backend.set_image_data_format('channels_last')
        old_data_format = 'channels_first'
    else:
        old_data_format = None

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    x = _conv_block(img_input, 32, alpha, strides=(2, 2))
    x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)

    x = _depthwise_conv_block(x,
                              128,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=2)
    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)

    x = _depthwise_conv_block(x,
                              256,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=4)
    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)

    x = _depthwise_conv_block(x,
                              512,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=6)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)

    x = _depthwise_conv_block(x,
                              1024,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=12)
    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)

    if include_top:
        if backend.image_data_format() == 'channels_first':
            shape = (int(1024 * alpha), 1, 1)
        else:
            shape = (1, 1, int(1024 * alpha))

        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Reshape(shape, name='reshape_1')(x)
        x = layers.Dropout(dropout, name='dropout')(x)
        x = layers.Conv2D(classes, (1, 1), padding='same',
                          name='conv_preds')(x)
        x = layers.Activation('softmax', name='act_softmax')(x)
        x = layers.Reshape((classes, ), name='reshape_2')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows))

    # Load weights.
    if weights == 'imagenet':
        if backend.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_first" format '
                             'are not available.')
        if alpha == 1.0:
            alpha_text = '1_0'
        elif alpha == 0.75:
            alpha_text = '7_5'
        elif alpha == 0.50:
            alpha_text = '5_0'
        else:
            alpha_text = '2_5'

        if include_top:
            model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weight_path,
                                                cache_subdir='models')
        else:
            model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weight_path,
                                                cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    if old_data_format:
        backend.set_image_data_format(old_data_format)
    return model
Exemplo n.º 17
0
def ResNet(model_params, input_shape=None, input_tensor=None, include_top=True,
           classes=1000, weights='imagenet', create_encoder=False):
    """Instantiates the ResNet, SEResNet architecture.
    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.
    Args:
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor
            (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    Returns:
        A Keras model instance.
    Raises:
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape, name='data')
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # choose residual block type
    ResidualBlock = model_params.residual_block
    Attention = None

    # get out intermidiate scales
    scales = []

    # get parameters for model layers
    no_scale_bn_params = get_bn_params(scale=False)
    bn_params = get_bn_params()
    conv_params = get_conv_params()
    init_filters = 64

    # resnet bottom
    x = layers.BatchNormalization(name='bn_data', **no_scale_bn_params)(img_input)
    x = layers.ZeroPadding2D(padding=(3, 3))(x)
    x = layers.Conv2D(init_filters, (7, 7), strides=(2, 2), name='conv0', **conv_params)(x)
    x = layers.BatchNormalization(name='bn0', **bn_params)(x)
    x = layers.Activation('relu', name='relu0')(x)
    x = layers.ZeroPadding2D(padding=(1, 1))(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='valid', name='pooling0')(x)

    # resnet body
    for stage, rep in enumerate(model_params.repetitions):
        for block in range(rep):

            filters = init_filters * (2 ** stage)
            
            # first block of first stage without strides because we have maxpooling before
            if block == 0 and stage == 0:
                x = ResidualBlock(filters, stage, block, strides=(1, 1),
                                  cut='post', attention=Attention)(x)

            elif block == 0:
                x = ResidualBlock(filters, stage, block, strides=(2, 2),
                                  cut='post', attention=Attention)(x)

            else:
                x = ResidualBlock(filters, stage, block, strides=(1, 1),
                                  cut='pre', attention=Attention)(x)
                
            if block == rep - 1:
                scales.append(x)

            
            

    x = layers.BatchNormalization(name='bn1', **bn_params)(x)
    x = layers.Activation('relu', name='relu1')(x)

    # resnet top
    if include_top:
        x = layers.GlobalAveragePooling2D(name='pool1')(x)
        x = layers.Dense(classes, name='fc1')(x)
        x = layers.Activation('softmax', name='softmax')(x)

    # Ensure that the model takes into account any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    if not create_encoder:
        model = Model(inputs, x)

        return model
    else:
        return inputs, x, scales
Exemplo n.º 18
0
        rpn_bbox[i][0] = temp_gt_boxes[i][0] / image_shape[0]
        rpn_bbox[i][1] = temp_gt_boxes[i][1] / image_shape[1]
        rpn_bbox[i][2] = (temp_gt_boxes[i][2] - temp_gt_boxes[i][0]) / image_shape[0]
        rpn_bbox[i][3] = (temp_gt_boxes[i][3] - temp_gt_boxes[i][1]) / image_shape[1]
    for i in range(1,gt_boxes.shape[0]):
        for j in range(4):
            rpn_out[i,j] = rpn_bbox[i][j] - rpn_bbox[i-1][j]
    return rpn_out

input_image = KL.Input(
            shape=[None, None, 512], name="input_feature")

decoder_inputs = KL.Input(
        shape=[1,4], name="input_rpn_bbox", dtype=tf.float32)
# gap
encoder_inputs = KL.GlobalAveragePooling2D(data_format=None)(input_image)
# calculate rpn_bbox here


encoder1 = KL.Dense(4, activation='relu', use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros',name = "encode1")
# encoder2 = KL.Dense(4, activation='relu', use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros',name = "encode2")

decoder = KL.GRU(4,activation = 'sigmoid', return_sequences=True, return_state=True, input_shape = (None,4),name = "decode")

state_h = encoder1(encoder_inputs)
# state_c = encoder2(encoder_inputs)
# encoded = [state_h, state_c]

encoder_model = KM.Model(input_image, state_h)
decoder_states_input_h = KL.Input(shape=[4], name="input_state_h", dtype=tf.float32)
# decoder_states_input_c = KL.Input(shape=[4], name="input_state_c", dtype=tf.float32)
Exemplo n.º 19
0
def ResNetRS(
    depth: int,
    input_shape=None,
    bn_momentum=0.0,
    bn_epsilon=1e-5,
    activation: str = "relu",
    se_ratio=0.25,
    dropout_rate=0.25,
    drop_connect_rate=0.2,
    include_top=True,
    block_args: List[Dict[str, int]] = None,
    model_name="resnet-rs",
    pooling=None,
    weights="imagenet",
    input_tensor=None,
    classes=1000,
    classifier_activation: Union[str, Callable] = "softmax",
    include_preprocessing=True,
):
    """Build Resnet-RS model, given provided parameters.

    Args:
        depth: Depth of ResNet network.
        input_shape: optional shape tuple. It should have exactly 3 inputs
          channels, and width and height should be no smaller than 32. E.g.
          (200, 200, 3) would be one valid value.
        bn_momentum: Momentum parameter for Batch Normalization layers.
        bn_epsilon: Epsilon parameter for Batch Normalization layers.
        activation: activation function.
        se_ratio: Squeeze and Excitation layer ratio.
        dropout_rate: dropout rate before final classifier layer.
        drop_connect_rate: dropout rate at skip connections.
        include_top: whether to include the fully-connected layer at the top of
          the network.
        block_args: list of dicts, parameters to construct block modules.
        model_name: name of the model.
        pooling: optional pooling mode for feature extraction when `include_top`
          is `False`.
          - `None` means that the output of the model will be the 4D tensor
            output of the last convolutional layer.
          - `avg` means that global average pooling will be applied to the
            output of the last convolutional layer, and thus the output of the
            model will be a 2D tensor.
          - `max` means that global max pooling will be applied.
        weights: one of `None` (random initialization), `'imagenet'`
          (pre-training on ImageNet), or the path to the weights file to be
          loaded. Note- one model can have multiple imagenet variants depending
          on input shape it was trained with. For input_shape 224x224 pass
          `imagenet-i224` as argument. By default, highest input shape weights
          are downloaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to
          use as image input for the model.
        classes: optional number of classes to classify images into, only to be
          specified if `include_top` is True, and if no `weights` argument is
          specified.
        classifier_activation: A `str` or callable. The activation function to
          use on the "top" layer. Ignored unless `include_top=True`. Set
          `classifier_activation=None` to return the logits of the "top" layer.
        include_preprocessing: Boolean, whether to include the preprocessing
          layer (`Rescaling`) at the bottom of the network. Defaults to `True`.
          Note- Input image is normalized by ImageNet mean and standard
          deviation.

    Returns:
        A `tf.keras.Model` instance.

    Raises:
        ValueError: in case of invalid argument for `weights`, or invalid input
            shape.
        ValueError: if `classifier_activation` is not `softmax` or `None` when
            using a pretrained top layer.
    """
    # Validate parameters
    available_weight_variants = DEPTH_TO_WEIGHT_VARIANTS[depth]
    if weights == "imagenet":
        max_input_shape = max(available_weight_variants)
        # `imagenet` argument without explicit weights input size.
        # Picking weights trained with biggest available shape
        weights = f"{weights}-i{max_input_shape}"

    weights_allow_list = [f"imagenet-i{x}" for x in available_weight_variants]
    if not (weights in {*weights_allow_list, None}
            or tf.io.gfile.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), `'imagenet'` "
            "(pre-training on ImageNet, with highest available input shape),"
            " or the path to the weights file to be loaded. "
            f"For ResNetRS{depth} the following weight variants are "
            f"available {weights_allow_list} (default=highest)."
            f" Received weights={weights}")

    if weights in weights_allow_list and include_top and classes != 1000:
        raise ValueError(
            f"If using `weights` as `'imagenet'` or any "
            f"of {weights_allow_list} "
            f"with `include_top` as true, `classes` should be 1000. "
            f"Received classes={classes}")

    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=224,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights,
    )
    # Define input tensor
    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    x = img_input

    if include_preprocessing:
        num_channels = input_shape[bn_axis - 1]
        x = layers.Rescaling(scale=1.0 / 255)(x)
        if num_channels == 3:
            x = layers.Normalization(
                mean=[0.485, 0.456, 0.406],
                variance=[0.229**2, 0.224**2, 0.225**2],
                axis=bn_axis,
            )(x)

    # Build stem
    x = STEM(bn_momentum=bn_momentum,
             bn_epsilon=bn_epsilon,
             activation=activation)(x)

    # Build blocks
    if block_args is None:
        block_args = BLOCK_ARGS[depth]

    for i, args in enumerate(block_args):
        survival_probability = get_survival_probability(
            init_rate=drop_connect_rate,
            block_num=i + 2,
            total_blocks=len(block_args) + 1,
        )

        x = BlockGroup(
            filters=args["input_filters"],
            activation=activation,
            strides=(1 if i == 0 else 2),
            num_repeats=args["num_repeats"],
            se_ratio=se_ratio,
            bn_momentum=bn_momentum,
            bn_epsilon=bn_epsilon,
            survival_probability=survival_probability,
            name=f"BlockGroup{i + 2}_",
        )(x)

    # Build head:
    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)

        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,
                         activation=classifier_activation,
                         name="predictions")(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = layer_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = training.Model(inputs, x, name=model_name)

    # Download weights
    if weights in weights_allow_list:
        weights_input_shape = weights.split("-")[-1]  # e. g. "i160"
        weights_name = f"{model_name}-{weights_input_shape}"
        if not include_top:
            weights_name += "_notop"

        filename = f"{weights_name}.h5"
        download_url = BASE_WEIGHTS_URL + filename
        weights_path = data_utils.get_file(
            fname=filename,
            origin=download_url,
            cache_subdir="models",
            file_hash=WEIGHT_HASHES[filename],
        )
        model.load_weights(weights_path)

    elif weights is not None:
        model.load_weights(weights)

    return model
Exemplo n.º 20
0
def DenseNet(blocks,
             include_top=True,
             input_shape=None,
             pooling=None,
             classes=1000
             ):

    # Determine proper input shape
    bn_axis = 2

    img_input = layers.Input(shape=input_shape)

    # x = layers.Reshape((input_shape[0], input_shape[2],input_shape[1]))(img_input)
    # x = layers.Reshape((input_shape[1], input_shape[0]))(img_input)


    x = img_input

    # x = Melspectrogram(n_dft=128, n_hop=64, input_shape=(input_shape[0], input_shape[1]),
    #                          padding='same', sr=500, n_mels=80,
    #                          fmin=40.0, fmax=500/2, power_melgram=1.0,
    #                          return_decibel_melgram=True, trainable_fb=False,
    #                          trainable_kernel=False,
    #                          name='mel_stft') (x)
    #
    # x = layers.BatchNormalization(
    #     axis=bn_axis, epsilon=1.001e-5, name= 'spectrogram/bn')(x)
    # x = layers.Permute((2, 1, 3))(x)

    # x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x)
    # x = layers.Conv2D(64, (1,3), strides=(1,1), use_bias=False, name='conv1/conv1',padding='same')(x)

    # x = layers.BatchNormalization(
    #     axis=bn_axis, epsilon=1.001e-5, name='conv0/bn')(x)

    x = layers.Conv1D(64, 10, strides=3, use_bias=False, name='conv1/conv2', padding='same')(x)
    # x = layers.Conv2D(64, (1, 3), strides=(1, 2), use_bias=False, name='conv1/conv', padding='valid')(x)
    x = layers.BatchNormalization(
        axis=bn_axis, epsilon=1.001e-5, name='conv1/bn')(x)
    x = layers.Activation('relu', name='conv1/relu')(x)

    x = self_Att_channel(x,x_att= x, r=4, name='1')

    # x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x)
    # x = layers.MaxPooling2D((1,2), strides=(1,2), name='pool1')(x1)

    x1 = dense_block(x, blocks[0], name='conv2')
    x1 = transition_block(x1, 0.5, name='pool2')
    x = self_Att_channel(x1, x_att=x1, r=4, name='2')

    x2 = dense_block(x, blocks[1], name='conv3')
    x2 = transition_block(x2, 0.5, name='pool3')
    x = self_Att_channel(x2, x_att=x2, r=4, name='3')

    x3 = dense_block(x, blocks[2], name='conv4')
    x3 = transition_block(x3, 0.5, name='pool4')
    x = self_Att_channel(x3, x_att=x3, r=4, name='4')

    x4 = dense_block(x, blocks[3], name='conv5')
    x4 = transition_block(x4, 0.5, name='pool5')
    x = self_Att_channel(x4, x_att=x4, r=4, name='5')

    if include_top:

        # x = layers.Reshape([1,W,chanel*H],name = 'final_reshape')(x)
        x = layers.GlobalAveragePooling1D(name='avg_pool')(x)

        x = layers.Dense(classes, activation='sigmoid', name=str(classes))(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D(name='max_pool')(x)

    # Ensure that the model takes into account
    # any potential predecessors
    #
    #  of `input_tensor`.
    inputs = img_input

    # Create model.
    if blocks == [6, 12, 24, 16]:
        model = models.Model(inputs, x, name='densenet121')
    elif blocks == [6, 12, 32, 32]:
        model = models.Model(inputs, x, name='densenet169')
    elif blocks == [6, 12, 48, 32]:
        model = models.Model(inputs, x, name='densenet201')
    else:
        model = models.Model(inputs, x, name='densenet')

    return model
Exemplo n.º 21
0
def detect_model(classcnt):
    model_input = Input(shape=(INPUT_HEIGHT, INPUT_WIDTH, 3))

    x = model_input
    x05 = Conv2D(64, (5, 5),
                 strides=(3, 4),
                 activation=ACTFUNC,
                 padding='same',
                 name='block0_conv1')(x)
    x07 = Conv2D(64, (7, 7),
                 strides=(3, 4),
                 activation=ACTFUNC,
                 padding='same',
                 name='block0_conv2')(x)
    x09 = Conv2D(64, (9, 9),
                 strides=(3, 4),
                 activation=ACTFUNC,
                 padding='same',
                 name='block0_conv3')(x)
    x11 = Conv2D(64, (11, 11),
                 strides=(3, 4),
                 activation=ACTFUNC,
                 padding='same',
                 name='block0_conv4')(x)
    x = layers.concatenate([x05, x07, x09, x11])

    x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False,
               name='block1_conv1')(x)
    x = BatchNormalization(name='block1_conv1_bn')(x)
    x = Activation('relu', name='block1_conv1_act')(x)
    x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
    x = BatchNormalization(name='block1_conv2_bn')(x)
    x = Activation('relu', name='block1_conv2_act')(x)

    residual = Conv2D(128, (1, 1),
                      strides=(2, 2),
                      padding='same',
                      use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = SeparableConv2D(128, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block2_sepconv1')(x)
    x = BatchNormalization(name='block2_sepconv1_bn')(x)
    x = Activation('relu', name='block2_sepconv2_act')(x)
    x = SeparableConv2D(128, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block2_sepconv2')(x)
    x = BatchNormalization(name='block2_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3),
                     strides=(2, 2),
                     padding='same',
                     name='block2_pool')(x)
    x = layers.add([x, residual])

    residual = Conv2D(256, (1, 1),
                      strides=(2, 2),
                      padding='same',
                      use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block3_sepconv1_act')(x)
    x = SeparableConv2D(256, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block3_sepconv1')(x)
    x = BatchNormalization(name='block3_sepconv1_bn')(x)
    x = Activation('relu', name='block3_sepconv2_act')(x)
    x = SeparableConv2D(256, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block3_sepconv2')(x)
    x = BatchNormalization(name='block3_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3),
                     strides=(2, 2),
                     padding='same',
                     name='block3_pool')(x)
    x = layers.add([x, residual])

    residual = Conv2D(728, (1, 1),
                      strides=(2, 2),
                      padding='same',
                      use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block4_sepconv1_act')(x)
    x = SeparableConv2D(728, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block4_sepconv1')(x)
    x = BatchNormalization(name='block4_sepconv1_bn')(x)
    x = Activation('relu', name='block4_sepconv2_act')(x)
    x = SeparableConv2D(728, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block4_sepconv2')(x)
    x = BatchNormalization(name='block4_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3),
                     strides=(2, 2),
                     padding='same',
                     name='block4_pool')(x)
    x = layers.add([x, residual])

    for i in range(8):
        residual = x
        prefix = 'block' + str(i + 5)

        x = Activation('relu', name=prefix + '_sepconv1_act')(x)
        x = SeparableConv2D(728, (3, 3),
                            padding='same',
                            use_bias=False,
                            name=prefix + '_sepconv1')(x)
        x = BatchNormalization(name=prefix + '_sepconv1_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv2_act')(x)
        x = SeparableConv2D(728, (3, 3),
                            padding='same',
                            use_bias=False,
                            name=prefix + '_sepconv2')(x)
        x = BatchNormalization(name=prefix + '_sepconv2_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv3_act')(x)
        x = SeparableConv2D(728, (3, 3),
                            padding='same',
                            use_bias=False,
                            name=prefix + '_sepconv3')(x)
        x = BatchNormalization(name=prefix + '_sepconv3_bn')(x)

        x = layers.add([x, residual])

    residual = Conv2D(1024, (1, 1),
                      strides=(2, 2),
                      padding='same',
                      use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block13_sepconv1_act')(x)
    x = SeparableConv2D(728, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block13_sepconv1')(x)
    x = BatchNormalization(name='block13_sepconv1_bn')(x)
    x = Activation('relu', name='block13_sepconv2_act')(x)
    x = SeparableConv2D(1024, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block13_sepconv2')(x)
    x = BatchNormalization(name='block13_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3),
                     strides=(2, 2),
                     padding='same',
                     name='block13_pool')(x)
    x = layers.add([x, residual])

    x = SeparableConv2D(1536, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block14_sepconv1')(x)
    x = BatchNormalization(name='block14_sepconv1_bn')(x)
    x = Activation('relu', name='block14_sepconv1_act')(x)

    x = SeparableConv2D(2048, (3, 3),
                        padding='same',
                        use_bias=False,
                        name='block14_sepconv2')(x)
    x = BatchNormalization(name='block14_sepconv2_bn')(x)
    x = Activation('relu', name='block14_sepconv2_act')(x)

    x = layers.GlobalAveragePooling2D(name='avg_pool')(x)

    pred = Dense(3, activation='softmax', name='pred')(x)
    #x = Flatten()(x)
    #x = Dense(512, activation=ACTFUNC)(x)
    out = Dense(classcnt, activation='softmax', name='out')(x)

    model = Model(inputs=model_input, outputs=[pred, out])
    return model
Exemplo n.º 22
0
def InceptionV3(include_top=True,
                input_tensor=None,
                input_shape=None,
                pooling=None,
                classes=3,
                train_backbone=True,
                num_init_filters=8,
                **kwargs):
    """Instantiates the Inception v3 architecture.
    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.
    # Arguments
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(299, 299, 3)` (with `channels_last` data format)
            or `(3, 299, 299)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 75.
            E.g. `(150, 150, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
    # global backend, layers, models, keras_utils
    # backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    # if input_tensor is None:
    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        # if not backend.is_keras_tensor(input_tensor):
        #     img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        # else:
        img_input = input_tensor

    # if backend.image_data_format() == 'channels_first':
    # channel_axis = 1
    # else:
    channel_axis = 3
    x = conv2d_bn(img_input,
                  num_init_filters,
                  3,
                  3,
                  strides=(2, 2),
                  padding='valid',
                  trainable=train_backbone)
    x = conv2d_bn(x,
                  num_init_filters,
                  3,
                  3,
                  padding='valid',
                  trainable=train_backbone)
    x = conv2d_bn(x, num_init_filters * 2, 3, 3)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = conv2d_bn(x,
                  int(num_init_filters * 2.5),
                  1,
                  1,
                  padding='valid',
                  trainable=train_backbone)
    x = conv2d_bn(x,
                  num_init_filters * 6,
                  3,
                  3,
                  padding='valid',
                  trainable=train_backbone)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    # mixed 0: 35 x 35 x 256
    branch1x1 = conv2d_bn(x, num_init_filters * 2, 1, 1)

    branch5x5 = conv2d_bn(x, int(num_init_filters * 1.5), 1, 1)
    branch5x5 = conv2d_bn(branch5x5, num_init_filters * 2, 5, 5)

    branch3x3dbl = conv2d_bn(x, num_init_filters * 2, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, num_init_filters * 3, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, num_init_filters * 3, 3, 3)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, num_init_filters, 1, 1)
    x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool],
                           axis=channel_axis,
                           name='mixed0')

    # mixed 1: 35 x 35 x 288
    branch1x1 = conv2d_bn(x, num_init_filters * 2, 1, 1)

    branch5x5 = conv2d_bn(x, int(num_init_filters * 1.5), 1, 1)
    branch5x5 = conv2d_bn(branch5x5, num_init_filters * 2, 5, 5)

    branch3x3dbl = conv2d_bn(x, num_init_filters * 2, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, num_init_filters * 3, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, num_init_filters * 3, 3, 3)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, num_init_filters * 2, 1, 1)
    x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool],
                           axis=channel_axis,
                           name='mixed1')

    # mixed 2: 35 x 35 x 288
    branch1x1 = conv2d_bn(x, num_init_filters * 2, 1, 1)

    branch5x5 = conv2d_bn(x, int(num_init_filters * 1.5), 1, 1)
    branch5x5 = conv2d_bn(branch5x5, num_init_filters * 2, 5, 5)

    branch3x3dbl = conv2d_bn(x, num_init_filters * 2, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, num_init_filters * 3, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, num_init_filters * 3, 3, 3)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, num_init_filters * 2, 1, 1)
    x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool],
                           axis=channel_axis,
                           name='mixed2')

    # mixed 3: 17 x 17 x 768
    branch3x3 = conv2d_bn(x,
                          num_init_filters * 12,
                          3,
                          3,
                          strides=(2, 2),
                          padding='valid',
                          trainable=train_backbone)

    branch3x3dbl = conv2d_bn(x, num_init_filters * 2, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, num_init_filters * 3, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl,
                             num_init_filters * 3,
                             3,
                             3,
                             strides=(2, 2),
                             padding='valid',
                             trainable=train_backbone)

    branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = layers.concatenate([branch3x3, branch3x3dbl, branch_pool],
                           axis=channel_axis,
                           name='mixed3')

    # mixed 4: 17 x 17 x 768
    branch1x1 = conv2d_bn(x, num_init_filters * 6, 1, 1)

    branch7x7 = conv2d_bn(x, num_init_filters * 4, 1, 1)
    branch7x7 = conv2d_bn(branch7x7, num_init_filters * 4, 1, 7)
    branch7x7 = conv2d_bn(branch7x7, num_init_filters * 6, 7, 1)

    branch7x7dbl = conv2d_bn(x, num_init_filters * 4, 1, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 4, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 4, 1, 7)
    branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 4, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 6, 1, 7)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, num_init_filters * 6, 1, 1)
    x = layers.concatenate([branch1x1, branch7x7, branch7x7dbl, branch_pool],
                           axis=channel_axis,
                           name='mixed4')

    # mixed 5, 6: 17 x 17 x 768
    for i in range(2):
        branch1x1 = conv2d_bn(x, num_init_filters * 6, 1, 1)

        branch7x7 = conv2d_bn(x, num_init_filters * 5, 1, 1)
        branch7x7 = conv2d_bn(branch7x7, num_init_filters * 5, 1, 7)
        branch7x7 = conv2d_bn(branch7x7, num_init_filters * 6, 7, 1)

        branch7x7dbl = conv2d_bn(x, num_init_filters * 5, 1, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 5, 7, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 5, 1, 7)
        branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 5, 7, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 6, 1, 7)

        branch_pool = layers.AveragePooling2D((3, 3),
                                              strides=(1, 1),
                                              padding='same')(x)
        branch_pool = conv2d_bn(branch_pool, num_init_filters * 6, 1, 1)
        x = layers.concatenate(
            [branch1x1, branch7x7, branch7x7dbl, branch_pool],
            axis=channel_axis,
            name='mixed' + str(5 + i))

    # mixed 7: 17 x 17 x 768
    branch1x1 = conv2d_bn(x, num_init_filters * 6, 1, 1)

    branch7x7 = conv2d_bn(x, num_init_filters * 6, 1, 1)
    branch7x7 = conv2d_bn(branch7x7, num_init_filters * 6, 1, 7)
    branch7x7 = conv2d_bn(branch7x7, num_init_filters * 6, 7, 1)

    branch7x7dbl = conv2d_bn(x, num_init_filters * 6, 1, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 6, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 6, 1, 7)
    branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 6, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, num_init_filters * 6, 1, 7)

    branch_pool = layers.AveragePooling2D((3, 3),
                                          strides=(1, 1),
                                          padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, num_init_filters * 6, 1, 1)
    x = layers.concatenate([branch1x1, branch7x7, branch7x7dbl, branch_pool],
                           axis=channel_axis,
                           name='mixed7')

    # mixed 8: 8 x 8 x 1280
    branch3x3 = conv2d_bn(x, num_init_filters * 6, 1, 1, trainable=True)
    branch3x3 = conv2d_bn(branch3x3,
                          num_init_filters * 10,
                          3,
                          3,
                          strides=(2, 2),
                          padding='valid',
                          trainable=True)

    branch7x7x3 = conv2d_bn(x, num_init_filters * 6, 1, 1)
    branch7x7x3 = conv2d_bn(branch7x7x3, num_init_filters * 6, 1, 7)
    branch7x7x3 = conv2d_bn(branch7x7x3, num_init_filters * 6, 7, 1)
    branch7x7x3 = conv2d_bn(branch7x7x3,
                            num_init_filters * 6,
                            3,
                            3,
                            strides=(2, 2),
                            padding='valid',
                            trainable=True)

    branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = layers.concatenate([branch3x3, branch7x7x3, branch_pool],
                           axis=channel_axis,
                           name='mixed8')

    # mixed 9: 8 x 8 x 2048
    for i in range(2):
        branch1x1 = conv2d_bn(x, num_init_filters * 10, 1, 1, trainable=True)

        branch3x3 = conv2d_bn(x, num_init_filters * 12, 1, 1, trainable=True)
        branch3x3_1 = conv2d_bn(branch3x3,
                                num_init_filters * 12,
                                1,
                                3,
                                trainable=True)
        branch3x3_2 = conv2d_bn(branch3x3,
                                num_init_filters * 12,
                                3,
                                1,
                                trainable=True)
        branch3x3 = layers.concatenate([branch3x3_1, branch3x3_2],
                                       axis=channel_axis,
                                       name='mixed9_' + str(i))

        branch3x3dbl = conv2d_bn(x,
                                 num_init_filters * 14,
                                 1,
                                 1,
                                 trainable=True)
        branch3x3dbl = conv2d_bn(branch3x3dbl,
                                 num_init_filters * 12,
                                 3,
                                 3,
                                 trainable=True)
        branch3x3dbl_1 = conv2d_bn(branch3x3dbl,
                                   num_init_filters * 12,
                                   1,
                                   3,
                                   trainable=True)
        branch3x3dbl_2 = conv2d_bn(branch3x3dbl,
                                   num_init_filters * 12,
                                   3,
                                   1,
                                   trainable=True)
        branch3x3dbl = layers.concatenate([branch3x3dbl_1, branch3x3dbl_2],
                                          axis=channel_axis)

        branch_pool = layers.AveragePooling2D((3, 3),
                                              strides=(1, 1),
                                              padding='same')(x)
        branch_pool = conv2d_bn(branch_pool,
                                num_init_filters * 6,
                                1,
                                1,
                                trainable=True)
        x = layers.concatenate(
            [branch1x1, branch3x3, branch3x3dbl, branch_pool],
            axis=channel_axis,
            name='mixed' + str(9 + i))
    if include_top:
        # Classification block
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        x = layers.Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    # model = Model(inputs, x, name='inception_v3')

    return x
Exemplo n.º 23
0
    def apply(inputs):
        filters = input_filters * expand_ratio
        if expand_ratio != 1:
            x = layers.Conv2D(
                filters,
                kernel_size=kernel_size,
                strides=strides,
                kernel_initializer=CONV_KERNEL_INITIALIZER,
                data_format="channels_last",
                padding="same",
                use_bias=False,
                name=name + "expand_conv",
            )(inputs)
            x = layers.BatchNormalization(axis=bn_axis,
                                          momentum=bn_momentum,
                                          name=name + "expand_bn")(x)
            x = layers.Activation(activation=activation,
                                  name=name + "expand_activation")(x)
        else:
            x = inputs

        # Squeeze and excite
        if 0 < se_ratio <= 1:
            filters_se = max(1, int(input_filters * se_ratio))
            se = layers.GlobalAveragePooling2D(name=name + "se_squeeze")(x)
            if bn_axis == 1:
                se_shape = (filters, 1, 1)
            else:
                se_shape = (1, 1, filters)

            se = layers.Reshape(se_shape, name=name + "se_reshape")(se)

            se = layers.Conv2D(
                filters_se,
                1,
                padding="same",
                activation=activation,
                kernel_initializer=CONV_KERNEL_INITIALIZER,
                name=name + "se_reduce",
            )(se)
            se = layers.Conv2D(
                filters,
                1,
                padding="same",
                activation="sigmoid",
                kernel_initializer=CONV_KERNEL_INITIALIZER,
                name=name + "se_expand",
            )(se)

            x = layers.multiply([x, se], name=name + "se_excite")

        # Output phase:
        x = layers.Conv2D(
            output_filters,
            kernel_size=1 if expand_ratio != 1 else kernel_size,
            strides=1 if expand_ratio != 1 else strides,
            kernel_initializer=CONV_KERNEL_INITIALIZER,
            padding="same",
            use_bias=False,
            name=name + "project_conv",
        )(x)
        x = layers.BatchNormalization(axis=bn_axis,
                                      momentum=bn_momentum,
                                      name=name + "project_bn")(x)
        if expand_ratio == 1:
            x = layers.Activation(activation=activation,
                                  name=name + "project_activation")(x)

        # Residual:
        if strides == 1 and input_filters == output_filters:
            if survival_probability:
                x = layers.Dropout(
                    survival_probability,
                    noise_shape=(None, 1, 1, 1),
                    name=name + "drop",
                )(x)
            x = layers.add([x, inputs], name=name + "add")
        return x
def InceptionResNetV2(include_top=True,
                      weights='imagenet',
                      input_tensor=None,
                      input_shape=None,
                      pooling=None,
                      classes=1000,
                      **kwargs):
    """Instantiates the Inception-ResNet v2 architecture.

    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.

    # Arguments
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is `False` (otherwise the input shape
            has to be `(299, 299, 3)` (with `'channels_last'` data format)
            or `(3, 299, 299)` (with `'channels_first'` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 75.
            E.g. `(150, 150, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the last convolutional layer.
            - `'avg'` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `'max'` means that global max pooling will be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is `True`, and
            if no `weights` argument is specified.

    # Returns
        A Keras `Model` instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            ' as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=299,
                                      min_size=75,
                                      data_format=backend.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # Stem block: 35 x 35 x 192
    x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid')
    x = conv2d_bn(x, 32, 3, padding='valid')
    x = conv2d_bn(x, 64, 3)
    x = layers.MaxPooling2D(3, strides=2)(x)
    x = conv2d_bn(x, 80, 1, padding='valid')
    x = conv2d_bn(x, 192, 3, padding='valid')
    x = layers.MaxPooling2D(3, strides=2)(x)

    # Mixed 5b (Inception-A block): 35 x 35 x 320
    branch_0 = conv2d_bn(x, 96, 1)
    branch_1 = conv2d_bn(x, 48, 1)
    branch_1 = conv2d_bn(branch_1, 64, 5)
    branch_2 = conv2d_bn(x, 64, 1)
    branch_2 = conv2d_bn(branch_2, 96, 3)
    branch_2 = conv2d_bn(branch_2, 96, 3)
    branch_pool = layers.AveragePooling2D(3, strides=1, padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 64, 1)
    branches = [branch_0, branch_1, branch_2, branch_pool]
    channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3
    x = layers.Concatenate(axis=channel_axis, name='mixed_5b')(branches)

    # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320
    for block_idx in range(1, 11):
        x = inception_resnet_block(x,
                                   scale=0.17,
                                   block_type='block35',
                                   block_idx=block_idx)

    # Mixed 6a (Reduction-A block): 17 x 17 x 1088
    branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid')
    branch_1 = conv2d_bn(x, 256, 1)
    branch_1 = conv2d_bn(branch_1, 256, 3)
    branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid')
    branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x)
    branches = [branch_0, branch_1, branch_pool]
    x = layers.Concatenate(axis=channel_axis, name='mixed_6a')(branches)

    # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088
    for block_idx in range(1, 21):
        x = inception_resnet_block(x,
                                   scale=0.1,
                                   block_type='block17',
                                   block_idx=block_idx)

    # Mixed 7a (Reduction-B block): 8 x 8 x 2080
    branch_0 = conv2d_bn(x, 256, 1)
    branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid')
    branch_1 = conv2d_bn(x, 256, 1)
    branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid')
    branch_2 = conv2d_bn(x, 256, 1)
    branch_2 = conv2d_bn(branch_2, 288, 3)
    branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid')
    branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x)
    branches = [branch_0, branch_1, branch_2, branch_pool]
    x = layers.Concatenate(axis=channel_axis, name='mixed_7a')(branches)

    # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080
    for block_idx in range(1, 10):
        x = inception_resnet_block(x,
                                   scale=0.2,
                                   block_type='block8',
                                   block_idx=block_idx)
    x = inception_resnet_block(x,
                               scale=1.,
                               activation=None,
                               block_type='block8',
                               block_idx=10)

    # Final convolution block: 8 x 8 x 1536
    x = conv2d_bn(x, 1536, 1, name='conv_7b')

    if include_top:
        # Classification block
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        x = layers.Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs, x, name='inception_resnet_v2')

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5'
            weights_path = keras_utils.get_file(
                fname,
                BASE_WEIGHT_URL + fname,
                cache_subdir='models',
                file_hash='e693bd0210a403b3192acc6073ad2e96')
        else:
            fname = ('inception_resnet_v2_weights_'
                     'tf_dim_ordering_tf_kernels_notop.h5')
            weights_path = keras_utils.get_file(
                fname,
                BASE_WEIGHT_URL + fname,
                cache_subdir='models',
                file_hash='d19885ff4a710c122648d3b5c3b684e4')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
def MobileNetV2(input_shape=None,
                alpha=1.0,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                pooling=None,
                classes=1000,
                **kwargs):
    """Instantiates the MobileNetV2 architecture.
    # Arguments
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
        width multiplier in the MobileNetV2 paper, but the name is kept for
        consistency with MobileNetV1 in Keras.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape or invalid alpha, rows when
            weights='imagenet'
    """

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    if input_shape is not None and input_tensor is not None:
        try:
            is_input_t_tensor = backend.is_keras_tensor(input_tensor)
        except ValueError:
            try:
                is_input_t_tensor = backend.is_keras_tensor(
                    keras_utils.get_source_inputs(input_tensor))
            except ValueError:
                raise ValueError('input_tensor: ', input_tensor,
                                 'is not type input_tensor')
        if is_input_t_tensor:
            if backend.image_data_format == 'channels_first':
                if backend.int_shape(input_tensor)[1] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
            else:
                if backend.int_shape(input_tensor)[2] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
        else:
            raise ValueError('input_tensor specified: ', input_tensor,
                             'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    if input_shape is None and input_tensor is not None:

        try:
            backend.is_keras_tensor(input_tensor)
        except ValueError:
            raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                             type(input_tensor), 'which is not a valid type')

        if input_shape is None and not backend.is_keras_tensor(input_tensor):
            default_size = 224
        elif input_shape is None and backend.is_keras_tensor(input_tensor):
            if backend.image_data_format() == 'channels_first':
                rows = backend.int_shape(input_tensor)[2]
                cols = backend.int_shape(input_tensor)[3]
            else:
                rows = backend.int_shape(input_tensor)[1]
                cols = backend.int_shape(input_tensor)[2]

            if rows == cols and rows in [96, 128, 160, 192, 224]:
                default_size = rows
            else:
                default_size = 224

    # If input_shape is None and no input_tensor
    elif input_shape is None:
        default_size = 224

    # If input_shape is not None, assume default size
    else:
        if backend.image_data_format() == 'channels_first':
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [96, 128, 160, 192, 224]:
            default_size = rows
        else:
            default_size = 224

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=backend.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if weights == 'imagenet':
        if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
            raise ValueError('If imagenet weights are being loaded, '
                             'alpha can be one of `0.35`, `0.50`, `0.75`, '
                             '`1.0`, `1.3` or `1.4` only.')

        if rows != cols or rows not in [96, 128, 160, 192, 224]:
            rows = 224
            warnings.warn('MobileNet shape is undefined.'
                          ' Weights for input shape'
                          '(224, 224) will be loaded.')

    if backend.image_data_format() != 'channels_last':
        warnings.warn('The MobileNet family of models is only available '
                      'for the input data format "channels_last" '
                      '(width, height, channels). '
                      'However your settings specify the default '
                      'data format "channels_first" (channels, width, height).'
                      ' You should set `image_data_format="channels_last"` '
                      'in your Keras config located at ~/.keras/keras.json. '
                      'The model being returned right now will expect inputs '
                      'to follow the "channels_last" data format.')
        backend.set_image_data_format('channels_last')
        old_data_format = 'channels_first'
    else:
        old_data_format = None

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    first_block_filters = _make_divisible(32 * alpha, 8)
    x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3),
                             name='Conv1_pad')(img_input)
    x = layers.Conv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv1')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name='bn_Conv1')(x)
    x = layers.ReLU(6., name='Conv1_relu')(x)

    x = _inverted_res_block(x,
                            filters=16,
                            alpha=alpha,
                            stride=1,
                            expansion=1,
                            block_id=0)

    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=1)
    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=2)

    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=3)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=4)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=5)

    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=6)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=7)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=8)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=9)

    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=10)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=11)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=12)

    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=13)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=14)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=15)

    x = _inverted_res_block(x,
                            filters=320,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=16)

    # no alpha applied to last conv as stated in the paper:
    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    x = layers.Conv2D(last_block_filters,
                      kernel_size=1,
                      use_bias=False,
                      name='Conv_1')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_1_bn')(x)
    x = layers.ReLU(6., name='out_relu')(x)

    if include_top:
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(classes,
                         activation='softmax',
                         use_bias=True,
                         name='Logits')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs,
                         x,
                         name='mobilenetv2_%0.2f_%s' % (alpha, rows))

    # Load weights.
    if weights == 'imagenet':
        if backend.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_first" format '
                             'are not available.')

        if include_top:
            model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '.h5')
            weigh_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weigh_path,
                                                cache_subdir='models')
        else:
            model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
            weigh_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weigh_path,
                                                cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    if old_data_format:
        backend.set_image_data_format(old_data_format)
    return model
Exemplo n.º 26
0
def NASNet(input_shape=None,
           penultimate_filters=4032,
           num_blocks=6,
           stem_block_filters=96,
           skip_reduction=True,
           filter_multiplier=2,
           include_top=True,
           weights=None,
           input_tensor=None,
           pooling=None,
           classes=1000,
           default_size=None,
           **kwargs):
    '''Instantiates a NASNet model.

    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.

    # Arguments
        input_shape: Optional shape tuple, the input shape
            is by default `(331, 331, 3)` for NASNetLarge and
            `(224, 224, 3)` for NASNetMobile.
            It should have exactly 3 input channels,
            and width and height should be no smaller than 32.
            E.g. `(224, 224, 3)` would be one valid value.
        penultimate_filters: Number of filters in the penultimate layer.
            NASNet models use the notation `NASNet (N @ P)`, where:
                -   N is the number of blocks
                -   P is the number of penultimate filters
        num_blocks: Number of repeated blocks of the NASNet model.
            NASNet models use the notation `NASNet (N @ P)`, where:
                -   N is the number of blocks
                -   P is the number of penultimate filters
        stem_block_filters: Number of filters in the initial stem block
        skip_reduction: Whether to skip the reduction step at the tail
            end of the network.
        filter_multiplier: Controls the width of the network.
            - If `filter_multiplier` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `filter_multiplier` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `filter_multiplier` = 1, default number of filters from the
                 paper are used at each layer.
        include_top: Whether to include the fully-connected
            layer at the top of the network.
        weights: `None` (random initialization) or
            `imagenet` (ImageNet weights)
        input_tensor: Optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: Optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
        default_size: Specifies the default image size of the model

    # Returns
        A Keras model instance.

    # Raises
        ValueError: In case of invalid argument for `weights`,
            invalid input shape or invalid `penultimate_filters` value.
    '''
    # global backend, layers, models, keras_utils
    # backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    if (isinstance(input_shape, tuple) and None in input_shape
            and weights == 'imagenet'):
        raise ValueError('When specifying the input shape of a NASNet'
                         ' and loading `ImageNet` weights, '
                         'the input_shape argument must be static '
                         '(no None entries). Got: `input_shape=' +
                         str(input_shape) + '`.')

    if default_size is None:
        default_size = 331

    # Determine proper input shape and default size.
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=backend.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if backend.image_data_format() != 'channels_last':
        warnings.warn('The NASNet family of models is only available '
                      'for the input data format "channels_last" '
                      '(width, height, channels). '
                      'However your settings specify the default '
                      'data format "channels_first" (channels, width, height).'
                      ' You should set `image_data_format="channels_last"` '
                      'in your Keras config located at ~/.keras/keras.json. '
                      'The model being returned right now will expect inputs '
                      'to follow the "channels_last" data format.')
        backend.set_image_data_format('channels_last')
        old_data_format = 'channels_first'
    else:
        old_data_format = None

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    if penultimate_filters % 24 != 0:
        raise ValueError(
            'For NASNet-A models, the value of `penultimate_filters` '
            'needs to be divisible by 24. Current value: %d' %
            penultimate_filters)

    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1
    filters = penultimate_filters // 24

    x = layers.Conv2D(stem_block_filters, (3, 3),
                      strides=(2, 2),
                      padding='same',
                      use_bias=False,
                      kernel_regularizer=l2(weight_decay),
                      name='stem_conv1',
                      kernel_initializer='he_normal')(img_input)

    if use_bn:
        x = layers.BatchNormalization(axis=channel_dim,
                                      momentum=bn_momentum,
                                      epsilon=1e-3,
                                      name='stem_bn1')(x)

    p = None
    x, p = _reduction_a_cell(x,
                             p,
                             filters // (filter_multiplier**2),
                             block_id='stem_1')
    x, p = _reduction_a_cell(x,
                             p,
                             filters // filter_multiplier,
                             block_id='stem_2')

    for i in range(num_blocks):
        x, p = _normal_a_cell(x, p, filters, block_id='%d' % (i))

    x, p0 = _reduction_a_cell(x,
                              p,
                              filters * filter_multiplier,
                              block_id='reduce_%d' % (num_blocks))

    p = p0 if not skip_reduction else p

    for i in range(num_blocks):
        x, p = _normal_a_cell(x,
                              p,
                              filters * filter_multiplier,
                              block_id='%d' % (num_blocks + i + 1))

    x, p0 = _reduction_a_cell(x,
                              p,
                              filters * filter_multiplier**2,
                              block_id='reduce_%d' % (2 * num_blocks))

    p = p0 if not skip_reduction else p

    for i in range(num_blocks):
        x, p = _normal_a_cell(x,
                              p,
                              filters * filter_multiplier**2,
                              block_id='%d' % (2 * num_blocks + i + 1))

    x = layers.Activation('relu')(x)

    if include_top:
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    model = models.Model(inputs, x, name='NASNet')

    # Load weights.
    if weights == 'imagenet':
        if default_size == 224:  # mobile version
            if include_top:
                weights_path = keras_utils.get_file(
                    'nasnet_mobile.h5',
                    NASNET_MOBILE_WEIGHT_PATH,
                    cache_subdir='models',
                    file_hash='020fb642bf7360b370c678b08e0adf61')
            else:
                weights_path = keras_utils.get_file(
                    'nasnet_mobile_no_top.h5',
                    NASNET_MOBILE_WEIGHT_PATH_NO_TOP,
                    cache_subdir='models',
                    file_hash='1ed92395b5b598bdda52abe5c0dbfd63')
            model.load_weights(weights_path)
        elif default_size == 331:  # large version
            if include_top:
                weights_path = keras_utils.get_file(
                    'nasnet_large.h5',
                    NASNET_LARGE_WEIGHT_PATH,
                    cache_subdir='models',
                    file_hash='11577c9a518f0070763c2b964a382f17')
            else:
                weights_path = keras_utils.get_file(
                    'nasnet_large_no_top.h5',
                    NASNET_LARGE_WEIGHT_PATH_NO_TOP,
                    cache_subdir='models',
                    file_hash='d81d89dc07e6e56530c4e77faddd61b5')
            model.load_weights(weights_path)
        else:
            raise ValueError(
                'ImageNet weights can only be loaded with NASNetLarge'
                ' or NASNetMobile')
    elif weights is not None:
        model.load_weights(weights)

    if old_data_format:
        backend.set_image_data_format(old_data_format)

    return model
Exemplo n.º 27
0
def residual_network(x):
    """
    ResNeXt by default. For ResNet set `cardinality` = 1 above.

    """
    def add_common_layers(y):
        y = layers.BatchNormalization()(y)
        y = layers.LeakyReLU()(y)

        return y

    def grouped_convolution(y, nb_channels, _strides):
        # when `cardinality` == 1 this is just a standard convolution
        if cardinality == 1:
            return layers.Conv2D(nb_channels,
                                 kernel_size=(3, 3),
                                 strides=_strides,
                                 padding='same')(y)

        assert not nb_channels % cardinality
        _d = nb_channels // cardinality

        # in a grouped convolution layer, input and output channels are divided into `cardinality` groups,
        # and convolutions are separately performed within each group
        groups = []
        for j in range(cardinality):
            group = layers.Lambda(lambda z: z[:, :, :, j * _d:j * _d + _d])(y)
            groups.append(
                layers.Conv2D(_d,
                              kernel_size=(3, 3),
                              strides=_strides,
                              padding='same')(group))

        # the grouped convolutional layer concatenates them as the outputs of the layer
        y = layers.concatenate(groups)

        return y

    def residual_block(y,
                       nb_channels_in,
                       nb_channels_out,
                       _strides=(1, 1),
                       _project_shortcut=False):
        """
        Our network consists of a stack of residual blocks. These blocks have the same topology,
        and are subject to two simple rules:
        - If producing spatial maps of the same size, the blocks share the same hyper-parameters (width and filter sizes).
        - Each time the spatial map is down-sampled by a factor of 2, the width of the blocks is multiplied by a factor of 2.
        """
        shortcut = y

        # we modify the residual building block as a bottleneck design to make the network more economical
        y = layers.Conv2D(nb_channels_in,
                          kernel_size=(1, 1),
                          strides=(1, 1),
                          padding='same')(y)
        y = add_common_layers(y)

        # ResNeXt (identical to ResNet when `cardinality` == 1)
        y = grouped_convolution(y, nb_channels_in, _strides=_strides)
        y = add_common_layers(y)

        y = layers.Conv2D(nb_channels_out,
                          kernel_size=(1, 1),
                          strides=(1, 1),
                          padding='same')(y)
        # batch normalization is employed after aggregating the transformations and before adding to the shortcut
        y = layers.BatchNormalization()(y)

        # identity shortcuts used directly when the input and output are of the same dimensions
        if _project_shortcut or _strides != (1, 1):
            # when the dimensions increase projection shortcut is used to match dimensions (done by 1×1 convolutions)
            # when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2
            shortcut = layers.Conv2D(nb_channels_out,
                                     kernel_size=(1, 1),
                                     strides=_strides,
                                     padding='same')(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)

        y = layers.add([shortcut, y])

        # relu is performed right after each batch normalization,
        # expect for the output of the block where relu is performed after the adding to the shortcut
        y = layers.LeakyReLU()(y)

        return y

    ## Stream 1
    # conv1
    x = layers.Conv2D(64, kernel_size=(7, 7), strides=(2, 2),
                      padding='same')(x)
    x = add_common_layers(x)

    # conv2
    x = layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
    for i in range(3):
        project_shortcut = True if i == 0 else False
        x = residual_block(x, 128, 256, _project_shortcut=project_shortcut)

    # conv3
    for i in range(4):
        # down-sampling is performed by conv3_1, conv4_1, and conv5_1 with a stride of 2
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 256, 512, _strides=strides)

    ## Stream 2
    # conv1
    x = layers.Conv2D(64, kernel_size=(7, 7), strides=(2, 2),
                      padding='same')(x)
    x = add_common_layers(x)

    # conv2
    x = layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
    for i in range(3):
        project_shortcut = True if i == 0 else False
        x = residual_block(x, 128, 256, _project_shortcut=project_shortcut)

    # conv3
    for i in range(4):
        # down-sampling is performed by conv3_1, conv4_1, and conv5_1 with a stride of 2
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 256, 512, _strides=strides)

    ## Stream 3
    # conv1
    x = layers.Conv2D(64, kernel_size=(7, 7), strides=(2, 2),
                      padding='same')(x)
    x = add_common_layers(x)

    # conv2
    x = layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)
    for i in range(3):
        project_shortcut = True if i == 0 else False
        x = residual_block(x, 128, 256, _project_shortcut=project_shortcut)

    # conv3
    for i in range(4):
        # down-sampling is performed by conv3_1, conv4_1, and conv5_1 with a stride of 2
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 256, 512, _strides=strides)

    # conv4
    for i in range(6):
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 512, 1024, _strides=strides)

    # conv5
    for i in range(3):
        strides = (2, 2) if i == 0 else (1, 1)
        x = residual_block(x, 1024, 2048, _strides=strides)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1)(x)

    return x
Exemplo n.º 28
0
    axis=-1, momentum=0.99, epsilon=0.001)(block2_pool_input)
block2_pool_norm = layers.AveragePooling2D(pool_size=(8, 8))(block2_pool_norm)

# For block1_pool
# block1_pool_norm = layers.normalization.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(block1_pool_input)
# block1_pool_norm = layers.AveragePooling2D(pool_size=(16, 16))(block1_pool_norm)

output_fusion_norm = layers.concatenate(
    [block5_pool_norm, block3_pool_norm, block2_pool_norm], axis=-1)

# output_fusion_conc = layers.Lambda(expand_dim_backend)(output_fusion_norm)
output_fusion_norm = layers.normalization.BatchNormalization(
    axis=-1, momentum=0.99, epsilon=0.001)(output_fusion_norm)

# Configure the fully-connected layers
FC_output = layers.GlobalAveragePooling2D()(output_fusion_norm)
FC_output = layers.Dense(4096,
                         activation='relu',
                         kernel_regularizer=regularizers.l2(0.001))(FC_output)
FC_output = layers.Dense(4096,
                         activation='relu',
                         kernel_regularizer=regularizers.l2(0.001))(FC_output)
FC_output = layers.Dense(num_classes, activation='sigmoid')(FC_output)

model = Model(inputs=conv_base.input, outputs=FC_output)
model.summary()

for layers_i in range(model.layers.__len__()):
    print([layers_i, model.layers[layers_i].name])

# freeze the conv_base
Exemplo n.º 29
0
def VGG16(input_shape,
          include_top=True,
          weights='imagenet',
          pooling=None,
          classes=1000,
          final_activation='sigmoid'):

    input = Input(input_shape)
    # Block 1
    x = Conv2D_Initialize(64, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block1_conv1',
                          bias_initializer='zero')(input)
    x = Conv2D_Initialize(64, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block1_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D_Initialize(128, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block2_conv1')(x)
    x = Conv2D_Initialize(128, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block2_conv2')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D_Initialize(256, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block3_conv1')(x)
    x = Conv2D_Initialize(256, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block3_conv2')(x)
    x = Conv2D_Initialize(256, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block3_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D_Initialize(512, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block4_conv1')(x)
    x = Conv2D_Initialize(512, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block4_conv2')(x)
    x = Conv2D_Initialize(512, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block4_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D_Initialize(512, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block5_conv1')(x)
    x = Conv2D_Initialize(512, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block5_conv2')(x)
    x = Conv2D_Initialize(512, (3, 3),
                          activation='relu',
                          padding='same',
                          name='block5_conv3')(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    if include_top:
        # Classification block
        x = layers.Flatten(name='flatten')(x)
        x = Dense_Initialize(4096, activation='relu', name='fc1')(x)
        x = Dense_Initialize(4096, activation='relu', name='fc2')(x)
        x = Dense_Initialize(classes,
                             activation=final_activation,
                             name='predictions')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Create model.
    weights_path = None
    if weights == 'imagenet':
        if include_top:
            weights_path = keras_utils.get_file(
                'vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='64373286793e3c8b2b4e3219cbf3544b')
        else:
            weights_path = keras_utils.get_file(
                'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
                WEIGHTS_PATH_NO_TOP,
                cache_subdir='models',
                file_hash='6d6bbae143d832006294945121d1f1fc')

    model = Model(input, x, name='vgg16')
    if weights_path and weights:
        model.load_weights(weights_path, by_name=True, skip_mismatch=True)
    return model
Exemplo n.º 30
0
    def define_graph(self, mode, options):
        assert mode in ['training', 'inference']

        box_pred_method = options['box_pred_method']
        print(f'box_pred_method: {box_pred_method}')
        assert box_pred_method in [
            'lbf_guided', 'regress_landmark', 'regress_segbox', 'gt_segbox']

        batch_size = options['images_per_gpu']

        heads = options['heads']
        num_heads = len(heads)
        print(f'num_heads={num_heads}')
        num_masks = 0
        for class_ids in heads:
            num_masks += len(class_ids)
        assert num_masks == len(options['class_names'])
        print(f'num_masks={num_masks}')

        head_label_names = []
        for class_ids in heads:
            names_this_head = []
            for class_id in class_ids:
                names_this_head += options['class_names'][class_id]
            head_label_names.append(names_this_head)
        assert len(head_label_names) == len(heads)

        h = w = options['image_size']

        # assert h > 0 and w > 0 and h % 2**6 == 0 and w % 2**6 == 0
        if 'landmark_box_paddings448' in options:
            delta = options.get('landmark_box_padding_additional_ratio', 0.0)
            molded_padding_dict = {
                name:
                np.array(padding, np.float32) / 448.0 +
                    np.array([-delta, -delta, +delta, +delta], np.float32)
                for name, padding in options['landmark_box_paddings448'].items()
            }
        else:
            raise RuntimeError('padding information required')

        pprint(molded_padding_dict)

        # mean landmark68 pts
        mean_molded_landmark68_pts = tf.stack(
            [utils.MEAN_MOLDED_LANDMARK68_PTS],
            name='mean_molded_landmark68_pts')
        # mean head boxes
        mean_molded_head_boxes = utils.extract_landmark68_boxes_graph(
            mean_molded_landmark68_pts,
            head_label_names,
            molded_padding_dict)

        dropout_rate = options.get('dropout_rate', 0.0)
        print(f'dropout_rate={dropout_rate}')

        # Inputs
        input_molded_image = KL.Input(
            shape=[h, w, 3], name="input_molded_image")  # molded
        input_molded_image_exist = KL.Input(
            shape=[1], name='input_molded_image_exist', dtype=tf.uint8)
        print('input: %s' % input_molded_image.name)
        print('input_molded_image_exist.shape: {}, {}'.format(
            input_molded_image_exist.shape,
            input_molded_image_exist._keras_shape))

        if mode == 'training':
            input_gt_masks = KL.Input(
                shape=[num_masks, h, w], name="input_gt_masks")
            input_gt_masks_exist = KL.Input(
                shape=[1], name='input_gt_masks_exist', dtype=tf.uint8)
            print('input_gt_masks_exist.shape: {}, {}'.format(
                input_gt_masks_exist.shape, input_gt_masks_exist._keras_shape))
            molded_gt_masks = KL.Lambda(lambda xx: tf.cast(xx, tf.float32))(
                input_gt_masks)

        if box_pred_method == 'lbf_guided':
            input_molded_lbf_landmark68_pts = KL.Input(
                shape=[68, 2],
                dtype=tf.float32,
                name="input_molded_lbf_landmark68_pts")
            input_molded_lbf_landmark68_pts_exist = KL.Input(
                shape=[1],
                name='input_molded_lbf_landmark68_pts_exist',
                dtype=tf.uint8)
            print('input_molded_lbf_landmark68_pts_exist.shape: {}, {}'.format(
                input_molded_lbf_landmark68_pts_exist.shape,
                input_molded_lbf_landmark68_pts_exist._keras_shape))

        elif box_pred_method == 'regress_landmark':
            if mode == 'training':
                input_gt_molded_landmark68_pts = KL.Input(
                    shape=[68, 2],
                    dtype=tf.float32,
                    name='input_gt_molded_landmark68_pts')
                input_gt_molded_landmark68_pts_exist = KL.Input(
                    shape=[1],
                    name='input_gt_molded_landmark68_pts_exist', dtype=tf.uint8)

        elif box_pred_method == 'regress_segbox':
            def _box_to_std_deform(box):
                return utils.compute_box_deform(mean_molded_head_boxes, box)

            def _std_deform_to_box(deform):
                return utils.apply_box_deform(mean_molded_head_boxes, deform)

            if mode == 'training':
                input_gt_molded_head_boxes = KL.Input(
                    shape=[num_heads, 4],
                    dtype=tf.float32,
                    name='input_gt_molded_head_boxes')
                input_gt_molded_head_boxes_exist = KL.Input(
                    shape=[1],
                    name='input_gt_molded_head_boxes_exist', dtype=tf.uint8)

                # get box deforms
                input_gt_head_box_deforms = KL.Lambda(
                    _box_to_std_deform,
                    name='input_gt_head_box_deforms')(
                        input_gt_molded_head_boxes)

        elif box_pred_method == 'gt_segbox':
            input_gt_molded_head_boxes = KL.Input(
                shape=[num_heads, 4],
                dtype=tf.float32,
                name='input_gt_molded_head_boxes')
            input_gt_molded_head_boxes_exist = KL.Input(
                shape=[1],
                name='input_gt_molded_head_boxes_exist', dtype=tf.uint8)

        # Construct Backbone Network
        box_from = options.get('box_from', 'P2')

        def _expand_boxes_by_ratio(boxes, rel_ratio):
            y1, x1, y2, x2 = tf.split(boxes, 4, axis=-1)
            cy = (y1 + y2) / 2.0
            cx = (x1 + x2) / 2.0
            h2 = (y2 - y1) / 2.0
            w2 = (x2 - x1) / 2.0
            yy1 = cy - h2 * (1 + rel_ratio)
            xx1 = cx - w2 * (1 + rel_ratio)
            yy2 = cy + h2 * (1 + rel_ratio)
            xx2 = cx + w2 * (1 + rel_ratio)
            return tf.concat([yy1, xx1, yy2, xx2], axis=-1)

        if options['backbone'] == 'vgg16':
            print('making vgg16 backbone')
            C1, C2, C3, C4, C5 = vgg16_graph(input_molded_image)
            assert box_from == 'C5'
            mrcnn_feature_maps = [C5]
        elif options['backbone'] == 'vgg16fpn':
            print('making vgg16fpn backbone')
            C1, C2, C3, C4, C5 = vgg16_graph(input_molded_image)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [P2, P3, P4, P5]
        elif options['backbone'] == 'vgg16fpnP2':
            print('making vgg16fpnP2 backbone')
            C1, C2, C3, C4, C5 = vgg16_graph(input_molded_image)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [P2]
        elif options['backbone'] == 'resnet50':
            C1, C2, C3, C4, _ = resnet_graph(
                input_molded_image, 'resnet50', False)
            assert box_from == 'C4'
            box_feature = C4
            mrcnn_feature_maps = [C4]
        elif options['backbone'] == 'resnet50fpn':
            print('making resnet50fpn backbone')
            C1, C2, C3, C4, C5 = resnet_graph(
                input_molded_image, 'resnet50', True)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [P2, P3, P4, P5]
        elif options['backbone'] == 'resnet50fpnP2':
            print('making resnet50fpnP2 backbone')
            C1, C2, C3, C4, C5 = resnet_graph(
                input_molded_image, 'resnet50', True)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [P2]
        elif options['backbone'] == 'resnet50fpnC4':
            C1, C2, C3, C4, C5 = resnet_graph(
                input_molded_image, 'resnet50', True)

            P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5])
            if box_from == 'P2':
                box_feature = P2
            elif box_from == 'C5':
                box_feature = C5
            elif box_from == 'C4':
                box_feature = C4
            mrcnn_feature_maps = [C4]
        else:
            raise NotImplementedError()

        if box_pred_method in ['regress_landmark', 'regress_segbox']:
            # get box and optionally landmarks
            with tf.name_scope('box_neck'):
                x = box_feature
                box_neck_conv_num = options['box_neck_conv_num']
                for k in range(box_neck_conv_num):
                    x = KL.Conv2D(320, (3, 3), strides=(1, 1),
                                  padding='same', name=f'box_conv{k}')(x)
                    x = KL.BatchNormalization(name=f'box_convbn{k}')(x)
                    x = KL.Activation('relu')(x)

                x = KL.Conv2D(1280, (1, 1), name='box_conv_last')(x)
                x = KL.BatchNormalization(name=f'box_convbn_last')(x)

                x = KL.GlobalAveragePooling2D()(x)
                x = KL.Dropout(dropout_rate)(x)
            box_feature = x
            print(f'box_feature.shape={box_feature.shape}')

        if box_pred_method == 'lbf_guided':
            molded_head_boxes = KL.Lambda(
                lambda xx: utils.extract_landmark68_boxes_graph(
                    xx, head_label_names, molded_padding_dict),
                name='molded_head_boxes')(input_molded_lbf_landmark68_pts)

        elif box_pred_method == 'regress_landmark':
            x = box_feature
            x = KL.Dense(68 * 2, name='box_landmark_fc')(x)
            x = KL.Reshape((68, 2))(x)  # landmark68 offsets

            pred_molded_landmark68_pts = KL.Lambda(
                lambda xx: xx + mean_molded_landmark68_pts,
                name='pred_molded_landmark68_pts')(x)
            molded_head_boxes = KL.Lambda(
                lambda xx: utils.extract_landmark68_boxes_graph(
                    xx, head_label_names, molded_padding_dict),
                name='molded_head_boxes')(pred_molded_landmark68_pts)

            # compute landmark loss
            if mode == 'training':
                # Point loss
                def _l2_loss(pts1, pts2):
                    # (batch, 68, 2)
                    return tf.reduce_mean(
                        tf.norm(pts1 - pts2, axis=-1), axis=-1)
                landmark68_loss = KL.Lambda(lambda xx: _l2_loss(xx[0], xx[1]))(
                    [pred_molded_landmark68_pts, input_gt_molded_landmark68_pts])
                landmark68_loss = KL.Lambda(
                    lambda xx: tf.where(
                        tf.reshape(xx[0] > 0, tf.shape(xx[1])),
                        xx[1], tf.zeros_like(xx[1])),
                    name='landmark68_loss')([
                        input_gt_molded_landmark68_pts_exist, landmark68_loss])
                print('landmark68_loss.shape={}, {}'.format(
                    landmark68_loss.shape, landmark68_loss._keras_shape))

        elif box_pred_method == 'regress_segbox':
            x = box_feature
            x = KL.Dense(num_heads * 4, name='box_fc')(x)

            use_rpn_box_loss = options.get('use_rpn_box_loss', True)
            print(f'use_rpn_box_loss={use_rpn_box_loss}')

            if use_rpn_box_loss:
                pred_head_box_deforms = KL.Reshape(
                    (num_heads, 4))(x)  # box deforms

                pred_molded_head_boxes = KL.Lambda(
                    _std_deform_to_box, name='pred_molded_head_boxes')(
                    pred_head_box_deforms)
                head_box_padding_ratio = options['head_box_padding_ratio']
                molded_head_boxes = KL.Lambda(lambda xx: tf.stop_gradient(
                    xx + tf.constant([
                        - head_box_padding_ratio,
                        - head_box_padding_ratio,
                        head_box_padding_ratio,
                        head_box_padding_ratio
                    ], tf.float32)), name='molded_head_boxes')(pred_molded_head_boxes)

                # compute segbox loss
                if mode == 'training':
                    # Box loss
                    use_soft_l1_loss = options.get('use_soft_l1_loss', True)

                    def _l1_loss(box_deform1, box_deform2):
                        # (batch, num_heads, 4)
                        if use_soft_l1_loss:
                            return tf.reduce_mean(
                                tf.sqrt(tf.square(box_deform1 -
                                                  box_deform2) + K.epsilon()),
                                axis=[1, 2])
                        else:
                            return tf.reduce_mean(tf.abs(box_deform1 - box_deform2), axis=[1, 2])
                    box_loss = KL.Lambda(lambda xx: _l1_loss(xx[0], xx[1]))(
                        [input_gt_head_box_deforms, pred_head_box_deforms])
                    box_loss = KL.Lambda(
                        lambda xx: tf.where(tf.reshape(
                            xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])),
                        name='box_loss')([
                            input_gt_molded_head_boxes_exist,
                            box_loss])

                    print('box_loss.shape={}, {}'.format(
                        box_loss.shape, box_loss._keras_shape))
            else:
                pred_molded_head_boxes = KL.Reshape((num_heads, 4))(x)

                head_box_padding_ratio = options['head_box_padding_ratio']
                molded_head_boxes = KL.Lambda(lambda xx: tf.stop_gradient(
                    xx + tf.constant([
                        - head_box_padding_ratio,
                        - head_box_padding_ratio,
                        head_box_padding_ratio,
                        head_box_padding_ratio
                    ], tf.float32)), name='molded_head_boxes')(pred_molded_head_boxes)

                # compute segbox loss
                if mode == 'training':
                    # Box loss
                    use_soft_l1_loss = options.get('use_soft_l1_loss', True)

                    def _l1_loss(box_deform1, box_deform2):
                        # (batch, num_heads, 4)
                        if use_soft_l1_loss:
                            return tf.reduce_mean(
                                tf.sqrt(tf.square(box_deform1 -
                                                  box_deform2) + K.epsilon()),
                                axis=[1, 2])
                        else:
                            return tf.reduce_mean(tf.abs(box_deform1 - box_deform2), axis=[1, 2])
                    box_loss = KL.Lambda(lambda xx: _l1_loss(xx[0], xx[1]))(
                        [input_gt_molded_head_boxes, pred_molded_head_boxes])
                    box_loss = KL.Lambda(
                        lambda xx: tf.where(tf.reshape(
                            xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])),
                        name='box_loss')([
                            input_gt_molded_head_boxes_exist,
                            box_loss])

                    print('box_loss.shape={}, {}'.format(
                        box_loss.shape, box_loss._keras_shape))

        elif box_pred_method == 'gt_segbox':
            head_box_padding_ratio = options['head_box_padding_ratio']
            molded_head_boxes = KL.Lambda(lambda xx: tf.stop_gradient(
                xx + tf.constant([
                    - head_box_padding_ratio,
                    - head_box_padding_ratio,
                    head_box_padding_ratio,
                    head_box_padding_ratio
                ], tf.float32)), name='molded_head_boxes')(input_gt_molded_head_boxes)

        if 'fixed_head_box' in options:
            # replace certain molded_head_boxes with assigned ones
            fixed_head_box = options['fixed_head_box']
            fixed_head_box_flags = np.zeros((num_heads), np.uint8)
            fixed_head_box_values = np.zeros((num_heads, 4), np.float32)
            for head_id, box in fixed_head_box.items():
                fixed_head_box_flags[head_id] = 1
                fixed_head_box_values[head_id, :] = np.array(box, np.float32)
            print(f'fixed_head_box_flags={fixed_head_box_flags}')
            print(f'fixed_head_box_values={fixed_head_box_values}')

            fixed_head_box_flags = tf.tile(
                tf.expand_dims(tf.expand_dims(
                    tf.constant(fixed_head_box_flags), 0), -1),
                [tf.shape(molded_head_boxes)[0], 1, 4])
            fixed_head_box_values = tf.tile(
                tf.expand_dims(tf.constant(fixed_head_box_values), 0),
                [tf.shape(molded_head_boxes)[0], 1, 1])
            molded_head_boxes = KL.Lambda(lambda xx: tf.where(
                fixed_head_box_flags, fixed_head_box_values, xx))(molded_head_boxes)

        # visualize pts and boxes
        # with tf.name_scope('boxes_pts'):

        #     def _show_boxes_pts(im, boxes, pts=None):
        #         return visualize.tf_display_boxes_pts(
        #             im, boxes, pts, utils.MEAN_PIXEL)

        #     show_num = min(batch_size, 3)
        #     if box_pred_method == 'regress_landmark':
        #         label_pts = [('pred_molded_landmark68_pts',
        #                       pred_molded_landmark68_pts)]
        #         if mode == 'training':
        #             label_pts.append(
        #                 ('input_gt_molded_landmark68_pts', input_gt_molded_landmark68_pts))
        #         for label, pts in label_pts:
        #             plot_ims = []
        #             for k in range(show_num):
        #                 im = tfplot.ops.plot(_show_boxes_pts, [
        #                     input_molded_image[k, :, :, :],
        #                     molded_head_boxes[k, :, :],
        #                     pts[k, :, :]])
        #                 plot_ims.append(im)
        #             plot_ims = tf.stack(plot_ims, axis=0)
        #             tf.summary.image(
        #                 name=label, tensor=plot_ims)
        #     else:
        #         plot_ims = []
        #         for k in range(show_num):
        #             im = tfplot.ops.plot(_show_boxes_pts, [
        #                 input_molded_image[k, :, :, :],
        #                 molded_head_boxes[k, :, :]])
        #             plot_ims.append(im)
        #         plot_ims = tf.stack(plot_ims, axis=0)
        #         tf.summary.image(
        #             name='molded_head_boxes', tensor=plot_ims)

        # Construct Head Networks
        head_class_nums = [len(class_ids) for class_ids in heads]

        # ROI Pooling
        pool_size = options.get('pool_size', 56)
        deconv_num = options.get('deconv_num', 2)
        conv_num = options.get('conv_num', 1)

        molded_head_boxes = KL.Lambda(tf.stop_gradient)(molded_head_boxes)

        aligned = PyramidROIAlignAll(
            [pool_size, pool_size], name="roi_align_mask")(
                [molded_head_boxes] + mrcnn_feature_maps)
        # print(aligned._keras_shape)

        fg_masks = [None] * num_heads
        bg_masks = [None] * num_heads

        def _slice_lambda(index):
            return lambda xx: xx[:, index, :, :, :]

        head_mask_features = [None] * num_heads
        for i in range(num_heads):
            x = KL.Lambda(_slice_lambda(i))(aligned)

            for k in range(conv_num):
                x = KL.Conv2D(
                    256, (3, 3),
                    padding="same",
                    name=f"mrcnn_mask_conv{k+1}_{i}")(x)
                x = BatchNorm(axis=-1, name=f'mrcnn_mask_bn{k+1}_{i}')(x)
                x = KL.Activation('relu')(x)
                if dropout_rate > 0:
                    x = KL.Dropout(dropout_rate)(x)

            if deconv_num == 1:  # to be compatible with previous trained models
                x = KL.Conv2DTranspose(
                    256, (2, 2),
                    strides=2,
                    activation="relu",
                    name="mrcnn_mask_deconv_%d" % i)(x)
            else:
                for k in range(deconv_num):
                    x = KL.Conv2DTranspose(
                        256, (2, 2),
                        strides=2,
                        activation="relu",
                        name="mrcnn_mask_deconv%d_%d" % (k + 1, i))(x)
            # [batch, h, w, 256]
            head_mask_features[i] = x

        mask_feature_size = pool_size * 2**deconv_num

        for i in range(num_heads):
            x = head_mask_features[i]
            num_classes_this_head = head_class_nums[i]
            assert num_classes_this_head > 0

            x = KL.Conv2D(
                1 + num_classes_this_head, (1, 1), strides=1,
                name='mrcnn_mask_conv_last_%d' % i,
                activation='linear')(x)
            x = KL.Lambda(
                lambda xx: tf.nn.softmax(xx, dim=-1),
                name="mrcnn_fullmask_%d" % i)(x)

            # [batch, height, width, num_classes]
            # [batch, num_classes, height, width]
            fg_masks[i] = KL.Lambda(
                lambda xx: tf.transpose(xx[:, :, :, 1:], [0, 3, 1, 2]),
                name='mrcnn_fg_mask_%d' % i)(x)

            # [batch, height, width]
            bg_masks[i] = KL.Lambda(
                lambda xx: xx[:, :, :, 0], name='mrcnn_bg_mask_%d' % i)(x)

            print(fg_masks[i]._keras_shape, fg_masks[i].shape,
                  bg_masks[i]._keras_shape, bg_masks[i].shape)

        if len(fg_masks) > 1:
            mrcnn_fg_masks = KL.Lambda(
                lambda xx: tf.concat(xx, axis=1), name='mrcnn_fg_masks')(fg_masks)
        else:
            mrcnn_fg_masks = KL.Lambda(
                lambda xx: xx, name='mrcnn_fg_masks')(fg_masks[0])

        if len(bg_masks) > 1:
            mrcnn_bg_masks = KL.Lambda(
                lambda xx: tf.stack(xx, axis=1), name='mrcnn_bg_masks')(bg_masks)
        else:
            mrcnn_bg_masks = KL.Lambda(
                lambda xx: tf.expand_dims(xx, axis=1),
                name='mrcnn_bg_masks')(bg_masks[0])

        # [batch, num_masks+num_heads, height, width]
        mrcnn_masks = KL.Concatenate(
            axis=1, name='mrcnn_masks')([mrcnn_fg_masks, mrcnn_bg_masks])
        print('mrcnn_masks.shape={}, {}'.format(mrcnn_masks.shape,
                                                mrcnn_masks._keras_shape))

        def _tile_by_head_classes(data):
            tiled = [None] * num_masks
            for i, class_ids in enumerate(heads):
                for class_id in class_ids:
                    tiled[class_id] = data[:, i]
            assert None not in tiled
            return tf.stack(tiled, axis=1)

        # Unmold masks back to image view
        def _unmold_mask(masks, boxes):
            # masks: (batch, num_masks, h, w)
            # boxes: (batch, num_heads, 4)
            mask_h, mask_w = tf.shape(masks)[2], tf.shape(masks)[3]

            # (batch, num_masks, 4)
            boxes = _tile_by_head_classes(boxes)

            masks = tf.reshape(masks, (-1, mask_h, mask_w))
            boxes = tf.reshape(boxes, (-1, 4))

            unmolded_masks = inverse_box_crop(masks, boxes, [h, w])
            unmolded_masks = tf.reshape(unmolded_masks, (-1, num_masks, h, w))
            return unmolded_masks

        output_masks = KL.Lambda(
            lambda xx: _unmold_mask(xx[0], xx[1]),
            name='output_masks')([mrcnn_fg_masks, molded_head_boxes])
        print('output_masks.shape={}, {}'.format(
            output_masks.shape, output_masks._keras_shape))

        # if options.get('full_view_mask_loss', False):

        if mode == "training":
            head_mask_shape = [mask_feature_size, mask_feature_size]
            print('head_mask_shape={}'.format(head_mask_shape))

            # mask loss
            # extract target gt fg masks
            def _extract_gt_fg_batched(gt_masks, boxes):
                # gt_masks: [batch, num_masks, h, w]
                # boxes: [batch, num_heads, 4]

                # [batch * num_masks, h, w, 1]
                gt_masks = tf.reshape(gt_masks, [-1, h, w, 1])

                # [batch, num_masks, 4]
                boxes = _tile_by_head_classes(boxes)
                # [batch * num_masks, 4]
                boxes = tf.reshape(boxes, [-1, 4])

                # [batch * num_masks, mask_h, mask_w]
                target_masks = tf.image.crop_and_resize(
                    gt_masks, boxes, tf.range(tf.shape(gt_masks)[0]),
                    head_mask_shape)
                target_masks = tf.reshape(target_masks,
                                          [-1, num_masks] + head_mask_shape)
                return target_masks

            target_gt_fg_masks = KL.Lambda(
                lambda xx: _extract_gt_fg_batched(xx[0], xx[1]))(
                    [molded_gt_masks, molded_head_boxes])

            # extract target gt bg masks
            def _extract_gt_bg_batched(gt_masks, boxes):
                # gt_masks: [batch, num_masks, h, w]
                # boxes: [batch, num_heads, 4]

                gt_bg_masks = [None] * num_heads
                for i, class_ids in enumerate(heads):
                    gt_masks_this_head = [None] * len(class_ids)
                    for j, class_id in enumerate(class_ids):
                        # each of [batch, h, w]
                        gt_masks_this_head[j] = gt_masks[:, class_id, :, :]
                    # [batch, len(class_ids), h, w]
                    gt_masks_this_head = tf.stack(gt_masks_this_head, axis=1)
                    # [batch, h, w]
                    gt_bg_masks[i] = 1.0 - tf.reduce_max(
                        gt_masks_this_head, axis=1)

                # [batch, num_heads, h, w]
                gt_bg_masks = tf.stack(gt_bg_masks, axis=1)
                # [batch * num_heads, h, w, 1]
                gt_bg_masks = tf.reshape(gt_bg_masks, [-1, h, w, 1])

                # [batch * num_heads, 4]
                boxes = tf.reshape(boxes, [-1, 4])

                # [batch * num_heads, mask_h, mask_w]
                target_masks = tf.image.crop_and_resize(
                    gt_bg_masks, boxes, tf.range(tf.shape(gt_bg_masks)[0]),
                    head_mask_shape, extrapolation_value=1)  # !!!
                target_masks = tf.reshape(target_masks,
                                          [-1, num_heads] + head_mask_shape)
                return target_masks

            target_gt_bg_masks = KL.Lambda(
                lambda xx: _extract_gt_bg_batched(xx[0], xx[1]))(
                    [molded_gt_masks, molded_head_boxes])

            target_gt_masks = KL.Concatenate(
                axis=1, name='target_gt_masks')(
                    [target_gt_fg_masks, target_gt_bg_masks])
            print('target_gt_masks.shape={}, {}'.format(
                target_gt_masks.shape, target_gt_masks._keras_shape))

            mask_loss_im = KL.Lambda(
                lambda xx: K.binary_crossentropy(target=xx[0], output=xx[1]),
                name="mask_ls_im")([target_gt_masks, mrcnn_masks])
            print('mask_loss_im.shape: {} {}'.format(mask_loss_im._keras_shape,
                                                     mask_loss_im.shape))

            mask_loss_im_reduced = KL.Lambda(
                lambda xx: tf.reduce_mean(xx, axis=[2, 3]),
                name='mask_loss_im_reduced')(mask_loss_im)

            def _get_individual_losses(loss_im, name, index):
                return KL.Lambda(
                    lambda xx: tf.reduce_mean(xx[:, index], axis=[1, 2]),
                    name=name)(loss_im)

            # visualization
            with tf.name_scope('original_masks'):
                for i, class_ids in enumerate(heads):
                    for j, class_id in enumerate(class_ids):
                        name = head_label_names[i][j]
                        fg_target_pred_original_view = tf.expand_dims(tf.concat([
                            tf.cast(
                                input_gt_masks[:, class_id, :, :], tf.float32),
                            output_masks[:, class_id, :, :]], axis=-1), axis=-1)
                        tf.summary.image(
                            f'fg_target_pred_original_view_{i}_{name}',
                            fg_target_pred_original_view)

            with tf.name_scope('cropped_masks'):
                for i, class_ids in enumerate(heads):
                    for j, class_id in enumerate(class_ids):
                        name = head_label_names[i][j]
                        fg_target_pred_loss = tf.expand_dims(tf.concat([
                            target_gt_fg_masks[:, class_id, :, :],
                            mrcnn_fg_masks[:, class_id, :, :],
                            mask_loss_im[:, class_id]], axis=-1), axis=-1)
                        tf.summary.image(
                            f'fg_target_pred_loss_{name}', fg_target_pred_loss)
                    bg_target_pred_loss = tf.expand_dims(tf.concat([
                        target_gt_bg_masks[:, i, :, :],
                        mrcnn_bg_masks[:, i, :, :],
                        mask_loss_im[:, i + num_masks]], axis=-1), axis=-1)
                    tf.summary.image(
                        f'bg_target_pred_loss_{i}', bg_target_pred_loss)

            mask_loss = KL.Lambda(
                lambda xx: tf.reduce_mean(xx, axis=[1]))(mask_loss_im_reduced)
            mask_loss = KL.Lambda(
                lambda xx: tf.where(tf.reshape(
                    xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])),
                name='mask_loss')([input_gt_masks_exist, mask_loss])
            print('mask_loss.shape={}, {}'.format(mask_loss.shape,
                                                  mask_loss._keras_shape))

            if box_pred_method == 'lbf_guided':
                inputs = [
                    input_molded_image_exist,
                    input_gt_masks_exist,
                    input_molded_lbf_landmark68_pts_exist,
                    input_molded_image,
                    input_gt_masks,
                    input_molded_lbf_landmark68_pts
                ]
                outputs = [mask_loss]
            elif box_pred_method == 'regress_landmark':
                inputs = [
                    input_molded_image_exist,
                    input_gt_masks_exist,
                    input_gt_molded_landmark68_pts_exist,
                    input_molded_image,
                    input_gt_masks,
                    input_gt_molded_landmark68_pts
                ]
                outputs = [mask_loss, landmark68_loss]
            elif box_pred_method == 'regress_segbox':
                inputs = [
                    input_molded_image_exist,
                    input_gt_masks_exist,
                    input_gt_molded_head_boxes_exist,
                    input_molded_image,
                    input_gt_masks,
                    input_gt_molded_head_boxes,
                ]
                outputs = [mask_loss, box_loss]
            elif box_pred_method == 'gt_segbox':
                inputs = [
                    input_molded_image_exist,
                    input_gt_masks_exist,
                    input_gt_molded_head_boxes_exist,
                    input_molded_image,
                    input_gt_masks,
                    input_gt_molded_head_boxes
                ]
                outputs = [mask_loss]
        else:
            if box_pred_method == 'lbf_guided':
                inputs = [
                    input_molded_image_exist,
                    input_molded_lbf_landmark68_pts_exist,
                    input_molded_image,
                    input_molded_lbf_landmark68_pts
                ]
                outputs = [
                    output_masks,
                    molded_head_boxes
                ]
            elif box_pred_method == 'regress_landmark':
                inputs = [
                    input_molded_image_exist,
                    input_molded_image
                ]
                outputs = [
                    output_masks,
                    molded_head_boxes,
                    pred_molded_landmark68_pts
                ]
            elif box_pred_method == 'regress_segbox':
                inputs = [
                    input_molded_image_exist,
                    input_molded_image
                ]
                outputs = [
                    output_masks,
                    molded_head_boxes
                ]
            elif box_pred_method == 'gt_segbox':
                inputs = [
                    input_molded_image_exist,
                    input_gt_molded_head_boxes_exist,
                    input_molded_image,
                    input_gt_molded_head_boxes
                ]
                outputs = [
                    output_masks,
                    molded_head_boxes
                ]
        return [inputs, outputs]