def ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the ResNet50 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Arguments: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 197. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns: A Keras model instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_size=224, min_size=197, data_format=K.image_data_format(), include_top=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: img_input = Input(tensor=input_tensor, shape=input_shape) if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = ZeroPadding2D((3, 3))(img_input) x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x0 = x x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, [x, x0], name='resnet50') # load weights if weights == 'imagenet': if include_top: weights_path = get_file( 'resnet50_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', md5_hash='a7b3fe01876f51b976af0dea6bc144eb') else: weights_path = "./saved_model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5" model.load_weights(weights_path) return model
def ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the ResNet50 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. # Arguments include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 244)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 197. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=197, data_format=K.image_data_format(), include_top=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = ZeroPadding2D((3, 3))(img_input) x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = AveragePooling2D((7, 7), name='avg_pool')(x) if include_top: x = Flatten()(x) x = Dense(classes, activation='softmax', name='fc1000')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='resnet50') # load weights if weights == 'imagenet': if include_top: weights_path = WEIGHTS_PATH else: weights_path = WEIGHTS_PATH_NO_TOP model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first': if include_top: maxpool = model.get_layer(name='avg_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1000') layer_utils.convert_dense_weights_data_format( dense, shape, 'channels_first') if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') return model
def nn_base(input_tensor=None, trainable=False): # Determine proper input shape input_shape = (None, None, 3) if input_tensor is None: img_input = Input(shape=input_shape) else: if not is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 x = ZeroPadding2D((3, 3))(img_input) x = Convolution2D(64, (7, 7), strides=(2, 2), name='conv1', trainable=trainable)(x) x = FixedBatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable=trainable) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable=trainable) x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable=trainable) x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable=trainable) x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable=trainable) x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable=trainable) x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable=trainable) x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable=trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable=trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable=trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable=trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable=trainable) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable=trainable) return x
def SSD300(input_shape, num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ net = {} # Block 1 input_tensor = input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) net['input'] = input_tensor net['conv1_1'] = Convolution2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(net['input']) net['conv1_2'] = Convolution2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(net['conv1_1']) net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool1')(net['conv1_2']) # Block 2 net['conv2_1'] = Convolution2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(net['pool1']) net['conv2_2'] = Convolution2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(net['conv2_1']) net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool2')(net['conv2_2']) # Block 3 net['conv3_1'] = Convolution2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(net['pool2']) net['conv3_2'] = Convolution2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(net['conv3_1']) net['conv3_3'] = Convolution2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(net['conv3_2']) net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool3')(net['conv3_3']) # Block 4 net['conv4_1'] = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(net['pool3']) net['conv4_2'] = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(net['conv4_1']) net['conv4_3'] = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(net['conv4_2']) net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool4')(net['conv4_3']) # Block 5 net['conv5_1'] = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(net['pool4']) net['conv5_2'] = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(net['conv5_1']) net['conv5_3'] = Convolution2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(net['conv5_2']) net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), padding='same', name='pool5')(net['conv5_3']) # FC6 net['fc6'] = Convolution2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', name='fc6')(net['pool5']) # x = Dropout(0.5, name='drop6')(x) # FC7 net['fc7'] = Convolution2D(1024, (1, 1), activation='relu', padding='same', name='fc7')(net['fc6']) # x = Dropout(0.5, name='drop7')(x) # Block 6 net['conv6_1'] = Convolution2D(256, (1, 1), activation='relu', padding='same', name='conv6_1')(net['fc7']) net['conv6_2'] = Convolution2D(512, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv6_2')(net['conv6_1']) # Block 7 net['conv7_1'] = Convolution2D(128, (1, 1), activation='relu', padding='same', name='conv7_1')(net['conv6_2']) net['conv7_2'] = ZeroPadding2D()(net['conv7_1']) net['conv7_2'] = Convolution2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', name='conv7_2')(net['conv7_2']) # Block 8 net['conv8_1'] = Convolution2D(128, (1, 1), activation='relu', padding='same', name='conv8_1')(net['conv7_2']) net['conv8_2'] = Convolution2D(256, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv8_2')(net['conv8_1']) # Last Pool net['pool6'] = GlobalAveragePooling2D(name='pool6')(net['conv8_2']) # Prediction from conv4_3 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 3 x = Convolution2D(num_priors * 4, (3, 3), padding='same', name='conv4_3_norm_mbox_loc')(net['conv4_3_norm']) net['conv4_3_norm_mbox_loc'] = x flatten = Flatten(name='conv4_3_norm_mbox_loc_flat') net['conv4_3_norm_mbox_loc_flat'] = flatten(net['conv4_3_norm_mbox_loc']) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['conv4_3_norm']) net['conv4_3_norm_mbox_conf'] = x flatten = Flatten(name='conv4_3_norm_mbox_conf_flat') net['conv4_3_norm_mbox_conf_flat'] = flatten(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # Prediction from fc7 num_priors = 6 net['fc7_mbox_loc'] = Convolution2D(num_priors * 4, (3, 3), padding='same', name='fc7_mbox_loc')(net['fc7']) flatten = Flatten(name='fc7_mbox_loc_flat') net['fc7_mbox_loc_flat'] = flatten(net['fc7_mbox_loc']) name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) net['fc7_mbox_conf'] = Convolution2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['fc7']) flatten = Flatten(name='fc7_mbox_conf_flat') net['fc7_mbox_conf_flat'] = flatten(net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # Prediction from conv6_2 num_priors = 6 x = Convolution2D(num_priors * 4, (3, 3), padding='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x flatten = Flatten(name='conv6_2_mbox_loc_flat') net['conv6_2_mbox_loc_flat'] = flatten(net['conv6_2_mbox_loc']) name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['conv6_2']) net['conv6_2_mbox_conf'] = x flatten = Flatten(name='conv6_2_mbox_conf_flat') net['conv6_2_mbox_conf_flat'] = flatten(net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # Prediction from conv7_2 num_priors = 6 x = Convolution2D(num_priors * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x flatten = Flatten(name='conv7_2_mbox_loc_flat') net['conv7_2_mbox_loc_flat'] = flatten(net['conv7_2_mbox_loc']) name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['conv7_2']) net['conv7_2_mbox_conf'] = x flatten = Flatten(name='conv7_2_mbox_conf_flat') net['conv7_2_mbox_conf_flat'] = flatten(net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # Prediction from conv8_2 num_priors = 6 x = Convolution2D(num_priors * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x flatten = Flatten(name='conv8_2_mbox_loc_flat') net['conv8_2_mbox_loc_flat'] = flatten(net['conv8_2_mbox_loc']) name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) x = Convolution2D(num_priors * num_classes, (3, 3), padding='same', name=name)(net['conv8_2']) net['conv8_2_mbox_conf'] = x flatten = Flatten(name='conv8_2_mbox_conf_flat') net['conv8_2_mbox_conf_flat'] = flatten(net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # Prediction from pool6 num_priors = 6 x = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(net['pool6']) net['pool6_mbox_loc_flat'] = x name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) x = Dense(num_priors * num_classes, name=name)(net['pool6']) net['pool6_mbox_conf_flat'] = x priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox') target_shape = (1, 1, 256) net['pool6_reshaped'] = Reshape(target_shape, name='pool6_reshaped')(net['pool6']) net['pool6_mbox_priorbox'] = priorbox(net['pool6_reshaped']) # Gather all predictions net['mbox_loc'] = concatenate([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['pool6_mbox_loc_flat'] ], axis=1, name='mbox_loc') net['mbox_conf'] = concatenate([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['pool6_mbox_conf_flat'] ], axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['pool6_mbox_priorbox'] ], axis=1, name='mbox_priorbox') num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], axis=2, #axis = 0, name='predictions') model = Model(net['input'], net['predictions']) return model