def conv_block(x, growth_rate, name): """A building block for a dense block. Arguments: x: input tensor. growth_rate: float, growth rate at dense layers. name: string, block label. Returns: output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 x1 = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(x) x1 = Activation('relu', name=name + '_0_relu')(x1) x1 = Conv2D(4 * growth_rate, 1, use_bias=False, name=name + '_1_conv')(x1) x1 = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x1) x1 = Activation('relu', name=name + '_1_relu')(x1) x1 = Conv2D(growth_rate, 3, padding='same', use_bias=False, name=name + '_2_conv')(x1) x = Concatenate(axis=bn_axis, name=name + '_concat')([x, x1]) return x
def load_data(): dirname = 'data/' path = dirname num_train_samples = 50000 x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.zeros((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) data, labels = load_batch(fpath) x_train[(i - 1) * 10000:i * 10000, :, :, :] = data y_train[(i - 1) * 10000:i * 10000] = labels fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = 1.0 - x_test / 255.0 x_train = 1.0 - x_train / 255.0 return (x_train, y_train), (x_test, y_test) - x_train / 255.0 return (x_train, y_train), (x_test, y_test)
def load_data(): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = 'data/' # origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = dirname # get_file(dirname, origin=origin, untar=True) num_train_samples = 50000 x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.zeros((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) data, labels = load_batch(fpath) x_train[(i - 1) * 10000:i * 10000, :, :, :] = data y_train[(i - 1) * 10000:i * 10000] = labels fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = 1.0 - x_test / 255.0 x_train = 1.0 - x_train / 255.0 return (x_train, y_train), (x_test, y_test)
def load_data(label_mode='fine'): """Loads CIFAR100 dataset. Arguments: label_mode: one of "fine", "coarse". Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. Raises: ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('label_mode must be one of "fine" "coarse".') dirname = 'cifar-100-python' origin = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def preprocess_input(x, data_format=None, mode='caffe'): """Preprocesses a tensor or Numpy array encoding a batch of images. Arguments: x: Input Numpy or symbolic tensor, 3D or 4D. data_format: Data format of the image tensor/array. mode: One of "caffe", "tf". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. Returns: Preprocessed tensor or Numpy array. Raises: ValueError: In case of unknown `data_format` argument. """ if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format ' + str(data_format)) if isinstance(x, np.ndarray): return _preprocess_numpy_input(x, data_format=data_format, mode=mode) else: return _preprocess_symbolic_input(x, data_format=data_format, mode=mode)
def img_to_array(img, data_format=None): """Converts a PIL Image instance to a Numpy array. Arguments: img: PIL Image instance. data_format: Image data format. Returns: A 3D Numpy array. Raises: ValueError: if invalid `img` or `data_format` is passed. """ if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format: ', data_format) # Numpy array x has format (height, width, channel) # or (channel, height, width) # but original PIL image has format (width, height, channel) x = np.asarray(img, dtype=K.floatx()) if len(x.shape) == 3: if data_format == 'channels_first': x = x.transpose(2, 0, 1) elif len(x.shape) == 2: if data_format == 'channels_first': x = x.reshape((1, x.shape[0], x.shape[1])) else: x = x.reshape((x.shape[0], x.shape[1], 1)) else: raise ValueError('Unsupported image shape: ', x.shape) return x
def load_data(): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = 'cifar-10-batches-py' origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) num_train_samples = 50000 x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.zeros((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) data, labels = load_batch(fpath) x_train[(i - 1) * 10000:i * 10000, :, :, :] = data y_train[(i - 1) * 10000:i * 10000] = labels fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def preprocess_input(x, data_format=None, mode='caffe'): """Preprocesses a tensor or Numpy array encoding a batch of images. Arguments: x: Input Numpy or symbolic tensor, 3D or 4D. data_format: Data format of the image tensor/array. mode: One of "caffe", "tf". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. Returns: Preprocessed tensor or Numpy array. Raises: ValueError: In case of unknown `data_format` argument. """ if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format ' + str(data_format)) if isinstance(x, np.ndarray): return _preprocess_numpy_input(x, data_format=data_format, mode=mode) else: return _preprocess_symbolic_input(x, data_format=data_format, mode=mode)
def load_weights(net, model, include_top=True, cache_dir=None): if net['WEIGHTS'] == 'imagenet': if include_top: weights_path = get_file( 'vgg16_weights_tf_dim_ordering_tf_kernels.h5', net['WEIGHTS_PATH'], cache_subdir='pretrained_models', file_hash='64373286793e3c8b2b4e3219cbf3544b', cache_dir=cache_dir) else: weights_path = get_file( 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', net['WEIGHTS_PATH_NO_TOP'], cache_subdir='pretrained_models', file_hash='6d6bbae143d832006294945121d1f1fc', cache_dir=cache_dir) model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first': if include_top: maxpool = model.get_layer(name='block5_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1') layer_utils.convert_dense_weights_data_format( dense, shape, 'channels_first') elif net['WEIGHTS'] is not None: model.load_weights(net['WEIGHTS']) return model
def conv_block(x, growth_rate, name): """A building block for a dense block. Arguments: x: input tensor. growth_rate: float, growth rate at dense layers. name: string, block label. Returns: output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 x1 = BatchNormalization( axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')( x) x1 = Activation('relu', name=name + '_0_relu')(x1) x1 = Conv2D(4 * growth_rate, 1, use_bias=False, name=name + '_1_conv')(x1) x1 = BatchNormalization( axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')( x1) x1 = Activation('relu', name=name + '_1_relu')(x1) x1 = Conv2D( growth_rate, 3, padding='same', use_bias=False, name=name + '_2_conv')( x1) x = Concatenate(axis=bn_axis, name=name + '_concat')([x, x1]) return x
def preprocess_input(x, data_format=None): """Preprocesses a tensor encoding a batch of images. Arguments: x: input Numpy tensor, 4D. data_format: data format of the image tensor. Returns: Preprocessed tensor. """ if data_format is None: data_format = K.image_data_format() assert data_format in {'channels_last', 'channels_first'} if data_format == 'channels_first': if x.ndim == 3: # 'RGB'->'BGR' x = x[::-1, ...] # Zero-center by mean pixel x[0, :, :] -= 103.939 x[1, :, :] -= 116.779 x[2, :, :] -= 123.68 else: x = x[:, ::-1, ...] x[:, 0, :, :] -= 103.939 x[:, 1, :, :] -= 116.779 x[:, 2, :, :] -= 123.68 else: # 'RGB'->'BGR' x = x[..., ::-1] # Zero-center by mean pixel x[..., 0] -= 103.939 x[..., 1] -= 116.779 x[..., 2] -= 123.68 return x
def load_data(label_mode='fine'): """Loads CIFAR100 dataset. Arguments: label_mode: one of "fine", "coarse". Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. Raises: ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def load_cifar100_data(label_mode='fine'): """loads cifar100 dataset. arguments: label_mode: one of "fine", "coarse". returns: tuple of numpy arrays: `(x_train, y_train), (x_test, y_test)`. raises: valueerror: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise valueerror('label_mode must be one of "fine" "coarse".') print('load cifar100 dataset') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) y_test = map(one_hot_vec_100, y_test) y_train = map(one_hot_vec_100, y_train) return (x_train, y_train, x_test, y_test)
def img_to_array(img, data_format=None): """Converts a PIL Image instance to a Numpy array. Arguments: img: PIL Image instance. data_format: Image data format. Returns: A 3D Numpy array. Raises: ValueError: if invalid `img` or `data_format` is passed. """ if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format: ', data_format) # Numpy array x has format (height, width, channel) # or (channel, height, width) # but original PIL image has format (width, height, channel) x = np.asarray(img, dtype=K.floatx()) if len(x.shape) == 3: if data_format == 'channels_first': x = x.transpose(2, 0, 1) elif len(x.shape) == 2: if data_format == 'channels_first': x = x.reshape((1, x.shape[0], x.shape[1])) else: x = x.reshape((x.shape[0], x.shape[1], 1)) else: raise ValueError('Unsupported image shape: ', x.shape) return x
def set_model(self, model): self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_grads: grads = model.optimizer.get_gradients( model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [ grad.values if is_indexed_slices(grad) else grad for grad in grads ] tf_summary.histogram( '{}_grad'.format(mapped_weight_name), grads) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf_summary.FileWriter(self.log_dir)
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): """Adds an initial convolution layer (with batch normalization and relu6). Arguments: inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last` data format) or (3, rows, cols) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. Input shape: 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. Output shape: 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. Returns: Output tensor of block. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 filters = int(filters * alpha) x = Conv2D( filters, kernel, padding='same', use_bias=False, strides=strides, name='conv1')( inputs) x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x) return Activation(relu6, name='conv1_relu')(x)
def normalize_data_format(value): if value is None: value = K.image_data_format() data_format = value.lower() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('The `data_format` argument must be one of ' '"channels_first", "channels_last". Received: ' + str(value)) return data_format
def normalize_data_format(value): if value is None: value = backend.image_data_format() data_format = value.lower() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('The `data_format` argument must be one of ' '"channels_first", "channels_last". Received: ' + str(value)) return data_format
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): """Adds an initial convolution layer (with batch normalization and relu6). Arguments: inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last` data format) or (3, rows, cols) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. Input shape: 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. Output shape: 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. Returns: Output tensor of block. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 filters = int(filters * alpha) x = ZeroPadding2D(padding=(1, 1), name='conv1_pad')(inputs) x = Conv2D(filters, kernel, padding='valid', use_bias=False, strides=strides, name='conv1')(x) x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x) return Activation(relu6, name='conv1_relu')(x)
def __init__(self, rate, data_format=None, **kwargs): super(SpatialDropout3D, self).__init__(rate, **kwargs) if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_last', 'channels_first'}: raise ValueError('data_format must be in ' '{"channels_last", "channels_first"}') self.data_format = data_format self.input_spec = InputSpec(ndim=5)
def __init__(self, rate, data_format=None, **kwargs): super(SpatialDropout3D, self).__init__(rate, **kwargs) if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_last', 'channels_first'}: raise ValueError('data_format must be in ' '{"channels_last", "channels_first"}') self.data_format = data_format self.input_spec = InputSpec(ndim=5)
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): """A block that has a conv layer at shortcut. Arguments: input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the first conv layer in the block. Returns: Output tensor for the block. Note that from stage 3, the first conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D(filters1, (1, 1), strides=strides, name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) shortcut = Conv2D(filters3, (1, 1), strides=strides, name=conv_name_base + '1')(input_tensor) shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = Activation('relu')(x) return x
def __init__(self, pool_size=(2, 2, 2), strides=None, padding='valid', data_format=None, **kwargs): if data_format is None: data_format = K.image_data_format() if strides is None: strides = pool_size super(AveragePooling3D, self).__init__(pool_size, strides, padding, data_format, **kwargs)
def __init__(self, pool_size=(2, 2, 2), strides=None, padding='valid', data_format=None, **kwargs): if data_format is None: data_format = K.image_data_format() if strides is None: strides = pool_size super(AveragePooling3D, self).__init__(pool_size, strides, padding, data_format, **kwargs)
def _separable_conv_block(ip, filters, kernel_size=(3, 3), strides=(1, 1), block_id=None): """Adds 2 blocks of [relu-separable conv-batchnorm]. Arguments: ip: Input tensor filters: Number of output filters per layer kernel_size: Kernel size of separable convolutions strides: Strided convolution for downsampling block_id: String block_id Returns: A Keras tensor """ channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 with K.name_scope('separable_conv_block_%s' % block_id): x = Activation('relu')(ip) x = SeparableConv2D( filters, kernel_size, strides=strides, name='separable_conv_1_%s' % block_id, padding='same', use_bias=False, kernel_initializer='he_normal')( x) x = BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='separable_conv_1_bn_%s' % (block_id))( x) x = Activation('relu')(x) x = SeparableConv2D( filters, kernel_size, name='separable_conv_2_%s' % block_id, padding='same', use_bias=False, kernel_initializer='he_normal')( x) x = BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='separable_conv_2_bn_%s' % (block_id))( x) return x
def set_model(self, model): self.model = model self.sess = K.get_session() if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_grads: grads = model.optimizer.get_gradients(model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [grad.values if is_indexed_slices(grad) else grad for grad in grads] tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, self.sess.graph) else: self.writer = tf_summary.FileWriter(self.log_dir)
def __init__(self, pool_function, pool_size, strides, padding='valid', data_format=None, name=None, **kwargs): super(Pooling1D, self).__init__(name=name, **kwargs) if data_format is None: data_format = backend.image_data_format() if strides is None: strides = pool_size self.pool_function = pool_function self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size') self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = conv_utils.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=3)
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): """A block that has a conv layer at shortcut. Arguments: input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the first conv layer in the block. Returns: Output tensor for the block. Note that from stage 3, the first conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D( filters1, (1, 1), strides=strides, name=conv_name_base + '2a')( input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D( filters2, kernel_size, padding='same', name=conv_name_base + '2b')( x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) shortcut = Conv2D( filters3, (1, 1), strides=strides, name=conv_name_base + '1')( input_tensor) shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = Activation('relu')(x) return x
def array_to_img(x, data_format=None, scale=True): """Converts a 3D Numpy array to a PIL Image instance. Arguments: x: Input Numpy array. data_format: Image data format. scale: Whether to rescale image values to be within [0, 255]. Returns: A PIL Image instance. Raises: ImportError: if PIL is not available. ValueError: if invalid `x` or `data_format` is passed. """ if pil_image is None: raise ImportError('Could not import PIL.Image. ' 'The use of `array_to_img` requires PIL.') x = np.asarray(x, dtype=K.floatx()) if x.ndim != 3: raise ValueError( 'Expected image array to have rank 3 (single image). ' 'Got array with shape:', x.shape) if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('Invalid data_format:', data_format) # Original Numpy array x has format (height, width, channel) # or (channel, height, width) # but target PIL image has format (width, height, channel) if data_format == 'channels_first': x = x.transpose(1, 2, 0) if scale: x = x + max(-np.min(x), 0) # pylint: disable=g-no-augmented-assignment x_max = np.max(x) if x_max != 0: x /= x_max x *= 255 if x.shape[2] == 3: # RGB return pil_image.fromarray(x.astype('uint8'), 'RGB') elif x.shape[2] == 1: # grayscale return pil_image.fromarray(x[:, :, 0].astype('uint8'), 'L') else: raise ValueError('Unsupported channel number: ', x.shape[2])
def preprocess_input(x, data_format=None, mode='caffe'): """Preprocesses a tensor encoding a batch of images. Arguments: x: input Numpy tensor, 4D. data_format: data format of the image tensor. mode: One of "caffe", "tf". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. Returns: Preprocessed tensor. """ if data_format is None: data_format = K.image_data_format() assert data_format in {'channels_last', 'channels_first'} if mode == 'tf': x /= 255. x -= 0.5 x *= 2. return x if data_format == 'channels_first': if x.ndim == 3: # 'RGB'->'BGR' x = x[::-1, ...] # Zero-center by mean pixel x[0, :, :] -= 103.939 x[1, :, :] -= 116.779 x[2, :, :] -= 123.68 else: x = x[:, ::-1, ...] x[:, 0, :, :] -= 103.939 x[:, 1, :, :] -= 116.779 x[:, 2, :, :] -= 123.68 else: # 'RGB'->'BGR' x = x[..., ::-1] # Zero-center by mean pixel x[..., 0] -= 103.939 x[..., 1] -= 116.779 x[..., 2] -= 123.68 return x
def __init__(self, x, y, image_data_generator, batch_size=32, shuffle=False, seed=None, data_format=None, save_to_dir=None, save_prefix='', save_format='png'): if y is not None and len(x) != len(y): raise ValueError('X (images tensor) and y (labels) ' 'should have the same length. ' 'Found: X.shape = %s, y.shape = %s' % (np.asarray(x).shape, np.asarray(y).shape)) if data_format is None: data_format = K.image_data_format() self.x = np.asarray(x, dtype=K.floatx()) if self.x.ndim != 4: raise ValueError( 'Input data in `NumpyArrayIterator` ' 'should have rank 4. You passed an array ' 'with shape', self.x.shape) channels_axis = 3 if data_format == 'channels_last' else 1 if self.x.shape[channels_axis] not in {1, 3, 4}: logging.warning('NumpyArrayIterator is set to use the ' 'data format convention "' + data_format + '" ' '(channels on axis ' + str(channels_axis) + '), i.e. expected ' 'either 1, 3 or 4 channels on axis ' + str(channels_axis) + '. ' 'However, it was passed an array with shape ' + str(self.x.shape) + ' (' + str(self.x.shape[channels_axis]) + ' channels).') if y is not None: self.y = np.asarray(y) else: self.y = None self.image_data_generator = image_data_generator self.data_format = data_format self.save_to_dir = save_to_dir self.save_prefix = save_prefix self.save_format = save_format super(NumpyArrayIterator, self).__init__(x.shape[0], batch_size, shuffle, seed)
def array_to_img(x, data_format=None, scale=True): """Converts a 3D Numpy array to a PIL Image instance. Arguments: x: Input Numpy array. data_format: Image data format. scale: Whether to rescale image values to be within [0, 255]. Returns: A PIL Image instance. Raises: ImportError: if PIL is not available. ValueError: if invalid `x` or `data_format` is passed. """ if pil_image is None: raise ImportError('Could not import PIL.Image. ' 'The use of `array_to_img` requires PIL.') x = np.asarray(x, dtype=K.floatx()) if x.ndim != 3: raise ValueError('Expected image array to have rank 3 (single image). ' 'Got array with shape:', x.shape) if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('Invalid data_format:', data_format) # Original Numpy array x has format (height, width, channel) # or (channel, height, width) # but target PIL image has format (width, height, channel) if data_format == 'channels_first': x = x.transpose(1, 2, 0) if scale: x = x + max(-np.min(x), 0) # pylint: disable=g-no-augmented-assignment x_max = np.max(x) if x_max != 0: x /= x_max x *= 255 if x.shape[2] == 3: # RGB return pil_image.fromarray(x.astype('uint8'), 'RGB') elif x.shape[2] == 1: # grayscale return pil_image.fromarray(x[:, :, 0].astype('uint8'), 'L') else: raise ValueError('Unsupported channel number: ', x.shape[2])
def __init__(self, x, y, image_data_generator, batch_size=32, shuffle=False, seed=None, data_format=None, save_to_dir=None, save_prefix='', save_format='png'): if y is not None and len(x) != len(y): raise ValueError('X (images tensor) and y (labels) ' 'should have the same length. ' 'Found: X.shape = %s, y.shape = %s' % (np.asarray(x).shape, np.asarray(y).shape)) if data_format is None: data_format = K.image_data_format() self.x = np.asarray(x, dtype=K.floatx()) if self.x.ndim != 4: raise ValueError('Input data in `NumpyArrayIterator` ' 'should have rank 4. You passed an array ' 'with shape', self.x.shape) channels_axis = 3 if data_format == 'channels_last' else 1 if self.x.shape[channels_axis] not in {1, 3, 4}: logging.warning( 'NumpyArrayIterator is set to use the ' 'data format convention "' + data_format + '" ' '(channels on axis ' + str(channels_axis) + '), i.e. expected ' 'either 1, 3 or 4 channels on axis ' + str(channels_axis) + '. ' 'However, it was passed an array with shape ' + str(self.x.shape) + ' (' + str(self.x.shape[channels_axis]) + ' channels).') if y is not None: self.y = np.asarray(y) else: self.y = None self.image_data_generator = image_data_generator self.data_format = data_format self.save_to_dir = save_to_dir self.save_prefix = save_prefix self.save_format = save_format super(NumpyArrayIterator, self).__init__(x.shape[0], batch_size, shuffle, seed)
def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, p=1.0, Lambda=0.5, name="conv2d", is_training=True, **kwargs): if data_format is None: data_format = K.image_data_format() self.filters = filters self.kernel_size = kernel_size self.strides = strides self.padding = padding.upper() self.data_format = data_format self.dilation_rate = dilation_rate self.use_bias = use_bias self.activation = activation self.kernel_initializer = kernel_initializer self.bias_initializer = bias_initializer self.kernel_regularizer = kernel_regularizer self.bias_regularizer = bias_regularizer self.activity_regularizer = activity_regularizer self.kernel_constraint = kernel_constraint self.bias_constraint = bias_constraint self.p = p self.Lambda = Lambda self.is_training = is_training self.layername = name super(AULMConv2d, self).__init__(name=self.layername, **kwargs)
def fire_module(x, fire_id, squeeze=16, expand=64): s_id = 'fire' + str(fire_id) + '/' if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 3 x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x) x = Activation('relu', name=s_id + relu + sq1x1)(x) left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1)(x) left = Activation('relu', name=s_id + relu + exp1x1)(left) right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3)(x) right = Activation('relu', name=s_id + relu + exp3x3)(right) x = concatenate([left, right], axis=channel_axis, name=s_id + 'concat') return x
def __init__(self, pool_function, pool_size, strides, padding='valid', data_format=None, name=None, **kwargs): super(Pooling1D, self).__init__(name=name, **kwargs) if data_format is None: data_format = backend.image_data_format() if strides is None: strides = pool_size self.pool_function = pool_function self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size') self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = conv_utils.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=3)
def conv2d_bn(x, filters, num_row, num_col, padding='same', strides=(1, 1), name=None): """Utility function to apply conv + BN. Arguments: x: input tensor. filters: filters in `Conv2D`. num_row: height of the convolution kernel. num_col: width of the convolution kernel. padding: padding mode in `Conv2D`. strides: strides in `Conv2D`. name: name of the ops; will become `name + '_conv'` for the convolution and `name + '_bn'` for the batch norm layer. Returns: Output tensor after applying `Conv2D` and `BatchNormalization`. """ if name is not None: bn_name = name + '_bn' conv_name = name + '_conv' else: bn_name = None conv_name = None if K.image_data_format() == 'channels_first': bn_axis = 1 else: bn_axis = 3 x = Conv2D( filters, (num_row, num_col), strides=strides, padding=padding, use_bias=False, name=conv_name)( x) x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) x = Activation('relu', name=name)(x) return x
def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, spectral_normalization=True, bias_constraint=None, **kwargs): if data_format is None: data_format = K.image_data_format() super(Conv2D, self).__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activations.get(activation), use_bias=use_bias, kernel_initializer=initializers.get(kernel_initializer), bias_initializer=initializers.get(bias_initializer), kernel_regularizer=regularizers.get(kernel_regularizer), bias_regularizer=regularizers.get(bias_regularizer), activity_regularizer=regularizers.get(activity_regularizer), kernel_constraint=constraints.get(kernel_constraint), bias_constraint=constraints.get(bias_constraint), **kwargs) self.u = K.random_normal_variable( [1, filters], 0, 1, dtype=self.dtype, name="sn_estimate") # [1, out_channels] self.spectral_normalization = spectral_normalization
def transition_block(x, reduction, name): """A transition block. Arguments: x: input tensor. reduction: float, compression rate at transition layers. name: string, block label. Returns: output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')(x) x = Activation('relu', name=name + '_relu')(x) x = Conv2D(int(K.int_shape(x)[bn_axis] * reduction), 1, use_bias=False, name=name + '_conv')(x) x = AveragePooling2D(2, strides=2, name=name + '_pool')(x) return x
def conv2d_bn(x, filters, kernel_size, strides=1, padding='same', activation='relu', use_bias=False, name=None): """Utility function to apply conv + BN. Arguments: x: input tensor. filters: filters in `Conv2D`. kernel_size: kernel size as in `Conv2D`. strides: strides in `Conv2D`. padding: padding mode in `Conv2D`. activation: activation in `Conv2D`. use_bias: whether to use a bias in `Conv2D`. name: name of the ops; will become `name + '_ac'` for the activation and `name + '_bn'` for the batch norm layer. Returns: Output tensor after applying `Conv2D` and `BatchNormalization`. """ x = Conv2D( filters, kernel_size, strides=strides, padding=padding, use_bias=use_bias, name=name)( x) if not use_bias: bn_axis = 1 if K.image_data_format() == 'channels_first' else 3 bn_name = None if name is None else name + '_bn' x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) if activation is not None: ac_name = None if name is None else name + '_ac' x = Activation(activation, name=ac_name)(x) return x
def transition_block(x, reduction, name): """A transition block. Arguments: x: input tensor. reduction: float, compression rate at transition layers. name: string, block label. Returns: output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')(x) x = Activation('relu', name=name + '_relu')(x) x = Conv2D( int(K.int_shape(x)[bn_axis] * reduction), 1, use_bias=False, name=name + '_conv')( x) x = AveragePooling2D(2, strides=2, name=name + '_pool')(x) return x
def identity_block(input_tensor, kernel_size, filters, stage, block): """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' relu_name_base = 'relu' + str(stage) + block + '_branch' x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu', name=relu_name_base + '2a')(x) x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu', name=relu_name_base + '2b')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = Activation('relu', name='relu' + str(stage) + block)(x) return x
def identity_block(input_tensor, kernel_size, filters, stage, block): """The identity block is the block that has no conv layer at shortcut. Arguments: input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names Returns: Output tensor for the block. """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D( filters2, kernel_size, padding='same', name=conv_name_base + '2b')( x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = Activation('relu')(x) return x
def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs): if data_format is None: data_format = K.image_data_format() super(Conv2D_circular, self).__init__( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activations.get(activation), use_bias=use_bias, kernel_initializer=initializers.get(kernel_initializer), bias_initializer=initializers.get(bias_initializer), kernel_regularizer=regularizers.get(kernel_regularizer), bias_regularizer=regularizers.get(bias_regularizer), activity_regularizer=regularizers.get(activity_regularizer), kernel_constraint=constraints.get(kernel_constraint), bias_constraint=constraints.get(bias_constraint), **kwargs)
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1), block_id=1): """Adds a depthwise convolution block. A depthwise convolution block consists of a depthwise conv, batch normalization, relu6, pointwise convolution, batch normalization and relu6 activation. Arguments: inputs: Input tensor of shape `(rows, cols, channels)` (with `channels_last` data format) or (channels, rows, cols) (with `channels_first` data format). pointwise_conv_filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the pointwise convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `filters_in * depth_multiplier`. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. block_id: Integer, a unique identification designating the block number. Input shape: 4D tensor with shape: `(batch, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, rows, cols, channels)` if data_format='channels_last'. Output shape: 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. Returns: Output tensor of block. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 pointwise_conv_filters = int(pointwise_conv_filters * alpha) x = DepthwiseConv2D( # pylint: disable=not-callable (3, 3), padding='same', depth_multiplier=depth_multiplier, strides=strides, use_bias=False, name='conv_dw_%d' % block_id)( inputs) x = BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x) x = Conv2D( pointwise_conv_filters, (1, 1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%d' % block_id)( x) x = BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x) return Activation(relu6, name='conv_pw_%d_relu' % block_id)(x)
def Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the Xception architecture. Optionally loads weights pre-trained on ImageNet. This model is available for TensorFlow only, and can only be used with inputs following the TensorFlow data format `(width, height, channels)`. You should set `image_data_format="channels_last"` in your Keras config located at ~/.keras/keras.json. Note that the default input image size for this model is 299x299. Arguments: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(299, 299, 3)`. It should have exactly 3 input channels, and width and height should be no smaller than 71. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns: A Keras model instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') if K.backend() != 'tensorflow': raise RuntimeError('The Xception model is only available with ' 'the TensorFlow backend.') if K.image_data_format() != 'channels_last': logging.warning( 'The Xception model is only available for the ' 'input data format "channels_last" ' '(width, height, channels). ' 'However your settings specify the default ' 'data format "channels_first" (channels, width, height). ' 'You should set `image_data_format="channels_last"` in your Keras ' 'config located at ~/.keras/keras.json. ' 'The model being returned right now will expect inputs ' 'to follow the "channels_last" data format.') K.set_image_data_format('channels_last') old_data_format = 'channels_first' else: old_data_format = None # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_size=299, min_size=71, data_format=K.image_data_format(), require_flatten=False, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: img_input = Input(tensor=input_tensor, shape=input_shape) x = Conv2D( 32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input) x = BatchNormalization(name='block1_conv1_bn')(x) x = Activation('relu', name='block1_conv1_act')(x) x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x) x = BatchNormalization(name='block1_conv2_bn')(x) x = Activation('relu', name='block1_conv2_act')(x) residual = Conv2D( 128, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = SeparableConv2D( 128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x) x = BatchNormalization(name='block2_sepconv1_bn')(x) x = Activation('relu', name='block2_sepconv2_act')(x) x = SeparableConv2D( 128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x) x = BatchNormalization(name='block2_sepconv2_bn')(x) x = MaxPooling2D( (3, 3), strides=(2, 2), padding='same', name='block2_pool')(x) x = layers.add([x, residual]) residual = Conv2D( 256, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = Activation('relu', name='block3_sepconv1_act')(x) x = SeparableConv2D( 256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x) x = BatchNormalization(name='block3_sepconv1_bn')(x) x = Activation('relu', name='block3_sepconv2_act')(x) x = SeparableConv2D( 256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x) x = BatchNormalization(name='block3_sepconv2_bn')(x) x = MaxPooling2D( (3, 3), strides=(2, 2), padding='same', name='block3_pool')(x) x = layers.add([x, residual]) residual = Conv2D( 728, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = Activation('relu', name='block4_sepconv1_act')(x) x = SeparableConv2D( 728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x) x = BatchNormalization(name='block4_sepconv1_bn')(x) x = Activation('relu', name='block4_sepconv2_act')(x) x = SeparableConv2D( 728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x) x = BatchNormalization(name='block4_sepconv2_bn')(x) x = MaxPooling2D( (3, 3), strides=(2, 2), padding='same', name='block4_pool')(x) x = layers.add([x, residual]) for i in range(8): residual = x prefix = 'block' + str(i + 5) x = Activation('relu', name=prefix + '_sepconv1_act')(x) x = SeparableConv2D( 728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x) x = BatchNormalization(name=prefix + '_sepconv1_bn')(x) x = Activation('relu', name=prefix + '_sepconv2_act')(x) x = SeparableConv2D( 728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x) x = BatchNormalization(name=prefix + '_sepconv2_bn')(x) x = Activation('relu', name=prefix + '_sepconv3_act')(x) x = SeparableConv2D( 728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x) x = BatchNormalization(name=prefix + '_sepconv3_bn')(x) x = layers.add([x, residual]) residual = Conv2D( 1024, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = Activation('relu', name='block13_sepconv1_act')(x) x = SeparableConv2D( 728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x) x = BatchNormalization(name='block13_sepconv1_bn')(x) x = Activation('relu', name='block13_sepconv2_act')(x) x = SeparableConv2D( 1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x) x = BatchNormalization(name='block13_sepconv2_bn')(x) x = MaxPooling2D( (3, 3), strides=(2, 2), padding='same', name='block13_pool')(x) x = layers.add([x, residual]) x = SeparableConv2D( 1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x) x = BatchNormalization(name='block14_sepconv1_bn')(x) x = Activation('relu', name='block14_sepconv1_act')(x) x = SeparableConv2D( 2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x) x = BatchNormalization(name='block14_sepconv2_bn')(x) x = Activation('relu', name='block14_sepconv2_act')(x) if include_top: x = GlobalAveragePooling2D(name='avg_pool')(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='xception') # load weights if weights == 'imagenet': if include_top: weights_path = get_file( 'xception_weights_tf_dim_ordering_tf_kernels.h5', TF_WEIGHTS_PATH, cache_subdir='models', file_hash='0a58e3b7378bc2990ea3b43d5981f1f6') else: weights_path = get_file( 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5', TF_WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='b0042744bf5b25fce3cb969f33bebb97') model.load_weights(weights_path) if old_data_format: K.set_image_data_format(old_data_format) elif weights is not None: model.load_weights(weights) return model
def __init__(self, directory, image_data_generator, target_size=(256, 256), color_mode='rgb', classes=None, class_mode='categorical', batch_size=32, shuffle=True, seed=None, data_format=None, save_to_dir=None, save_prefix='', save_format='png', follow_links=False, interpolation='nearest'): if data_format is None: data_format = K.image_data_format() self.directory = directory self.image_data_generator = image_data_generator self.target_size = tuple(target_size) if color_mode not in {'rgb', 'grayscale'}: raise ValueError('Invalid color mode:', color_mode, '; expected "rgb" or "grayscale".') self.color_mode = color_mode self.data_format = data_format if self.color_mode == 'rgb': if self.data_format == 'channels_last': self.image_shape = self.target_size + (3,) else: self.image_shape = (3,) + self.target_size else: if self.data_format == 'channels_last': self.image_shape = self.target_size + (1,) else: self.image_shape = (1,) + self.target_size self.classes = classes if class_mode not in {'categorical', 'binary', 'sparse', 'input', None}: raise ValueError('Invalid class_mode:', class_mode, '; expected one of "categorical", ' '"binary", "sparse", "input"' ' or None.') self.class_mode = class_mode self.save_to_dir = save_to_dir self.save_prefix = save_prefix self.save_format = save_format self.interpolation = interpolation white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'ppm'} # first, count the number of samples and classes self.samples = 0 if not classes: classes = [] for subdir in sorted(os.listdir(directory)): if os.path.isdir(os.path.join(directory, subdir)): classes.append(subdir) self.num_classes = len(classes) self.class_indices = dict(zip(classes, range(len(classes)))) pool = multiprocessing.pool.ThreadPool() function_partial = partial( _count_valid_files_in_directory, white_list_formats=white_list_formats, follow_links=follow_links) self.samples = sum( pool.map(function_partial, (os.path.join(directory, subdir) for subdir in classes))) print('Found %d images belonging to %d classes.' % (self.samples, self.num_classes)) # second, build an index of the images in the different class subfolders results = [] self.filenames = [] self.classes = np.zeros((self.samples,), dtype='int32') i = 0 for dirpath in (os.path.join(directory, subdir) for subdir in classes): results.append( pool.apply_async( _list_valid_filenames_in_directory, (dirpath, white_list_formats, self.class_indices, follow_links))) for res in results: classes, filenames = res.get() self.classes[i:i + len(classes)] = classes self.filenames += filenames i += len(classes) pool.close() pool.join() super(DirectoryIterator, self).__init__(self.samples, batch_size, shuffle, seed)
def Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the Xception architecture. Optionally loads weights pre-trained on ImageNet. This model is available for TensorFlow only, and can only be used with inputs following the TensorFlow data format `(width, height, channels)`. You should set `image_data_format='channels_last'` in your Keras config located at ~/.keras/keras.json. Note that the default input image size for this model is 299x299. Arguments: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(299, 299, 3)`. It should have exactly 3 inputs channels, and width and height should be no smaller than 71. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns: A Keras model instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') if K.image_data_format() != 'channels_last': logging.warning( 'The Xception model is only available for the ' 'input data format "channels_last" ' '(width, height, channels). ' 'However your settings specify the default ' 'data format "channels_first" (channels, width, height). ' 'You should set `image_data_format="channels_last"` in your Keras ' 'config located at ~/.keras/keras.json. ' 'The model being returned right now will expect inputs ' 'to follow the "channels_last" data format.') K.set_image_data_format('channels_last') old_data_format = 'channels_first' else: old_data_format = None # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=299, min_size=71, data_format=K.image_data_format(), require_flatten=False, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input) x = BatchNormalization(name='block1_conv1_bn')(x) x = Activation('relu', name='block1_conv1_act')(x) x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x) x = BatchNormalization(name='block1_conv2_bn')(x) x = Activation('relu', name='block1_conv2_act')(x) residual = Conv2D(128, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x) x = BatchNormalization(name='block2_sepconv1_bn')(x) x = Activation('relu', name='block2_sepconv2_act')(x) x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x) x = BatchNormalization(name='block2_sepconv2_bn')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x) x = layers.add([x, residual]) residual = Conv2D(256, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = Activation('relu', name='block3_sepconv1_act')(x) x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x) x = BatchNormalization(name='block3_sepconv1_bn')(x) x = Activation('relu', name='block3_sepconv2_act')(x) x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x) x = BatchNormalization(name='block3_sepconv2_bn')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool')(x) x = layers.add([x, residual]) residual = Conv2D(728, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = Activation('relu', name='block4_sepconv1_act')(x) x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x) x = BatchNormalization(name='block4_sepconv1_bn')(x) x = Activation('relu', name='block4_sepconv2_act')(x) x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x) x = BatchNormalization(name='block4_sepconv2_bn')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block4_pool')(x) x = layers.add([x, residual]) for i in range(8): residual = x prefix = 'block' + str(i + 5) x = Activation('relu', name=prefix + '_sepconv1_act')(x) x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x) x = BatchNormalization(name=prefix + '_sepconv1_bn')(x) x = Activation('relu', name=prefix + '_sepconv2_act')(x) x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x) x = BatchNormalization(name=prefix + '_sepconv2_bn')(x) x = Activation('relu', name=prefix + '_sepconv3_act')(x) x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x) x = BatchNormalization(name=prefix + '_sepconv3_bn')(x) x = layers.add([x, residual]) residual = Conv2D(1024, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = Activation('relu', name='block13_sepconv1_act')(x) x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x) x = BatchNormalization(name='block13_sepconv1_bn')(x) x = Activation('relu', name='block13_sepconv2_act')(x) x = SeparableConv2D(1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x) x = BatchNormalization(name='block13_sepconv2_bn')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block13_pool')(x) x = layers.add([x, residual]) x = SeparableConv2D(1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x) x = BatchNormalization(name='block14_sepconv1_bn')(x) x = Activation('relu', name='block14_sepconv1_act')(x) x = SeparableConv2D(2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x) x = BatchNormalization(name='block14_sepconv2_bn')(x) x = Activation('relu', name='block14_sepconv2_act')(x) if include_top: x = GlobalAveragePooling2D(name='avg_pool')(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='xception') # load weights if weights == 'imagenet': if include_top: weights_path = get_file( 'xception_weights_tf_dim_ordering_tf_kernels.h5', TF_WEIGHTS_PATH, cache_subdir='models', file_hash='0a58e3b7378bc2990ea3b43d5981f1f6') else: weights_path = get_file( 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5', TF_WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='b0042744bf5b25fce3cb969f33bebb97') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) if old_data_format: K.set_image_data_format(old_data_format) return model
def ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the ResNet50 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. # Arguments include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization) or 'imagenet' (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 197. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=48, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = Conv2D( #64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input) 64, (7, 7), strides=(1, 1), padding='same', name='conv1')(img_input) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') #x = AveragePooling2D((7, 7), name='avg_pool')(x) #x = AveragePooling2D((2, 2), name='avg_pool')(x) x = AveragePooling2D((4, 4), name='avg_pool')(x) if include_top: x = Flatten()(x) x = Dense(classes, activation='softmax', name='fc1000')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='resnet50') # load weights if weights == 'imagenet': if include_top: weights_path = get_file( 'resnet50_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', md5_hash='a7b3fe01876f51b976af0dea6bc144eb') else: weights_path = get_file( 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', md5_hash='a268eb855778b3df3c7506639542a6af') model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if include_top: maxpool = model.get_layer(name='avg_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1000') layer_utils.convert_dense_weights_data_format( dense, shape, 'channels_first') if K.image_data_format() == 'channels_first' and K.backend( ) == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') return model
def InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the Inception v3 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Note that the default input image size for this model is 299x299. Arguments: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(299, 299, 3)` (with `channels_last` data format) or `(3, 299, 299)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 139. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns: A Keras model instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_size=299, min_size=139, data_format=K.image_data_format(), require_flatten=False, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 3 x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') x = conv2d_bn(x, 32, 3, 3, padding='valid') x = conv2d_bn(x, 64, 3, 3) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv2d_bn(x, 80, 1, 1, padding='valid') x = conv2d_bn(x, 192, 3, 3, padding='valid') x = MaxPooling2D((3, 3), strides=(2, 2))(x) # mixed 0, 1, 2: 35 x 35 x 256 branch1x1 = conv2d_bn(x, 64, 1, 1) branch5x5 = conv2d_bn(x, 48, 1, 1) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) branch3x3dbl = conv2d_bn(x, 64, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 32, 1, 1) x = layers.concatenate( [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed0') # mixed 1: 35 x 35 x 256 branch1x1 = conv2d_bn(x, 64, 1, 1) branch5x5 = conv2d_bn(x, 48, 1, 1) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) branch3x3dbl = conv2d_bn(x, 64, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 64, 1, 1) x = layers.concatenate( [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed1') # mixed 2: 35 x 35 x 256 branch1x1 = conv2d_bn(x, 64, 1, 1) branch5x5 = conv2d_bn(x, 48, 1, 1) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) branch3x3dbl = conv2d_bn(x, 64, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 64, 1, 1) x = layers.concatenate( [branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed2') # mixed 3: 17 x 17 x 768 branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') branch3x3dbl = conv2d_bn(x, 64, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) branch3x3dbl = conv2d_bn( branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) x = layers.concatenate( [branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed3') # mixed 4: 17 x 17 x 768 branch1x1 = conv2d_bn(x, 192, 1, 1) branch7x7 = conv2d_bn(x, 128, 1, 1) branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) branch7x7dbl = conv2d_bn(x, 128, 1, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed4') # mixed 5, 6: 17 x 17 x 768 for i in range(2): branch1x1 = conv2d_bn(x, 192, 1, 1) branch7x7 = conv2d_bn(x, 160, 1, 1) branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) branch7x7dbl = conv2d_bn(x, 160, 1, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed' + str(5 + i)) # mixed 7: 17 x 17 x 768 branch1x1 = conv2d_bn(x, 192, 1, 1) branch7x7 = conv2d_bn(x, 192, 1, 1) branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) branch7x7dbl = conv2d_bn(x, 192, 1, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed7') # mixed 8: 8 x 8 x 1280 branch3x3 = conv2d_bn(x, 192, 1, 1) branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, strides=(2, 2), padding='valid') branch7x7x3 = conv2d_bn(x, 192, 1, 1) branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) branch7x7x3 = conv2d_bn( branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) x = layers.concatenate( [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name='mixed8') # mixed 9: 8 x 8 x 2048 for i in range(2): branch1x1 = conv2d_bn(x, 320, 1, 1) branch3x3 = conv2d_bn(x, 384, 1, 1) branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) branch3x3 = layers.concatenate( [branch3x3_1, branch3x3_2], axis=channel_axis, name='mixed9_' + str(i)) branch3x3dbl = conv2d_bn(x, 448, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) branch3x3dbl = layers.concatenate( [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) x = layers.concatenate( [branch1x1, branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed' + str(9 + i)) if include_top: # Classification block x = GlobalAveragePooling2D(name='avg_pool')(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='inception_v3') # load weights if weights == 'imagenet': if include_top: weights_path = get_file( 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', file_hash='9a0d58056eeedaa3f26cb7ebd46da564') else: weights_path = get_file( 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='bcbd6486424b2319ff4ef7d526e38f63') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): """Adds a Inception-ResNet block. This function builds 3 types of Inception-ResNet blocks mentioned in the paper, controlled by the `block_type` argument (which is the block name used in the official TF-slim implementation): - Inception-ResNet-A: `block_type='block35'` - Inception-ResNet-B: `block_type='block17'` - Inception-ResNet-C: `block_type='block8'` Arguments: x: input tensor. scale: scaling factor to scale the residuals (i.e., the output of passing `x` through an inception module) before adding them to the shortcut branch. Let `r` be the output from the residual branch, the output of this block will be `x + scale * r`. block_type: `'block35'`, `'block17'` or `'block8'`, determines the network structure in the residual branch. block_idx: an `int` used for generating layer names. The Inception-ResNet blocks are repeated many times in this network. We use `block_idx` to identify each of the repetitions. For example, the first Inception-ResNet-A block will have `block_type='block35', block_idx=0`, ane the layer names will have a common prefix `'block35_0'`. activation: activation function to use at the end of the block. When `activation=None`, no activation is applied (i.e., "linear" activation: `a(x) = x`). Returns: Output tensor for the block. Raises: ValueError: if `block_type` is not one of `'block35'`, `'block17'` or `'block8'`. """ if block_type == 'block35': branch_0 = conv2d_bn(x, 32, 1) branch_1 = conv2d_bn(x, 32, 1) branch_1 = conv2d_bn(branch_1, 32, 3) branch_2 = conv2d_bn(x, 32, 1) branch_2 = conv2d_bn(branch_2, 48, 3) branch_2 = conv2d_bn(branch_2, 64, 3) branches = [branch_0, branch_1, branch_2] elif block_type == 'block17': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 128, 1) branch_1 = conv2d_bn(branch_1, 160, [1, 7]) branch_1 = conv2d_bn(branch_1, 192, [7, 1]) branches = [branch_0, branch_1] elif block_type == 'block8': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(branch_1, 224, [1, 3]) branch_1 = conv2d_bn(branch_1, 256, [3, 1]) branches = [branch_0, branch_1] else: raise ValueError('Unknown Inception-ResNet block type. ' 'Expects "block35", "block17" or "block8", ' 'but got: ' + str(block_type)) block_name = block_type + '_' + str(block_idx) channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 mixed = Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches) up = conv2d_bn( mixed, K.int_shape(x)[channel_axis], 1, activation=None, use_bias=True, name=block_name + '_conv') x = Lambda( lambda inputs, scale: inputs[0] + inputs[1] * scale, output_shape=K.int_shape(x)[1:], arguments={'scale': scale}, name=block_name)([x, up]) if activation is not None: x = Activation(activation, name=block_name + '_ac')(x) return x
def VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the VGG19 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Arguments: include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns: A Keras model instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_size=224, min_size=48, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Block 1 x = Conv2D( 64, (3, 3), activation='relu', padding='same', name='block1_conv1')( img_input) x = Conv2D( 64, (3, 3), activation='relu', padding='same', name='block1_conv2')( x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = Conv2D( 128, (3, 3), activation='relu', padding='same', name='block2_conv1')( x) x = Conv2D( 128, (3, 3), activation='relu', padding='same', name='block2_conv2')( x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv1')( x) x = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv2')( x) x = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv3')( x) x = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv4')( x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv1')( x) x = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv2')( x) x = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv3')( x) x = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv4')( x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv1')( x) x = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv2')( x) x = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv3')( x) x = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv4')( x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) if include_top: # Classification block x = Flatten(name='flatten')(x) x = Dense(4096, activation='relu', name='fc1')(x) x = Dense(4096, activation='relu', name='fc2')(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='vgg19') # load weights if weights == 'imagenet': if include_top: weights_path = get_file( 'vgg19_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', file_hash='cbe5617147190e668d6c5d5026f83318') else: weights_path = get_file( 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='253f8cb515780f3b799900260a226db6') model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first': if include_top: maxpool = model.get_layer(name='block5_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1') layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first') elif weights is not None: model.load_weights(weights) return model
def InceptionResNetV2(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the Inception-ResNet v2 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `"image_data_format": "channels_last"` in your Keras config at `~/.keras/keras.json`. The model and the weights are compatible with TensorFlow, Theano and CNTK backends. The data format convention used by the model is the one specified in your Keras config file. Note that the default input image size for this model is 299x299, instead of 224x224 as in the VGG16 and ResNet models. Also, the input preprocessing function is different (i.e., do not use `imagenet_utils.preprocess_input()` with this model. Use `preprocess_input()` defined in this module instead). Arguments: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(299, 299, 3)` (with `'channels_last'` data format) or `(3, 299, 299)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 139. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `'avg'` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. Returns: A Keras `Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_size=299, min_size=139, data_format=K.image_data_format(), require_flatten=False, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Stem block: 35 x 35 x 192 x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid') x = conv2d_bn(x, 32, 3, padding='valid') x = conv2d_bn(x, 64, 3) x = MaxPooling2D(3, strides=2)(x) x = conv2d_bn(x, 80, 1, padding='valid') x = conv2d_bn(x, 192, 3, padding='valid') x = MaxPooling2D(3, strides=2)(x) # Mixed 5b (Inception-A block): 35 x 35 x 320 branch_0 = conv2d_bn(x, 96, 1) branch_1 = conv2d_bn(x, 48, 1) branch_1 = conv2d_bn(branch_1, 64, 5) branch_2 = conv2d_bn(x, 64, 1) branch_2 = conv2d_bn(branch_2, 96, 3) branch_2 = conv2d_bn(branch_2, 96, 3) branch_pool = AveragePooling2D(3, strides=1, padding='same')(x) branch_pool = conv2d_bn(branch_pool, 64, 1) branches = [branch_0, branch_1, branch_2, branch_pool] channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 x = Concatenate(axis=channel_axis, name='mixed_5b')(branches) # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 for block_idx in range(1, 11): x = inception_resnet_block( x, scale=0.17, block_type='block35', block_idx=block_idx) # Mixed 6a (Reduction-A block): 17 x 17 x 1088 branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid') branch_1 = conv2d_bn(x, 256, 1) branch_1 = conv2d_bn(branch_1, 256, 3) branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid') branch_pool = MaxPooling2D(3, strides=2, padding='valid')(x) branches = [branch_0, branch_1, branch_pool] x = Concatenate(axis=channel_axis, name='mixed_6a')(branches) # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 for block_idx in range(1, 21): x = inception_resnet_block( x, scale=0.1, block_type='block17', block_idx=block_idx) # Mixed 7a (Reduction-B block): 8 x 8 x 2080 branch_0 = conv2d_bn(x, 256, 1) branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid') branch_1 = conv2d_bn(x, 256, 1) branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid') branch_2 = conv2d_bn(x, 256, 1) branch_2 = conv2d_bn(branch_2, 288, 3) branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid') branch_pool = MaxPooling2D(3, strides=2, padding='valid')(x) branches = [branch_0, branch_1, branch_2, branch_pool] x = Concatenate(axis=channel_axis, name='mixed_7a')(branches) # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 for block_idx in range(1, 10): x = inception_resnet_block( x, scale=0.2, block_type='block8', block_idx=block_idx) x = inception_resnet_block( x, scale=1., activation=None, block_type='block8', block_idx=10) # Final convolution block: 8 x 8 x 1536 x = conv2d_bn(x, 1536, 1, name='conv_7b') if include_top: # Classification block x = GlobalAveragePooling2D(name='avg_pool')(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor` if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model model = Model(inputs, x, name='inception_resnet_v2') # Load weights if weights == 'imagenet': if include_top: fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' weights_path = get_file( fname, BASE_WEIGHT_URL + fname, cache_subdir='models', file_hash='e693bd0210a403b3192acc6073ad2e96') else: fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5' weights_path = get_file( fname, BASE_WEIGHT_URL + fname, cache_subdir='models', file_hash='d19885ff4a710c122648d3b5c3b684e4') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def SqueezeNet(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the SqueezeNet architecture. """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') if weights == 'imagenet' and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') input_shape = _obtain_input_shape(input_shape, default_size=227, min_size=48, data_format=K.image_data_format(), require_flatten=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = Convolution2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(img_input) x = Activation('relu', name='relu_conv1')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x) x = fire_module(x, fire_id=2, squeeze=16, expand=64) x = fire_module(x, fire_id=3, squeeze=16, expand=64) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x) x = fire_module(x, fire_id=4, squeeze=32, expand=128) x = fire_module(x, fire_id=5, squeeze=32, expand=128) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x) x = fire_module(x, fire_id=6, squeeze=48, expand=192) x = fire_module(x, fire_id=7, squeeze=48, expand=192) x = fire_module(x, fire_id=8, squeeze=64, expand=256) x = fire_module(x, fire_id=9, squeeze=64, expand=256) if include_top: # It's not obvious where to cut the network... # Could do the 8th or 9th layer... some work recommends cutting earlier layers. x = Dropout(0.5, name='drop9')(x) x = Convolution2D(classes, (1, 1), padding='valid', name='conv10')(x) x = Activation('relu', name='relu_conv10')(x) x = GlobalAveragePooling2D()(x) x = Activation('softmax', name='loss')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalAveragePooling2D()(x) elif pooling == None: pass else: raise ValueError("Unknown argument for 'ppoling'=" + pooling) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input model = Model(inputs, x, name='squeezenet') # load weights if weights == 'imagenet': if include_top: weights_path = '/tmp/squeezenet_weights_tf_dim_ordering_tf_kernels.h5' else: weights_path = get_file( 'squeezenet_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_dir='/tmp/') model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_daata_format() == 'channels_first': pass return model
def __init__(self, featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, zca_epsilon=1e-6, rotation_range=0., width_shift_range=0., height_shift_range=0., shear_range=0., zoom_range=0., channel_shift_range=0., fill_mode='nearest', cval=0., horizontal_flip=False, vertical_flip=False, rescale=None, preprocessing_function=None, data_format=None): if data_format is None: data_format = K.image_data_format() self.featurewise_center = featurewise_center self.samplewise_center = samplewise_center self.featurewise_std_normalization = featurewise_std_normalization self.samplewise_std_normalization = samplewise_std_normalization self.zca_whitening = zca_whitening self.zca_epsilon = zca_epsilon self.rotation_range = rotation_range self.width_shift_range = width_shift_range self.height_shift_range = height_shift_range self.shear_range = shear_range self.zoom_range = zoom_range self.channel_shift_range = channel_shift_range self.fill_mode = fill_mode self.cval = cval self.horizontal_flip = horizontal_flip self.vertical_flip = vertical_flip self.rescale = rescale self.preprocessing_function = preprocessing_function if data_format not in {'channels_last', 'channels_first'}: raise ValueError( '`data_format` should be `"channels_last"` (channel after row and ' 'column) or `"channels_first"` (channel before row and column). ' 'Received arg: ', data_format) self.data_format = data_format if data_format == 'channels_first': self.channel_axis = 1 self.row_axis = 2 self.col_axis = 3 if data_format == 'channels_last': self.channel_axis = 3 self.row_axis = 1 self.col_axis = 2 self.mean = None self.std = None self.principal_components = None if np.isscalar(zoom_range): self.zoom_range = [1 - zoom_range, 1 + zoom_range] elif len(zoom_range) == 2: self.zoom_range = [zoom_range[0], zoom_range[1]] else: raise ValueError('`zoom_range` should be a float or ' 'a tuple or list of two floats. ' 'Received arg: ', zoom_range) if zca_whitening: if not featurewise_center: self.featurewise_center = True logging.warning('This ImageDataGenerator specifies ' '`zca_whitening`, which overrides ' 'setting of `featurewise_center`.') if featurewise_std_normalization: self.featurewise_std_normalization = False logging.warning('This ImageDataGenerator specifies ' '`zca_whitening` ' 'which overrides setting of' '`featurewise_std_normalization`.') if featurewise_std_normalization: if not featurewise_center: self.featurewise_center = True logging.warning('This ImageDataGenerator specifies ' '`featurewise_std_normalization`, ' 'which overrides setting of ' '`featurewise_center`.') if samplewise_std_normalization: if not samplewise_center: self.samplewise_center = True logging.warning('This ImageDataGenerator specifies ' '`samplewise_std_normalization`, ' 'which overrides setting of ' '`samplewise_center`.')
def VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the VGG19 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Arguments: include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns: A Keras model instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=48, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Block 1 x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv4')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) if include_top: # Classification block x = Flatten(name='flatten')(x) x = Dense(4096, activation='relu', name='fc1')(x) x = Dense(4096, activation='relu', name='fc2')(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='vgg19') # load weights if weights == 'imagenet': if include_top: weights_path = get_file( 'vgg19_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', file_hash='cbe5617147190e668d6c5d5026f83318') else: weights_path = get_file( 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='253f8cb515780f3b799900260a226db6') model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first': if include_top: maxpool = model.get_layer(name='block5_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1') layer_utils.convert_dense_weights_data_format( dense, shape, 'channels_first') elif weights is not None: model.load_weights(weights) return model
def ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the ResNet50 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Arguments: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 197. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns: A Keras model instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_size=224, min_size=197, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = Conv2D( 64, (7, 7), strides=(2, 2), padding='same', name='conv1')( img_input) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = AveragePooling2D((7, 7), name='avg_pool')(x) if include_top: x = Flatten()(x) x = Dense(classes, activation='softmax', name='fc1000')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='resnet50') # load weights if weights == 'imagenet': if include_top: weights_path = get_file( 'resnet50_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', md5_hash='a7b3fe01876f51b976af0dea6bc144eb') else: weights_path = get_file( 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', md5_hash='a268eb855778b3df3c7506639542a6af') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model