def ResAttentionNet56( shape=(512, 512, 5), n_channels=64, n_classes=21, dropout=0.3): input_ = Input(shape=shape) x = Conv2D(n_channels, (7, 7), strides=(2, 2), padding='same')(input_) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = residual_block(x, output_channels=n_channels * 4) x = attention_block(x, encoder_depth=3) x = residual_block(x, output_channels=n_channels * 8, stride=2) x = attention_block(x, encoder_depth=2) # x = residual_block(x, output_channels=n_channels * 16, stride=2) # 14x14 # x = attention_block(x, encoder_depth=1) # bottleneck 7x7 x = residual_block(x, output_channels=n_channels * 32) # 7x7 x = residual_block(x, output_channels=n_channels * 32) x = residual_block(x, output_channels=n_channels * 32) pool_size = (x.get_shape()[1], x.get_shape()[2]) x = AveragePooling2D(pool_size=pool_size, strides=(1, 1))(x) # x = Flatten()(x) if dropout > 0: x = Dropout(dropout)(x) # output = Dense(n_classes, activation='sigmoid')(x) model = Model(input_, x) # print(x.get_shape()[1].value, x.get_shape()[2].value,x.shape()[0].value) return model
def UNet(img_shape = (640,640,1), net_layers = [16,32,64], act = 'relu', pool_size = (2,2), final_pool = (1,1), dropout = 0.50, final_act = 'softmax'): # INPUT img_input = Input(shape = img_shape) # ENCODING LAYERS fwd_lyrs = [] i = 0 for lyrs in net_layers: print('Encode: ', lyrs) if i == 0: x = Conv2D(int(lyrs/2), (1,1), padding = 'same')(img_input) x = Conv2D(lyrs, (3,3), padding = 'same')(x) else: x = Conv2D(int(lyrs/2), (1,1), padding = 'same')(x) x = Conv2D(lyrs, (3,3), padding = 'same')(x) x = BatchNormalization()(x) x = Dropout(dropout)(x) print('Conv: ', list(x.get_shape())) fwd_lyrs.append(x) x = MaxPooling2D(pool_size)(x) print('Pool: ', list(x.get_shape())) i += 1 # MIDDLE LAYER act = 'relu' x = Conv2D(1, (1,1), activation = act, padding = 'same')(x) print('Conv: ', list(x.get_shape())) net_layers.reverse() fwd_lyrs.reverse() # DECODING LAYERS i = 0 for lyrs in net_layers: print('Decode: ', lyrs) x = Conv2D(int(lyrs/2), (1,1), padding = 'same')(x) x = Conv2D(lyrs, (3,3), padding = 'same')(x) x = BatchNormalization()(x) x = Dropout(dropout)(x) print('Conv: ', list(x.get_shape())) x = concatenate([UpSampling2D(pool_size)(x), fwd_lyrs[i]]) print('Up2D: ', list(x.get_shape())) i += 1 # OUTPUT out = Conv2D(1, final_pool, activation = final_act, padding = 'same')(x) print('Conv: ', list(out.get_shape())) model = Model(img_input, out) return model
def build_model(self, input_shape=(None, cfg.INPUT_SIZE), use_lstm=False, use_dropout=True): input = Input(shape=input_shape, dtype='float32') reshape = Reshape((-1, cfg.INPUT_SIZE, 1))(input) x = Conv2D(16, (3, 3), padding='SAME', activation='relu')(reshape) x = BatchNormalization()(x) x = Conv2D(16, (3, 3), padding='SAME', activation='relu')(x) x = BatchNormalization()(x) x = MaxPooling2D((1, 2), strides=(1, 2))(x) if use_dropout: x = Dropout(0.25)(x) x = Conv2D(32, (3, 3), padding='SAME', activation='relu')(x) x = BatchNormalization()(x) x = MaxPooling2D((1, 2), strides=(1, 2))(x) if use_dropout: x = Dropout(0.25)(x) dim = x.get_shape() x = Reshape((-1, int(dim[2] * dim[3])))(x) x = Dense(256, activation='relu')(x) if use_dropout: x = Dropout(0.5)(x) if use_lstm: x = Bidirectional(LSTM(64, return_sequences=True))(x) preds = Dense(cfg.PITCH_NUM, activation='sigmoid')(x) self.model = Model(input, preds)
def mobile_net_block(inputs, expand_to, strided, num_outputs): # Following expand layer should be only if input_filters < output_fildetes net = Conv2D( filters=expand_to, kernel_size=1, padding='same', kernel_regularizer=tf.keras.regularizers.l2(weights_decay))(inputs) net = BatchNormalization()(net) net = ReLU(max_value=6)(net) net = SeparableConv2D( filters=expand_to, kernel_size=3, strides=2 if strided else 1, padding='same', kernel_regularizer=tf.keras.regularizers.l2(weights_decay))(net) net = BatchNormalization()(net) net = ReLU(max_value=6)(net) net = Conv2D( filters=num_outputs, kernel_size=1, padding='same', kernel_regularizer=tf.keras.regularizers.l2(weights_decay))(net) net = BatchNormalization()(net) if not strided and net.get_shape().as_list()[-1] == inputs.get_shape( ).as_list()[-1]: return tf.keras.layers.Add()([inputs, net]) return net
def sanscript(input_size, d_model): N = 4 H = 4 D = 192 F = 512 R = 0.1 input_data = Input(name="input", shape=input_size) cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding="same")(input_data) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=80, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) shape = cnn.get_shape() san = Reshape((shape[1], shape[2] * shape[3]))(cnn) san = Dense(D)(san) san = Dropout(D)(san) for n in range(N): attn_output, attn_weights = MultiHeadAttention(D, H)(san, san, san) attn_output = Dropout(R)(attn_output) san = LayerNormalization(epsilon=1e-6)(san + attn_output) ffn_output = FeedForwardNetwork(D, F)(san) ffn_output = Dropout(R)(ffn_output) san = LayerNormalization(epsilon=1e-6)(san + ffn_output) san = Dropout(R)(san) output_data = Dense(d_model, activation="softmax")(san) return (input_data, output_data)
def get_encoder( input_dim, encoder_conv_filters, encoder_conv_kernel_size, encoder_conv_strides, z_dim, use_batch_norm=False, use_dropout=False): num_layers = len(encoder_conv_filters) encoder_input = Input(shape=input_dim, name='encoder_input') x = encoder_input #print(x.get_shape()) for i in range(num_layers): conv_layer = Conv2D( filters=encoder_conv_filters[i], kernel_size=encoder_conv_kernel_size[i], strides=encoder_conv_strides[i], padding='same', name='encoder_conv_{}'.format(i)) x = conv_layer(x) if use_batch_norm: x = BatchNormalization()(x) x = LeakyReLU()(x) if use_dropout: x = Dropout(rate=0.25)(x) shape_before_flattening = x.get_shape().as_list()[1:] x = Flatten()(x) mu = Dense(z_dim, name='mu')(x) log_var = Dense(z_dim, name='log_var')(x) encoder_mu_log_var = Model(encoder_input, (mu, log_var)) def sampling(args): mu, log_var = args epsilon = tf.random.normal( shape=tf.shape(mu), mean=0, stddev=1.0) return mu + tf.exp(log_var/2)*epsilon encoder_output = Lambda(sampling, name='encoder_output')([mu, log_var]) return Model(encoder_input, encoder_output, name='Encoder'), encoder_input, encoder_output, shape_before_flattening, mu, log_var
def puigcerver(input_size, d_model): """ Convolucional Recurrent Neural Network by Puigcerver et al. Reference: Joan Puigcerver. Are multidimensional recurrent layers really necessary for handwritten text recognition? In: Document Analysis and Recognition (ICDAR), 2017 14th IAPR International Conference on, vol. 1, pp. 67–72. IEEE (2017) Carlos Mocholí Calvo and Enrique Vidal Ruiz. Development and experimentation of a deep learning system for convolutional and recurrent neural networks Escola Tècnica Superior d’Enginyeria Informàtica, Universitat Politècnica de València, 2018 """ input_data = Input(name="input", shape=input_size) cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding="same")(input_data) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) # cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=80, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) shape = cnn.get_shape() blstm = Reshape((shape[1], shape[2] * shape[3]))(cnn) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.2))(blstm) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.2))(blstm) blstm = Dropout(rate=0.2)(blstm) output_data = Dense(units=d_model, activation="softmax")(blstm) return (input_data, output_data)
def transition_layer(self, x, scope): with tf.compat.v1.name_scope(scope): x = BatchNormalization()(x) x = ReLU()(x) s = x.get_shape().as_list() in_channel = s[-1] x = conv_layer(x, filter=in_channel*0.5, kernel=[1,1], layer_name=scope+'_conv1') x = Drop_out(x, rate=self.dropout_rate, training=self.training) x = Average_pooling(x, pool_size=[2,2], stride=2) return x
def AttentionResNet92(shape=(256, 256, 3), n_channels=64, n_classes=100, dropout=0, regularization=0.01): """ Attention-92 ResNet https://arxiv.org/abs/1704.06904 """ regularizer = l2(regularization) input_ = Input(shape=shape) x = Conv2D(n_channels, (7, 7), strides=(2, 2), padding='same')(input_) # 112x112 x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) # 56x56 x = residual_block(x, output_channels=n_channels * 4) # 56x56 x = attention_block(x, encoder_depth=3) # bottleneck 7x7 x = residual_block(x, output_channels=n_channels * 8, stride=2) # 28x28 x = attention_block(x, encoder_depth=2) # bottleneck 7x7 x = attention_block(x, encoder_depth=2) # bottleneck 7x7 x = residual_block(x, output_channels=n_channels * 16, stride=2) # 14x14 x = attention_block(x, encoder_depth=1) # bottleneck 7x7 x = attention_block(x, encoder_depth=1) # bottleneck 7x7 x = attention_block(x, encoder_depth=1) # bottleneck 7x7 x = residual_block(x, output_channels=n_channels * 32, stride=2) # 7x7 x = residual_block(x, output_channels=n_channels * 32) x = residual_block(x, output_channels=n_channels * 32) pool_size = (x.get_shape()[1], x.get_shape()[2]) x = AveragePooling2D(pool_size=pool_size, strides=(1, 1))(x) x = Flatten()(x) if dropout: x = Dropout(dropout)(x) output = Dense(n_classes, kernel_regularizer=regularizer, activation='sigmoid')(x) # softmax model = Model(input_, output) return model
def _building_block(self, x, channel_out=256): channel = channel_out // 4 h = Conv2D(channel, kernel_size=(1, 1), padding='same')(x) h = BatchNormalization()(h) h = Activation('relu')(h) h = Conv2D(channel, kernel_size=(3, 3), padding='same')(h) h = BatchNormalization()(h) h = Activation('relu')(h) h = Conv2D(channel_out, kernel_size=(1, 1), padding='same')(h) h = BatchNormalization()(h) shortcut = self._shortcut(x, output_shape=h.get_shape().as_list()) h = Add()([h, shortcut]) return Activation('relu')(h)
def build_model(): input_tensor = Input((height, width, 3)) x = input_tensor for i, n_cnn in enumerate([2, 2, 2, 2, 2]): for j in range(n_cnn): x = Conv2D(32 * 2**min(i, 3), kernel_size=3, padding='same', kernel_initializer='he_uniform')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D(2 if i < 3 else (2, 1))(x) x = Permute((2, 1, 3))(x) x = TimeDistributed(Flatten())(x) print("=====TimeDistributed:", x.get_shape()) #print("========",x.get_shape()) #timestamp = int(x.get_shape()[1]) #dim = int(x.get_shape()[2]) * int(x.get_shape()[3]) #x = Reshape((timestamp, dim))(x) #print("=====reshape:", x.get_shape()) rnn_size = 128 x = Bidirectional(GRU(rnn_size, return_sequences=True))(x) x = Bidirectional(GRU(rnn_size, return_sequences=True))(x) x = Dense(n_class, activation='softmax')(x) print("========x:", x) #labels = Input(name='the_labels', shape=[n_len], dtype='int64') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') model = Model(inputs=[input_tensor, input_length, label_length], outputs=x) def func(x): print("========decode:", x) #input_length = tf.reshape(tf.ones_like(x[:,0, 0]), [-1]) * 16 #input_length = tf.ones(tf.shape(x)[0]) * int(x.shape[1]) input_length = tf.fill(tf.reshape(tf.shape(x)[0], [-1]), tf.constant(int(x.shape[1]))) #input_length = tf.ones(2) * 16 print("========decode2:", input_length) return tf.keras.backend.ctc_decode(x, input_length) output_pred = tf.keras.layers.Lambda(func)(x) print("======ctc_decode:", output_pred) model_infer = Model(inputs=input_tensor, outputs=output_pred[0]) model.compile(loss=ctc_loss_wrap(input_length, label_length), optimizer=Adam(1e-3, amsgrad=True)) return model, model_infer
def conv_bgru(input_shape, output_size): conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 rnn_size = 512 input_data = Input(name="input", shape=input_shape) cnn = Conv2D(conv_filters, kernel_size, padding='same', kernel_initializer='he_normal')(input_data) cnn = BatchNormalization()(cnn) cnn = Activation('relu')(cnn) cnn = MaxPooling2D(pool_size=(pool_size, pool_size))(cnn) cnn = Conv2D(conv_filters, kernel_size, padding='same', kernel_initializer='he_normal')(cnn) cnn = BatchNormalization()(cnn) cnn = Activation('relu')(cnn) cnn = MaxPooling2D(pool_size=(pool_size, pool_size))(cnn) # CNN to RNN shape = cnn.get_shape() cnn = Reshape((shape[1], shape[2] * shape[3]))(cnn) dense = Dense(time_dense_size, activation='relu', kernel_initializer='he_normal')(cnn) # RNN layer bgru = Bidirectional(GRU(units=rnn_size, return_sequences=True), merge_mode="sum")(dense) bgru = BatchNormalization()(bgru) bgru = Bidirectional(GRU(units=rnn_size, return_sequences=True), merge_mode="concat")(bgru) bgru = BatchNormalization()(bgru) # transforms RNN output to character activations: dense = Dense(output_size, kernel_initializer='he_normal')(bgru) output_data = Activation("softmax", name="output")(dense) return input_data, output_data
def _transmit_block(x, is_last): bn_scale = PARAMS['bn_scale'] activation = PARAMS['activation'] kernel_initializer = PARAMS['kernel_initializer'] weight_decay = PARAMS['weight_decay'] compression = PARAMS['compression'] x = BatchNormalization(scale=bn_scale, axis=-1)(x) x = activation()(x) if is_last: x = GlobalAvgPool3D()(x) else: *_, f = x.get_shape().as_list() x = Conv3D(f // compression, kernel_size=(1, 1, 1), padding='same', use_bias=True, kernel_initializer=kernel_initializer, kernel_regularizer=l2_penalty(weight_decay))(x) x = AveragePooling3D((2, 2, 2), padding='valid')(x) return x
def build_model(input_width, input_height, input_channels, class_size): input_data = Input((input_width, input_height, input_channels), name="input") cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding="same")(input_data) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=80, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=96, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) shape = cnn.get_shape() blstm = Reshape((shape[2], shape[1] * shape[3]))(cnn) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Dropout(rate=0.5)(blstm) model = Dense(units=class_size, activation="softmax")(blstm) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001) return input_data, model, optimizer
def create_encoder(in_, depth, filters, kernel_size, activation, dilation, padding, complexity_factor, regularizer=None, name="encoder", name_prefix=""): name = "{}{}".format(name_prefix, name) residual_connections = [] for i in range(depth): l_name = name + "_L%i" % i conv = Conv2D(int(filters*complexity_factor), (kernel_size, 1), activation=activation, padding=padding, kernel_regularizer=regularizer, bias_regularizer=regularizer, dilation_rate=dilation, name=l_name + "_conv1")(in_) bn = BatchNormalization(name=l_name + "_BN1")(conv) s = bn.get_shape()[1] if s % 2: bn = ZeroPadding2D(padding=[[1, 0], [0, 0]], name=l_name + "_padding")(bn) in_ = MaxPooling2D(pool_size=(2, 1), name=l_name + "_pool")(bn) # add bn layer to list for residual conn. residual_connections.append(bn) filters = int(filters * np.sqrt(2)) # Bottom name = "{}bottom".format(name_prefix) conv = Conv2D(int(filters*complexity_factor), (kernel_size, 1), activation=activation, padding=padding, kernel_regularizer=regularizer, bias_regularizer=regularizer, dilation_rate=1, name=name + "_conv1")(in_) encoded = BatchNormalization(name=name + "_BN1")(conv) return encoded, residual_connections, filters
def build_model(): """The original architecture from the CRNN paper. """ # note that the height and width are flipped input_image = keras.Input(shape=(IM_WIDTH, IM_HEIGHT, CHANNELS)) x = Conv2D(64, 3, padding='same', activation='relu')(input_image) x = MaxPool2D(pool_size=2, padding='same')(x) x = Conv2D(128, 3, padding='same', activation='relu')(x) x = MaxPool2D(pool_size=2, padding='same')(x) x = Conv2D(256, 3, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(256, 3, padding='same', activation='relu')(x) x = MaxPool2D(pool_size=2, strides=(1, 2), padding='same')(x) x = Conv2D(512, 3, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(512, 3, padding='same', activation='relu')(x) x = MaxPool2D(pool_size=2, strides=(1, 2), padding='same')(x) x = Conv2D(512, 2, padding='same', use_bias=False)(x) x = BatchNormalization()(x) x = Activation('relu')(x) output_shape = x.get_shape() target_shape = (int(output_shape[1]), int(output_shape[2] * output_shape[3])) x = Reshape(target_shape)(x) x = Dense(64, activation='relu')(x) x = Bidirectional(LSTM(units=256, return_sequences=True), merge_mode='sum')(x) x = Bidirectional(LSTM(units=256, return_sequences=True), merge_mode='sum')(x) x = Dense(units=NR_CHARACTERS)(x) return keras.Model(inputs=input_image, outputs=x, name='CRNN')
def entry_flow(self, inputs): # entry convolutional layers x = SeparableConv2D(self.firstConv_filters, self.firstConv_filterSize, strides=self.firstConv_filterStride, padding='same')(inputs) x = BatchNormalization()(x) x = Activation('selu')(x) previous_block_activation = x print(" first conv layer ", previous_block_activation.get_shape().as_list()) for _ in range(self.entry_residual_blocks): print(" residual block at ", _, " ", x.get_shape().as_list()) x = Activation('selu')(x) x = SeparableConv2D(self.entry_residual_filters, self.entry_residual_filterSize, strides=self.entry_residual_filterStride, padding='same')(x) x = BatchNormalization()(x) # max pooling layer that we may potentially get rid of x = MaxPooling2D(3, strides=2, padding='same')(x) # the residual connection as described in the architecture diagram residual = SeparableConv2D( self.entry_residual_filters, 1, strides=2, padding='same')(previous_block_activation) x = Add()([x, residual]) previous_block_activation = x # x = GlobalAveragePooling2D()(x) x = Flatten()(x) return x
def add_unet_layer(model, network_config, remaining_layers, output_shape, n_channels=None): if n_channels is None: n_channels = model.get_shape().as_list()[-1] downsample = np.array([ x != 0 and remaining_layers % x == 0 for x in network_config.unet_downsample_rate ]) if network_config.convolution_padding == 'same': conv_contract = np.zeros(3, dtype=np.int32) else: conv_contract = network_config.convolution_dim - 1 # First U convolution module. for i in range(network_config.num_layers_per_module): if i == network_config.num_layers_per_module - 1: # Increase the number of channels before downsampling to avoid # bottleneck (identical to 3D U-Net paper). n_channels = 2 * n_channels model = Conv3D(n_channels, tuple(network_config.convolution_dim), kernel_initializer=network_config.initialization, activation=network_config.convolution_activation, padding=network_config.convolution_padding)(model) if network_config.batch_normalization: model = BatchNormalization()(model) # Crop and pass forward to upsampling. if remaining_layers > 0: forward_link_shape = output_shape + network_config.num_layers_per_module * conv_contract else: forward_link_shape = output_shape contraction = (np.array(model.get_shape().as_list()[1:4]) - forward_link_shape) // 2 forward = Cropping3D(list(zip(list(contraction), list(contraction))))(model) if network_config.dropout_probability > 0.0: forward = Dropout(network_config.dropout_probability)(forward) # Terminal layer of the U. if remaining_layers <= 0: return forward # Downsample and recurse. model = Conv3D(n_channels, tuple(network_config.convolution_dim), strides=list(downsample + 1), kernel_initializer=network_config.initialization, activation=network_config.convolution_activation, padding='same')(model) if network_config.batch_normalization: model = BatchNormalization()(model) next_output_shape = np.ceil( np.divide(forward_link_shape, downsample.astype(np.float32) + 1.0)).astype(np.int32) model = add_unet_layer(model, network_config, remaining_layers - 1, next_output_shape.astype(np.int32)) # Upsample output of previous layer and merge with forward link. model = Conv3DTranspose(n_channels * 2, tuple(network_config.convolution_dim), strides=list(downsample + 1), kernel_initializer=network_config.initialization, activation=network_config.convolution_activation, padding='same')(model) if network_config.batch_normalization: model = BatchNormalization()(model) # Must crop output because Keras wrongly pads the output shape for odd array sizes. stride_pad = (network_config.convolution_dim // 2) * np.array(downsample) + (1 - np.mod(forward_link_shape, 2)) tf_pad_start = stride_pad // 2 # Tensorflow puts odd padding at end. model = Cropping3D( list(zip(list(tf_pad_start), list(stride_pad - tf_pad_start))))(model) model = concatenate([forward, model]) # Second U convolution module. for _ in range(network_config.num_layers_per_module): model = Conv3D(n_channels, tuple(network_config.convolution_dim), kernel_initializer=network_config.initialization, activation=network_config.convolution_activation, padding=network_config.convolution_padding)(model) if network_config.batch_normalization: model = BatchNormalization()(model) return model
def YOLOv3Net(cfgfile, model_size, num_classes): blocks = parse_cfg(cfgfile) outputs = {} output_filters = [] filters = [] out_pred = [] scale = 0 inputs = input_image = Input(shape=model_size) inputs = inputs / 255.0 for i, block in enumerate(blocks[1:]): # If it is a convolutional layer if (block["type"] == "convolutional"): activation = block["activation"] filters = int(block["filters"]) kernel_size = int(block["size"]) strides = int(block["stride"]) if strides > 1: inputs = ZeroPadding2D(((1, 0), (1, 0)))(inputs) inputs = Conv2D(filters, kernel_size, strides=strides, padding='valid' if strides > 1 else 'same', name='conv_' + str(i), use_bias=False if ("batch_normalize" in block) else True)(inputs) if "batch_normalize" in block: inputs = BatchNormalization(name='bnorm_' + str(i))(inputs) if activation == "leaky": inputs = LeakyReLU(alpha=0.1, name='leaky_' + str(i))(inputs) elif (block["type"] == "upsample"): stride = int(block["stride"]) inputs = UpSampling2D(stride)(inputs) # If it is a route layer elif (block["type"] == "route"): block["layers"] = block["layers"].split(',') start = int(block["layers"][0]) if len(block["layers"]) > 1: end = int(block["layers"][1]) - i filters = output_filters[i + start] + output_filters[ end] # Index negatif :end - index inputs = tf.concat([outputs[i + start], outputs[i + end]], axis=-1) else: filters = output_filters[i + start] inputs = outputs[i + start] elif block["type"] == "shortcut": from_ = int(block["from"]) inputs = outputs[i - 1] + outputs[i + from_] # Yolo detection layer elif block["type"] == "yolo": mask = block["mask"].split(",") mask = [int(x) for x in mask] anchors = block["anchors"].split(",") anchors = [int(a) for a in anchors] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in mask] n_anchors = len(anchors) out_shape = inputs.get_shape().as_list() inputs = tf.reshape(inputs, [-1, n_anchors * out_shape[1] * out_shape[2], \ 5 + num_classes]) box_centers = inputs[:, :, 0:2] box_shapes = inputs[:, :, 2:4] confidence = inputs[:, :, 4:5] classes = inputs[:, :, 5:num_classes + 5] box_centers = tf.sigmoid(box_centers) confidence = tf.sigmoid(confidence) classes = tf.sigmoid(classes) anchors = tf.tile(anchors, [out_shape[1] * out_shape[2], 1]) box_shapes = tf.exp(box_shapes) * tf.cast(anchors, dtype=tf.float32) x = tf.range(out_shape[1], dtype=tf.float32) y = tf.range(out_shape[2], dtype=tf.float32) cx, cy = tf.meshgrid(x, y) cx = tf.reshape(cx, (-1, 1)) cy = tf.reshape(cy, (-1, 1)) cxy = tf.concat([cx, cy], axis=-1) cxy = tf.tile(cxy, [1, n_anchors]) cxy = tf.reshape(cxy, [1, -1, 2]) strides = (input_image.shape[1] // out_shape[1], \ input_image.shape[2] // out_shape[2]) box_centers = (box_centers + cxy) * strides prediction = tf.concat( [box_centers, box_shapes, confidence, classes], axis=-1) if scale: out_pred = tf.concat([out_pred, prediction], axis=1) else: out_pred = prediction scale = 1 outputs[i] = inputs output_filters.append(filters) model = Model(input_image, out_pred) model.summary() return model
def yolov3_net(cfg_file, num_classes): """ Build model yolo from config file :param cfg_file: :param num_classes: :return: """ blocks = parse_cfg(cfg_file) model_size = int(blocks[0]['width']), int(blocks[0]['height']), int( blocks[0]['channels']) outputs = {} output_filters = [] filters = [] out_pred = [] scale = 0 inputs = input_image = Input(shape=model_size) inputs = inputs / 255.0 for i, block in enumerate(blocks[1:]): if block['type'] == 'convolutional': activation = block['activation'] filters = int(block['filters']) kernel_size = int(block['size']) strides = int(block['stride']) if strides > 1: # downsampling is performed, so we need to adjust the padding inputs = ZeroPadding2D(((1, 0), (1, 0)))(inputs) inputs = Conv2D( filters, kernel_size, strides=strides, padding='valid' if strides > 1 else 'same', name='conv_' + str(i), use_bias=False if "batch_normalize" in block else True)(inputs) if "batch_normalize" in block: inputs = BatchNormalization(name="batch_normalize_" + str(i))(inputs) if activation == 'leaky': inputs = LeakyReLU(alpha=0.1, name="leaky_" + str(i))(inputs) elif block['type'] == 'upsample': stride = int(block['stride']) inputs = UpSampling2D(stride)(inputs) elif block['type'] == 'route': block['layers'] = block['layers'].split(',') start = int(block['layers'][0]) if len(block['layers']) > 1: end = int(block['layers'][1]) - i filters = output_filters[i + start] + output_filters[ end] # Index negatif :end - index inputs = tf.concat([outputs[i + start], outputs[i + end]], axis=-1) else: filters = output_filters[i + start] inputs = outputs[i + start] elif block['type'] == 'shortcut': from_ = int(block['from']) inputs = outputs[i - 1] + outputs[i + from_] elif block['type'] == 'yolo': mask = block['mask'].split(',') mask = [int(x) for x in mask] anchors = block['anchors'].split(',') anchors = [int(x) for x in anchors] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in mask] n_anchors = len(anchors) out_shape = inputs.get_shape().as_list() inputs = tf.reshape( inputs, [-1, n_anchors * out_shape[1] * out_shape[2], 5 + num_classes]) box_centers = inputs[:, :, 0:2] box_shapes = inputs[:, :, 2:4] confidence = inputs[:, :, 4:5] classes = inputs[:, :, 5:5 + num_classes] # Refile bounding boxes box_centers = tf.sigmoid(box_centers) confidence = tf.sigmoid(confidence) classes = tf.sigmoid(classes) anchors = tf.tile(anchors, [out_shape[1] * out_shape[2], 1]) box_shapes = tf.exp(box_shapes) * tf.cast(anchors, dtype=tf.float32) x = tf.range(out_shape[1], dtype=tf.float32) y = tf.range(out_shape[2], dtype=tf.float32) cx, cy = tf.meshgrid(x, y) cx = tf.reshape(cx, (-1, 1)) cy = tf.reshape(cy, (-1, 1)) cxy = tf.concat([cx, cy], axis=-1) cxy = tf.tile(cxy, [1, n_anchors]) cxy = tf.reshape(cxy, [1, -1, 2]) strides = (input_image.get_shape().as_list()[1] // out_shape[1], input_image.get_shape().as_list()[1] // out_shape[2]) box_centers = (box_centers + cxy) * strides prediction = tf.concat( [box_centers, box_shapes, confidence, classes], axis=-1) if scale: out_pred = tf.concat([out_pred, prediction], axis=1) scale += 1 else: out_pred = prediction scale = 1 outputs[i] = inputs output_filters.append(filters) model = Model(input_image, out_pred) # model.summary() print(model.outputs) return model
def puigcerver(input_size, output_size, learning_rate=3e-4): """ Convolucional Recurrent Neural Network by Puigcerver et al. Reference: Puigcerver, J.: Are multidimensional recurrent layers really necessary for handwritten text recognition? In: Document Analysis and Recognition (ICDAR), 2017 14th IAPR International Conference on, vol. 1, pp. 67–72. IEEE (2017) """ input_data = Input(name="input", shape=input_size) cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding="same")(input_data) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) cnn = Dropout(rate=0.2)(cnn) cnn = Conv2D(filters=80, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn) cnn = BatchNormalization()(cnn) cnn = LeakyReLU(alpha=0.01)(cnn) shape = cnn.get_shape() blstm = Reshape((shape[1], shape[2] * shape[3]))(cnn) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm) blstm = Dropout(rate=0.5)(blstm) output_data = Dense(units=output_size, activation="softmax")(blstm) optimizer = RMSprop(learning_rate=learning_rate) return (input_data, output_data, optimizer)
def identity_block(input_tensor, kernel_size, filters, stage, block, squeeze=False, squeeze_type='normal'): """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. """ filters1, filters2, filters3 = filters # K.learning_phase() if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 # squeeze_block = Squeeze_and_Excite(input_tensor.get_shape()[bn_axis]) conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' if squeeze == True and squeeze_type == 'pre': squeeze_block = Squeeze_and_Excite(input_tensor.get_shape()[bn_axis]) x = squeeze_block(input_tensor) x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) # x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x, training = False) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x) # x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x, training = False) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) # x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x, training = False) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) # K.int_shape(input_tensor)[bn_axis] if squeeze == True and squeeze_type == 'normal': squeeze_block = Squeeze_and_Excite(x.get_shape()[bn_axis]) x = squeeze_block(x) if squeeze_type != 'identity': # Never have squeeze = False and squeeze_type = 'identity' x = layers.add([x, input_tensor]) x = Activation('relu')(x) if squeeze == True and squeeze_type == 'post': squeeze_block = Squeeze_and_Excite(x.get_shape()[bn_axis]) x = squeeze_block(x) if squeeze == True and squeeze_type == 'identity': squeeze_block = Squeeze_and_Excite(x.get_shape()[bn_axis]) y = squeeze_block(input_tensor) x = layers.add([y, x]) return x
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), squeeze=False, squeeze_type='normal'): """A block that has a conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the first conv layer in the block. # Returns Output tensor for the block. Note that from stage 3, the first conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ """ tf.keras.layers.Conv2D( filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs ) """ filters1, filters2, filters3 = filters # K.learning_phase() if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' if squeeze == True and squeeze_type == 'pre': squeeze_block = Squeeze_and_Excite(input_tensor.get_shape()[bn_axis]) x = squeeze_block(input_tensor) x = Conv2D(filters1, (1, 1), strides=strides, name=conv_name_base + '2a')(input_tensor) # x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x, training = False) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D(filters2, kernel_size, padding='same', name=conv_name_base + '2b')(x) # x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x, training = False) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) # x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x, training = False) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) shortcut = Conv2D(filters3, (1, 1), strides=strides, name=conv_name_base + '1')(input_tensor) # shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut, training = False) shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) if squeeze == True and squeeze_type == 'normal': squeeze_block = Squeeze_and_Excite(x.get_shape()[bn_axis]) x = squeeze_block(x) x = layers.add([x, shortcut]) x = Activation('relu')(x) if squeeze == True and squeeze_type == 'post': squeeze_block = Squeeze_and_Excite(x.get_shape()[bn_axis]) x = squeeze_block(x) # if squeeze == True and squeeze_type == 'identity': # squeeze_block = Squeeze_and_Excite(x.get_shape()[bn_axis]) # y = squeeze_block(input_tensor) # x = layers.add([y, x]) return x
def __init__(self, num_classes=19, output_stride=16, backbonetype='mobilenetv2', weights='imagenet', dl_input_shape=(None, 483, 769, 3), weight_decay=0.00004, pooling='global', residual_shortcut=False): super(CMSNet, self).__init__(name='cmsnet') """ :param num_classes: (Default value = 19) :param output_stride: (Default value = 16) if strid count is 4 remove stride from block 13 and inser atrous in 14, 15 and 16 if strid count is 3 remove stride from block 6/13 and inser atrous rate 2 in 7-13/ and rate 4 14-16 :param backbonetype: (Default value = 'mobilenetv2') :param weights: (Default value = 'imagenet') :param input_shape: (Default value = (None, 483,769,3) :param weight_decay: use 0.00004 for MobileNet-V2 or Xcpetion model backbonetype. Use 0.0001 for ResNet backbonetype. """ self.logger = logging.getLogger('perception.models.CMSNet') self.logger.info('creating an instance of CMSNet with backbone ' + backbonetype + ', OS' + str(output_stride) + ', nclass=' + str(num_classes) + ', input=' + str(dl_input_shape) + ', pooling=' + pooling + ', residual=' + str(residual_shortcut)) self.num_classes = num_classes self.output_stride = output_stride self.dl_input_shape = dl_input_shape self._createBackbone(backbonetype=backbonetype, output_stride=output_stride) # All with 256 filters and batch normalization. # one 1×1 convolution and three 3×3 convolutions with rates = (6, 12, 18) when output stride = 16. # Rates are doubled when output stride = 8. #Create Spatial Pyramid Pooling x = self.backbone.output pooling_shape = self.backbone.compute_output_shape(self.dl_input_shape) pooling_shape_float = tf.cast(pooling_shape[1:3], tf.float32) assert pooling in [ 'aspp', 'spp', 'global' ], "Only suported pooling= 'aspp', 'spp' or 'global'." if pooling == 'aspp': if output_stride == 16: rates = (6, 12, 18) elif output_stride == 8: rates = (12, 24, 36) #gride lavel: pooling x0 = Conv2D(filters=256, kernel_size=3, name='aspp_0_expand', padding="same", dilation_rate=rates[0], kernel_regularizer=l2(weight_decay))(x) x0 = BatchNormalization(name='aspp_0_expand_BN')(x0) #epsilon=1e-5 x0 = ReLU(name='aspp_0_expand_relu')(x0) x1 = Conv2D(filters=256, kernel_size=3, name='aspp_1_expand', padding="same", dilation_rate=rates[1], kernel_regularizer=l2(weight_decay))(x) x1 = BatchNormalization(name='aspp_1_expand_BN')(x1) #epsilon=1e-5 x1 = ReLU(name='aspp_1_expand_relu')(x1) x2 = Conv2D(filters=256, kernel_size=3, name='aspp_2_expand', padding="same", dilation_rate=rates[2], kernel_regularizer=l2(weight_decay))(x) x2 = BatchNormalization(name='aspp_2_expand_BN')(x2) #epsilon=1e-5 x2 = ReLU(name='aspp_2_expand_relu')(x2) #gride lavel: all xn = Conv2D(filters=256, kernel_size=1, name='aspp_n_expand', kernel_regularizer=l2(weight_decay))(x) xn = BatchNormalization(name='aspp_n_expand_BN')(xn) #epsilon=1e-5 xn = ReLU(name='aspp_n_expand_relu')(xn) #Concatenate spatial pyramid pooling x0.set_shape(pooling_shape[0:3].concatenate(x0.get_shape()[-1])) x1.set_shape(pooling_shape[0:3].concatenate(x1.get_shape()[-1])) x2.set_shape(pooling_shape[0:3].concatenate(x2.get_shape()[-1])) xn.set_shape(pooling_shape[0:3].concatenate(xn.get_shape()[-1])) x = Concatenate(name='aspp_concatenate')([x0, x1, x2, xn]) elif pooling == 'spp': rates = (1, 2, 3, 6) #gride lavel: pooling x0 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[0], tf.int32), padding="valid", name='spp_0_average_pooling2d')(x) x0 = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_0_expand', kernel_regularizer=l2(weight_decay))(x0) x0 = BatchNormalization(name='spp_0_expand_BN')(x0) #epsilon=1e-5 x0 = ReLU(name='spp_0_expand_relu')(x0) if tf.__version__.split('.')[0] == '1': x0 = Lambda(lambda x0: tf.image.resize_bilinear( x0, pooling_shape[1:3], align_corners=True), name='spp_0_resize_bilinear')(x0) else: x0 = Lambda(lambda x0: tf.image.resize(x0, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_0_resize_bilinear')(x0) x1 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[1], tf.int32), padding="valid", name='spp_1_average_pooling2d')(x) x1 = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_1_expand', kernel_regularizer=l2(weight_decay))(x1) x1 = BatchNormalization(name='spp_1_expand_BN')(x1) #epsilon=1e-5 x1 = ReLU(name='spp_1_expand_relu')(x1) if tf.__version__.split('.')[0] == '1': x1 = Lambda(lambda x1: tf.image.resize_bilinear( x1, pooling_shape[1:3], align_corners=True), name='spp_1_resize_bilinear')(x1) else: x1 = Lambda(lambda x1: tf.image.resize(x1, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_1_resize_bilinear')(x1) x2 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[2], tf.int32), padding="valid", name='spp_2_average_pooling2d')(x) x2 = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_2_expand', kernel_regularizer=l2(weight_decay))(x2) x2 = BatchNormalization(name='spp_2_expand_BN')(x2) #epsilon=1e-5 x2 = ReLU(name='spp_2_expand_relu')(x2) if tf.__version__.split('.')[0] == '1': x2 = Lambda(lambda x2: tf.image.resize_bilinear( x2, pooling_shape[1:3], align_corners=True), name='spp_2_resize_bilinear')(x2) else: x2 = Lambda(lambda x2: tf.image.resize(x2, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_2_resize_bilinear')(x2) x3 = AvgPool2D(pool_size=tf.cast(pooling_shape_float / rates[3], tf.int32), padding="valid", name='spp_3_average_pooling2d')(x) x3 = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_3_expand', kernel_regularizer=l2(weight_decay))(x3) x3 = BatchNormalization(name='spp_3_expand_BN')(x3) #epsilon=1e-5 x3 = ReLU(name='spp_3_expand_relu')(x3) if tf.__version__.split('.')[0] == '1': x3 = Lambda(lambda x3: tf.image.resize_bilinear( x3, pooling_shape[1:3], align_corners=True), name='spp_3_resize_bilinear')(x3) else: x3 = Lambda(lambda x3: tf.image.resize(x3, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_3_resize_bilinear')(x3) #gride lavel: all xn = Conv2D(filters=int(pooling_shape[-1] / len(rates)), kernel_size=1, name='spp_n_expand', kernel_regularizer=l2(weight_decay))(x) xn = BatchNormalization(name='spp_n_expand_BN')(xn) #epsilon=1e-5 xn = ReLU(name='spp_n_expand_relu')(xn) #Concatenate spatial pyramid pooling xn.set_shape(pooling_shape[0:3].concatenate(xn.get_shape()[-1])) x = Concatenate(name='spp_concatenate')([x0, x1, x2, xn]) elif pooling == 'global': #gride lavel: pooling x0 = AvgPool2D(pool_size=pooling_shape[1:3], padding="valid", name='spp_0_average_pooling2d')(x) x0 = Conv2D(filters=256, kernel_size=1, name='spp_0_expand', kernel_regularizer=l2(weight_decay))(x0) x0 = BatchNormalization(name='spp_0_expand_BN')(x0) #epsilon=1e-5 x0 = ReLU(name='spp_0_expand_relu')(x0) # x0 = tf.image.resize(x0, # size=pooling_shape[1:3], # method=tf.image.ResizeMethod.BILINEAR, name='spp_0_resize_bilinear') if tf.__version__.split('.')[0] == '1': x0 = Lambda(lambda x0: tf.image.resize_bilinear( x0, pooling_shape[1:3], align_corners=True), name='spp_0_resize_bilinear')(x0) else: x0 = Lambda(lambda x0: tf.image.resize(x0, pooling_shape[1:3], method=tf.image. ResizeMethod.BILINEAR), name='spp_0_resize_bilinear')(x0) #gride lavel: all xn = Conv2D(filters=256, kernel_size=1, name='spp_1_expand', kernel_regularizer=l2(weight_decay))(x) xn = BatchNormalization(name='spp_1_expand_BN')(xn) #epsilon=1e-5 xn = ReLU(name='spp_1_expand_relu')(xn) #Concatenate spatial pyramid pooling xn.set_shape(pooling_shape[0:3].concatenate(xn.get_shape()[-1])) x = Concatenate(name='spp_concatenate')([x0, xn]) #Concate Projection x = Conv2D(filters=256, kernel_size=1, name='spp_concat_project', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='spp_concat_project_BN')(x) #epsilon=1e-5 x = ReLU(name='spp_concat_project_relu')(x) if residual_shortcut: assert output_stride == 16, "For while residual shotcut is available for atous with os16." #self.strideOutput8LayerName #block_6_project_BN (BatchNormal (None, 61, 97, 64) os8_shape = self.backbone.get_layer( self.strideOutput8LayerName).output_shape os8_output = self.backbone.get_layer( self.strideOutput8LayerName).output x = Conv2D(filters=os8_shape[-1], kernel_size=1, name='shotcut_2x_conv', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(name='shotcut_2x_BN')(x) #epsilon=1e-5 if tf.__version__.split('.')[0] == '1': x = Lambda(lambda x: tf.image.resize_bilinear( x, os8_shape[1:3], align_corners=True), name='shotcut_2x_bilinear')(x) else: x = Lambda(lambda x: tf.image.resize( x, os8_shape[1:3], method=tf.image.ResizeMethod.BILINEAR), name='shotcut_2x_bilinear')(x) x = ReLU(name='shotcut_2x_relu')(x) x = Add(name='shotcut_2x_add')([x, os8_output]) x = Dropout(rate=0.1, name='dropout')(x) #Semantic Segmentation x = Conv2D(filters=num_classes, kernel_size=1, name='segmentation', kernel_regularizer=l2(weight_decay))(x) #x = BatchNormalization(name='segmentation_BN')(x) # x = tf.image.resize(x, size=self.dl_input_shape[1:3], # method=tf.image.ResizeMethod.BILINEAR, name='segmentation_bilinear') if tf.__version__.split('.')[0] == '1': x = Lambda(lambda x: tf.image.resize_bilinear( x, self.dl_input_shape[1:3], align_corners=True), name='segmentation_bilinear')(x) else: x = Lambda(lambda x: tf.image.resize(x, self.dl_input_shape[1:3], method=tf.image.ResizeMethod. BILINEAR), name='segmentation_bilinear')(x) x = Softmax(name='logistic_softmax')(x) #logist to training #argmax super(CMSNet, self).__init__(inputs=self.backbone.input, outputs=x, name='cmsnet')
c4 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p3) c4 = BatchNormalization()(c4) c4 = Dropout(0.2) (c4) c4 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c4) c4 = BatchNormalization()(c4) p4 = MaxPooling2D(pool_size=(2, 2)) (c4) c5 = Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p4) c5 = BatchNormalization()(c5) c5 = Dropout(0.3) (c5) c5 = Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c5) c5 = BatchNormalization()(c5) u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c5) in_channel = c5.get_shape().as_list()[3] c4 = attention_block_2d(x=c4, g=u6 ,inter_channel=in_channel // 4, data_format='channels_last') u6 = concatenate([u6, c4]) c6 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u6) c6 = BatchNormalization()(c6) c6 = Dropout(0.2) (c6) c6 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c6) c6 = BatchNormalization()(c6) u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c6) in_channel = c5.get_shape().as_list()[3] c3 = attention_block_2d(x=c3, g=u7,inter_channel=in_channel // 4, data_format='channels_last') u7 = concatenate([u7, c3]) c7 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u7) c7 = BatchNormalization()(c7) c7 = Dropout(0.2) (c7)
def build_encoder_decoder(): # Encoder input_tensor = Input(shape=(320, 320, 4)) x = ZeroPadding2D((1, 1))(input_tensor) x = Conv2D(64, (3, 3), activation='relu', name='conv1_1')(x) x = BatchNormalization()(x) x = ZeroPadding2D((1, 1))(x) x = Conv2D(64, (3, 3), activation='relu', name='conv1_2')(x) x = BatchNormalization()(x) orig_1 = x x = MaxPooling2D((2, 2), strides=(2, 2))(x) x = ZeroPadding2D((1, 1))(x) x = Conv2D(128, (3, 3), activation='relu', name='conv2_1')(x) x = ZeroPadding2D((1, 1))(x) x = Conv2D(128, (3, 3), activation='relu', name='conv2_2')(x) orig_2 = x x = MaxPooling2D((2, 2), strides=(2, 2))(x) x = ZeroPadding2D((1, 1))(x) x = Conv2D(256, (3, 3), activation='relu', name='conv3_1')(x) x = ZeroPadding2D((1, 1))(x) x = Conv2D(256, (3, 3), activation='relu', name='conv3_2')(x) x = ZeroPadding2D((1, 1))(x) x = Conv2D(256, (3, 3), activation='relu', name='conv3_3')(x) orig_3 = x x = MaxPooling2D((2, 2), strides=(2, 2))(x) inputs_size = x.get_shape()[1:3] conv_4_1x1 = Conv2D(512, (1, 1), activation='relu', padding='same', name='conv4_1x1')(x) conv_4_3x3_1 = Conv2D(512, (3, 3), activation='relu', padding='same', dilation_rate=ATROUS_RATES[0], name='conv4_3x3_1')(x) conv_4_3x3_2 = Conv2D(512, (3, 3), activation='relu', padding='same', dilation_rate=ATROUS_RATES[1], name='conv4_3x3_2')(x) conv_4_3x3_3 = Conv2D(512, (3, 3), activation='relu', padding='same', dilation_rate=ATROUS_RATES[2], name='conv4_3x3_3')(x) # Image average pooling image_level_features = Lambda( lambda x: tf.reduce_mean(x, [1, 2], keepdims=True), name='global_average_pooling')(x) image_level_features = Conv2D( 512, (1, 1), activation='relu', padding='same', name='image_level_features_conv_1x1')(image_level_features) image_level_features = Lambda(lambda x: tf.image.resize(x, inputs_size), name='upsample_1')(image_level_features) # Concat x = Concatenate(axis=3)([ conv_4_1x1, conv_4_3x3_1, conv_4_3x3_2, conv_4_3x3_3, image_level_features ]) x = Conv2D(512, (1, 1), activation='relu', padding='same', name='conv_1x1_1_concat')(x) x = Conv2D(512, (1, 1), activation='relu', padding='same', name='conv_1x1_2_concat')(x) orig_4 = x x = MaxPooling2D((2, 2), strides=(2, 2))(x) x = ZeroPadding2D((1, 1))(x) x = Conv2D(512, (3, 3), activation='relu', name='conv5_1')(x) x = ZeroPadding2D((1, 1))(x) x = Conv2D(512, (3, 3), activation='relu', name='conv5_2')(x) x = ZeroPadding2D((1, 1))(x) x = Conv2D(512, (3, 3), activation='relu', name='conv5_3')(x) orig_5 = x x = MaxPooling2D((2, 2), strides=(2, 2))(x) # Decoder # x = UpSampling2D(size=(2, 2))(x) the_shape = K.int_shape(orig_5) shape = (1, the_shape[1], the_shape[2], the_shape[3]) origReshaped = Reshape(shape)(orig_5) xReshaped = Reshape(shape)(x) together = Concatenate(axis=1)([origReshaped, xReshaped]) x = Unpooling()(together) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='deconv5_1', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='deconv5_2', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='deconv5_3', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = UpSampling2D(size=(2, 2))(x) the_shape = K.int_shape(orig_4) shape = (1, the_shape[1], the_shape[2], the_shape[3]) origReshaped = Reshape(shape)(orig_4) xReshaped = Reshape(shape)(x) together = Concatenate(axis=1)([origReshaped, xReshaped]) x = Unpooling()(together) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='deconv4_1', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='deconv4_2', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='deconv4_3', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = UpSampling2D(size=(2, 2))(x) the_shape = K.int_shape(orig_3) shape = (1, the_shape[1], the_shape[2], the_shape[3]) origReshaped = Reshape(shape)(orig_3) xReshaped = Reshape(shape)(x) together = Concatenate(axis=1)([origReshaped, xReshaped]) x = Unpooling()(together) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='deconv3_1', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='deconv3_2', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='deconv3_3', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = UpSampling2D(size=(2, 2))(x) the_shape = K.int_shape(orig_2) shape = (1, the_shape[1], the_shape[2], the_shape[3]) origReshaped = Reshape(shape)(orig_2) xReshaped = Reshape(shape)(x) together = Concatenate(axis=1)([origReshaped, xReshaped]) x = Unpooling()(together) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='deconv2_1', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='deconv2_2', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = UpSampling2D(size=(2, 2))(x) the_shape = K.int_shape(orig_1) shape = (1, the_shape[1], the_shape[2], the_shape[3]) origReshaped = Reshape(shape)(orig_1) xReshaped = Reshape(shape)(x) together = Concatenate(axis=1)([origReshaped, xReshaped]) x = Unpooling()(together) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='deconv1_1', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='deconv1_2', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(1, (3, 3), activation='sigmoid', padding='same', name='pred', kernel_initializer='he_normal', bias_initializer='zeros')(x) model = Model(inputs=input_tensor, outputs=x) return model
def build_encoder_decoder(): kernel = 3 # Encoder # input_tensor = Input(shape=(320, 320, 4)) input_tensor_shape = input_tensor.get_shape()[1:3] # Entry flow x = Conv2D(32, (3, 3), padding="same")(input_tensor) x = BatchNormalization()(x) x = Activation("relu")(x) x = Conv2D(64, (3, 3), padding="same")(x) orig_1 = BatchNormalization()(x) x = Activation("relu")(orig_1) x = x = Conv2D(64, (3, 3), strides=(2, 2), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x, orig_2 = res_downsample_xception_block(x, 128) x, orig_3 = res_downsample_xception_block(x, 256, top_relu=True) x, orig_4 = res_downsample_xception_block(x, 728, top_relu=True) # Middle flow for i in range(8): x = res_xception_block(x, 728) # Exit flow res = Conv2D(1024, (1, 1), padding="same")(x) res = BatchNormalization()(res) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(728, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(1024, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(1024, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Add()([x, res]) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(1536, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(1536, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(2048, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) inputs_size = x.get_shape()[1:3] # Atrous convolution conv_4_1x1 = SeparableConv2D(256, (1, 1), activation='relu', padding='same', name='conv4_1x1')(x) conv_4_3x3_1 = SeparableConv2D(256, (kernel, kernel), activation='relu', padding='same', dilation_rate=ATROUS_RATES[0], name='conv4_3x3_1')(x) conv_4_3x3_2 = SeparableConv2D(256, (kernel, kernel), activation='relu', padding='same', dilation_rate=ATROUS_RATES[1], name='conv4_3x3_2')(x) conv_4_3x3_3 = SeparableConv2D(256, (kernel, kernel), activation='relu', padding='same', dilation_rate=ATROUS_RATES[2], name='conv4_3x3_3')(x) # Image average pooling image_level_features = Lambda( lambda x: tf.reduce_mean(x, [1, 2], keepdims=True), name='global_average_pooling')(x) image_level_features = Conv2D( 256, (1, 1), activation='relu', padding='same', name='image_level_features_conv_1x1')(image_level_features) image_level_features = Lambda( lambda x: tf.image.resize_bilinear(x, inputs_size), name='upsample_1')(image_level_features) # Concat x = Concatenate(axis=3)([ conv_4_1x1, conv_4_3x3_1, conv_4_3x3_2, conv_4_3x3_3, image_level_features ]) x = Conv2D(256, (1, 1), activation='relu', padding='same', name='conv_1x1_1_concat')(x) x = Conv2D(728, (1, 1), activation='relu', padding='same', name='conv_1x1_2_concat')(x) # Decoderg # x = UpSampling2D(size=(2, 2))(x) the_shape = K.int_shape(orig_4) shape = (1, the_shape[1], the_shape[2], the_shape[3]) origReshaped = Reshape(shape)(orig_4) xReshaped = Reshape(shape)(x) together = Concatenate(axis=1)([origReshaped, xReshaped]) x = Unpooling()(together) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='deconv4_1', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='deconv4_2', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='deconv4_3', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = UpSampling2D(size=(2, 2))(x) the_shape = K.int_shape(orig_3) shape = (1, the_shape[1], the_shape[2], the_shape[3]) origReshaped = Reshape(shape)(orig_3) xReshaped = Reshape(shape)(x) together = Concatenate(axis=1)([origReshaped, xReshaped]) x = Unpooling()(together) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='deconv3_1', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='deconv3_2', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='deconv3_3', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = UpSampling2D(size=(2, 2))(x) the_shape = K.int_shape(orig_2) shape = (1, the_shape[1], the_shape[2], the_shape[3]) origReshaped = Reshape(shape)(orig_2) xReshaped = Reshape(shape)(x) together = Concatenate(axis=1)([origReshaped, xReshaped]) x = Unpooling()(together) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='deconv2_1', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='deconv2_2', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = UpSampling2D(size=(2, 2))(x) the_shape = K.int_shape(orig_1) shape = (1, the_shape[1], the_shape[2], the_shape[3]) origReshaped = Reshape(shape)(orig_1) xReshaped = Reshape(shape)(x) together = Concatenate(axis=1)([origReshaped, xReshaped]) x = Unpooling()(together) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='deconv1_1', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='deconv1_2', kernel_initializer='he_normal', bias_initializer='zeros')(x) x = BatchNormalization()(x) x = Conv2D(1, (3, 3), activation='sigmoid', padding='same', name='pred', kernel_initializer='he_normal', bias_initializer='zeros')(x) model = Model(inputs=input_tensor, outputs=x) return model
input_shape = (28, 28, 1) channel = input_shape[1] inputs = Input(shape=input_shape) h = BatchNormalization(name='ResidualBlock_bn1')(inputs) h = tf.nn.relu(h) h = Conv2D(channel, kernel_size=(3, 3), padding='same', name='ResidualBlock_Conv2')(h) h = BatchNormalization(name='ResidualBlock_bn2')(h) h = tf.nn.relu(h) h = Conv2D(channel, kernel_size=(3, 3), padding='same', name='ResidualBlock_Conv3')(h) print(h) sc = _short(inputs.shape[-1], h.get_shape()[-1]) print(h, sc) outputs = tf.keras.layers.Add()()([h, sc]) model = Model(inputs=inputs, outputs=outputs) # input1 = tf.keras.layers.Input(shape=(16,)) # x1 = tf.keras.layers.Dense(8, activation='relu')(input1) # input2 = tf.keras.layers.Input(shape=(32,)) # x2 = tf.keras.layers.Dense(8, activation='relu')(input2) # # equivalent to `added = tf.keras.layers.add([x1, x2])` # added = tf.keras.layers.Add()([x1, x2]) # out = tf.keras.layers.Dense(4)(added) # model = tf.keras.models.Model(inputs=[input1, input2], outputs=out)
def CRNN_STN(cfg): inputs = Input((cfg.width, cfg.height, cfg.nb_channels)) c_1 = Conv2D(cfg.conv_filter_size[0], (3, 3), activation='relu', padding='same', name='conv_1')(inputs) c_2 = Conv2D(cfg.conv_filter_size[1], (3, 3), activation='relu', padding='same', name='conv_2')(c_1) c_3 = Conv2D(cfg.conv_filter_size[2], (3, 3), activation='relu', padding='same', name='conv_3')(c_2) bn_3 = BatchNormalization(name='bn_3')(c_3) p_3 = MaxPooling2D(pool_size=(2, 2), name='maxpool_3')(bn_3) c_4 = Conv2D(cfg.conv_filter_size[3], (3, 3), activation='relu', padding='same', name='conv_4')(p_3) c_5 = Conv2D(cfg.conv_filter_size[4], (3, 3), activation='relu', padding='same', name='conv_5')(c_4) bn_5 = BatchNormalization(name='bn_5')(c_5) p_5 = MaxPooling2D(pool_size=(2, 2), name='maxpool_5')(bn_5) c_6 = Conv2D(cfg.conv_filter_size[5], (3, 3), activation='relu', padding='same', name='conv_6')(p_5) c_7 = Conv2D(cfg.conv_filter_size[6], (3, 3), activation='relu', padding='same', name='conv_7')(c_6) bn_7 = BatchNormalization(name='bn_7')(c_7) bn_7_shape = bn_7.get_shape() loc_input_shape = (bn_7_shape[1], bn_7_shape[2], bn_7_shape[3]) stn = SpatialTransformer(localization_net=loc_net(loc_input_shape), output_size=(loc_input_shape[0], loc_input_shape[1]))(bn_7) reshape = Reshape(target_shape=(int(bn_7_shape[1]), int(bn_7_shape[2] * bn_7_shape[3])), name='reshape')(stn) fc_9 = Dense(cfg.lstm_nb_units[0], activation='relu', name='fc_9')(reshape) lstm_10 = LSTM(cfg.lstm_nb_units[0], kernel_initializer="he_normal", return_sequences=True, name='lstm_10')(fc_9) lstm_10_back = LSTM(cfg.lstm_nb_units[0], kernel_initializer="he_normal", go_backwards=True, return_sequences=True, name='lstm_10_back')(fc_9) lstm_10_add = add([lstm_10, lstm_10_back]) lstm_11 = LSTM(cfg.lstm_nb_units[1], kernel_initializer="he_normal", return_sequences=True, name='lstm_11')(lstm_10_add) lstm_11_back = LSTM(cfg.lstm_nb_units[1], kernel_initializer="he_normal", go_backwards=True, return_sequences=True, name='lstm_11_back')(lstm_10_add) lstm_11_concat = concatenate([lstm_11, lstm_11_back]) do_11 = Dropout(cfg.dropout_rate, name='dropout')(lstm_11_concat) fc_12 = Dense(len(cfg.characters), kernel_initializer='he_normal', activation='softmax', name='fc_12')(do_11) prediction_model = Model(inputs=inputs, outputs=fc_12) labels = Input(name='labels', shape=[cfg.label_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') ctc_loss = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([fc_12, labels, input_length, label_length]) training_model = Model(inputs=[inputs, labels, input_length, label_length], outputs=[ctc_loss]) return training_model, prediction_model
def build_encoder_decoder(): kernel = 3 # Encoder # input_tensor = Input(shape=(320, 320, 4)) input_tensor_shape = input_tensor.get_shape()[1:3] # Entry flow x = Conv2D(32, (3, 3), strides=(2, 2), padding="same")(input_tensor) x = BatchNormalization()(x) x = Activation("relu")(x) x = Conv2D(64, (3, 3), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x, _ = res_downsample_xception_block(x, 128) x, low_level_feature = res_downsample_xception_block(x, 256, top_relu=True) x, _ = res_downsample_xception_block(x, 728, top_relu=True) # Middle flow for i in range(8): x = res_xception_block(x, 728) # Exit flow res = Conv2D(1024, (1, 1), padding="same")(x) res = BatchNormalization()(res) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(728, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(1024, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(1024, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Add()([x, res]) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(1536, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(1536, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) x = DepthwiseConv2D((3, 3), padding="same")(x) x = BatchNormalization()(x) x = Conv2D(2048, (1, 1), padding="same")(x) x = BatchNormalization()(x) x = Activation("relu")(x) inputs_size = x.get_shape()[1:3] # Atrous convolution b0 = Conv2D(256, (1, 1), padding="same")(x) b0 = BatchNormalization()(b0) b0 = Activation("relu")(b0) b1 = DepthwiseConv2D((3, 3), dilation_rate=(6, 6), padding="same")(x) b1 = BatchNormalization()(b1) b1 = Activation("relu")(b1) b1 = Conv2D(256, (1, 1), padding="same")(b1) b1 = BatchNormalization()(b1) b1 = Activation("relu")(b1) b2 = DepthwiseConv2D((3, 3), dilation_rate=(12, 12), padding="same")(x) b2 = BatchNormalization()(b2) b2 = Activation("relu")(b2) b2 = Conv2D(256, (1, 1), padding="same")(b2) b2 = BatchNormalization()(b2) b2 = Activation("relu")(b2) b3 = DepthwiseConv2D((3, 3), dilation_rate=(12, 12), padding="same")(x) b3 = BatchNormalization()(b3) b3 = Activation("relu")(b3) b3 = Conv2D(256, (1, 1), padding="same")(b3) b3 = BatchNormalization()(b3) b3 = Activation("relu")(b3) # Image average pooling image_level_features = Lambda( lambda x: tf.reduce_mean(x, [1, 2], keepdims=True), name='global_average_pooling')(x) image_level_features = Conv2D( 256, (1, 1), activation='relu', padding='same', name='image_level_features_conv_1x1')(image_level_features) image_level_features = Lambda( lambda x: tf.image.resize_bilinear(x, inputs_size), name='upsample_1')(image_level_features) # Concat x = Concatenate(axis=3)([b0, b1, b2, b3, image_level_features]) x = Conv2D(256, (1, 1), activation='relu', padding='same', name='conv_1x1_concat')(x) # Decoderg # low_level_feature_shape = low_level_feature.get_shape()[1:3] x = Lambda(lambda x: tf.image.resize_bilinear(x, low_level_feature_shape), name="upsample_2")(x) x = Concatenate(axis=3)([x, low_level_feature]) x = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', bias_initializer='zeros', name="dev_conv3_1")(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', bias_initializer='zeros', name="dev_conv3_2")(x) x = Lambda(lambda x: tf.image.resize_bilinear(x, input_tensor_shape), name="upsample_3")(x) x = Conv2D(1, (3, 3), activation='sigmoid', padding='same', kernel_initializer='he_normal', bias_initializer='zeros', name='pred')(x) model = Model(inputs=input_tensor, outputs=x) return model