def network(outputs, units, depth, n_labels, direction, dropout, init_filters, lstm=False): for n in range (depth): if direction == 'bi': if lstm is True: if cudnn is True: outputs=Bidirectional(CuDNNLSTM(units, return_sequences=True))(outputs) else: outouts=Bidirectional(LSTM(units, kernel_initializer='glorot_uniform', return_sequences=True, use_forget_bias=True, dropout=dropout, unroll=False))(outputs) else: if cudnn is False: outputs=Bidirectional(GRU(units, kernel_initializer='glorot_uniform', return_sequences=True, dropout=dropout, unroll=False))(outputs) else: outputs=Bidirectional(CuDNNGRU(units, return_sequences=True))(outputs) else: if lstm is True: if cudnn is True: outputs = CuDNNLSTM(units, return_sequences=True)(outputs) else: outputs=LSTM(units, kernel_initializer='glorot_uniform', return_sequences=True, use_forget_bias=True, dropout=dropout, unroll=False)(outputs) else: if cudnn is True: outputs=CuDNNGRU(units,return_sequences=True)(outputs) else: outputs=GRU(units, kernel_initializer='glorot_uniform', return_sequences=True, dropout=dropout, unroll=False)(outputs) outputs=layer_normalization.LayerNormalization()(outputs) return outputs
def build_model(): seq_input1 = Input(shape=(seq_size, dim), name='seq1') seq_input2 = Input(shape=(seq_size, dim), name='seq2') l1=Conv1D(hidden_dim, 3) r1=Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True)) l2=Conv1D(hidden_dim, 3) r2=Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True)) l3=Conv1D(hidden_dim, 3) r3=Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True)) l4=Conv1D(hidden_dim, 3) r4=Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True)) l5=Conv1D(hidden_dim, 3) r5=Bidirectional(CuDNNGRU(hidden_dim, return_sequences=True)) l6=Conv1D(hidden_dim, 3) s1=MaxPooling1D(3)(l1(seq_input1)) s1=concatenate([r1(s1), s1]) s1=MaxPooling1D(3)(l2(s1)) s1=concatenate([r2(s1), s1]) s1=MaxPooling1D(3)(l3(s1)) s1=concatenate([r3(s1), s1]) s1=MaxPooling1D(3)(l4(s1)) s1=concatenate([r4(s1), s1]) s1=MaxPooling1D(3)(l5(s1)) s1=concatenate([r5(s1), s1]) s1=l6(s1) s1=GlobalAveragePooling1D()(s1) s2=MaxPooling1D(3)(l1(seq_input2)) s2=concatenate([r1(s2), s2]) s2=MaxPooling1D(3)(l2(s2)) s2=concatenate([r2(s2), s2]) s2=MaxPooling1D(3)(l3(s2)) s2=concatenate([r3(s2), s2]) s2=MaxPooling1D(3)(l4(s2)) s2=concatenate([r4(s2), s2]) s2=MaxPooling1D(3)(l5(s2)) s2=concatenate([r5(s2), s2]) s2=l6(s2) s2=GlobalAveragePooling1D()(s2) merge_text = multiply([s1, s2]) x = Dense(100, activation='linear')(merge_text) x = keras.layers.LeakyReLU(alpha=0.3)(x) x = Dense(int((hidden_dim+7)/2), activation='linear')(x) x = keras.layers.LeakyReLU(alpha=0.3)(x) main_output = Dense(2, activation='softmax')(x) merge_model = Model(inputs=[seq_input1, seq_input2], outputs=[main_output]) return merge_model
def BiGRU_base(length, out_length, para): ed = para['embedding_dimension'] ps = para['pool_size'] fd = para['fully_dimension'] dp = para['drop_out'] lr = para['learning_rate'] l2value = 0.001 main_input = Input(shape=(length, ), dtype='int64', name='main_input') x = Embedding(output_dim=ed, input_dim=21, input_length=length)(main_input) a = Convolution1D(64, 2, activation='relu', border_mode='same', W_regularizer=l2(l2value))(x) apool = MaxPooling1D(pool_length=ps, stride=1, border_mode='same')(a) b = Convolution1D(64, 3, activation='relu', border_mode='same', W_regularizer=l2(l2value))(x) bpool = MaxPooling1D(pool_length=ps, stride=1, border_mode='same')(b) c = Convolution1D(64, 8, activation='relu', border_mode='same', W_regularizer=l2(l2value))(x) cpool = MaxPooling1D(pool_length=ps, stride=1, border_mode='same')(c) merge = Concatenate(axis=-1)([apool, bpool, cpool]) merge = Dropout(dp)(merge) x = Bidirectional(CuDNNGRU(50, return_sequences=True))(merge) x = Flatten()(x) x = Dense(fd, activation='relu', name='FC1', W_regularizer=l2(l2value))(x) # output = Dense(out_length, activation='sigmoid', name='output')(x) output = Dense(out_length, activation='sigmoid', name='output', W_regularizer=l2(l2value))(x) model = Model(inputs=main_input, output=output) adam = Adam(lr=lr) model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy']) model.summary() return model
def gru_keras(max_features, maxlen, bidirectional, dropout_rate, embed_dim, rec_units,mtype='GRU', reduction = None): if K.backend == 'tensorflow': K.clear_session() input_layer = Input(shape=(maxlen,)) embedding_layer = Embedding(max_features, output_dim=embed_dim, trainable=True)(input_layer) x = SpatialDropout1D(dropout_rate)(embedding_layer) if reduction: if mtype == 'GRU': if bidirectional: x = Bidirectional(CuDNNGRU(units=rec_units, return_sequences=True))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=True)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional(CuDNNLSTM(units=rec_units, return_sequences=True))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=True)(x) if reduction == 'average': x = GlobalAveragePooling1D()(x) elif reduction == 'maximum': x = GlobalMaxPool1D()(x) elif reduction == 'attention': x = AttentionWithContext()(x) else: if mtype == 'GRU': if bidirectional: x = Bidirectional(CuDNNGRU(units=rec_units, return_sequences=False))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=False)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional(CuDNNLSTM(units=rec_units, return_sequences=False))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=False)(x) output_layer = Dense(1, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='binary_crossentropy', optimizer=RMSprop(clipvalue=1, clipnorm=1), metrics=['acc']) return model
def create_model(self, vocab_size, embedding_matrix, input_length=5000, embed_dim=200): rnn_input = Input(shape=(input_length, )) embedding = self.embedding_layers(rnn_input, vocab_size, embedding_matrix, dropout=0.5, noise=0.0, input_length=input_length, embed_dim=embed_dim) word_encoder = Bidirectional( CuDNNGRU(50, return_sequences=True, recurrent_regularizer=l2(0.0001), kernel_regularizer=l2(0.0001)))(embedding) word_encoder = SpatialDropout1D(0.1)(word_encoder) word_attention = FeedForwardAttention()(word_encoder) word_attention = Dropout(0.5)(word_attention) word_attention = Reshape((1, 50 * 2))(word_attention) sentence_encoder = Bidirectional( CuDNNGRU(50, return_sequences=True, recurrent_regularizer=l2(0.0001), kernel_regularizer=l2(0.0001)))(word_attention) sentence_encoder = SpatialDropout1D(0.1)(sentence_encoder) sent_attention = FeedForwardAttention()(sentence_encoder) sent_attention = Dropout(0.5)(sent_attention) outputs = Dense(self.num_classes, activation='sigmoid')(sent_attention) model = Model(inputs=rnn_input, outputs=outputs) return model
def model_fun(input_pci_model): pars = input_pci_model.hyper_pars.varirate input_title = Input(shape=(pars['lstm1_max_len'], )) embedding_matrix = input_pci_model.load_embedding_matrix() net_title = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], input_length=pars['lstm1_max_len'], trainable=False)(input_title) for i in range(1, pars['lstm1_layer']): if gpu != "-1": net_title = CuDNNGRU(pars['lstm1_neurons'], return_sequences=True)(net_title) else: net_title = GRU(pars['lstm1_neurons'], return_sequences=True)(net_title) if gpu != "-1": net_title = CuDNNGRU(pars['lstm1_neurons'])(net_title) else: net_title = GRU(pars['lstm1_neurons'])(net_title) net_title = Dropout(pars['lstm1_dropout'])(net_title) input_meta = Input(shape=(input_pci_model.X_train[1].shape[1], )) net_meta = Dense(pars['meta_neurons'], activation='relu')(input_meta) net_meta = Dropout(pars['meta_dropout'])(net_meta) for i in range(1, pars['meta_layer']): net_meta = Dense(pars['meta_neurons'], activation='relu')(net_meta) net_meta = Dropout(pars['meta_dropout'])(net_meta) net_combined = keras.layers.concatenate([net_title, net_meta]) for i in range(1, pars['fc_layer'] + 1): net_combined = Dense(pars['fc_neurons'], activation='relu')(net_combined) net_combined = Dropout(pars['fc_dropout'])(net_combined) net_combined = Dense(1, activation='sigmoid')(net_combined) out = keras.models.Model(inputs=[input_title, input_meta], outputs=[net_combined]) return out
def GRUConvdeep3V2(params): Embedding_layer = Embedding(params['nb_words'], params['embedding_dim'], weights=[params['embedding_matrix']], input_length=params['sequence_length'], trainable=False) input_ = Input(shape=(params['sequence_length'], )) embed_input_ = Embedding_layer(input_) x = Activation('tanh')(embed_input_) x = SpatialDropout1D(0.1, name='embed_drop')(x) if params['bidirectional']: x_g1 = Bidirectional( CuDNNGRU(params['lstm_units'], return_sequences=True))(x) x_l1 = Bidirectional( CuDNNLSTM(params['lstm_units'], return_sequences=True))(x_g1) x_g2 = Bidirectional( CuDNNGRU(params['lstm_units'], return_sequences=True))(x_l1) x1 = GlobalMaxPooling1D()(x_g2) x2 = GlobalAveragePooling1D()(x_g2) x3 = AttentionWithContext()(x_g2) merge_layer = concatenate([x1, x2, x3]) x_conv = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='glorot_uniform')(x_g2) x1_conv = GlobalMaxPooling1D()(x_conv) x2_conv = GlobalAveragePooling1D()(x_conv) x3_conv = AttentionWithContext()(x_conv) merge_layer2 = concatenate([x1, x2, x3, x1_conv, x2_conv, x3_conv]) x = Dropout(params['dropout_rate'])(merge_layer2) x = Dense(256, activation='relu')(x) x = Dropout(params['dropout_rate'])(x) x = Dense(6, activation='sigmoid')(x) model = Model(inputs=input_, outputs=x) model.compile(loss=params['loss'], optimizer=params['optimizer'], metrics=['accuracy']) return model
def buildmodel(): comment_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) embedded_sequences = embedding_layer(comment_input) # embedded_sequences = Dropout(0.1)(embedded_sequences) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = Bidirectional(CuDNNGRU(128, return_sequences=True, go_backwards=True))(embedded_sequences) x = Bidirectional(CuDNNGRU(128, return_sequences=True))(embedded_sequences) x = Dropout(0.1)(x) x = Conv1D(64, kernel_size=3, padding="same", kernel_initializer="he_uniform")(x) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) char_input = Input(shape=(MAX_CHAR_SEQUENCE_LENGTH, )) embedding_layer = Embedding(nb_chars, 256) char_emb_sequences = embedding_layer(char_input) convs = [] for i in range(1, 8): conv = Conv1D(64, kernel_size=i, padding='valid')(char_emb_sequences) conv = PReLU()(conv) conv = Dropout(0.1)(conv) conv = GlobalMaxPooling1D()(conv) convs.append(conv) char_merged = concatenate(convs) merged = concatenate([avg_pool, max_pool, char_merged]) merged = Dropout(0.1)(merged) preds = Dense(6, activation='sigmoid')(merged) ######################################## ## train the model ######################################## model = Model(inputs=[comment_input, char_input], outputs=preds) model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(lr=1e-3), metrics=['accuracy']) #print(model.summary()) return model
def build_discriminator(self): model = Sequential() model.add(CuDNNGRU(512, input_shape=self.seq_shape, return_sequences=True)) model.add(Bidirectional(CuDNNGRU(512))) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(256)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(1, activation='sigmoid')) model.summary() seq = Input(shape=self.seq_shape) validity = model(seq) #model.save('gru_gan_discriminator_arch.h5') return Model(seq, validity)
def TimeDistributed_CuDNNGRU(inputs, output_size, name, mode, sequences=True): x = TimeDistributed( Bidirectional(CuDNNGRU(output_size, return_sequences=sequences, kernel_initializer='he_normal', kernel_regularizer=l2(1e-4), name=name), merge_mode=mode))(inputs) return x
def baseline(self, config): inp = Input(shape=(config.strmaxlen, ), name='input') emb = Embedding(config.max_features, config.max_features, embeddings_initializer='identity', trainable=True)(inp) emb = SpatialDropout1D(config.prob_dropout)(emb) x = Bidirectional(CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb) x = Bidirectional(CuDNNGRU(config.cell_size_l2, return_sequences=False))(x) outp = Dense(2, activation='softmax')(x) model = Model(inputs=inp, outputs=outp) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001, decay=0.0001), metrics=['categorical_crossentropy', 'accuracy']) return model
def get_word_attention(emb_matrix, word_length, optimizer, nclasses, gru_output_dim=128): input = Input(shape=(word_length, ), dtype='int32') embedding = Embedding( input_dim=emb_matrix.shape[0], output_dim=emb_matrix.shape[1], weights=[emb_matrix],input_length=word_length,trainable=True, mask_zero=False) sequence_input = embedding(input) print('embedding: ',sequence_input.shape) x = Bidirectional(CuDNNGRU(gru_output_dim,return_sequences=True))(sequence_input) print('Shape after BD LSTM',x.shape) model = Model(input, x) return model
def make_pred_model(self, batch_size): model = Sequential() #model.add(InputLayer(batch_input_shape=(batch_size, self.sequence_length, self.vocab_size))) model.add(Embedding(batch_input_shape=(batch_size, 1), input_dim=self.vocab_size, output_dim=256)) for i in np.arange(0, 3): model.add(CuDNNGRU(512, return_sequences=True, stateful=True)) model.add(TimeDistributed(Dense(self.vocab_size))) model.add(Activation('softmax')) return model
def cudnn_gru(maxlen,nb_words,embed_dim,embedding_matrix,trainable_flag,comp=True): inp = Input(shape=(maxlen, )) if embedding_matrix is None: x = Embedding(nb_words, embed_dim)(inp) else: x = Embedding(nb_words, embed_dim, weights=[embedding_matrix],trainable=trainable_flag)(inp) x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x) x = Dropout(0.1)(x) x = Bidirectional(CuDNNGRU(128, return_sequences=False))(x) x = Dense(32, activation="relu")(x) x = Dropout(0.1)(x) x = Dense(6, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) if comp: model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['accuracy']) return model
def get_GRU_GlobalMaxAve(embedding_matrix, sequence_length, dropout_rate, recurrent_units, dense_size): input_layer = Input(shape=(sequence_length,)) embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], trainable=False)(input_layer) x = Bidirectional(CuDNNGRU(recurrent_units, return_sequences=True))(embedding_layer) x = Bidirectional(CuDNNGRU(recurrent_units, return_sequences=True)) x1 = GlobalMaxPooling1D()(x) x2 = GlobalAveragePooling1D()(x) x = Concatenate(axis=1)([x1, x2]) x = Dense(dense_size, activation="relu")(x) output_layer = Dense(6, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='binary_crossentropy', optimizer=RMSprop(clipvalue=1, clipnorm=1), metrics=['accuracy']) return model
def create_network(network_input, n_vocab): """ create the structure of the neural network """ model = Sequential() model.add( CuDNNGRU(256, input_shape=(network_input.shape[1], network_input.shape[2]), return_sequences=True)) model.add(Dropout(0.3)) model.add(CuDNNGRU(128, return_sequences=True)) model.add(Dropout(0.3)) model.add(CuDNNGRU(64)) model.add(Dense(256)) model.add(Dropout(0.3)) model.add(Dense(n_vocab)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') return model
def model(batch_size, time_steps, num_neurons): x_in = Input(batch_shape=(batch_size, time_steps, 1)) x = Dense(num_neurons, activation='relu')(x_in) x = BatchNormalization()(x) rnn_in1 = concatenate([x_in, x]) x = CuDNNGRU(num_neurons, return_sequences=True, stateful=False)(rnn_in1) x = BatchNormalization()(x) rnn_in2 = concatenate([x_in, x]) #x = Dropout(0.4)(x) x = CuDNNGRU(num_neurons, return_sequences=False, stateful=False)(rnn_in2) x = BatchNormalization()(x) #x = Dropout(0.4)(x) x = Dense(256, activation='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def encoder(self): # Defines the encoder model with single layer of GRU and defined number of GRU units. BN is added for improved training perfomance and quickness # Cuddn GRU provides much faster perfomance but does not provide dropout within. model = Sequential() model.add(CuDNNGRU(self.GRU_units,input_shape=self.input_shape)) model.add(BatchNormalization()) model.add(Dense(2,activation='softmax')) model.summary() return model
def create_model(self): con_input = Input(shape=(self.conlen,)) ans_input = Input(shape=(self.anslen,)) ques_input = Input(shape=(self.queslen,)) ee = Embedding(output_dim=self.embdims, input_dim=self.convocabsize, mask_zero=False)(con_input) se = Embedding(output_dim=self.quesdims, input_dim=self.quesvocabsize, mask_zero=False)(ques_input) se_enc = CuDNNGRU(self.recdims, return_state=True, return_sequences=True) seout, state_ques = se_enc(se) enc = CuDNNGRU(self.recdims, return_state=True, return_sequences=True) encout, state_h = enc(ee, initial_state=state_ques) de = Embedding(output_dim=self.embdims, input_dim=self.ansvocabsize, mask_zero=False)(ans_input) dec = CuDNNGRU(self.recdims, return_sequences=True) decout = dec(de, initial_state=state_h) attn = dot([decout, encout], axes=[2, 2]) attn = Activation('softmax')(attn) ast_attn = dot([decout, seout], axes=[2, 2]) ast_attn = Activation('softmax')(ast_attn) context = dot([attn, encout], axes=[2, 1]) ast_context = dot([ast_attn, seout], axes=[2, 1]) context = concatenate([context, decout, ast_context]) out = TimeDistributed(Dense(self.findims, activation="relu"))(context) out = Flatten()(out) out = Dense(self.ansvocabsize, activation="softmax")(out) model = Model(inputs=[con_input, ans_input, ques_input], outputs=out) if self.config['multigpu']: model = keras.utils.multi_gpu_model(model, gpus=2) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return self.config, model
def build_cell(self, modeltype): """ build lstm or gru cell """ if modeltype == 'lstm': # kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', # not zeros # rnn_out_f = LSTM(self.hidden_layer_size, input_shape=(self.length ,self.n_features), use_bias=True, activation=self.activation, dropout=self.output_dropout, recurrent_dropout=self.input_dropout, )(self.feature) # rnn_out_b = LSTM(self.hidden_layer_size, input_shape=(self.length ,self.n_features), use_bias=True, activation=self.activation, dropout=self.output_dropout, recurrent_dropout=self.input_dropout, go_backwards=True)(self.feature) #zeros failing spontaneously. rnn_out_f = CuDNNLSTM( self.hidden_layer_size, input_shape=(self.length, self.n_features), )(self.feature) rnn_out_b = CuDNNLSTM(self.hidden_layer_size, input_shape=(self.length, self.n_features), go_backwards=True)(self.feature) #zeros # rnn_cell=tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(self.hidden_layer_size) # rnn_cell_bw=tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(self.hidden_layer_size) #zeros # rnn_cell=tf.contrib.cudnn_rnn.CudnnLSTM(1, self.hidden_layer_size, dropout=self.output_dropout, kernel_initializer=tf.glorot_uniform_initializer(), bias_initializer='zeros', direction='bidirectional') # rnn_cell_bw=tf.contrib.cudnn_rnn.CudnnLSTM(1, self.hidden_layer_size, dropout=self.output_dropout, kernel_initializer=tf.glorot_uniform_initializer(), bias_initializer='zeros', direction='bidirectional') # rnn_cell=tf.keras.layers.LSTMCell(self.hidden_layer_size, activation=self.activation) elif modeltype == 'gru': rnn_out_f = CuDNNGRU( self.hidden_layer_size, input_shape=(self.length, self.n_features), )(self.feature) rnn_out_b = CuDNNGRU(self.hidden_layer_size, input_shape=(self.length, self.n_features), go_backwards=True)(self.feature) else: raise ('Unknown modeltype for RNN') # self.rnn_cell=tf.nn.rnn_cell.DropoutWrapper(rnn_cell, input_keep_prob=1.0-self.input_dropout, output_keep_prob=1.0-self.output_dropout) # self.rnn_cell_bw=tf.nn.rnn_cell.DropoutWrapper(rnn_cell_bw, input_keep_prob=1.0-self.input_dropout, output_keep_prob=1.0-self.output_dropout) self.rnn_cell = merge.Concatenate(axis=-1)([rnn_out_f, rnn_out_b]) self.rnn_cell = Dropout(self.output_dropout)(self.rnn_cell)
def final_model(input_dim=161): """ Build a deep network for speech """ filters = 200 kernel_size = 11 conv_stride = 2 conv_border_mode = 'same' units = 200 output_dim = 29 # Main acoustic input input_data = Input(name='the_input', shape=(None, input_dim)) conv_1d = Conv1D(filters, kernel_size, strides=conv_stride, padding=conv_border_mode, activation='relu', name='conv1d')(input_data) b_norm = Dropout(0.2)(conv_1d) bidir_rnn = Bidirectional(CuDNNGRU(units, return_sequences=True), merge_mode='concat')(b_norm) drp_out = Dropout(0.2)(bidir_rnn) bidir_rnn = Bidirectional(CuDNNGRU(units, return_sequences=True), merge_mode='concat')(drp_out) drp_out = Dropout(0.3)(bidir_rnn) bidir_rnn = Bidirectional(CuDNNGRU(units, return_sequences=True), merge_mode='concat')(drp_out) drp_out = Dropout(0.3)(bidir_rnn) bidir_rnn = Bidirectional(CuDNNGRU(units, return_sequences=True), merge_mode='concat')(drp_out) drp_out = Dropout(0.4)(bidir_rnn) drp_out = Dense(output_dim * 2)(drp_out) drp_out = BatchNormalization()(drp_out) time_dense = TimeDistributed(Dense(output_dim))(drp_out) y_pred = Activation('softmax', name='softmax')(time_dense) # Specify the model model = Model(inputs=input_data, outputs=y_pred) # TODO: Specify model.output_length model.output_length = lambda x: cnn_output_length( x, kernel_size, conv_border_mode, conv_stride) print(model.summary()) return model
def create_rnn_model(rnnModel, type, inputSize): """ Function to create my rnn neural network Arguments: rnnModel: keras rnnModel type: string input: choose model: GRU, LSTM inputSize: training input size with shape (time_length,features) Return: model after set up """ # https://machinelearningmastery.com/return-sequences-and-return-states-for-lstms-in-keras/ explain return_sequences & return_state # return_sequences = True to access the hidden state output for each input time step. # when stacking LSTM or GRU layers we have to set return_sequences = True # when need to access the sequence of hidden state outputs, set return_sequences = True # when predicting a sequence of outputs with a Dense output layer wrapped in a TimeDistributed layer, set return_sequences = True if (type == 'GRU'): rnnModel.add( CuDNNGRU(units=32, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=True, return_state=False, stateful=False, input_shape=inputSize)) if (type == 'LSTM'): rnnModel.add( CuDNNLSTM(units=32, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, return_sequences=True, return_state=False, stateful=False, input_shape=inputSize)) rnnModel.add(Dense(3, activation='softmax')) rnnModel.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) return rnnModel
def build_rnn_mixer(self, input_shape, num_output): model_input = Input(input_shape, name='mixer_input') x = Dropout(0.25)(model_input) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) x = Dropout(0.25)(x) x = Bidirectional(CuDNNGRU(64, return_sequences=False))(x) x = Dropout(0.5)(x) x = Dense(32)(x) x = BatchNormalization()(x) x = ELU()(x) x = Dense(num_output)(x) model_output = Activation('softmax', name='mixture_weights')(x) model = Model(inputs=model_input, outputs=model_output, name='mixer_rnn') opt = Adam(lr=5e-3, decay=1e-2) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model
def get_GRU_Attn(embedding_matrix, sequence_length, dropout_rate, recurrent_units, dense_size): input_layer = Input(shape=(sequence_length,)) embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], trainable=False)(input_layer) x = SpatialDropout1D(rate=dropout_rate)(embedding_layer) x = Bidirectional(CuDNNGRU(recurrent_units, return_sequences=True))(x) x = Bidirectional(CuDNNGRU(recurrent_units, return_sequences=True))(x) x = Attention(sequence_length)(x) x = Dense(dense_size, activation="relu")(x) x = Dropout(dropout_rate)(x) output_layer = Dense(6, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='binary_crossentropy', optimizer=Nadam(), metrics=['accuracy']) return model
def final_model_2(units,input_dim, filters, kernel_size, conv_stride, conv_border_mode,drop_out_rate,output_dim=29): """ Build a deep network for speech """ # Main acoustic input input_data = Input(name='the_input', shape=(None, input_dim)) # TODO: Specify the layers in your network # Add convolutional layer conv_1d = Conv1D(filters, kernel_size, strides=conv_stride, padding=conv_border_mode, activation='relu', name='conv1d')(input_data) # Add batch normalization bn_cnn = BatchNormalization(name='bn_conv_1d')(conv_1d) # Add a recurrent layer bidir_rnn_1 = (CuDNNGRU(output_dim, return_sequences=True, name='rnn_1'))(bn_cnn) bn_rnn_1 = BatchNormalization(name='bn_rnn_1')(bidir_rnn_1) # layer 2 bidir_rnn_2 = (CuDNNGRU(output_dim, return_sequences=True, name='rnn_2'))(bn_rnn_1) bn_rnn_2 = BatchNormalization(name='bn_rnn_2')(bidir_rnn_2) #layer 3 bidir_rnn_3 = (CuDNNGRU(output_dim, return_sequences=True, name='rnn_3'))(bn_rnn_2) bn_rnn_3 = BatchNormalization(name='bn_rnn_3')(bidir_rnn_3) # TODO: Add a TimeDistributed(Dense(output_dim)) layer time_dense = TimeDistributed(Dense(output_dim))(bn_rnn_3) # TODO: Add softmax activation layer y_pred = Activation('softmax', name='softmax')(time_dense) # Specify the model model = Model(inputs=input_data, outputs=y_pred) # TODO: Specify model.output_length model.output_length = lambda x: cnn_output_length( x, kernel_size, conv_border_mode, conv_stride) print(model.summary()) return model
def NN_huaweivv1(maxlen, embedding_matrix=None, class_num=17): # emb_layer = Embedding( embedding_matrix.shape[0], embedding_matrix.shape[1], input_length=maxlen, weights=[embedding_matrix], trainable=False, ) seq1 = Input(shape=(maxlen, )) x1 = emb_layer(seq1) sdrop = SpatialDropout1D(rate=0.2) lstm_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) gru_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) cnn1d_layer = Conv1D(64, kernel_size=12, padding="same", kernel_initializer="he_uniform") x1 = TimestepDropout(0.2)(x1) x1 = sdrop(x1) lstm1 = lstm_layer(x1) gru1 = gru_layer(lstm1) att_1 = Attention(maxlen)(lstm1) att_2 = Attention(maxlen)(gru1) cnn1 = cnn1d_layer(lstm1) avg_pool = GlobalAveragePooling1D() max_pool = GlobalMaxPooling1D() x1 = concatenate([ att_1, att_2, Attention(maxlen)(cnn1), avg_pool(cnn1), max_pool(cnn1) ]) x = Dropout(0.2)(Activation(activation="relu")(BatchNormalization()( Dense(256)(x1)))) x = Activation(activation="relu")(BatchNormalization()(Dense(128)(x))) pred = Dense(class_num, activation='sigmoid')(x) model = Model(inputs=seq1, outputs=pred) return model
def get_stacked_model(): # AA Sequence aa_seq = Input(shape=(None, )) embed_1 = Embedding(22, 32)(aa_seq) lstm_1 = CuDNNLSTM(units=75, return_sequences=True)(embed_1) bilstm_1 = Bidirectional(CuDNNLSTM(units=50, return_sequences=True))(lstm_1) bigru_1 = Bidirectional(CuDNNGRU(units=50, return_sequences=True))(bilstm_1) output_1 = CuDNNLSTM(units=64, return_sequences=True)(bigru_1) # Q8 Sequence ss_seq = Input(shape=(None, )) embed_2 = Embedding(9, 32)(ss_seq) bigru_2_1 = Bidirectional(CuDNNGRU(units=100, return_sequences=True))(embed_2) bigru_2_2 = Bidirectional(CuDNNGRU(units=50, return_sequences=True))(bigru_2_1) output_2 = Dense(units=64, activation='relu')(bigru_2_2) # PSSM pssm = Input(shape=(None, 21)) bigru_3_1 = Bidirectional(CuDNNGRU(units=100, return_sequences=True))(pssm) # bigru_3_2 = Bidirectional(CuDNNGRU(units=64, return_sequences=True))(bigru_3_1) output_3 = Dense(units=64, activation='relu')(bigru_3_1) # Concatenation concat = concatenate([output_1, output_2, output_3], axis=-1) # Torsion Angle and Distance Matrix Prediction angles = TimeDistributed(Dense(3), name='3d_output')(concat) angles_dihedral = Lambda(slice, name='dihedrals')(angles) # pts = Lambda(dihedral_to_point, name='pts_from_angle')(angles) # cc = Lambda(point_to_coordinate, name='coords_from_pts')(pts) # dcalpha = Lambda(pairwise_distance, name='3d_dcalpha_output')(cc) angles_dihedral_2d = Lambda(dihedral_to_2D, name='ang2d')(angles_dihedral) coords = TimeDistributed(Dense(3), name='3d_output')(concat) dcalpha = Lambda(pairwise_distance, name='3d_dcalpha_output')(coords) stacked = concateate([angles_dihedral_2d, dcalpha], axis=1) # return Model(inputs=[aa_seq, ss_seq, pssm], outputs=[angles_dihedral, dcalpha]) return Model(inputs=[aa_seq, ss_seq, pssm], outputs=stacked)
def get_GRU_model(embedding_matrix, train_labels, train_tokens, test_tokens, call_backs): dropout_rate = 0.3 recurrent_units = 64 dense_size = 256 input_layer = Input(shape=(MAX_SENTENCE_LEN, )) embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], trainable=False)(input_layer) x = Bidirectional(CuDNNGRU(recurrent_units, return_sequences=True))(embedding_layer) x = Bidirectional(CuDNNGRU(recurrent_units, return_sequences=True))(x) x = Dropout(dropout_rate)(x) x1 = MaxPooling1D(4)(x) x2 = AveragePooling1D(4)(x) x = Concatenate(axis=1)([x1, x2]) x = Flatten()(x) x = Dense(dense_size, activation="relu")(x) output_layer = Dense(6, activation="sigmoid")(x) #output_layer = Dense(6, activation="rmsprop")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='binary_crossentropy', optimizer=RMSprop(clipvalue=1, clipnorm=1), metrics=['accuracy']) try: model.load_weights(MODEL_DIR + model_name) print("Train from existing models") except: print("Train from stratch...") model.fit(train_tokens, train_labels, batch_size=batch_size, epochs=epochs, callbacks=call_backs, validation_split=0.1) # load the current best model model.load_weights(MODEL_DIR + model_name) return model
def StackedLSTMmodel(n_outputs,vocab_sz,emb_sz,embedding_matrix,n_timesteps): print('running stacked LSTM') hid_sz = 100 x = Input(shape=(n_timesteps,)) # start creating a simpleLSTM model e = Embedding(vocab_sz, emb_sz, weights=[embedding_matrix], input_length=n_timesteps, trainable=True)(x) if DEVICE == 'CPU': l_1 = Bidirectional(GRU(hid_sz, return_sequences=True, activation='relu'))(e) l_2 = Bidirectional(GRU(hid_sz, return_sequences=True, activation='relu'))(l_1) else: l_1 = Bidirectional(CuDNNGRU(hid_sz, return_sequences=True))(e) l_2 = Bidirectional(CuDNNGRU(hid_sz, return_sequences=True))(l_1) l_3 = TimeDistributed(Dense(100, activation='relu'))(l_2) l_4 = Flatten()(l_3) l_5 = Dense(n_outputs, activation='sigmoid')(l_4) model = Model(inputs=x, outputs=l_5) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) plot_model(model, to_file='stackedLSTM_figure.png', show_shapes=True, show_layer_names=True) return model
def build_model(): inp = Input(shape=(max_len, )) embedding = Embedding(max_features + 1, dim)(inp) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(embedding) x = Bidirectional(CuDNNGRU(64, return_sequences=True))(x) x = all_pool(x) #x = BatchNormalization()(x) x = Dense(1024, activation='relu')(x) x = Dropout(0.5)(x) x = Dense(512, activation='relu')(x) x = Dropout(0.5)(x) x = Dense(128, activation='relu')(x) x = Dropout(0.5)(x) out = Dense(cate_num, activation='softmax')(x) model = Model(inputs=inp, outputs=out) model.compile(optimizer=Adam(lr=0.002), loss="categorical_crossentropy") return model