def make_model(self): input = Input(shape=(self.maxlen, self.num_chars)) # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). x = recurrent.LSTM(self.hidden_size)(input) # For the decoder's input, we repeat the encoded input for each time step x = RepeatVector(self.max_digits + 1)(x) # The decoder RNN could be multiple layers stacked or a single layer x = recurrent.LSTM(self.hidden_size, return_sequences=True)(x) # For each of step of the output sequence, decide which character should be chosen x = TimeDistributed(Dense(self.num_chars, activation='softmax'))(x) def full_number_accuracy(y_true, y_pred): y_true_argmax = K.argmax(y_true) y_pred_argmax = K.argmax(y_pred) tfd = K.equal(y_true_argmax, y_pred_argmax) tfn = K.all(tfd, axis=1) tfc = K.cast(tfn, dtype='float32') tfm = K.mean(tfc) return tfm self.model = Model(inputs=input, outputs=x) self.model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=self.optimizer_lr, clipnorm=self.clipnorm), metrics=['accuracy', full_number_accuracy])
def get_model(word_index): nb_words = min(MAX_NB_WORDS, len(word_index)) if os.path.exists(embedding_matrix_path): embedding_matrix = np.load(embedding_matrix_path)["arr_0"] else: embedding_matrix = get_embedding_matrix(word_index) # embedding_matrix,nb_words=get_embedding_matrix(word_index) nb_words = min(MAX_NB_WORDS, len(word_index)) input1 = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedding_layer = Embedding(nb_words, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], trainable=True) x = embedding_layer(input1) x = Dropout(rate_drop_dense)(x) x = recurrent.LSTM(lstm_output_size)(x) x = RepeatVector(MAX_SEQUENCE_LENGTH)(x) x = recurrent.LSTM(lstm_output_size)(x) x = Dropout(rate_drop_dense)(x) out = Dense(6, activation='sigmoid')(x) model = Model(inputs=input1, outputs=out) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['mse']) model.summary() return model
def generate_model(output_len, chars=None): """Generate the model""" print('Build model...') chars = chars or CHARS model = Sequential() # "Encode" the input sequence using an RNN, producing an output of hidden_size # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). for layer_number in range(CONFIG.input_layers): model.add(recurrent.LSTM(CONFIG.hidden_size, input_shape=(None, len(chars)), kernel_initializer=CONFIG.initialization, return_sequences=layer_number + 1 < CONFIG.input_layers)) model.add(Dropout(CONFIG.amount_of_dropout)) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(output_len)) # The decoder RNN could be multiple layers stacked or a single layer for _ in range(CONFIG.output_layers): model.add(recurrent.LSTM(CONFIG.hidden_size, return_sequences=True, kernel_initializer=CONFIG.initialization)) model.add(Dropout(CONFIG.amount_of_dropout)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributed(Dense(len(chars), kernel_initializer=CONFIG.initialization))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def get_rnn_model(): model = Sequential() model.add(embed_1) model.add(conv_1) model.add(conv_2) model.add(pool_1) # model.add(conv_3) # model.add(conv_4) # model.add(pool_2) # model.add(conv_5) model.add(Attention(recurrent.LSTM(256,input_dim=EMBEDDING_DIM,consume_less='mem',return_sequences=True))) model.add(Attention(recurrent.LSTM(128,input_dim=EMBEDDING_DIM,consume_less='mem',return_sequences=True))) model.add(Attention(recurrent.LSTM(64,input_dim=EMBEDDING_DIM,consume_less='mem',return_sequences=False))) # model.add(bi_lstm_1) # model.add(bi_lstm_2) # model.add(bi_lstm_3) model.add(Dense(256)) model.add(drop_1) model.add(dense_2) model.compile(loss='mean_squared_error', optimizer="Adam") return model
def generate_model(output_len, chars=None): """Generate the model""" print('Build model...') chars = chars or CHARS model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). for layer_number in range(INPUT_LAYERS): model.add( recurrent.LSTM(HIDDEN_SIZE, input_shape=(None, len(chars)), init=INITIALIZATION, return_sequences=layer_number + 1 < INPUT_LAYERS)) model.add(Dropout(AMOUNT_OF_DROPOUT)) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(output_len)) # The decoder RNN could be multiple layers stacked or a single layer for _ in range(OUTPUT_LAYERS): model.add( recurrent.LSTM(HIDDEN_SIZE, return_sequences=True, init=INITIALIZATION)) model.add(Dropout(AMOUNT_OF_DROPOUT)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributed(Dense(len(chars), init=INITIALIZATION))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def create_model(computed_params, model_params): max_inputseq_len = computed_params['max_inputseq_len'] max_outputseq_len = computed_params['max_outputseq_len'] nb_labels = computed_params['nb_labels'] word_dims = computed_params['word_dims'] if model_params['arch'] == 'crf': max_len = max(max_inputseq_len, max_outputseq_len) + 2 input1 = Input(shape=(max_len, word_dims,), dtype='float32', name='input1') input2 = Input(shape=(max_len, word_dims,), dtype='float32', name='input2') else: input1 = Input(shape=(max_inputseq_len, word_dims,), dtype='float32', name='input1') input2 = Input(shape=(max_inputseq_len, word_dims,), dtype='float32', name='input2') optimizer = model_params['optimizer'] arch = model_params['arch'] if arch == 'bilstm': net1 = Bidirectional(recurrent.LSTM(units=model_params['rnn_size'], dropout=model_params['dropout_rate'], return_sequences=False))(input1) net2 = Bidirectional(recurrent.LSTM(units=model_params['rnn_size'], dropout=model_params['dropout_rate'], return_sequences=False))(input2) net = concatenate([net1, net2]) elif arch in ('crf', 'rnn_seq'): net1 = Bidirectional(recurrent.LSTM(units=model_params['rnn_size'], dropout=model_params['dropout_rate'], return_sequences=False))(input1) net2 = Bidirectional(recurrent.LSTM(units=model_params['rnn_size'], dropout=model_params['dropout_rate'], return_sequences=False))(input2) net = concatenate([net1, net2]) else: raise NotImplementedError() if model_params['dense1'] > 0: net = Dense(units=model_params['dense1'], activation='sigmoid')(net) if arch == 'crf': net = RepeatVector(max_len)(net) net = recurrent.LSTM(model_params['rnn_size'], return_sequences=True)(net) net = CRF(units=computed_params['nb_terms'], sparse_target=False)(net) model = Model(inputs=[input1, input2], outputs=net) model.compile(loss=crf_loss, optimizer=optimizer, metrics=[crf_viterbi_accuracy]) elif arch == 'bilstm': net = Dense(units=nb_labels, activation='softmax')(net) model = Model(inputs=[input1, input2], outputs=net) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model
def _create_model(self, units, lr, hypothesis_max_length, sentence_max_length, embeddings, seed): hyp_inpt = Input([hypothesis_max_length], name="hypothesis_input", dtype="int32") hyp_emb = Dropout(0.5, seed=seed)(Embedding(embeddings.shape[0], embeddings.shape[1], weights=[embeddings], mask_zero=True, name="hyp_emb")(hyp_inpt)) hyp_bilstm = Bidirectional( recurrent.LSTM(units[0], return_sequences=False, name="hyp_lstm"))(hyp_emb) content_inpt = Input([sentence_max_length], name="evidence_input", dtype="int32") content_emb = Dropout(0.5, seed=seed)(Embedding( embeddings.shape[0], embeddings.shape[1], weights=[embeddings], mask_zero=True, name="content_emb")(content_inpt)) evidence_bilstm = Bidirectional( recurrent.LSTM(units[0], return_sequences=False, name="evidence_lstm"))(content_emb) hidden = Dropout(0.5, seed=seed)( Concatenate()([hyp_bilstm, evidence_bilstm])) if len(units) > 1: # add dense layer if required hidden = Dropout(0.5, seed=seed)(Dense(units=units[1], activation="relu", name="hidden_dense")(hidden)) classifier = Dense(units=2, activation="softmax", name="dense")(hidden) model = Model(inputs=[hyp_inpt, content_inpt], outputs=classifier) optimizer = Adam(lr=lr) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary()) return model
def create_model(params, computed_params): input = Input(shape=( computed_params['max_text_len'], computed_params['nb_features'], ), dtype='float32', name='input') if params['optimizer'] == 'ftml': opt = keras_contrib.optimizers.FTML() else: opt = params['optimizer'] if params['net_arch'] == 'crf': net = input for _ in range(params['nb_rnn']): net = Bidirectional( recurrent.LSTM(units=params['rnn_units1'], dropout=params['dropout_rate'], return_sequences=True))(net) net = CRF(units=computed_params['nb_labels'], sparse_target=False)(net) model = Model(inputs=[input], outputs=net) model.compile(loss=crf_loss, optimizer=opt, metrics=[crf_viterbi_accuracy]) model.summary() elif params['net_arch'] == 'lstm': net = Bidirectional( recurrent.LSTM(units=params['rnn_units1'], dropout=params['dropout_rate'], return_sequences=False))(input) for _ in range(params['nb_dense1']): net = Dense(units=params['rnn_units1'], activation=params['activation1'])(net) decoder = RepeatVector(computed_params['max_text_len'])(net) decoder = recurrent.LSTM(params['rnn_units2'], return_sequences=True)(decoder) decoder = TimeDistributed(Dense(units=computed_params['nb_labels'], activation='softmax'), name='output')(decoder) model = Model(inputs=[input], outputs=decoder) model.compile(loss='categorical_crossentropy', optimizer=opt) model.summary() return model
def model(x_train, y_train, x_test, y_test): """ Model providing function: Create Keras model with double curly brackets dropped-in as needed. Return value has to be a valid python dictionary with two customary keys: - loss: Specify a numeric evaluation metric to be minimized - status: Just use STATUS_OK and see hyperopt documentation if not feasible The last one is optional, though recommended, namely: - model: specify the model just created so that we can later use it again. """ CONFIG = Configuration() chars = read_top_chars() model = Sequential() # "Encode" the input sequence using an RNN, producing an output of hidden_size # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). for layer_number in range(CONFIG.input_layers): model.add(recurrent.LSTM(CONFIG.hidden_size, input_shape=(None, len(chars)), kernel_initializer=CONFIG.initialization, return_sequences=layer_number + 1 < CONFIG.input_layers)) model.add(Dropout(CONFIG.amount_of_dropout)) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(CONFIG.max_input_len)) # The decoder RNN could be multiple layers stacked or a single layer for _ in range(CONFIG.output_layers): model.add(recurrent.LSTM(CONFIG.hidden_size, return_sequences=True, kernel_initializer=CONFIG.initialization)) model.add(Dropout(CONFIG.amount_of_dropout)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributed(Dense(len(chars), kernel_initializer=CONFIG.initialization))) model.add(Activation('softmax')) model.compile(loss=CONFIG.loss, optimizer={{choice(['rmsprop', 'adam', 'sgd'])}}, metrics=['accuracy']) model.fit(x_train, y_train, batch_size={{choice([100, 200, 300, 400])}}, epochs={{choice([100, 200, 250, 300])}}, verbose=2, validation_data=(x_test, y_test)) score, acc = model.evaluate(x_test, y_test, verbose=0) print('Test accuracy:', acc) print('Random Predictions:') ctable = CharacterTable(read_top_chars()) print_random_predictions(model, ctable, x_test, y_test) return {'loss': -acc, 'status': STATUS_OK, 'model': model}
def test_explicit_device_with_go_backward_and_mask(self): batch_size = 8 timestep = 7 masksteps = 5 units = 4 inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) mask = np.ones((batch_size, timestep)).astype(np.bool) mask[:, masksteps:] = 0 # Test for V1 behavior. lstm_v1 = rnn_v1.LSTM(units, return_sequences=True, go_backwards=True) with testing_utils.device(should_use_gpu=True): outputs_masked_v1 = lstm_v1(inputs, mask=tf.constant(mask)) outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps]) self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1) # Test for V2 behavior. lstm = rnn.LSTM(units, return_sequences=True, go_backwards=True) with testing_utils.device(should_use_gpu=True): outputs_masked = lstm(inputs, mask=tf.constant(mask)) outputs_trimmed = lstm(inputs[:, :masksteps]) self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
def _create_model(self, units, max_length): model = Sequential() model.add( Embedding(self.embeddings.shape[0], self.embeddings.shape[1], weights=[self.embeddings], input_length=max_length, mask_zero=True, name="emb")) model.add(Dropout(0.5, name="dropoutemb")) for i, unit in enumerate(units): model.add( recurrent.LSTM(unit, return_sequences=False, name="lstm" + str(i))) # model.add(recurrent.LSTM(5, return_sequences=False, name="lstm", activation="relu")) model.add(Dropout(0.5, name="dropout" + str(i))) model.add(Dense(units=2, activation="softmax", name="dense")) optimizer = Adam(lr=0.01) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary()) return model
def test_explicit_device_with_go_backward_and_mask(self): if tf.test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support padded input yet.') batch_size = 8 timestep = 7 masksteps = 5 units = 4 inputs = np.random.randn(batch_size, timestep, units).astype(np.float32) mask = np.ones((batch_size, timestep)).astype(np.bool) mask[:, masksteps:] = 0 # Test for V1 behavior. lstm_v1 = rnn_v1.LSTM(units, return_sequences=True, go_backwards=True) with testing_utils.device(should_use_gpu=True): outputs_masked_v1 = lstm_v1(inputs, mask=tf.constant(mask)) outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps]) self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1) # Test for V2 behavior. lstm = rnn.LSTM(units, return_sequences=True, go_backwards=True) with testing_utils.device(should_use_gpu=True): outputs_masked = lstm(inputs, mask=tf.constant(mask)) outputs_trimmed = lstm(inputs[:, :masksteps]) self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
def create_model(embeddings, units, max_length, seed): tf.set_random_seed(seed) session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) model = Sequential() emb = Embedding(embeddings.shape[0], embeddings.shape[1], weights=[embeddings], input_length=max_length, mask_zero=True, name="foo") model.add(emb) model.add(Dropout(0.5, name="dropoutemb")) for i, unit in enumerate(units): model.add( Bidirectional( recurrent.LSTM(unit, return_sequences=False, name="lstm" + str(i)))) # model.add(recurrent.LSTM(5, return_sequences=False, name="lstm", activation="relu")) model.add(Dropout(0.5, name="dropout" + str(i))) model.add(Dense(units=2, activation="softmax", name="dense")) optimizer = Adam(lr=0.01) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary()) return model
def rnn_att_model(embed, MAX_LEN, SENT_HIDDEN_SIZE, ACTIVATION, DP, L2, LABEL_NUM, OPTIMIZER, MLP_LAYER, LAYERS, RNN_Cell='LSTM'): print('Build model...') RNN = recurrent.LSTM if RNN_Cell == 'BiLSTM': RNN = lambda *args, **kwargs: Bidirectional( recurrent.LSTM(*args, **kwargs)) elif RNN_Cell == 'GRU': RNN = recurrent.GRU elif RNN_Cell == 'BiGRU': RNN = lambda *args, **kwargs: Bidirectional( recurrent.GRU(*args, **kwargs)) rnn_kwargs = dict(units=SENT_HIDDEN_SIZE, dropout=DP, recurrent_dropout=DP) translate = TimeDistributed(Dense(SENT_HIDDEN_SIZE, activation=ACTIVATION)) premise = Input(shape=(MAX_LEN, ), dtype='int32') prem = embed(premise) # prem = translate(prem) if LAYERS > 1: for l in range(LAYERS - 1): rnn = RNN(return_sequences=True, **rnn_kwargs) prem = BatchNormalization()(rnn(prem)) rnn = RNN(return_sequences=True, **rnn_kwargs) prem = rnn(prem) prem = Attention(MAX_LEN)(prem) joint = Dropout(DP)(prem) for i in range(MLP_LAYER): joint = Dense(2 * SENT_HIDDEN_SIZE, activation=ACTIVATION, kernel_regularizer=l2(L2) if L2 else None)(joint) joint = Dropout(DP)(joint) # joint = BatchNormalization()(joint) pred = Dense(LABEL_NUM, activation='softmax')(joint) model = Model(inputs=premise, outputs=pred) model.compile(optimizer=OPTIMIZER, loss='categorical_crossentropy', metrics=['accuracy']) model.summary() return model
def make_model(len_feature): ##########################Parameters for the model and dataset #TRAINING_SIZE = len(inputs) # Try replacing JZS1 with LSTM, GRU, or SimpleRNN HIDDEN_SIZE = node0 RNN = recurrent.LSTM(HIDDEN_SIZE, input_shape=(None, len(chars)), return_sequences=False, kernel_regularizer=l2(l2_c), bias_regularizer=l2(l2_c), recurrent_dropout=drop_out_c, dropout=drop_out_c, kernel_constraint=maxnorm(constrain_max)) #len0_hla = 34 #ratio_t = 1 ###class number = binder or non-binder (1 = binder, 0 = non-binder) #classes = [0,1] ##########################start a model########################## ##########fixed part model_fixed = Sequential() model_fixed.add( Dense(help_nn, input_dim=len_feature, activation=act_fun, kernel_regularizer=l2(l2_c), bias_regularizer=l2(l2_c))) model_fixed.add(Dropout(drop_out_c)) ##########recurrent part model_r = Sequential() if mask0: model_r.add( Masking(mask_value=0., input_shape=(MAXLEN, len(dict_aa['A'])))) model_r.add(RNN) ####merge merged = Merge([model_fixed, model_r], mode='concat') ###final final_model = Sequential() final_model.add(merged) #, kernel_constraint=maxnorm(constrain_max) for _ in range(0, help_layer0): final_model.add( Dense(help_nn, kernel_regularizer=l2(l2_c), bias_regularizer=l2(l2_c))) final_model.add(Activation(act_fun)) final_model.add(Dropout(drop_out_c)) final_model.add(Dense(1)) final_model.compile(loss=loss_function0, optimizer="adam") model = final_model json_string = model.to_json() open(path_save + file_name0 + out_name + '_model.json', 'w').write(json_string) return model
def test_masking_layer(): ''' This test based on a previously failing issue here: https://github.com/fchollet/keras/issues/1567 ''' inputs = np.random.random((6, 3, 4)) targets = np.abs(np.random.random((6, 3, 5))) targets /= targets.sum(axis=-1, keepdims=True) model = Sequential() model.add(Masking(input_shape=(3, 4))) model.add(recurrent.LSTM(units=5, return_sequences=True, unroll=False)) model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) model = Sequential() model.add(Masking(input_shape=(3, 4))) model.add(recurrent.LSTM(units=5, return_sequences=True, unroll=True)) model.compile(loss='categorical_crossentropy', optimizer='adam') model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1)
def get_model0(embedding_matrix): input1 = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') # x = Dropout(rate_drop_dense)(input1) embedding_layer = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], trainable=True) x = embedding_layer(input1) x = Dropout(rate_drop_dense)(x) x = recurrent.LSTM(lstm_output_size)(x) x = RepeatVector(MAX_SEQUENCE_LENGTH)(x) x = recurrent.LSTM(lstm_output_size)(x) x = Dropout(rate_drop_dense)(x) out = Dense(6, activation='sigmoid')(x) model = Model(inputs=input1, outputs=out) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['mse']) # model.summary() return model
def create_model(units, lr, hypothesis_max_length, sentence_max_length, embeddings, seed): # set random seed to Keras and TensorFlow tf.set_random_seed(seed) # session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) # sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) # K.set_session(sess) hyp_inpt = Input([hypothesis_max_length], name="hypothesis_input", dtype="int32") hyp_emb = Dropout(0.5, seed=seed)(Embedding(embeddings.shape[0], embeddings.shape[1], weights=[embeddings], mask_zero=True, name="hyp_emb")(hyp_inpt)) hyp_bilstm = Bidirectional(recurrent.LSTM(units[0], return_sequences=False, name="hyp_lstm" ))(hyp_emb) content_inpt = Input([sentence_max_length], name="evidence_input", dtype="int32") content_emb = Dropout(0.5, seed=seed)(Embedding(embeddings.shape[0], embeddings.shape[1], weights=[embeddings], mask_zero=True, name="content_emb")(content_inpt)) evidence_bilstm = Bidirectional(recurrent.LSTM(units[0], return_sequences=False, name="evidence_lstm"))(content_emb) hidden = Dropout(0.5, seed=seed)(Concatenate()([hyp_bilstm, evidence_bilstm])) if len(units) > 1: # add dense layer if required hidden = Dropout(0.5, seed=seed)(Dense(units=units[1], activation="relu", name="hidden_dense")(hidden)) classifier = Dense(units=2, activation="softmax", name="dense")(hidden) model = Model(inputs=[hyp_inpt, content_inpt], outputs=classifier) optimizer = Adam(lr=lr) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary()) return model
def get_model(self, max_size=EMBEDDING_SIZE): # Defining model print('Define model') q1 = Input(shape=(MAX_LEN,), dtype='int32') q2 = Input(shape=(MAX_LEN,), dtype='int32') if max_size < EMBEDDING_SIZE: print('Reduce EMBEDDING_SIZE to %d due to pretrained embedding' % max_size) if max_size < SENT_HIDDEN_SIZE: print('Reduce SENT_HIDDEN_SIZE to %d due to pretrained embedding' % max_size) if self.word_embedding_matrix is None: embed = Embedding(self.VOCAB_SIZE + 1, min(max_size, EMBEDDING_SIZE), input_length=MAX_LEN, trainable=TRAIN_EMBED) else: embed = Embedding(self.VOCAB_SIZE + 1, min(max_size, EMBEDDING_SIZE), weights=[self.word_embedding_matrix], input_length=MAX_LEN, trainable=TRAIN_EMBED) embed_q1 = embed(q1) embed_q2 = embed(q2) translate = TimeDistributed(Dense(min(max_size, SENT_HIDDEN_SIZE), activation=ACTIVATION)) sent_q1 = translate(embed_q1) sent_q2 = translate(embed_q2) if self.sen_type == 'lstm': sent_embed = Bidirectional(recurrent.LSTM(units=min(max_size, SENT_HIDDEN_SIZE), recurrent_dropout=DROPOUT_RATE, dropout=DROPOUT_RATE, return_sequences=False)) else: sent_embed = Lambda(lambda x: keras.backend.sum(x, axis=1), output_shape=(min(max_size, SENT_HIDDEN_SIZE),)) sent_q1 = BatchNormalization()(sent_embed(sent_q1)) sent_q2 = BatchNormalization()(sent_embed(sent_q2)) joint = concatenate([sent_q1, sent_q2]) joint = Dropout(DROPOUT_RATE)(joint) for i in range(3): joint = Dense(2 * SENT_HIDDEN_SIZE, activation=ACTIVATION, kernel_regularizer=l2(L2) if L2 else None)(joint) joint = Dropout(DROPOUT_RATE)(joint) joint = BatchNormalization()(joint) pred = Dense(1, activation='sigmoid')(joint) model = Model(inputs=[q1, q2], outputs=pred) if self.model_type == 'compare': model.compile(optimizer=OPTIMIZER, loss='binary_crossentropy', metrics=['accuracy']) else: model.compile(optimizer=OPTIMIZER, loss='mean_squared_error', metrics=['accuracy']) return model
def create_seq2seq_model(model_filename, params_filename): """Create the model""" input_token_index,target_token_index,\ input_characters,target_characters,\ max_encoder_seq_length,num_encoder_tokens,\ max_decoder_seq_length,num_decoder_tokens=get_parameters_from_file(params_filename) initialization = "he_normal" model = Sequential() # "Encode" the input sequence using an RNN, producing an output of hidden_size # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). model.add(Masking(input_shape=(None, num_encoder_tokens))) for layer_number in range(LSTM_LAYERS): model.add( recurrent.LSTM(latent_dim, kernel_initializer=initialization, return_sequences=layer_number + 1 < LSTM_LAYERS)) model.add(Dropout(DROPOUT)) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(max_decoder_seq_length)) # The decoder RNN could be multiple layers stacked or a single layer for _ in range(LSTM_LAYERS): model.add( recurrent.LSTM(latent_dim, return_sequences=True, kernel_initializer=initialization)) model.add(Dropout(DROPOUT)) # For each of step of the output sequence, decide which character should be chosen model.add( TimeDistributed( Dense(num_decoder_tokens, kernel_initializer=initialization))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.save(model_filename)
def test_masking_layer(): ''' This test based on a previously failing issue here: https://github.com/fchollet/keras/issues/1567 ''' model = Sequential() model.add(Masking(input_shape=(3, 4))) model.add(recurrent.LSTM(output_dim=5, return_sequences=True)) model.compile(loss='categorical_crossentropy', optimizer='adam') I = np.random.random((6, 3, 4)) V = np.abs(np.random.random((6, 3, 5))) V /= V.sum(axis=-1, keepdims=True) model.fit(I, V, nb_epoch=1, batch_size=100, verbose=1)
def make_model(self, model_type='class2'): ##########################Parameters for the model and dataset #input layers seq_input = Input(shape=(self.MAXLEN, len(self.dict_aa['A']))) fix_input = Input(shape=(self.fix_len, )) #set RNN layer if self.mask0: seq_input0 = Masking(mask_value=0.0)(seq_input) else: seq_input0 = seq_input rnn0 = recurrent.LSTM( self.node0, activation=self.act_fun, #recurrent_activation=self.act_fun, use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=self.drop_out_c, recurrent_dropout=self.drop_r)(seq_input0) fix0 = Dense(self.fix_node, activation=self.act_fun)(fix_input) fix0_d = Dropout(self.drop_out_c)(fix0) merge_layer = concatenate([rnn0, fix0_d]) combine1 = Dense(self.help_nn, activation=self.act_fun)(merge_layer) combine1_d = Dropout(self.drop_out_c)(combine1) combine2 = Dense(self.help_nn, activation=self.act_fun)(combine1_d) combine2_d = Dropout(self.drop_out_c)(combine2) if model_type == 'regression': dense0 = Dense(1)(combine2_d) final_model = Model(inputs=[seq_input, fix_input], outputs=[dense0]) final_model.compile(loss=self.loss_function0, optimizer='adam') elif 'class' in model_type: class_n = int(model_type[-1]) dense0 = Dense(class_n, activation='softmax')(combine2_d) final_model = Model(inputs=[fix_input, seq_input], outputs=[dense0]) final_model.compile(loss=self.loss_function0, optimizer="RMSprop") json_string = final_model.to_json() open(self.path_save + self.file_name0 + self.out_name + '_model.json', 'w').write(json_string) return final_model
def test_lstm_v2_feature_parity_with_canonical_lstm(self): if tf.test.is_built_with_rocm(): self.skipTest('Skipping the test as ROCm MIOpen does not ' 'support padded input yet.') input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 20 (x_train, y_train), _ = testing_utils.get_test_data(train_samples=batch, test_samples=0, input_shape=(timestep, input_shape), num_classes=rnn_state_size, random_seed=87654321) y_train = np_utils.to_categorical(y_train, rnn_state_size) # For the last batch item of the test data, we filter out the last # timestep to simulate the variable length sequence and masking test. x_train[-2:, -1, :] = 0.0 y_train[-2:] = 0 inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=tf.float32) masked_input = keras.layers.Masking()(inputs) lstm_layer = rnn_v1.LSTM(rnn_state_size, recurrent_activation='sigmoid') output = lstm_layer(masked_input) lstm_model = keras.models.Model(inputs, output) weights = lstm_model.get_weights() y_1 = lstm_model.predict(x_train) lstm_model.compile('rmsprop', 'mse') lstm_model.fit(x_train, y_train) y_2 = lstm_model.predict(x_train) with testing_utils.device(should_use_gpu=True): cudnn_layer = rnn.LSTM(rnn_state_size) cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input)) cudnn_model.set_weights(weights) y_3 = cudnn_model.predict(x_train) cudnn_model.compile('rmsprop', 'mse') cudnn_model.fit(x_train, y_train) y_4 = cudnn_model.predict(x_train) self.assertAllClose(y_1, y_3, rtol=1e-5, atol=2e-5) self.assertAllClose(y_2, y_4, rtol=1e-5, atol=2e-5)
def _time_performance_run_normal_lstm(self, test_config, x_train, y_train): # Get performance number for standard LSTM on GPU. input_shape = test_config['input_shape'] rnn_state_size = test_config['rnn_state_size'] timestep = test_config['timestep'] layer = rnn_v1.LSTM(rnn_state_size) inputs = keras.layers.Input(shape=[timestep, input_shape], dtype=tf.float32) outputs = layer(inputs) model = keras.models.Model(inputs, outputs) model.compile('sgd', 'mse') sec_per_epoch = self._measure_performance(test_config, model, x_train, y_train) logging.info('Average performance for %s per epoch is: %s', 'Normal LSTM', sec_per_epoch) return sec_per_epoch
def make_rnn(MAXLEN): ##########################Parameters for the model and dataset #TRAINING_SIZE = len(inputs) # Try replacing JZS1 with LSTM, GRU, or SimpleRNN RNN = recurrent.LSTM(HIDDEN_SIZE, input_shape=(None, len(chars)), return_sequences=False,W_regularizer=l2(l2_c),b_regularizer=l2(l2_c),dropout_W=drop_out_c,dropout_U=drop_out_c) ##########################start a model model = Sequential() #masking if mask0: model.add(Masking(mask_value=0., input_shape=(MAXLEN, len(dict_aa['A'])))) # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE #model.add(Masking()) #print(str(LAYERS)) #keras.layers.core.ActivityRegularization(l2=0.0, l2=0.0) if LAYERS>1: #print('1') model.add(RNN(return_sequences=True)) else: #print('2') model.add(RNN) if help_nn >0: model.add(Dense(help_nn)) model.add(Activation('tanh')) if LAYERS>2: for _ in xrange(LAYERS-2): #print('3') model.add(RNN(HIDDEN_SIZE, return_sequences=True)) # #model.add(Dropout(0.5)) if LAYERS>1: #print('4') model.add(RNN(HIDDEN_SIZE, return_sequences=False)) model.add(Dense(2)) model.add(Activation('softmax')) model.compile(loss=loss_function0, optimizer='adam') #save the model json_string = model.to_json() open(path_save+file_name0+out_name+'_model.json', 'w+').write(json_string) return model
def generateModel(X_train, Y_train, X_test, Y_test): model = Sequential() model.add( recurrent.LSTM(32, input_dim=1, input_length=99, activation='sigmoid', inner_activation='hard_sigmoid')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) # Compile model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) X_train = np.array(X_train).reshape(-1, 99, 1) X_test = np.array(X_test).reshape(-1, 99, 1) Y_train = to_categorical(Y_train, 10) Y_test = to_categorical(Y_test, 10) # Fit the model model.fit(X_train, Y_train, nb_epoch=150, batch_size=4) # model.predict(X_test, batch_size=4, verbose=0) # model.predict_on_batch(self, x) model.save('my_model.h5') # creates a HDF5 file 'my_model.h5' # del model # deletes the existing model # returns a compiled model # identical to the previous one # model = load_model('my_model.h5') scores = model.evaluate(X_test, Y_test, batch_size=4) print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100)) return model
def __init__(self, load=None): self.model_path = "../../data/lstm_model.json" self.params_path = "../../data/lstm_params.h5" self.vocab, self.max_len = Loader.build_vocab() print("data loaded!") print("vocab size: " + str(len(self.vocab))) print("max sentence length: " + str(self.max_len)) self.w2v = Loader.load_word_vec(self.vocab) print("word2vec loaded!") print("num words already in word2vec: " + str(len(self.w2v))) Loader.add_unknown_words(self.w2v, self.vocab) self.W, self.word_idx_map = Loader.get_W(self.w2v) self.c2id, self.id2c = Loader.build_class() print(self.c2id) if load: self.model = Loader.load_model(self.model_path, "lstm", self.params_path) return self.model = Sequential() self.model.add( Embedding(len(self.word_idx_map) + 1, 300, weights=[self.W])) self.model.add( recurrent.LSTM(output_dim=100, activation='tanh', dropout_W=0, dropout_U=0)) #self.model.add(convolutional.Convolution1D(100, 3, activation='tanh', border_mode='same')) self.model.add(pooling.GlobalMaxPooling1D()) #self.model.add(Dropout(0.2)) self.model.add(Dense(7)) self.model.add(Activation('softmax')) print(self.model.summary()) rmsprop = keras.optimizers.rmsprop(lr=0.002) self.model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=["accuracy"])
def build_model(self): assert self.seq_len > 1 assert len(self.alphabet.alphabet) > 0 bits_per_char = self.alphabet.nb_chars rnn_size = bits_per_char model = Sequential() model.add( Masking(mask_value=0, input_shape=(self.seq_len, bits_per_char), name='input_layer')) model.add( recurrent.LSTM(rnn_size, input_shape=(self.seq_len, bits_per_char), return_sequences=False)) model.add(Dense(units=rnn_size, activation='sigmoid')) model.add( Dense(units=bits_per_char, activation='softmax', name='output_layer')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def model_init(arguments, prem, hypo, sent1, sent2): rnn_kwargs = dict(output_dim=300, dropout_W=0.2, dropout_U=0.2) RNN = lambda *args, **kwargs: Bidirectional(recurrent.LSTM( *args, **kwargs)) # if RNN and 1 > 1: # for l in range(1 - 1): # rnn = RNN(return_sequences=True, **arguments) # prem = BatchNormalization()(rnn(prem)) # hypo = BatchNormalization()(rnn(hypo)) # rnn = emnd_sum if not RNN else RNN(return_sequences=False, **arguments) rnn = RNN(return_sequences=False, **rnn_kwargs) # print("--------------------------------------------------------------------------") prem = rnn(prem) # print("--------------------------------------------------------------------------") prem = BatchNormalization()(prem) # print("--------------------------------------------------------------------------") hypo = rnn(hypo) hypo = BatchNormalization()(hypo) s1_s2_combined = concatenate([prem, hypo]) s1_s2_combined = Dropout(dropoutt)(s1_s2_combined) for i in range(3): s1_s2_combined = Dense( 2 * 300, activation='relu', W_regularizer=l2(L2) if L2 else None)(s1_s2_combined) s1_s2_combined = Dropout(dropoutt)(s1_s2_combined) s1_s2_combined = BatchNormalization()(s1_s2_combined) pred = Dense(len(B), activation='softmax')(s1_s2_combined) model = Model(input=[sent1, sent2], output=pred) model.compile(optimizer=OPTIMIZER, loss='categorical_crossentropy', metrics=['accuracy']) model.summary() return model
def test_lstm_output_on_multiple_kernel(self): input_shape = 10 rnn_state_size = 8 timestep = 4 batch = 100 x_train = np.random.random((batch, timestep, input_shape)) inputs = keras.layers.Input( shape=[timestep, input_shape], dtype=tf.float32) with testing_utils.device(should_use_gpu=False): layer = rnn.LSTM(rnn_state_size) output = layer(inputs) cpu_model = keras.models.Model(inputs, output) weights = cpu_model.get_weights() y_1 = cpu_model.predict(x_train) with testing_utils.device(should_use_gpu=True): layer = rnn.LSTM(rnn_state_size) output = layer(inputs) gpu_model = keras.models.Model(inputs, output) gpu_model.set_weights(weights) y_2 = gpu_model.predict(x_train) # Note that CuDNN uses 'sigmoid' as activation, so the LSTM V2 uses # 'sigmoid' as default. Construct the canonical LSTM with sigmoid to achieve # the same output. with testing_utils.device(should_use_gpu=True): layer = rnn_v1.LSTM(rnn_state_size, recurrent_activation='sigmoid') output = layer(inputs) canonical_model = keras.models.Model(inputs, output) # Remove the extra cudnn bias since canonical lstm will not use it. canonical_model.set_weights(weights[:3]) y_3 = canonical_model.predict(x_train) self.assertAllClose(y_1, y_2) self.assertAllClose(y_2, y_3)