def bilstm(X_train, X_test, Y_train, Y_test, wordembeddings): np.random.seed(1234) tf.random.set_seed(1234) random.seed(1234) max_length_sentence = X_train.str.split().str.len().max() tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n\'', lower=True) tokenizer.fit_on_texts(X_train) word_index = tokenizer.word_index EMBEDDING_DIM = 300 vocabulary_size = len(word_index) + 1 print('Found %s unique tokens.' % len(word_index)) sequences_train = tokenizer.texts_to_sequences(X_train) sequences_valid = tokenizer.texts_to_sequences(X_test) X_train = pad_sequences(sequences_train, maxlen=max_length_sentence) X_val = pad_sequences(sequences_valid, maxlen=X_train.shape[1]) y_train = np.asarray(Y_train) y_val = np.asarray(Y_test) #print(word_index) ''' print('Shape of data tensor:', X_train.shape) print('Shape of data tensor:', X_val.shape) print('Shape of data tensor:', y_train.shape) print('Shape of data tensor:', y_val.shape) print(X_train) print("*"*100) print(X_val) print("*"*100) print(y_train) print("*"*100) print(y_val) ''' embedding_matrix = np.zeros((vocabulary_size, EMBEDDING_DIM)) for word, i in word_index.items(): if (word in wordembeddings.keys()): embedding_vector = wordembeddings[word] if len(embedding_vector) == 0: #if array is empty embedding_vector = wordembeddings[word.title()] if len(embedding_vector) == 0: embedding_vector = wordembeddings[word.upper()] if len(embedding_vector) == 0: embedding_vector = np.array([ round(np.random.rand(), 8) for i in range(0, 300) ]) else: #print("WORD NOT IN DICT",word) embedding_vector = np.array( [round(np.random.rand(), 8) for i in range(0, 300)]) if len(embedding_vector) != 0: embedding_matrix[i] = embedding_vector embedding_layer = Embedding(vocabulary_size, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False) #Try with True inputs = Input(shape=(X_train.shape[1], )) model = (Embedding(vocabulary_size, EMBEDDING_DIM, input_length=max_length_sentence, weights=[embedding_matrix]))(inputs) model = Bidirectional(GRU(64))( model) # !!!!!!! CHANGE THIS FOR OTHER MODELS model = (Dense(900, activation='relu'))(model) model = (Dense(400, activation='relu'))(model) model = (Dense(250, activation='relu'))(model) model = (Dense(204, activation='softmax'))(model) model = Model(inputs=inputs, outputs=model) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() callbacks = [EarlyStopping(monitor='val_loss')] hist_adam = model.fit( X_train, y_train, batch_size=1000, epochs=200, verbose=1, validation_data=(X_val, y_val), callbacks=callbacks ) #!!!!!!!!!!!!!!!!!!!!!!!CHANGE BATCH SIZE TO 1000 #change epochs to 200 model.save(config.bigru_prepocessed_dataset1_chai ) # !!!!!!! CHANGE THIS FOR OTHER MODELS y_pred = model.predict(X_val) print(y_pred) y_val_class = pd.DataFrame(y_val).idxmax(axis=1) print(y_val_class) y_val_class_argmax = np.argmax(y_val, axis=1) y_pred_class_argmax = np.argmax(y_pred, axis=1) y_pred_class = pd.DataFrame(y_pred).idxmax(axis=1) print(y_pred_class) print(classification_report(y_val_class, y_pred_class)) plt.suptitle('Optimizer : Adam', fontsize=10) plt.ylabel('Loss', fontsize=16) plt.xlabel('Epoch', fontsize=14) plt.plot(hist_adam.history['loss'], color='b', label='Training Loss') plt.plot(hist_adam.history['val_loss'], color='r', label='Validation Loss') plt.legend(loc='upper right') plt.savefig( '/home/ubuntu/asset_classification/results/bigru_model_dataset1_preprocessed_chai.png' ) # !!!!!!! CHANGE THIS FOR OTHER MODELS tf.keras.utils.plot_model( model, to_file=config.bigru_architecture, show_shapes=True) # !!!!!!! CHANGE THIS FOR OTHER MODELS return (y_pred, y_val_class, y_pred_class, y_val_class_argmax, y_pred_class_argmax)
encoder = Bidirectional(LSTM(HIDDEN_SIZE), merge_mode='sum', name="encoder_lstm")(inputs) decoder = RepeatVector(max(sent_lens))(encoder) decoder = Bidirectional(LSTM(EMBED_SIZE, return_sequences=True), merge_mode='sum')(decoder) autoencoder = Model(inputs, decoder) autoencoder.compile(optimizer='sgd', loss='mse') num_train_step = len(X_train) // BATCH_SIZE num_test_step = len(X_test) // BATCH_SIZE hist = autoencoder.fit_generator(train_gen, steps_per_epoch = num_train_step, epochs = 20, validation_data = test_gen, \ validation_steps=num_test_step) encoder = Model(autoencoder.input, autoencoder.get_layer("encoder_lstm").output) def compare_cosine_similarity(x, y): return np.dot(x, y) / (np.linalg.norm(x, 2) * np.linalg.norm(y, 2)) for i in range(10): Xtest, Ytest = next(test_gen) Ytest_ = autoencoder.predict(Xtest) Xvec = encoder.predict(Xtest) Yvec = encoder.predict(Ytest_) for j in range(Xvec.shape[0]): print(compare_cosine_similarity(Xvec[j], Yvec[j]))
def train(is_debug=False): train_data = open("dataset/atis-2.train.w-intent.iob", "r").readlines() test_data = open("dataset/atis-2.dev.w-intent.iob", "r").readlines() train_data_ed = data_pipeline(train_data) test_data_ed = data_pipeline(test_data) word2index, index2word, slot2index, index2slot, intent2index, index2intent = \ get_info_from_training_data(train_data_ed) # print("slot2index: ", slot2index) # print("index2slot: ", index2slot) index_train = to_index(train_data_ed, word2index, slot2index, intent2index) index_test = to_index(test_data_ed, word2index, slot2index, intent2index) intents = [item[3] for item in index_train] intent_labels = np.eye(intent_size)[np.array(intents)] intent_train = [item[0] for item in index_train] intent_train = np.array(intent_train) slot_train = [item[2] for item in index_train] slot_train = np.array(slot_train) slot_train_target = np.insert(slot_train, slot_train.shape[1] - 1, values=0, axis=1) slot_train_target = np.delete(slot_train_target, 0, axis=1) slot_train_target = np.eye(slot_size)[slot_train_target] seq = np.array(index_test[0][0]) import tensorflow as tf from keras.layers import Lambda print_func = Lambda(lambda x: tf.Print(x, [tf.shape(x)])) squeeze = Lambda(lambda x: tf.squeeze(x, axis=2)) # encoder define input_voc = Input(shape=(None, 1)) embedding_voc = Embedding(input_dim=vocab_size, output_dim=embedding_size, mask_zero=True) embedding_voc_out = embedding_voc(input_voc) encoder_lstm1 = Bidirectional(LSTM(units=hidden_size, dropout=0.7, return_sequences=True), merge_mode="concat") embedding_voc_out = squeeze(embedding_voc_out) # embedding_voc_out = K.backend.squeeze(embedding_voc_out, axis=0) encoder_lstm1_out = encoder_lstm1(embedding_voc_out) # encoder encoder = Bidirectional(LSTM(units=hidden_size, dropout=0.7, return_sequences=False, return_state=True), merge_mode="concat") # encoder output, states encoder_out, forward_h, forward_c, backward_h, backward_c = encoder( encoder_lstm1_out) encoder_state = [forward_h, forward_c, backward_h, backward_c] # intent intent = Dense(intent_size, activation="linear")(encoder_out) intent = Dense(intent_size, activation="softmax")(intent) # encode state forward_h = K.layers.concatenate([forward_h, backward_h], 1) forward_c = K.layers.concatenate([forward_c, backward_c], 1) encoder_state = [forward_h, forward_c] # decoder define input_slot = Input(shape=(None, 1)) embedding_slot = Embedding(input_dim=slot_size, output_dim=embedding_size, mask_zero=True) embedding_slot_out = embedding_slot(input_slot) embedding_slot_out = squeeze(embedding_slot_out) decoder_lstm1 = LSTM(units=hidden_size * 2, dropout=0.7, return_state=True, return_sequences=True) decoder_lstm1_out, forward_h, forward_c = decoder_lstm1( embedding_slot_out, initial_state=[forward_h, forward_c]) decoder = LSTM(units=hidden_size * 2, dropout=0.7, return_sequences=True, return_state=False) decoder_output = decoder(decoder_lstm1_out) dense1 = Dense(slot_size, activation="linear") dense1_out = dense1(decoder_output) dense2 = Dense(slot_size, activation="softmax") dense2_out = dense2(dense1_out) model = Model(inputs=[input_voc, input_slot], outputs=[intent, dense2_out]) # print(model.summary()) def intent_slot_loss(y_true, y_pred): y_slot_true = y_true[0] y_intent_true = y_true[1] y_slot_pred = y_pred[0] y_intent_pred = y_pred[1] return K.losses.categorical_crossentropy( y_slot_true, y_slot_pred) + K.losses.categorical_crossentropy( y_intent_true, y_intent_pred) model.compile(optimizer="adam", loss=categorical_crossentropy, metrics=["acc"]) # acc = model.fit([np.expand_dims(intent_train, 2), np.expand_dims(slot_train, 2)], [intent_labels, slot_train_target], batch_size=batch_size, epochs=1) # print(acc) # model.save_weights("nlu.hdf5") model.load_weights("nlu.hdf5") ## inference encoder = Model(input_voc, encoder_state) intenter = Model(input_voc, intent) # print(encoder.summary()) # decoder decoder_in = Input(shape=(1, )) decoder_state_in_h = Input(shape=(hidden_size * 2, )) decoder_state_in_c = Input(shape=(hidden_size * 2, )) decoder_state_in = [decoder_state_in_h, decoder_state_in_c] decoder_out, decoder_h, decoder_c = decoder_lstm1( embedding_slot(decoder_in), initial_state=decoder_state_in) decoder_state_out = [decoder_h, decoder_c] #output decoder_output = decoder(decoder_out) dense1_out = dense1(decoder_output) dense2_out = dense2(dense1_out) #output decoder_model = Model([decoder_in] + decoder_state_in, [dense2_out] + decoder_state_out) print(decoder_model.summary()) i = 0 print(seq) seq = np.expand_dims(np.array(seq), 0) seq = np.expand_dims(np.array(seq), 2) encoder_state = encoder.predict(seq) intent = intenter.predict(seq) intent = np.argmax(intent) print(index2intent[intent]) index = slot2index["<STR>"] while i < input_steps: i = i + 1 decoder_out, state_h, state_c = decoder_model.predict( [np.array([index])] + encoder_state) index = np.argmax(decoder_out) encoder_state = [state_h, state_c] print(index2slot[index])
history = model.fit(X_train, np.array(y_train), batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.1, verbose=2) def pred2label(pred): out = [] for pred_i in pred: out_i = [] for p in pred_i: p_i = np.argmax(p) out_i.append(idx2tag[p_i].replace("PADword", "O")) out.append(out_i) return out pred_y = model.predict(X_test) pred_y = np.argmax(pred_y, axis=-1) y_test_true = np.argmax(y_test, -1) pred_y = [[index_to_tag[i].replace("PADword", "O") for i in pred_y[index]][0:len(y_test[index])] for index in range(len(pred_y))] y_test_true = [[index_to_tag[i] for i in row] for row in y_test] print('LSTM Classification Report\n', metrics.flat_classification_report(pred_y, y_test_true))
# show figure plt.show() score = model.evaluate([X_w_te,np.array(X_c_te).reshape((len(X_c_te), max_len, max_len_char))], np.array(y_te), batch_size=batch_size,verbose=1) print(model.metrics_names) print("Score:") print(score) # ## Prediction on test set from seqeval.metrics import precision_score, recall_score, f1_score, classification_report # print("Input:") # print(X_te[0]) # print("Supposed output:") # print(y_te) # print(np.array(y_te)) test_pred = model.predict([X_w_te,np.array(X_c_te).reshape((len(X_c_te), max_len, max_len_char))], verbose=1) # print("Prediction result:") # print(test_pred[0]) idx2tag = {i: w for w,i in tags2idx.items()} tags_size = len(idx2tag) idx2tag[tags_size] = 'O' def pred2label(pred): out = [] for pred_i in pred: out_i = [] for p in pred_i: p_i = np.argmax(p) out_i.append(idx2tag[p_i].replace("PAD", "O")) out.append(out_i) return out
all_train = all[:int(0.8 * all.shape[0]), ...] Y_train = Y[:int(0.8 * all.shape[0]), ...] Y_train_dense = np.reshape(Y_train, (Y_train.shape[0], Y_train.shape[1])) Y_train_dense = np.argmax(Y_train_dense, axis=-1) all_test = all[int(0.8 * all.shape[0]):, ...] Y_test = Y[int(0.8 * all.shape[0]):, ...] # pu.db Y_test_dense = np.reshape(Y_test, (Y_test.shape[0], Y_test.shape[1])) Y_test_dense = np.argmax(Y_test_dense, axis=-1) for i in xrange(100): print i model.fit(all_train, Y_train, batch_size=1000, epochs=5, verbose=1) Y_pred_train = model.predict(all_train, batch_size=1000) Y_pred_test = model.predict(all_test, batch_size=1000) Y_pred_train_dense = np.reshape( Y_pred_train, (Y_pred_train.shape[0], Y_pred_train.shape[1])) Y_pred_train_dense = np.argmax(Y_pred_train_dense, axis=-1) Y_pred_test_dense = np.reshape( Y_pred_test, (Y_pred_test.shape[0], Y_pred_test.shape[1])) Y_pred_test_dense = np.argmax(Y_pred_test_dense, axis=-1) train_acc = np.sum( Y_pred_train_dense == Y_train_dense) * 100.0 / len(Y_pred_train_dense) val_acc = np.sum( Y_pred_test_dense == Y_test_dense) * 100.0 / len(Y_pred_test_dense)