def get_data(): X_train, Y_train = read_csv('data/train_emoji.csv') X_test, Y_test = read_csv('data/tesss.csv') Y_oh_train = convert_to_one_hot(Y_train, C=5) Y_oh_test = convert_to_one_hot(Y_test, C=5) maxLen = len(max(X_train, key=len).split()) word_to_index, index_to_word, word_to_vec_map = \ read_glove_vecs('../word_vectors/data/glove.6B.50d.txt') return word_to_index, index_to_word, word_to_vec_map, X_train, Y_train
def main(): X_train, Y_train = read_csv('../data/train_emoji.csv') X_test, Y_test = read_csv('../data/tesss.csv') maxLen = len(max(X_train, key=len).split()) word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('../data/glove.6B.50d.txt') # X1 = np.array(["funny lol", "lets play baseball", "food is ready for you"]) # X1_indices = sentences_to_indices(X1, word_to_index, max_len=5) # print("X1 =", X1) # print("X1_indices =", X1_indices) # # embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index) # print("weights[0][1][3] =", embedding_layer.get_weights()[0][1][3]) model = Emojify_V2((maxLen,), word_to_vec_map, word_to_index) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) X_train_indices = sentences_to_indices(X_train, word_to_index, maxLen) Y_train_oh = convert_to_one_hot(Y_train, C=5) model.fit(X_train_indices, Y_train_oh, epochs=50, batch_size=32, shuffle=True) X_test_indices = sentences_to_indices(X_test, word_to_index, max_len=maxLen) Y_test_oh = convert_to_one_hot(Y_test, C=5) loss, acc = model.evaluate(X_test_indices, Y_test_oh) print() print("Test accuracy = ", acc) # This code allows you to see the mislabelled examples C = 5 y_test_oh = np.eye(C)[Y_test.reshape(-1)] X_test_indices = sentences_to_indices(X_test, word_to_index, maxLen) pred = model.predict(X_test_indices) for i in range(len(X_test)): x = X_test_indices num = np.argmax(pred[i]) if (num != Y_test[i]): print('Expected emoji:' + label_to_emoji(Y_test[i]) + ' prediction: ' + X_test[i] + label_to_emoji( num).strip()) # Change the sentence below to see your prediction. Make sure all the words are in the Glove embeddings. x_test = np.array(['not feeling happy']) X_test_indices = sentences_to_indices(x_test, word_to_index, maxLen) print(x_test[0] + ' ' + label_to_emoji(np.argmax(model.predict(X_test_indices))))
def main(): X_train, Y_train = read_csv('../data/train_emoji.csv') X_test, Y_test = read_csv('../data/tesss.csv') maxLen = len(max(X_train, key=len).split()) for index in range(10): print(X_train[index], label_to_emoji(Y_train[index])) Y_oh_train = convert_to_one_hot(Y_train, C=5) Y_oh_test = convert_to_one_hot(Y_test, C=5) word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('../data/glove.6B.50d.txt') word = "cucumber" index = 289846 print("") print("the index of", word, "in the vocabulary is", word_to_index[word]) print("the", str(index) + "th word in the vocabulary is", index_to_word[index]) pred, W, b = model(X_train, Y_train, word_to_vec_map) print("Training set:") pred_train = predict(X_train, Y_train, W, b, word_to_vec_map) print('Test set:') pred_test = predict(X_test, Y_test, W, b, word_to_vec_map) X_my_sentences = np.array( ["i adore you", "i love you", "funny lol", "lets play with a ball", "food is ready", "not feeling happy"]) Y_my_labels = np.array([[0], [0], [2], [1], [4], [3]]) print('--------- PRINT PREDICTIONS ----------') pred = predict(X_my_sentences, Y_my_labels, W, b, word_to_vec_map) print_predictions(X_my_sentences, pred) print(Y_test.shape) print(' ' + label_to_emoji(0) + ' ' + label_to_emoji(1) + ' ' + label_to_emoji( 2) + ' ' + label_to_emoji(3) + ' ' + label_to_emoji(4)) print(pd.crosstab(Y_test, pred_test.reshape(56, ), rownames=['Actual'], colnames=['Predicted'], margins=True)) plot_confusion_matrix(Y_test, pred_test) plt.show()
X = Dropout(0.5)(X) X = Dense(5)(X) X = Activation('softmax')(X) model = Model(inputs=sentence_indices, outputs=X) return model if __name__ == '__main__': os.chdir( r"E:\深度学习\【中英】【吴恩达课后编程作业】Course 5 - 序列模型 - 第二周作业 - 词向量的运算与Emoji生成器") word_to_index, index_to_word, word_to_vec_map = emo_utils.read_glove_vecs( 'data/glove.6B.50d.txt') embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index) print("weights[0][1][3] =", embedding_layer.get_weights()[0][1][3]) max_Len = 10 model = Emojify_V2((max_Len, ), word_to_vec_map, word_to_index) model.summary() model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) X_train, Y_train = emo_utils.read_csv('data/train_emoji.csv') X_test, Y_test = emo_utils.read_csv('data/test.csv') X_train_indices = sentences_to_indices(X_train, word_to_index, max_Len) Y_train_oh = emo_utils.convert_to_one_hot(Y_train, C=5) model.fit(X_train_indices, Y_train_oh,