def main(): G = GloveEmbedding("glove.6B.50d.txt") word_to_idx, idx_to_word, embedding = G.read_embedding() #print("locon: ", word_to_idx["locon"]) s = "I love New York and music locon" s = s.lower() print("Sentence: ", s) S = SentenceToIndices(word_to_idx) sentence = S.map_sentence(s) print("Sentence to indices: ", sentence) print("Padded: ", PadSentences(10).pad(sentence)) SE = SentenceToEmbedding(word_to_idx, idx_to_word, embedding) matrix = SE.map_sentence(s, max_len=10) print("Matrix: ", matrix) print("Matrix.shape: ", matrix.shape) print("Embedding i: ", embedding[word_to_idx["i"]]) sentences = [] sentences.append("I esta malo".lower()) sentences.append("Love la musica salsa.".lower()) sentences.append("Uff, q mal te va nene".lower()) mapped, mlen = S.map_sentence_list(sentences) print("mlen: ", mlen) for s in mapped: print(s)
def main(): G = GloveEmbedding("../test/data/glove.6B.50d.txt") word_to_idx, idx_to_word, embedding = G.read_embedding() #print("locon: ", word_to_idx["locon"]) print("Length dictionary: ", len(word_to_idx)) #s = "I love New York and music locon" s = "The flu is making me sad" s = s.lower() print("Sentence: ", s) S = SentenceToIndices(word_to_idx) sentence = S.map_sentence(s) print("Sentence to indices: ", sentence) print("Padded: ", PadSentences(10).pad(sentence)) SE = SentenceToEmbeddingWithEPSILON(word_to_idx, idx_to_word, embedding) matrix1 = SE.map_sentence(s, max_len=len(s)) s2 = "The flu is making me sad".lower() matrix2 = SE.map_sentence(s2, max_len=len(s2)) print("Matrix 1: ", matrix1) print("Matrix.shape: ", matrix1.shape) print("\n Matrix 2: ", matrix2) print("Matrix.shape: ", matrix2.shape) print("\n Self Similarity: ", matrix_cosine_similary(matrix1, matrix1)) M1 = np.array([-1, 40, 0.04]).reshape((3, 1)) M2 = np.array([100, 2, 3]).reshape((3, 1)) print("M1: \n ", M1) print("M2: \n", M2) SimM = matrix_cosine_similary(M1, M2) print("SimM: \n", SimM) D = distance_similarity_matrix(SimM) print("D: ", D) M3 = np.array([[1, 2, 3, 1], [4, 5, 6, 2], [7, 8, 9, 1]]) M4 = np.array([[1, 2, 3.000001, 1], [4, 5, 6, 2], [7, 8, 9, 1]]) SimM = matrix_cosine_similary(M3, M3) print("SimM: \n", SimM) D = distance_similarity_matrix(SimM) print("D: ", D) SimM = matrix_cosine_similary(M3, M4) print("\nSimM: \n", SimM) Up = np.triu(SimM) D = distance_similarity_matrix(SimM) print("D: ", D) print("Up: ", Up) print("sum Up: ", np.sum(Up)) print("up I: ", np.triu(np.ones(Up.shape))) print("sum I: ", np.sum(np.triu(np.ones(Up.shape))))