def wordsimi353(embeddingDict, average_emb, rand, dim, ML): #embeddingDict=loadEmbeddingFile('/mounts/data/proj/wenpeng/Emb_Extend/results_L2norm_forall/conc_0_1_2_3_4.txt') #embeddingDict=loadEmbeddingFile('/mounts/data/proj/wenpeng/Tensor/result/tensor_ppmi/word2embedding_giga_wiki_20141201_300d.txt') #embeddingDict=loadEmbeddingFile(filename) #emb_matrix=mat(embeddingDict.values()) #norm_vector=LA.norm(emb_matrix, ord=2,axis=0) #emb_normalized=numpy.divide(emb_matrix,norm_vector) #key_list=embeddingDict.keys() wordPairs2label = load_wordsimi353() wordPair2simi = {} labels = [] predicts = [] unknown = 0 for (word1, word2), label in wordPairs2label.iteritems(): #labels.append(label) embedding1 = embeddingDict.get(word1) embedding2 = embeddingDict.get(word2) if embedding1 is not None and embedding2 is not None: predict = dot_prod(embedding1, embedding2) wordPair2simi[(word1, word2)] = predict predicts.append(predict) labels.append(label) else: if not ML: if embedding1 is None: if rand: vector = numpy.random.uniform(-1, 1, dim) embedding1 = list(vector / numpy.linalg.norm(vector)) else: embedding1 = average_emb if embedding2 is None: if rand: vector = numpy.random.uniform(-1, 1, dim) embedding2 = list(vector / numpy.linalg.norm(vector)) else: embedding2 = average_emb predict = dot_prod(embedding1, embedding2) wordPair2simi[(word1, word2)] = predict predicts.append(predict) labels.append(label) unknown += 1 sp, p_value = spearmanr(labels, predicts) print 'Spearmanr is: ' + str(sp) + ', unknown pairs: ' + str(unknown)
def wordsimi353(embeddingDict,average_emb,rand,dim,ML): #embeddingDict=loadEmbeddingFile('/mounts/data/proj/wenpeng/Emb_Extend/results_L2norm_forall/conc_0_1_2_3_4.txt') #embeddingDict=loadEmbeddingFile('/mounts/data/proj/wenpeng/Tensor/result/tensor_ppmi/word2embedding_giga_wiki_20141201_300d.txt') #embeddingDict=loadEmbeddingFile(filename) #emb_matrix=mat(embeddingDict.values()) #norm_vector=LA.norm(emb_matrix, ord=2,axis=0) #emb_normalized=numpy.divide(emb_matrix,norm_vector) #key_list=embeddingDict.keys() wordPairs2label=load_wordsimi353() wordPair2simi={} labels=[] predicts=[] unknown=0 for (word1, word2), label in wordPairs2label.iteritems(): #labels.append(label) embedding1=embeddingDict.get(word1) embedding2=embeddingDict.get(word2) if embedding1 is not None and embedding2 is not None: predict=dot_prod(embedding1, embedding2) wordPair2simi[(word1, word2)]=predict predicts.append(predict) labels.append(label) else: if not ML: if embedding1 is None: if rand: vector=numpy.random.uniform(-1,1,dim) embedding1=list(vector / numpy.linalg.norm(vector)) else: embedding1=average_emb if embedding2 is None: if rand: vector=numpy.random.uniform(-1,1,dim) embedding2=list(vector / numpy.linalg.norm(vector)) else: embedding2=average_emb predict=dot_prod(embedding1, embedding2) wordPair2simi[(word1, word2)]=predict predicts.append(predict) labels.append(label) unknown+=1 sp,p_value=spearmanr(labels, predicts) print 'Spearmanr is: '+str(sp)+', unknown pairs: '+str(unknown)
def RW(embeddingDict, average_emb, rand, dim, ML): #embeddingDict=loadEmbeddingFile(filename) #embeddingDict=loadEmbeddingFile('/mounts/data/proj/wenpeng/Tensor/result/tensor_ppmi/word2embedding_giga_wiki_20141201_300d.txt') #embeddingDict=loadEmbeddingFile('/mounts/data/proj/wenpeng/Dataset/glove.42B.300d.txt') #hlbl-embeddings-original.EMBEDDING_SIZE=100.txt') wordPairs2label = load_RW() wordPair2simi = {} labels = [] predicts = [] unknown = 0 for (word1, word2), label in wordPairs2label.iteritems(): #labels.append(label) embedding1 = embeddingDict.get(word1) embedding2 = embeddingDict.get(word2) if embedding1 is not None and embedding2 is not None: predict = dot_prod(embedding1, embedding2) wordPair2simi[(word1, word2)] = predict predicts.append(predict) labels.append(label) else: if not ML: if embedding1 is None: if rand: vector = numpy.random.uniform(-1, 1, dim) embedding1 = list(vector / numpy.linalg.norm(vector)) else: embedding1 = average_emb if embedding2 is None: if rand: vector = numpy.random.uniform(-1, 1, dim) embedding2 = list(vector / numpy.linalg.norm(vector)) else: embedding2 = average_emb predict = dot_prod(embedding1, embedding2) wordPair2simi[(word1, word2)] = predict predicts.append(predict) labels.append(label) unknown += 1 sp, p_value = spearmanr(labels, predicts) print 'Spearmanr is: ' + str(sp) + ', unknown pairs: ' + str(unknown)
def RW(embeddingDict,average_emb,rand,dim,ML): #embeddingDict=loadEmbeddingFile(filename) #embeddingDict=loadEmbeddingFile('/mounts/data/proj/wenpeng/Tensor/result/tensor_ppmi/word2embedding_giga_wiki_20141201_300d.txt') #embeddingDict=loadEmbeddingFile('/mounts/data/proj/wenpeng/Dataset/glove.42B.300d.txt') #hlbl-embeddings-original.EMBEDDING_SIZE=100.txt') wordPairs2label=load_RW() wordPair2simi={} labels=[] predicts=[] unknown=0 for (word1, word2), label in wordPairs2label.iteritems(): #labels.append(label) embedding1=embeddingDict.get(word1) embedding2=embeddingDict.get(word2) if embedding1 is not None and embedding2 is not None: predict=dot_prod(embedding1, embedding2) wordPair2simi[(word1, word2)]=predict predicts.append(predict) labels.append(label) else: if not ML: if embedding1 is None: if rand: vector=numpy.random.uniform(-1,1,dim) embedding1=list(vector / numpy.linalg.norm(vector)) else: embedding1=average_emb if embedding2 is None: if rand: vector=numpy.random.uniform(-1,1,dim) embedding2=list(vector / numpy.linalg.norm(vector)) else: embedding2=average_emb predict=dot_prod(embedding1, embedding2) wordPair2simi[(word1, word2)]=predict predicts.append(predict) labels.append(label) unknown+=1 sp,p_value=spearmanr(labels, predicts) print 'Spearmanr is: '+str(sp)+', unknown pairs: '+str(unknown)