Пример #1
0
def getWordPairVec(w1, w2):
    # root =  u"I:/数据/word12585relation30/rel_30_ref_TFIDF/ref_800_TFIDF/rel_spc/spc_1_350/word_matrix/"
    wordList = getWordList()
    WordEmbedding = getEmbedding.getEmbeddingMat_HLBL()
    w1vec = WordEmbedding[wordList.index(w1), :]
    w2vec = WordEmbedding[wordList.index(w2), :]
    vec = hstack([w1vec, w2vec])

    return vec
Пример #2
0
def getWordPairVec(w1,w2):
     # root =  u"I:/数据/word12585relation30/rel_30_ref_TFIDF/ref_800_TFIDF/rel_spc/spc_1_350/word_matrix/"
     wordList = getWordList()
     WordEmbedding = getEmbedding.getEmbeddingMat_HLBL()
     w1vec = WordEmbedding[wordList.index(w1),:]
     w2vec = WordEmbedding[wordList.index(w2),:]
     vec = hstack([w1vec,w2vec])


     return vec
Пример #3
0
        if item[1] in wordList and item[2] in wordList and  item[3] in wordList and item[4] in wordList and  item[5] in wordList and item[6] in wordList:
            sampleList.append(line)
    rfile.close()
    wfile = open(u"res/emnlp2013_turk_HLBL.txt","w")
    wfile.writelines(item for item in sampleList)

def getWordList():
    rfile = open(u"I:/数据/embedings/hlbl/words.txt")
    wordList = []
    for line in rfile:
        wordList.append(line.strip("\n"))
    rfile.close()
    # arr = np.array(wordList)
    return  wordList

WordEmbedding = getEmbedding.getEmbeddingMat_HLBL()
wordList = getWordList()

def sentCombMat_verb(w1,w2,w3):
    w2Mat = WordEmbedding[wordList.index(w1),:]
    return w2Mat



def vecSim(x, y):
    # result1 = x.T.dot(y)
    # result1 =  result1.tolist()[0][0]           #svd 之前的稀疏原始矩阵用这个
    # x = x*wArr
    # y = y*wArr
    result1 = np.dot(x,y)
    result2 = np.linalg.norm(x)
Пример #4
0
    rfile.close()
    wfile = open(u"res/emnlp2013_turk_HLBL.txt", "w")
    wfile.writelines(item for item in sampleList)


def getWordList():
    rfile = open(u"I:/数据/embedings/hlbl/words.txt")
    wordList = []
    for line in rfile:
        wordList.append(line.strip("\n"))
    rfile.close()
    # arr = np.array(wordList)
    return wordList


WordEmbedding = getEmbedding.getEmbeddingMat_HLBL()
wordList = getWordList()


def sentCombMat_Mult(w1, w2, w3):

    w1Mat = WordEmbedding[wordList.index(w1), :]
    w2Mat = WordEmbedding[wordList.index(w1), :]
    w3Mat = WordEmbedding[wordList.index(w1), :]

    return w1Mat * w2Mat * w3Mat


def vecSim(x, y):
    # result1 = x.T.dot(y)
    # result1 =  result1.tolist()[0][0]           #svd 之前的稀疏原始矩阵用这个