def getDelRefMap():
    delRefList = getdelRef()
    chosedRef = getRefList()
    WordEmbedding = getEmbedding.getEmbeddingMat_senna()
    SennaList = getSennaList()
    refmap = []
    # count = 0

    for i in range(100000, 120000):
        word1 = delRefList[i]

        print(i)
        # count += 1
        if i % 100 == 0:
            wfile = open("res/sennaMap_4739_E.txt", "w")
            wfile.writelines(item + '\n' for item in refmap)
        if word1 not in SennaList:
            continue
        w1vec = WordEmbedding[SennaList.index(word1), :]
        sim = []
        for word2 in chosedRef:
            if word2 not in SennaList:
                sim.append(0)
                continue
            w2vec = WordEmbedding[SennaList.index(word2), :]
            sim.append(vecSim(w1vec, w2vec))
        chosed = chosedRef[np.argmax(sim)]

        item = word1 + "\t" + chosed
        print(item)
        refmap.append(item)

    wfile = open("res/sennaMap_4739_E.txt", "w")
    wfile.writelines(item + '\n' for item in refmap)
def getDelRefMap():
     delRefList = getdelRef()
     chosedRef = getRefList()
     WordEmbedding = getEmbedding.getEmbeddingMat_senna()
     SennaList = getSennaList()
     refmap = open("res/sennaMap_4739_1-20000.txt").readlines()
     # count = 0
     # start = len(refmap)
     for i in range(50000,100000):
        word1 = delRefList[i]

        print(i)
        # count += 1
        if i%100 == 0:
            wfile =  open("res/sennaMap_4739_D.txt","w")
            wfile.writelines(item+'\n' for item in refmap)
        if word1 not in SennaList:
            continue
        w1vec = WordEmbedding[SennaList.index(word1),:]
        sim = []
        for word2 in chosedRef:
            if word2 not in SennaList:
                sim.append(0)
                continue
            w2vec = WordEmbedding[SennaList.index(word2),:]
            sim.append(vecSim(w1vec,w2vec))
        chosed = chosedRef[np.argmax(sim)]

        item = word1+"\t"+chosed
        print(item)
        refmap.append(item)

     wfile =  open("res/sennaMap_4739_D.txt","w")
     wfile.writelines(item+'\n' for item in refmap)
Exemplo n.º 3
0
def getWordPairVec(w1,w2):
     # root =  u"I:/数据/word12585relation30/rel_30_ref_TFIDF/ref_800_TFIDF/rel_spc/spc_1_350/word_matrix/"
     wordList = getWordList()
     WordEmbedding = getEmbedding.getEmbeddingMat_senna()
     w1vec = WordEmbedding[wordList.index(w1),:]
     w2vec = WordEmbedding[wordList.index(w2),:]
     vec = hstack([w1vec,w2vec])
     # print np.shape(w1Mat)[1]

     return vec
Exemplo n.º 4
0
def getWordPairVec(w1, w2):
    # root =  u"I:/数据/word12585relation30/rel_30_ref_TFIDF/ref_800_TFIDF/rel_spc/spc_1_350/word_matrix/"
    wordList = getWordList()
    WordEmbedding = getEmbedding.getEmbeddingMat_senna()
    w1vec = WordEmbedding[wordList.index(w1), :]
    w2vec = WordEmbedding[wordList.index(w2), :]
    vec = hstack([w1vec, w2vec])
    # print np.shape(w1Mat)[1]

    return vec
Exemplo n.º 5
0
def getDelRefMap():
    delRefList = getdelRef()
    chosedRef = getRefList()
    WordEmbedding = getEmbedding.getEmbeddingMat_senna()
    SennaList = getSennaList()
    map = []
    for word1 in delRefList:
        if word1 not in SennaList:
            continue
        w1vec = WordEmbedding[SennaList.index(word1), :]
        sim = []
        for word2 in chosedRef:
            if word2 not in SennaList:
                sim.append(0)
                continue
            w2vec = WordEmbedding[SennaList.index(word2), :]
            sim.append(vecSim(w1vec, w2vec))
        chosed = chosedRef[np.argmax(sim)]
        print(word1 + "\t" + chosed)
        map.append(word1 + "\t" + chosed)
    wfile = open("res/sennaMap6830.txt", "w")
    wfile.writelines(item + '\n' for item in map)
Exemplo n.º 6
0
def getDelRefMap():
     delRefList = getdelRef()
     chosedRef = getRefList()
     WordEmbedding = getEmbedding.getEmbeddingMat_senna()
     SennaList = getSennaList()
     map = []
     for word1 in delRefList:
        if word1 not in SennaList:
            continue
        w1vec = WordEmbedding[SennaList.index(word1),:]
        sim = []
        for word2 in chosedRef:
            if word2 not in SennaList:
                sim.append(0)
                continue
            w2vec = WordEmbedding[SennaList.index(word2),:]
            sim.append(vecSim(w1vec,w2vec))
        chosed = chosedRef[np.argmax(sim)]
        print(word1+"\t"+chosed)
        map.append(word1+"\t"+chosed)
     wfile =  open("res/sennaMap6830.txt","w")
     wfile.writelines(item+'\n' for item in map)
Exemplo n.º 7
0
def vecSim(x, y):
    result1 = np.dot(x,y)
    result2 = np.linalg.norm(x)
    result3 = np.linalg.norm(y)
    if(result2*result3 == 0):

        return 1
    cos = result1/(result2*result3)
    sim = 0.5+0.5*cos
    return sim


delRefList = getdelRef()
chosedRef = getRefList()
WordEmbedding = getEmbedding.getEmbeddingMat_senna()
SennaList = getSennaList()
refmap = []
# count = 0

def getDelRefMap(lock,index):
     count = 0
     for i in range(index,index*30000):
        word1 = delRefList[i]
     # for word1 in delRefList:
        # print(count)
        # count += 1

        print(count)

        if word1 not in SennaList:
Exemplo n.º 8
0
    rfile.close()
    wfile = open(u"res/GS2011_SENNA.txt", "w")
    wfile.writelines(item for item in sampleList)


def getWordList():
    rfile = open(u"I:/数据/embedings/senna/words.txt")
    wordList = []
    for line in rfile:
        wordList.append(line.strip("\n"))
    rfile.close()
    # arr = np.array(wordList)
    return wordList


WordEmbedding = getEmbedding.getEmbeddingMat_senna()
wordList = getWordList()


def sentCombMat_Mult(w1, w2, w3):

    w1Mat = WordEmbedding[wordList.index(w1), :]
    w2Mat = WordEmbedding[wordList.index(w1), :]
    w3Mat = WordEmbedding[wordList.index(w1), :]

    return w1Mat * w2Mat * w3Mat


def vecSim(x, y):

    result1 = np.dot(x, y)