示例#1
0
def writeVectors():

    vecFileName = config.results_path + "vectors.out"
    vecFile = open(vecFileName, 'w')

    mats = sio.loadmat(config.corpus_path + 'vars.normalized.100.mat')
    We_orig = mats.get('We')

    params = sio.loadmat(config.corpus_path + 'params_rae.mat')
    W1 = params.get('W1')
    W2 = params.get('W2')
    b1 = params.get('b1')
    We = params.get('We')
    b = params.get('b')
    W = params.get('W')

    hiddenSize = 100

    nExamples = 5
    print "loading data.."
    rnnData_train = RNNDataCorpus()
    rnnData_train.load_data_srl(load_file=config.train_data_srl,
                                nExamples=nExamples)

    print 'writing vectors to: ', vecFileName
    for ii in range(len(rnnData_train.allSNum)):

        sNum = rnnData_train.allSNum[ii]
        sStr = rnnData_train.allSStr[ii]
        sTree = rnnData_train.allSTree[ii]
        sKids = rnnData_train.allSKids[ii]

        words_indexed = np.where(sNum >= 0)[0]
        #L is only the part of the embedding matrix that is relevant for this sentence
        #L is deltaWe
        if We.shape[1] != 0:
            L = We[:, words_indexed]
            words_embedded = We_orig[:, words_indexed] + L
        else:
            words_embedded = We_orig[:, words_indexed]


#        sl = words_embedded.shape[1]

        tree = Tree()
        tree.pp = all  #np.zeros(((2*sl-1),1))
        tree.nodeScores = np.zeros(len(sNum))
        #        tree.nodeNames = np.arange(1,(2*sl-1))
        tree.kids = np.zeros((len(sNum), 2))

        tree.nodeFeatures = np.zeros((hiddenSize, len(sNum)))
        tree.nodeFeatures[:, :len(words_indexed)] = words_embedded

        toMerge = np.zeros(shape=(words_indexed.shape), dtype='int32')
        toMerge[:] = words_indexed[:]
        while len(toMerge) > 1:
            # find unpaired bottom leaf pairs (initially words) that share parent
            i = -1
            foundGoodPair = False
            while (not foundGoodPair):
                i += 1
                if sTree[toMerge[i]] == sTree[toMerge[i + 1]]:
                    foundGoodPair = True

            newParent = sTree[toMerge[i]]
            kid1 = toMerge[i]
            kid2 = toMerge[i + 1]
            tree.kids[newParent, :] = [kid1, kid2]
            # set new parent to be possible merge candidate
            toMerge[i] = newParent
            # delete other kid
            toMerge = np.delete(toMerge, i + 1)

            c1 = tree.nodeFeatures[:, kid1]
            c2 = tree.nodeFeatures[:, kid2]

            p = np.tanh(np.dot(W1, c1) + np.dot(W2, c2) + b1.flatten())

            tree.nodeFeatures[:, newParent] = p

        vec = tree.nodeFeatures[-1]
        vecFile.write(" ".join([str(x) for x in vec]) + '\n')

    vecFile.close()
    print "finished! "
示例#2
0
文件: rae.py 项目: 5idaidai/MVRNN
def writeVectors():

    vecFileName = config.results_path+"vectors.out"
    vecFile = open(vecFileName, 'w')
    
    mats = sio.loadmat(config.corpus_path+'vars.normalized.100.mat')    
    We_orig = mats.get('We')
    
    params = sio.loadmat(config.corpus_path+'params_rae.mat')
    W1 = params.get('W1')
    W2 = params.get('W2')
    b1 = params.get('b1')
    We = params.get('We')
    b = params.get('b')
    W = params.get('W')
    
    hiddenSize = 100
    
    nExamples = 5
    print "loading data.."
    rnnData_train = RNNDataCorpus()
    rnnData_train.load_data_srl(load_file=config.train_data_srl, nExamples=nExamples)  
    
    print 'writing vectors to: ', vecFileName
    for ii in range(len(rnnData_train.allSNum)):       
        
        sNum = rnnData_train.allSNum[ii]
        sStr = rnnData_train.allSStr[ii]
        sTree = rnnData_train.allSTree[ii]
        sKids = rnnData_train.allSKids[ii]
        
        words_indexed = np.where(sNum >= 0)[0]
        #L is only the part of the embedding matrix that is relevant for this sentence
        #L is deltaWe
        if We.shape[1] != 0:
            L = We[:, words_indexed]
            words_embedded = We_orig[:, words_indexed] + L;
        else :
            words_embedded = We_orig[:, words_indexed]
#        sl = words_embedded.shape[1]
        
        tree = Tree()
        tree.pp = all#np.zeros(((2*sl-1),1))
        tree.nodeScores = np.zeros(len(sNum))
#        tree.nodeNames = np.arange(1,(2*sl-1))
        tree.kids = np.zeros((len(sNum),2))
        
        tree.nodeFeatures = np.zeros((hiddenSize, len(sNum)))
        tree.nodeFeatures[:,:len(words_indexed)] = words_embedded;
        
        toMerge = np.zeros(shape=(words_indexed.shape), dtype='int32')
        toMerge[:] = words_indexed[:]    
        while len(toMerge)>1 :
            # find unpaired bottom leaf pairs (initially words) that share parent
            i=-1;
            foundGoodPair = False
            while (not foundGoodPair )  :
                i += 1
                if sTree[toMerge[i]]==sTree[toMerge[i+1]]:
                    foundGoodPair = True                 
                
            newParent = sTree[toMerge[i]] 
            kid1 = toMerge[i]
            kid2 = toMerge[i+1]
            tree.kids[newParent,:] = [kid1, kid2];
            # set new parent to be possible merge candidate
            toMerge[i] = newParent;
            # delete other kid
            toMerge = np.delete(toMerge,i+1)
            
            c1 = tree.nodeFeatures[:,kid1]
            c2 = tree.nodeFeatures[:,kid2]
            
            p = np.tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1.flatten())           
            
            tree.nodeFeatures[:,newParent] = p;
        
        vec = tree.nodeFeatures[-1]
        vecFile.write(" ".join([str(x) for x in vec])+'\n')
    
    vecFile.close()
    print "finished! "