示例#1
0
 def _setInternalFeatureStats(self, data):
     # features [path length, path depth1, path depth2, sentence length, length between elements]
     numFeatures = 5;
     
     self.features_mean = 0;
     self.features_std = np.ones(numFeatures);
     
     totaltrees = 0
     for s in range(len(data.allSStr)):
         nverbs = len(data.verbIndices[s].flatten())
         sentLength = len(data.allSStr[s].flatten())
         totaltrees += nverbs*sentLength
     
     allFeatures = np.zeros((totaltrees,numFeatures));
     
     thisTree = Tree()
     for s in range(len(data.allSStr)):
         thisTree.pp = data.allSTree[s].flatten()
         verbIndexesList = data.verbIndices[s].flatten()
         for  vid in verbIndexesList:
             for wid in range(len(data.allSStr[s])):    
                 indices = [vid, wid]        
                 fullPath = findPath(thisTree, indices)
                 allFeatures[s,:] = getInternalFeatures(fullPath, data.allSNum[s], indices, self.features_mean, self.features_std);
     
     # Find the mean
     self.features_mean = np.mean(allFeatures,axis=0); # check this
     allFeatures = allFeatures - self.features_mean
     # Find the standard deviation
     self.features_std  = np.std(allFeatures, axis=0)
示例#2
0
def writeVectors():

    vecFileName = config.results_path + "vectors.out"
    vecFile = open(vecFileName, 'w')

    mats = sio.loadmat(config.corpus_path + 'vars.normalized.100.mat')
    We_orig = mats.get('We')

    params = sio.loadmat(config.corpus_path + 'params_rae.mat')
    W1 = params.get('W1')
    W2 = params.get('W2')
    b1 = params.get('b1')
    We = params.get('We')
    b = params.get('b')
    W = params.get('W')

    hiddenSize = 100

    nExamples = 5
    print "loading data.."
    rnnData_train = RNNDataCorpus()
    rnnData_train.load_data_srl(load_file=config.train_data_srl,
                                nExamples=nExamples)

    print 'writing vectors to: ', vecFileName
    for ii in range(len(rnnData_train.allSNum)):

        sNum = rnnData_train.allSNum[ii]
        sStr = rnnData_train.allSStr[ii]
        sTree = rnnData_train.allSTree[ii]
        sKids = rnnData_train.allSKids[ii]

        words_indexed = np.where(sNum >= 0)[0]
        #L is only the part of the embedding matrix that is relevant for this sentence
        #L is deltaWe
        if We.shape[1] != 0:
            L = We[:, words_indexed]
            words_embedded = We_orig[:, words_indexed] + L
        else:
            words_embedded = We_orig[:, words_indexed]


#        sl = words_embedded.shape[1]

        tree = Tree()
        tree.pp = all  #np.zeros(((2*sl-1),1))
        tree.nodeScores = np.zeros(len(sNum))
        #        tree.nodeNames = np.arange(1,(2*sl-1))
        tree.kids = np.zeros((len(sNum), 2))

        tree.nodeFeatures = np.zeros((hiddenSize, len(sNum)))
        tree.nodeFeatures[:, :len(words_indexed)] = words_embedded

        toMerge = np.zeros(shape=(words_indexed.shape), dtype='int32')
        toMerge[:] = words_indexed[:]
        while len(toMerge) > 1:
            # find unpaired bottom leaf pairs (initially words) that share parent
            i = -1
            foundGoodPair = False
            while (not foundGoodPair):
                i += 1
                if sTree[toMerge[i]] == sTree[toMerge[i + 1]]:
                    foundGoodPair = True

            newParent = sTree[toMerge[i]]
            kid1 = toMerge[i]
            kid2 = toMerge[i + 1]
            tree.kids[newParent, :] = [kid1, kid2]
            # set new parent to be possible merge candidate
            toMerge[i] = newParent
            # delete other kid
            toMerge = np.delete(toMerge, i + 1)

            c1 = tree.nodeFeatures[:, kid1]
            c2 = tree.nodeFeatures[:, kid2]

            p = np.tanh(np.dot(W1, c1) + np.dot(W2, c2) + b1.flatten())

            tree.nodeFeatures[:, newParent] = p

        vec = tree.nodeFeatures[-1]
        vecFile.write(" ".join([str(x) for x in vec]) + '\n')

    vecFile.close()
    print "finished! "
示例#3
0
文件: rae.py 项目: 5idaidai/MVRNN
def writeVectors():

    vecFileName = config.results_path+"vectors.out"
    vecFile = open(vecFileName, 'w')
    
    mats = sio.loadmat(config.corpus_path+'vars.normalized.100.mat')    
    We_orig = mats.get('We')
    
    params = sio.loadmat(config.corpus_path+'params_rae.mat')
    W1 = params.get('W1')
    W2 = params.get('W2')
    b1 = params.get('b1')
    We = params.get('We')
    b = params.get('b')
    W = params.get('W')
    
    hiddenSize = 100
    
    nExamples = 5
    print "loading data.."
    rnnData_train = RNNDataCorpus()
    rnnData_train.load_data_srl(load_file=config.train_data_srl, nExamples=nExamples)  
    
    print 'writing vectors to: ', vecFileName
    for ii in range(len(rnnData_train.allSNum)):       
        
        sNum = rnnData_train.allSNum[ii]
        sStr = rnnData_train.allSStr[ii]
        sTree = rnnData_train.allSTree[ii]
        sKids = rnnData_train.allSKids[ii]
        
        words_indexed = np.where(sNum >= 0)[0]
        #L is only the part of the embedding matrix that is relevant for this sentence
        #L is deltaWe
        if We.shape[1] != 0:
            L = We[:, words_indexed]
            words_embedded = We_orig[:, words_indexed] + L;
        else :
            words_embedded = We_orig[:, words_indexed]
#        sl = words_embedded.shape[1]
        
        tree = Tree()
        tree.pp = all#np.zeros(((2*sl-1),1))
        tree.nodeScores = np.zeros(len(sNum))
#        tree.nodeNames = np.arange(1,(2*sl-1))
        tree.kids = np.zeros((len(sNum),2))
        
        tree.nodeFeatures = np.zeros((hiddenSize, len(sNum)))
        tree.nodeFeatures[:,:len(words_indexed)] = words_embedded;
        
        toMerge = np.zeros(shape=(words_indexed.shape), dtype='int32')
        toMerge[:] = words_indexed[:]    
        while len(toMerge)>1 :
            # find unpaired bottom leaf pairs (initially words) that share parent
            i=-1;
            foundGoodPair = False
            while (not foundGoodPair )  :
                i += 1
                if sTree[toMerge[i]]==sTree[toMerge[i+1]]:
                    foundGoodPair = True                 
                
            newParent = sTree[toMerge[i]] 
            kid1 = toMerge[i]
            kid2 = toMerge[i+1]
            tree.kids[newParent,:] = [kid1, kid2];
            # set new parent to be possible merge candidate
            toMerge[i] = newParent;
            # delete other kid
            toMerge = np.delete(toMerge,i+1)
            
            c1 = tree.nodeFeatures[:,kid1]
            c2 = tree.nodeFeatures[:,kid2]
            
            p = np.tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1.flatten())           
            
            tree.nodeFeatures[:,newParent] = p;
        
        vec = tree.nodeFeatures[-1]
        vecFile.write(" ".join([str(x) for x in vec])+'\n')
    
    vecFile.close()
    print "finished! "
示例#4
0
文件: mvrnn.py 项目: 5idaidai/MVRNN
def forwardPropTree(W, WO, Wcat, Wv, Wo, sNum,sTree, sStr=None, sNN=None, indicies=None, params=None):
    
    wsz = params.wordSize
    r = params.rankWo
    
    words = np.where(sNum>=0)[0]
    numTotalNodes = len(sNum)
    
    allV = Wv[:,sNum[words]]
    allO = Wo[:,sNum[words]]
    
    thisTree = Tree()
    # set tree structure of tree
    thisTree.pp = sTree  #to check
    # set which nodes are leaf nodes
    thisTree.isLeafVec = np.zeros(numTotalNodes);
    thisTree.isLeafVec[words] = 1;
    
    thisTree.nodeNames = np.arange(len(sTree))
    thisTree.nodeLabels = sNum;
    
    # the inputs to the parent
    thisTree.ParIn_z = np.zeros((wsz,numTotalNodes)) # empty for leaf nodes
    thisTree.ParIn_a = np.zeros((wsz,numTotalNodes))
    
    #node vectors
    thisTree.nodeAct_a = np.zeros((wsz, numTotalNodes))
    # the new operators
    thisTree.nodeOp_A = np.zeros((wsz**2,numTotalNodes))
    # the scores for each decision
    thisTree.score = np.zeros(numTotalNodes);
    # the children of each node (for speed)
    thisTree.kids = np.zeros((numTotalNodes,2), dtype='int32');
    
    
    # initialize the vectors and operators of the words (leaf nodes)
    thisTree.nodeAct_a[:,words] = allV;
    
    for thisWordNum in range(len(words)):
        diag_a = np.diag(allO[:wsz,thisWordNum])
        U = allO[wsz:wsz*(1+r),thisWordNum].reshape(wsz,r)
        V = allO[wsz*(1+r):,thisWordNum].reshape(wsz, r)
        A = diag_a + np.dot(U, np.transpose(V))  
        A = A.reshape(wsz**2)
        thisTree.nodeOp_A[:, thisWordNum] = A
    
    toMerge = np.zeros(shape=(words.shape), dtype='int32')
    toMerge[:] = words[:]    
    while len(toMerge)>1 :
        # find unpaired bottom leaf pairs (initially words) that share parent
        i=-1;
        foundGoodPair = False
        while (not foundGoodPair )  :
            i += 1
            if sTree[toMerge[i]]==sTree[toMerge[i+1]]:
                foundGoodPair = True                 
            
        newParent = sTree[toMerge[i]] 
        kid1 = toMerge[i]
        kid2 = toMerge[i+1]
        thisTree.kids[newParent,:] = [kid1, kid2];
        # set new parent to be possible merge candidate
        toMerge[i] = newParent;
        # delete other kid
        toMerge = np.delete(toMerge,i+1)
        
        a = thisTree.nodeAct_a[:,kid1];
        A = thisTree.nodeOp_A[:,kid1].reshape(wsz,wsz)
        b = thisTree.nodeAct_a[:,kid2];
        B = thisTree.nodeOp_A[:,kid2].reshape(wsz,wsz)
        
        l_a = np.dot(B,a)
        r_a = np.dot(A,b)
        C = np.concatenate((l_a,r_a, np.ndarray([1])))
        thisTree.nodeAct_a[:,newParent] = np.tanh(np.dot(W,C))
        
        P_A =  (np.dot(WO,np.vstack((A,B)))).reshape(wsz**2)
        
        # save all this for backprop:
        thisTree.ParIn_a[:,kid1] = l_a
        thisTree.ParIn_a[:,kid2] = r_a
        thisTree.nodeOp_A[:,newParent] = P_A   
    
    return thisTree
示例#5
0
def forwardPropTree(W,
                    WO,
                    Wcat,
                    Wv,
                    Wo,
                    sNum,
                    sTree,
                    sStr=None,
                    sNN=None,
                    indicies=None,
                    params=None):

    wsz = params.wordSize
    r = params.rankWo

    words = np.where(sNum >= 0)[0]
    numTotalNodes = len(sNum)

    allV = Wv[:, sNum[words]]
    allO = Wo[:, sNum[words]]

    thisTree = Tree()
    # set tree structure of tree
    thisTree.pp = sTree  #to check
    # set which nodes are leaf nodes
    thisTree.isLeafVec = np.zeros(numTotalNodes)
    thisTree.isLeafVec[words] = 1

    thisTree.nodeNames = np.arange(len(sTree))
    thisTree.nodeLabels = sNum

    # the inputs to the parent
    thisTree.ParIn_z = np.zeros((wsz, numTotalNodes))  # empty for leaf nodes
    thisTree.ParIn_a = np.zeros((wsz, numTotalNodes))

    #node vectors
    thisTree.nodeAct_a = np.zeros((wsz, numTotalNodes))
    # the new operators
    thisTree.nodeOp_A = np.zeros((wsz**2, numTotalNodes))
    # the scores for each decision
    thisTree.score = np.zeros(numTotalNodes)
    # the children of each node (for speed)
    thisTree.kids = np.zeros((numTotalNodes, 2), dtype='int32')

    # initialize the vectors and operators of the words (leaf nodes)
    thisTree.nodeAct_a[:, words] = allV

    for thisWordNum in range(len(words)):
        diag_a = np.diag(allO[:wsz, thisWordNum])
        U = allO[wsz:wsz * (1 + r), thisWordNum].reshape(wsz, r)
        V = allO[wsz * (1 + r):, thisWordNum].reshape(wsz, r)
        A = diag_a + np.dot(U, np.transpose(V))
        A = A.reshape(wsz**2)
        thisTree.nodeOp_A[:, thisWordNum] = A

    toMerge = np.zeros(shape=(words.shape), dtype='int32')
    toMerge[:] = words[:]
    while len(toMerge) > 1:
        # find unpaired bottom leaf pairs (initially words) that share parent
        i = -1
        foundGoodPair = False
        while (not foundGoodPair):
            i += 1
            if sTree[toMerge[i]] == sTree[toMerge[i + 1]]:
                foundGoodPair = True

        newParent = sTree[toMerge[i]]
        kid1 = toMerge[i]
        kid2 = toMerge[i + 1]
        thisTree.kids[newParent, :] = [kid1, kid2]
        # set new parent to be possible merge candidate
        toMerge[i] = newParent
        # delete other kid
        toMerge = np.delete(toMerge, i + 1)

        a = thisTree.nodeAct_a[:, kid1]
        A = thisTree.nodeOp_A[:, kid1].reshape(wsz, wsz)
        b = thisTree.nodeAct_a[:, kid2]
        B = thisTree.nodeOp_A[:, kid2].reshape(wsz, wsz)

        l_a = np.dot(B, a)
        r_a = np.dot(A, b)
        C = np.concatenate((l_a, r_a, np.ndarray([1])))
        thisTree.nodeAct_a[:, newParent] = np.tanh(np.dot(W, C))

        P_A = (np.dot(WO, np.vstack((A, B)))).reshape(wsz**2)

        # save all this for backprop:
        thisTree.ParIn_a[:, kid1] = l_a
        thisTree.ParIn_a[:, kid2] = r_a
        thisTree.nodeOp_A[:, newParent] = P_A

    return thisTree