示例#1
0
 def _setInternalFeatureStats(self, data):
     # features [path length, path depth1, path depth2, sentence length, length between elements]
     numFeatures = 5;
     
     self.features_mean = 0;
     self.features_std = np.ones(numFeatures);
     
     totaltrees = 0
     for s in range(len(data.allSStr)):
         nverbs = len(data.verbIndices[s].flatten())
         sentLength = len(data.allSStr[s].flatten())
         totaltrees += nverbs*sentLength
     
     allFeatures = np.zeros((totaltrees,numFeatures));
     
     thisTree = Tree()
     for s in range(len(data.allSStr)):
         thisTree.pp = data.allSTree[s].flatten()
         verbIndexesList = data.verbIndices[s].flatten()
         for  vid in verbIndexesList:
             for wid in range(len(data.allSStr[s])):    
                 indices = [vid, wid]        
                 fullPath = findPath(thisTree, indices)
                 allFeatures[s,:] = getInternalFeatures(fullPath, data.allSNum[s], indices, self.features_mean, self.features_std);
     
     # Find the mean
     self.features_mean = np.mean(allFeatures,axis=0); # check this
     allFeatures = allFeatures - self.features_mean
     # Find the standard deviation
     self.features_std  = np.std(allFeatures, axis=0)
示例#2
0
def writeVectors():

    vecFileName = config.results_path + "vectors.out"
    vecFile = open(vecFileName, 'w')

    mats = sio.loadmat(config.corpus_path + 'vars.normalized.100.mat')
    We_orig = mats.get('We')

    params = sio.loadmat(config.corpus_path + 'params_rae.mat')
    W1 = params.get('W1')
    W2 = params.get('W2')
    b1 = params.get('b1')
    We = params.get('We')
    b = params.get('b')
    W = params.get('W')

    hiddenSize = 100

    nExamples = 5
    print "loading data.."
    rnnData_train = RNNDataCorpus()
    rnnData_train.load_data_srl(load_file=config.train_data_srl,
                                nExamples=nExamples)

    print 'writing vectors to: ', vecFileName
    for ii in range(len(rnnData_train.allSNum)):

        sNum = rnnData_train.allSNum[ii]
        sStr = rnnData_train.allSStr[ii]
        sTree = rnnData_train.allSTree[ii]
        sKids = rnnData_train.allSKids[ii]

        words_indexed = np.where(sNum >= 0)[0]
        #L is only the part of the embedding matrix that is relevant for this sentence
        #L is deltaWe
        if We.shape[1] != 0:
            L = We[:, words_indexed]
            words_embedded = We_orig[:, words_indexed] + L
        else:
            words_embedded = We_orig[:, words_indexed]


#        sl = words_embedded.shape[1]

        tree = Tree()
        tree.pp = all  #np.zeros(((2*sl-1),1))
        tree.nodeScores = np.zeros(len(sNum))
        #        tree.nodeNames = np.arange(1,(2*sl-1))
        tree.kids = np.zeros((len(sNum), 2))

        tree.nodeFeatures = np.zeros((hiddenSize, len(sNum)))
        tree.nodeFeatures[:, :len(words_indexed)] = words_embedded

        toMerge = np.zeros(shape=(words_indexed.shape), dtype='int32')
        toMerge[:] = words_indexed[:]
        while len(toMerge) > 1:
            # find unpaired bottom leaf pairs (initially words) that share parent
            i = -1
            foundGoodPair = False
            while (not foundGoodPair):
                i += 1
                if sTree[toMerge[i]] == sTree[toMerge[i + 1]]:
                    foundGoodPair = True

            newParent = sTree[toMerge[i]]
            kid1 = toMerge[i]
            kid2 = toMerge[i + 1]
            tree.kids[newParent, :] = [kid1, kid2]
            # set new parent to be possible merge candidate
            toMerge[i] = newParent
            # delete other kid
            toMerge = np.delete(toMerge, i + 1)

            c1 = tree.nodeFeatures[:, kid1]
            c2 = tree.nodeFeatures[:, kid2]

            p = np.tanh(np.dot(W1, c1) + np.dot(W2, c2) + b1.flatten())

            tree.nodeFeatures[:, newParent] = p

        vec = tree.nodeFeatures[-1]
        vecFile.write(" ".join([str(x) for x in vec]) + '\n')

    vecFile.close()
    print "finished! "
示例#3
0
def forwardPropTree(W,
                    WO,
                    Wcat,
                    Wv,
                    Wo,
                    sNum,
                    sTree,
                    sStr=None,
                    sNN=None,
                    indicies=None,
                    params=None):

    wsz = params.wordSize
    r = params.rankWo

    words = np.where(sNum >= 0)[0]
    numTotalNodes = len(sNum)

    allV = Wv[:, sNum[words]]
    allO = Wo[:, sNum[words]]

    thisTree = Tree()
    # set tree structure of tree
    thisTree.pp = sTree  #to check
    # set which nodes are leaf nodes
    thisTree.isLeafVec = np.zeros(numTotalNodes)
    thisTree.isLeafVec[words] = 1

    thisTree.nodeNames = np.arange(len(sTree))
    thisTree.nodeLabels = sNum

    # the inputs to the parent
    thisTree.ParIn_z = np.zeros((wsz, numTotalNodes))  # empty for leaf nodes
    thisTree.ParIn_a = np.zeros((wsz, numTotalNodes))

    #node vectors
    thisTree.nodeAct_a = np.zeros((wsz, numTotalNodes))
    # the new operators
    thisTree.nodeOp_A = np.zeros((wsz**2, numTotalNodes))
    # the scores for each decision
    thisTree.score = np.zeros(numTotalNodes)
    # the children of each node (for speed)
    thisTree.kids = np.zeros((numTotalNodes, 2), dtype='int32')

    # initialize the vectors and operators of the words (leaf nodes)
    thisTree.nodeAct_a[:, words] = allV

    for thisWordNum in range(len(words)):
        diag_a = np.diag(allO[:wsz, thisWordNum])
        U = allO[wsz:wsz * (1 + r), thisWordNum].reshape(wsz, r)
        V = allO[wsz * (1 + r):, thisWordNum].reshape(wsz, r)
        A = diag_a + np.dot(U, np.transpose(V))
        A = A.reshape(wsz**2)
        thisTree.nodeOp_A[:, thisWordNum] = A

    toMerge = np.zeros(shape=(words.shape), dtype='int32')
    toMerge[:] = words[:]
    while len(toMerge) > 1:
        # find unpaired bottom leaf pairs (initially words) that share parent
        i = -1
        foundGoodPair = False
        while (not foundGoodPair):
            i += 1
            if sTree[toMerge[i]] == sTree[toMerge[i + 1]]:
                foundGoodPair = True

        newParent = sTree[toMerge[i]]
        kid1 = toMerge[i]
        kid2 = toMerge[i + 1]
        thisTree.kids[newParent, :] = [kid1, kid2]
        # set new parent to be possible merge candidate
        toMerge[i] = newParent
        # delete other kid
        toMerge = np.delete(toMerge, i + 1)

        a = thisTree.nodeAct_a[:, kid1]
        A = thisTree.nodeOp_A[:, kid1].reshape(wsz, wsz)
        b = thisTree.nodeAct_a[:, kid2]
        B = thisTree.nodeOp_A[:, kid2].reshape(wsz, wsz)

        l_a = np.dot(B, a)
        r_a = np.dot(A, b)
        C = np.concatenate((l_a, r_a, np.ndarray([1])))
        thisTree.nodeAct_a[:, newParent] = np.tanh(np.dot(W, C))

        P_A = (np.dot(WO, np.vstack((A, B)))).reshape(wsz**2)

        # save all this for backprop:
        thisTree.ParIn_a[:, kid1] = l_a
        thisTree.ParIn_a[:, kid2] = r_a
        thisTree.nodeOp_A[:, newParent] = P_A

    return thisTree