def forwardProp(self, allKids, words_embedded, updateWlab, label, theta, freq): (W1, W2, W3, W4, Wlab, b1, b2, b3, blab, WL) = self.getParams(theta) sl = np.size(words_embedded, 1) sentree = rnntree.rnntree(self.d, sl, words_embedded) collapsed_sentence = range(sl) if updateWlab: temp_label = np.zeros(self.cat) temp_label[label - 1] = 1.0 nodeUnder = np.ones([2 * sl - 1, 1]) for i in range( sl, 2 * sl - 1 ): # calculate n1, n2 and n1+n2 for each node in the sensentree and store in nodeUnder kids = allKids[i] n1 = nodeUnder[kids[0]] n2 = nodeUnder[kids[1]] nodeUnder[i] = n1 + n2 cat_size = self.cat sentree.catDelta = np.zeros([cat_size, 2 * sl - 1]) sentree.catDelta_out = np.zeros([self.d, 2 * sl - 1]) # classifier on single words for i in range(sl): sm = softmax(np.dot(Wlab, words_embedded[:, i]) + blab) lbl_sm = (1 - self.alpha) * (temp_label - sm) sentree.nodeScores[i] = 1.0 / 2.0 * (np.dot( lbl_sm, (temp_label - sm))) sentree.catDelta[:, i] = -np.dot(lbl_sm, softmax_prime(sm)) # sm = sigmoid(self.Wlab*words_embedded + self.blab) #lbl_sm = (1-self.alpha)*(label[:,np.ones(sl,1)] - sm) #sentree.nodeScores[:sl] = 1/2*(lbl_sm.*(label(:,ones(sl,1)) - sm)) #sentree.catDelta[:, :sl] = -(lbl_sm).*sigmoid_prime(sm) for i in range(sl, 2 * sl - 1): kids = allKids[i] c1 = sentree.nodeFeatures[:, kids[0]] c2 = sentree.nodeFeatures[:, kids[1]] # Eq. [2] in the paper: p = f(W[1][c1 c2] + b[1]) p = tanh(np.dot(W1, c1) + np.dot(W2, c2) + b1) # See last paragraph in Section 2.3 p_norm1 = p / norm(p) # Eq. (7) in the paper (for special case of 1d label) #sm = sigmoid(np.dot(Wlab,p_norm1) + blab) sm = softmax(np.dot(Wlab, p_norm1) + blab) beta = 0.5 #lbl_sm = beta * (1.0-self.alpha)*(label - sm) lbl_sm = beta * (1.0 - self.alpha) * (temp_label - sm) #lbl_sm = beta * (1.0-self.alpha) * (temp_label-sm) #sentree.catDelta[:, i] = -softmax_prime(sm)[:,label-1] #J=-(1.0-self.alpha)*np.log(sm[label-1]) #sentree.catDelta[:, i] = -np.dot(lbl_sm,sigmoid_prime(sm)) sentree.catDelta[:, i] = -np.dot(lbl_sm, softmax_prime(sm)) #J = 1.0/2.0*(np.dot(lbl_sm,(label - sm))) J = 1.0 / 2.0 * (np.dot(lbl_sm, (temp_label - sm))) sentree.nodeFeatures[:, i] = p_norm1 sentree.nodeFeatures_unnormalized[:, i] = p sentree.nodeScores[i] = J sentree.numkids = nodeUnder sentree.kids = allKids else: # Reconstruction Error for j in range(sl - 1): size2 = np.size(words_embedded, 1) c1 = words_embedded[:, 0:-1] c2 = words_embedded[:, 1:] freq1 = freq[0:-1] freq2 = freq[1:] p = tanh( np.dot(W1, c1) + np.dot(W2, c2) + np.reshape(b1, [self.d, 1]) * ([1] * (size2 - 1))) p_norm1 = p / np.sqrt(sum(p**2)) y1_unnormalized = tanh( np.dot(W3, p_norm1) + np.reshape(b2, [self.d, 1]) * ([1] * (size2 - 1))) y2_unnormalized = tanh( np.dot(W4, p_norm1) + np.reshape(b3, [self.d, 1]) * ([1] * (size2 - 1))) y1 = y1_unnormalized / np.sqrt(sum(y1_unnormalized**2)) y2 = y2_unnormalized / np.sqrt(sum(y2_unnormalized**2)) y1c1 = self.alpha * (y1 - c1) y2c2 = self.alpha * (y2 - c2) # Eq. (4) in the paper: reconstruction error J = 1.0 / 2.0 * sum((y1c1) * (y1 - c1) + (y2c2) * (y2 - c2)) # finding the pair with smallest reconstruction error for constructing sentree J_min = min(J) J_minpos = np.argmin(J) sentree.node_y1c1[:, sl + j] = y1c1[:, J_minpos] sentree.node_y2c2[:, sl + j] = y2c2[:, J_minpos] sentree.nodeDelta_out1[:, sl + j] = np.dot( norm1tanh_prime(y1_unnormalized[:, J_minpos]), y1c1[:, J_minpos]) sentree.nodeDelta_out2[:, sl + j] = np.dot( norm1tanh_prime(y2_unnormalized[:, J_minpos]), y2c2[:, J_minpos]) words_embedded = np.delete(words_embedded, J_minpos + 1, 1) words_embedded[:, J_minpos] = p_norm1[:, J_minpos] sentree.nodeFeatures[:, sl + j] = p_norm1[:, J_minpos] sentree.nodeFeatures_unnormalized[:, sl + j] = p[:, J_minpos] sentree.nodeScores[sl + j] = J_min sentree.pp[collapsed_sentence[J_minpos]] = sl + j sentree.pp[collapsed_sentence[J_minpos + 1]] = sl + j sentree.kids[sl + j, :] = [ collapsed_sentence[J_minpos], collapsed_sentence[J_minpos + 1] ] sentree.numkids[sl + j] = sentree.numkids[sentree.kids[ sl + j, 0]] + sentree.numkids[sentree.kids[sl + j, 1]] freq = np.delete(freq, J_minpos + 1) freq[J_minpos] = ( sentree.numkids[sentree.kids[sl + j, 0]] * freq1[J_minpos] + sentree.numkids[sentree.kids[sl + j, 1]] * freq2[J_minpos] ) / (sentree.numkids[sentree.kids[sl + j, 0]] + sentree.numkids[sentree.kids[sl + j, 1]]) collapsed_sentence = np.delete(collapsed_sentence, J_minpos + 1) collapsed_sentence[J_minpos] = sl + j return sentree
def backProp(self, sentree, updateWcat, words_embedded, gradW1, gradW2, gradW3, gradW4, gradWlab, gradb1, gradb2, gradb3, gradblab, gradL, theta): (W1, W2, W3, W4, Wlab, b1, b2, b3, blab, WL) = self.getParams(theta) sl = np.size(words_embedded, 1) toPopulate = np.array([[2 * sl - 2], [0], [0]]) nodeFeatures = sentree.nodeFeatures nodeFeatures_unnormalized = sentree.nodeFeatures_unnormalized W0 = np.zeros([self.d, self.d]) W = np.zeros([self.d, self.d, 3]) W[:, :, 0] = W0 W[:, :, 1] = W1 W[:, :, 2] = W2 DEL = [np.zeros([self.d, 1]), sentree.node_y1c1, sentree.node_y2c2] while np.size(toPopulate, 1) != 0: parentNode = toPopulate[:, 0].copy() mat = W[:, :, parentNode[1]] delt = DEL[parentNode[1]][:, parentNode[2]] if parentNode[0] > sl - 1: # Non-leaf? kids = sentree.kids[parentNode[0], :] kid1 = [kids[0], 1, parentNode[0]] kid2 = [kids[1], 2, parentNode[0]] #toPopulate = np.array([kid1, kid2, toPopulate[:, 1:]]) toPopulate[:, 0] = kid2 toPopulate = np.insert(toPopulate, 0, kid1, 1) a1_unnormalized = nodeFeatures_unnormalized[:, parentNode[ 0]] # unnormalized feature of pp a1 = nodeFeatures[:, parentNode[0]] # normalized feature of pp nd1 = sentree.nodeDelta_out1[:, parentNode[0]] # grad c1 nd2 = sentree.nodeDelta_out2[:, parentNode[0]] # grad c2 pd = sentree.parentDelta[:, parentNode[0]] if updateWcat: smd = sentree.catDelta[:, parentNode[0]] gradblab = gradblab + smd parent_d = np.dot( norm1tanh_prime(a1_unnormalized), (np.dot(W3, nd1) + np.dot(W4, nd2) + np.dot(mat, pd) + np.dot(np.transpose(Wlab), smd) - delt)) gradWlab = gradWlab + np.outer(smd, a1) else: parent_d = np.dot(norm1tanh_prime(a1_unnormalized), (np.dot(W3, nd1) + np.dot(W4, nd2) + np.dot(mat, pd) - delt)) gradb1 = gradb1 + parent_d gradb2 = gradb2 + nd1 gradb3 = gradb3 + nd2 sentree.parentDelta[:, toPopulate[0][0]] = parent_d sentree.parentDelta[:, toPopulate[0][1]] = parent_d gradW1 = gradW1 + np.outer(parent_d, nodeFeatures[:, toPopulate[0][0]]) gradW2 = gradW2 + np.outer(parent_d, nodeFeatures[:, toPopulate[0][1]]) gradW3 = gradW3 + np.outer(nd1, a1) gradW4 = gradW4 + np.outer(nd2, a1) else: # leaf if updateWcat: gradWlab = gradWlab + np.outer( sentree.catDelta[:, parentNode[0]], nodeFeatures[:, parentNode[0]]) gradblab = gradblab + sentree.catDelta[:, parentNode[0]] gradL[:, toPopulate[0][0]] = gradL[:, toPopulate[0][0]] + ( np.dot(mat, sentree.parentDelta[:, toPopulate[0][0]]) + np.dot(np.transpose(Wlab), sentree.catDelta[:, toPopulate[0][0]]) - delt) else: gradL[:, toPopulate[0][0]] = gradL[:, toPopulate[0][0]] + ( np.dot(mat, sentree.parentDelta[:, toPopulate[0][0]]) - delt) toPopulate = np.delete(toPopulate, 0, 1) return (gradW1, gradW2, gradW3, gradW4, gradWlab, gradb1, gradb2, gradb3, gradblab, gradL)
def forwardProp(self,allKids,words_embedded,updateWlab,label,theta,freq): (W1,W2,W3,W4,Wlab,b1,b2,b3,blab,WL)=self.getParams(theta) sl=np.size(words_embedded,1) sentree=rnntree.rnntree(self.d,sl,words_embedded) collapsed_sentence = range(sl) if updateWlab: temp_label=np.zeros(self.cat) temp_label[label-1]=1.0 nodeUnder = np.ones([2*sl-1,1]) for i in range(sl,2*sl-1): # calculate n1, n2 and n1+n2 for each node in the sensentree and store in nodeUnder kids = allKids[i] n1 = nodeUnder[kids[0]] n2 = nodeUnder[kids[1]] nodeUnder[i] = n1+n2 cat_size=self.cat sentree.catDelta = np.zeros([cat_size, 2*sl-1]) sentree.catDelta_out = np.zeros([self.d,2*sl-1]) # classifier on single words for i in range(sl): sm = softmax(np.dot(Wlab,words_embedded[:,i]) + blab) lbl_sm = (1-self.alpha)*(temp_label - sm) sentree.nodeScores[i] = 1.0/2.0*(np.dot(lbl_sm,(temp_label- sm))) sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm)) # sm = sigmoid(self.Wlab*words_embedded + self.blab) #lbl_sm = (1-self.alpha)*(label[:,np.ones(sl,1)] - sm) #sentree.nodeScores[:sl] = 1/2*(lbl_sm.*(label(:,ones(sl,1)) - sm)) #sentree.catDelta[:, :sl] = -(lbl_sm).*sigmoid_prime(sm) for i in range(sl,2*sl-1): kids = allKids[i] c1 = sentree.nodeFeatures[:,kids[0]] c2 = sentree.nodeFeatures[:,kids[1]] # Eq. [2] in the paper: p = f(W[1][c1 c2] + b[1]) p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1) # See last paragraph in Section 2.3 p_norm1 = p/norm(p) # Eq. (7) in the paper (for special case of 1d label) #sm = sigmoid(np.dot(Wlab,p_norm1) + blab) sm=softmax(np.dot(Wlab,p_norm1) + blab) beta=0.5 #lbl_sm = beta * (1.0-self.alpha)*(label - sm) lbl_sm = beta * (1.0-self.alpha)*(temp_label - sm) #lbl_sm = beta * (1.0-self.alpha) * (temp_label-sm) #sentree.catDelta[:, i] = -softmax_prime(sm)[:,label-1] #J=-(1.0-self.alpha)*np.log(sm[label-1]) #sentree.catDelta[:, i] = -np.dot(lbl_sm,sigmoid_prime(sm)) sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm)) #J = 1.0/2.0*(np.dot(lbl_sm,(label - sm))) J = 1.0/2.0*(np.dot(lbl_sm,(temp_label - sm))) sentree.nodeFeatures[:,i] = p_norm1 sentree.nodeFeatures_unnormalized[:,i] = p sentree.nodeScores[i] = J sentree.numkids = nodeUnder sentree.kids = allKids else: # Reconstruction Error for j in range(sl-1): size2=np.size(words_embedded,1) c1 = words_embedded[:,0:-1] c2 = words_embedded[:,1:] freq1 = freq[0:-1] freq2 = freq[1:] p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + np.reshape(b1,[self.d,1])*([1]*(size2-1))) p_norm1 =p/np.sqrt(sum(p**2)) y1_unnormalized = tanh(np.dot(W3,p_norm1) + np.reshape(b2,[self.d,1])*([1]*(size2-1))) y2_unnormalized = tanh(np.dot(W4,p_norm1) + np.reshape(b3,[self.d,1])*([1]*(size2-1))) y1 = y1_unnormalized/np.sqrt(sum(y1_unnormalized**2)) y2 = y2_unnormalized/np.sqrt(sum(y2_unnormalized**2)) y1c1 = self.alpha*(y1-c1) y2c2 = self.alpha*(y2-c2) # Eq. (4) in the paper: reconstruction error J = 1.0/2.0*sum((y1c1)*(y1-c1) + (y2c2)*(y2-c2)) # finding the pair with smallest reconstruction error for constructing sentree J_min= min(J) J_minpos=np.argmin(J) sentree.node_y1c1[:,sl+j] = y1c1[:,J_minpos] sentree.node_y2c2[:,sl+j] = y2c2[:,J_minpos] sentree.nodeDelta_out1[:,sl+j] = np.dot(norm1tanh_prime(y1_unnormalized[:,J_minpos]) , y1c1[:,J_minpos]) sentree.nodeDelta_out2[:,sl+j] = np.dot(norm1tanh_prime(y2_unnormalized[:,J_minpos]) , y2c2[:,J_minpos]) words_embedded=np.delete(words_embedded,J_minpos+1,1) words_embedded[:,J_minpos]=p_norm1[:,J_minpos] sentree.nodeFeatures[:, sl+j] = p_norm1[:,J_minpos] sentree.nodeFeatures_unnormalized[:, sl+j]= p[:,J_minpos] sentree.nodeScores[sl+j] = J_min sentree.pp[collapsed_sentence[J_minpos]] = sl+j sentree.pp[collapsed_sentence[J_minpos+1]] = sl+j sentree.kids[sl+j,:] = [collapsed_sentence[J_minpos], collapsed_sentence[J_minpos+1]] sentree.numkids[sl+j] = sentree.numkids[sentree.kids[sl+j,0]] + sentree.numkids[sentree.kids[sl+j,1]] freq=np.delete(freq,J_minpos+1) freq[J_minpos] = (sentree.numkids[sentree.kids[sl+j,0]]*freq1[J_minpos] + sentree.numkids[sentree.kids[sl+j,1]]*freq2[J_minpos])/(sentree.numkids[sentree.kids[sl+j,0]]+sentree.numkids[sentree.kids[sl+j,1]]) collapsed_sentence=np.delete(collapsed_sentence,J_minpos+1) collapsed_sentence[J_minpos]=sl+j return sentree
def backProp(self,sentree,updateWcat,words_embedded,gradW1,gradW2,gradW3,gradW4,gradWlab,gradb1,gradb2,gradb3,gradblab,gradL,theta): (W1,W2,W3,W4,Wlab,b1,b2,b3,blab,WL)=self.getParams(theta) sl=np.size(words_embedded,1) toPopulate = np.array([[2*sl-2],[0],[0]]) nodeFeatures = sentree.nodeFeatures nodeFeatures_unnormalized = sentree.nodeFeatures_unnormalized W0 = np.zeros([self.d,self.d]) W = np.zeros([self.d,self.d,3]) W[:,:,0] = W0 W[:,:,1] = W1 W[:,:,2] = W2 DEL = [np.zeros([self.d,1]), sentree.node_y1c1, sentree.node_y2c2] while np.size(toPopulate,1)!=0: parentNode = toPopulate[:,0].copy() mat = W[:,:,parentNode[1]] delt = DEL[parentNode[1]][:,parentNode[2]] if parentNode[0]>sl-1: # Non-leaf? kids = sentree.kids[parentNode[0],:] kid1 = [kids[0], 1, parentNode[0]] kid2 = [kids[1], 2, parentNode[0]] #toPopulate = np.array([kid1, kid2, toPopulate[:, 1:]]) toPopulate[:,0]=kid2 toPopulate=np.insert(toPopulate,0,kid1,1) a1_unnormalized = nodeFeatures_unnormalized[:,parentNode[0]] # unnormalized feature of pp a1 = nodeFeatures[:,parentNode[0]] # normalized feature of pp nd1 = sentree.nodeDelta_out1[:,parentNode[0]] # grad c1 nd2 = sentree.nodeDelta_out2[:,parentNode[0]] # grad c2 pd = sentree.parentDelta[:,parentNode[0]] if updateWcat: smd = sentree.catDelta[:,parentNode[0]]; gradblab =gradblab + smd parent_d = np.dot(norm1tanh_prime(a1_unnormalized) , (np.dot(W3,nd1) + np.dot(W4,nd2) + np.dot(mat,pd) + np.dot(np.transpose(Wlab),smd) - delt)) gradWlab = gradWlab + np.outer(smd,a1) else: parent_d = np.dot(norm1tanh_prime(a1_unnormalized) , (np.dot(W3,nd1) + np.dot(W4,nd2) + np.dot(mat,pd) - delt)) gradb1 = gradb1 + parent_d gradb2 = gradb2 + nd1 gradb3 = gradb3 + nd2 sentree.parentDelta[:,toPopulate[0][0]] = parent_d sentree.parentDelta[:,toPopulate[0][1]] = parent_d gradW1 = gradW1 + np.outer(parent_d,nodeFeatures[:,toPopulate[0][0]]) gradW2 = gradW2 + np.outer(parent_d,nodeFeatures[:,toPopulate[0][1]]) gradW3 = gradW3 + np.outer(nd1,a1) gradW4 = gradW4 + np.outer(nd2,a1) else: # leaf if updateWcat: gradWlab = gradWlab + np.outer(sentree.catDelta[:, parentNode[0]], nodeFeatures[:,parentNode[0]]) gradblab = gradblab + sentree.catDelta[:,parentNode[0]] gradL[:,toPopulate[0][0]] = gradL[:,toPopulate[0][0]] +(np.dot(mat,sentree.parentDelta[:,toPopulate[0][0]]) + np.dot(np.transpose(Wlab),sentree.catDelta[:,toPopulate[0][0]]) - delt) else: gradL[:,toPopulate[0][0]] = gradL[:,toPopulate[0][0]] +(np.dot(mat,sentree.parentDelta[:,toPopulate[0][0]]) - delt) toPopulate=np.delete(toPopulate,0,1) return (gradW1,gradW2,gradW3,gradW4,gradWlab,gradb1,gradb2,gradb3,gradblab,gradL)
def forwardProp(self,allKids,words_embedded,updateWlab,label,theta,freq): (W1,W2,W3,W4,Wlab,b1,b2,b3,blab,WL)=self.getParams(theta) #sl是words_embedded的个数,一句话单词的个数 # allKids一开始没有值,是因为训练之前,语法树本来就没有构建完,树结构是训练完了以后才出现的。但是,allkids内容应该会随着算法的进行而变化 sl=np.size(words_embedded,1) sentree=rnntree.rnntree(self.d,sl,words_embedded) collapsed_sentence = range(sl) # updateWlab主要是获得情感误差,修正情感的权值 # 情感误差也是需要p作为输入的,因此也需要计算出p if updateWlab: temp_label=np.zeros(self.cat) #假设cat = 4, temp_label就是(0,0,0,0)。下面这句话的意思是label对应的位置为1 temp_label[label-1]=1.0 nodeUnder = np.ones([2*sl-1,1]) # 这个for循环是计算出,某个节点底下一共有多少个子节点 # kids存了两个值,分别代表左右孩子。 # 可以推测出,allkids存的东西,allkids[i]代表第i个非叶子节点,allkids[i][0]是左孩子,allkids[i][1]是右孩子 for i in range(sl,2*sl-1): # calculate n1, n2 and n1+n2 for each node in the sensentree and store in nodeUnder kids = allKids[i] n1 = nodeUnder[kids[0]] n2 = nodeUnder[kids[1]] nodeUnder[i] = n1+n2 cat_size=self.cat sentree.catDelta = np.zeros([cat_size, 2*sl-1]) sentree.catDelta_out = np.zeros([self.d,2*sl-1]) # classifier on single words # 处理所有单词,即叶子节点 # 这里有个问题就是,为什么叶子节点也要计算情感误差 for i in range(sl): sm = softmax(np.dot(Wlab,words_embedded[:,i]) + blab) #这里不管情感误差是如何计算的,sentree.nodeScores存的是情感误差没错了。 #sentree.catDelta存的什么不清楚,但是和情感误差有关 lbl_sm = (1-self.alpha)*(temp_label - sm) sentree.nodeScores[i] = 1.0/2.0*(np.dot(lbl_sm,(temp_label- sm))) sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm)) # sm = sigmoid(self.Wlab*words_embedded + self.blab) #lbl_sm = (1-self.alpha)*(label[:,np.ones(sl,1)] - sm) #sentree.nodeScores[:sl] = 1/2*(lbl_sm.*(label(:,ones(sl,1)) - sm)) #sentree.catDelta[:, :sl] = -(lbl_sm).*sigmoid_prime(sm) #超过sl的部分是单词的父亲节点 for i in range(sl,2*sl-1): kids = allKids[i] #c1,c2,是左右孩子的向量 c1 = sentree.nodeFeatures[:,kids[0]] c2 = sentree.nodeFeatures[:,kids[1]] # Eq. [2] in the paper: p = f(W[1][c1 c2] + b[1]) #计算p,显然p是个数值,即得分,用于判断哪两个节点合并 p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1) # See last paragraph in Section 2.3 p_norm1 = p/norm(p) # Eq. (7) in the paper (for special case of 1d label) #sm = sigmoid(np.dot(Wlab,p_norm1) + blab) #这里是计算节点的情感标签,sm sm = softmax(np.dot(Wlab,p_norm1) + blab) beta=0.5 #lbl_sm = beta * (1.0-self.alpha)*(label - sm) lbl_sm = beta * (1.0-self.alpha)*(temp_label - sm) #lbl_sm = beta * (1.0-self.alpha) * (temp_label-sm) #sentree.catDelta[:, i] = -softmax_prime(sm)[:,label-1] #J=-(1.0-self.alpha)*np.log(sm[label-1]) #sentree.catDelta[:, i] = -np.dot(lbl_sm,sigmoid_prime(sm)) sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm)) #J = 1.0/2.0*(np.dot(lbl_sm,(label - sm))) J = 1.0/2.0*(np.dot(lbl_sm,(temp_label - sm))) sentree.nodeFeatures[:,i] = p_norm1 sentree.nodeFeatures_unnormalized[:,i] = p sentree.nodeScores[i] = J sentree.numkids = nodeUnder sentree.kids = allKids else: # 这里主要是计算重构误差 # Reconstruction Error for j in range(sl-1): size2=np.size(words_embedded,1) """ 经过测试,p有多个值 也就不难怪这里c1,c2里面分别存了多个单词的向量 因此,这个算法并不是一个个依次算p的,而是一次性一起算出来p 也因此J的值应该也是有多个值。代表两两单词计算的不同结果。 """ c1 = words_embedded[:,0:-1] # 去掉最后一个单词 c2 = words_embedded[:,1:] # 去掉第一个单词 freq1 = freq[0:-1] freq2 = freq[1:] p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + np.reshape(b1,[self.d,1])*([1]*(size2-1))) p_norm1 =p/np.sqrt(sum(p**2)) y1_unnormalized = tanh(np.dot(W3,p_norm1) + np.reshape(b2,[self.d,1])*([1]*(size2-1))) y2_unnormalized = tanh(np.dot(W4,p_norm1) + np.reshape(b3,[self.d,1])*([1]*(size2-1))) y1 = y1_unnormalized/np.sqrt(sum(y1_unnormalized**2)) y2 = y2_unnormalized/np.sqrt(sum(y2_unnormalized**2)) y1c1 = self.alpha*(y1-c1) y2c2 = self.alpha*(y2-c2) # Eq. (4) in the paper: reconstruction error J = 1.0/2.0*sum((y1c1)*(y1-c1) + (y2c2)*(y2-c2)) # finding the pair with smallest reconstruction error for constructing sentree J_min= min(J) J_minpos=np.argmin(J) """ 只有非叶子节点才会有重构节点,因此,sentree.node_y1c1需要从sl+j开始存y1c1. """ sentree.node_y1c1[:,sl+j] = y1c1[:,J_minpos] sentree.node_y2c2[:,sl+j] = y2c2[:,J_minpos] sentree.nodeDelta_out1[:,sl+j] = np.dot(norm1tanh_prime(y1_unnormalized[:,J_minpos]) , y1c1[:,J_minpos]) sentree.nodeDelta_out2[:,sl+j] = np.dot(norm1tanh_prime(y2_unnormalized[:,J_minpos]) , y2c2[:,J_minpos]) #一对节点被选中以后,需要删除words_embedded对应的向量 #还要把合成的节点加入words_embedded words_embedded=np.delete(words_embedded,J_minpos+1,1) words_embedded[:,J_minpos]=p_norm1[:,J_minpos] sentree.nodeFeatures[:, sl+j] = p_norm1[:,J_minpos] sentree.nodeFeatures_unnormalized[:, sl+j]= p[:,J_minpos] sentree.nodeScores[sl+j] = J_min # pp存的可能是父节点信息,因为两个孩子拥有同一个父亲 sentree.pp[collapsed_sentence[J_minpos]] = sl+j sentree.pp[collapsed_sentence[J_minpos+1]] = sl+j sentree.kids[sl+j,:] = [collapsed_sentence[J_minpos], collapsed_sentence[J_minpos+1]] sentree.numkids[sl+j] = sentree.numkids[sentree.kids[sl+j,0]] + sentree.numkids[sentree.kids[sl+j,1]] freq=np.delete(freq,J_minpos+1) freq[J_minpos] = (sentree.numkids[sentree.kids[sl+j,0]]*freq1[J_minpos] + sentree.numkids[sentree.kids[sl+j,1]]*freq2[J_minpos])/(sentree.numkids[sentree.kids[sl+j,0]]+sentree.numkids[sentree.kids[sl+j,1]]) collapsed_sentence=np.delete(collapsed_sentence,J_minpos+1) collapsed_sentence[J_minpos]=sl+j print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") print(sentree.pp) print("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^") print(sentree.kids) return sentree
def forwardProp(self,allKids,words_embedded,updateWlab,label,theta,freq): #allkids存的是所有节点,第i行存第i个节点,列表示第i行节点所包含的子节点 (W1,W2,W3,W4,Wlab,b1,b2,b3,blab,WL)=self.getParams(theta) #s1可能是词汇表的大小 sl=np.size(words_embedded,1) sentree=rnntree.rnntree(self.d,sl,words_embedded) collapsed_sentence = range(sl) #计算情感误差 if updateWlab: temp_label=np.zeros(self.cat) #label表示当前标签,label-1主要是因为list从0开始,即当前标签的位置为1 temp_label[label-1]=1.0 nodeUnder = np.ones([2*sl-1,1]) #n1,n2是kids的子节点数 for i in range(sl,2*sl-1): # calculate n1, n2 and n1+n2 for each node in the sensentree and store in nodeUnder kids = allKids[i] n1 = nodeUnder[kids[0]] #左节点 n2 = nodeUnder[kids[1]] #右节点 nodeUnder[i] = n1+n2 #第i个节点的子节点数目 cat_size=self.cat sentree.catDelta = np.zeros([cat_size, 2*sl-1]) sentree.catDelta_out = np.zeros([self.d,2*sl-1]) # classifier on single words for i in range(sl): sm = softmax(np.dot(Wlab,words_embedded[:,i]) + blab) #这里代码部分计算情感误差和论文不太一样,这里直接用yi-h(x)来表示情感误差 lbl_sm = (1-self.alpha)*(temp_label - sm) #这里貌似是在计算J sentree.nodeScores[i] = 1.0/2.0*(np.dot(lbl_sm,(temp_label- sm))) #sentree.nodeScores分为2个部分,这里计算0-s1,下面计算2*s1-1 sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm)) # sm = sigmoid(self.Wlab*words_embedded + self.blab) #lbl_sm = (1-self.alpha)*(label[:,np.ones(sl,1)] - sm) #sentree.nodeScores[:sl] = 1/2*(lbl_sm.*(label(:,ones(sl,1)) - sm)) #sentree.catDelta[:, :sl] = -(lbl_sm).*sigmoid_prime(sm) for i in range(sl,2*sl-1): #kids,c1,c2 是什么 kids = allKids[i] c1 = sentree.nodeFeatures[:,kids[0]] #左孩子的词向量 c2 = sentree.nodeFeatures[:,kids[1]] #右孩子的词向量 # Eq. [2] in the paper: p = f(W[1][c1 c2] + b[1]) p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + b1) # See last paragraph in Section 2.3 p_norm1 = p/norm(p) # Eq. (7) in the paper (for special case of 1d label) #sm = sigmoid(np.dot(Wlab,p_norm1) + blab) sm=softmax(np.dot(Wlab,p_norm1) + blab) beta=0.5 #论文里面本来是没有beta这个值的 #lbl_sm = beta * (1.0-self.alpha)*(label - sm) lbl_sm = beta * (1.0-self.alpha)*(temp_label - sm) #lbl_sm = beta * (1.0-self.alpha) * (temp_label-sm) #sentree.catDelta[:, i] = -softmax_prime(sm)[:,label-1] #J=-(1.0-self.alpha)*np.log(sm[label-1]) #sentree.catDelta[:, i] = -np.dot(lbl_sm,sigmoid_prime(sm)) sentree.catDelta[:, i] = -np.dot(lbl_sm,softmax_prime(sm)) #J = 1.0/2.0*(np.dot(lbl_sm,(label - sm))) J = 1.0/2.0*(np.dot(lbl_sm,(temp_label - sm))) sentree.nodeFeatures[:,i] = p_norm1 sentree.nodeFeatures_unnormalized[:,i] = p sentree.nodeScores[i] = J sentree.numkids = nodeUnder sentree.kids = allKids #计算重构误差 else: # Reconstruction Error for j in range(sl-1): size2=np.size(words_embedded,1) c1 = words_embedded[:,0:-1] c2 = words_embedded[:,1:] freq1 = freq[0:-1] freq2 = freq[1:] p = tanh(np.dot(W1,c1) + np.dot(W2,c2) + np.reshape(b1,[self.d,1])*([1]*(size2-1))) p_norm1 =p/np.sqrt(sum(p**2)) #下方y1,y2实际上就是论文的c1,c2,由p分解而来。 y1_unnormalized = tanh(np.dot(W3,p_norm1) + np.reshape(b2,[self.d,1])*([1]*(size2-1))) y2_unnormalized = tanh(np.dot(W4,p_norm1) + np.reshape(b3,[self.d,1])*([1]*(size2-1))) y1 = y1_unnormalized/np.sqrt(sum(y1_unnormalized**2)) y2 = y2_unnormalized/np.sqrt(sum(y2_unnormalized**2)) y1c1 = self.alpha*(y1-c1) y2c2 = self.alpha*(y2-c2) # Eq. (4) in the paper: reconstruction error:重构误差 #(y1-c1)*(y1-c1)的结果是一个数值 J = 1.0/2.0*sum((y1c1)*(y1-c1) + (y2c2)*(y2-c2)) #这个for循环的下面部分没看懂 # finding the pair with smallest reconstruction error for constructing sentree #min(J)是什么意思,J是一个值 J_min= min(J) J_minpos=np.argmin(J) #重构误差最小的重构向量存入树中(c1',c2') sentree.node_y1c1[:,sl+j] = y1c1[:,J_minpos] sentree.node_y2c2[:,sl+j] = y2c2[:,J_minpos] #可能是更新值 sentree.nodeDelta_out1[:,sl+j] = np.dot(norm1tanh_prime(y1_unnormalized[:,J_minpos]) , y1c1[:,J_minpos]) sentree.nodeDelta_out2[:,sl+j] = np.dot(norm1tanh_prime(y2_unnormalized[:,J_minpos]) , y2c2[:,J_minpos]) words_embedded=np.delete(words_embedded,J_minpos+1,1) words_embedded[:,J_minpos]=p_norm1[:,J_minpos] sentree.nodeFeatures[:, sl+j] = p_norm1[:,J_minpos] sentree.nodeFeatures_unnormalized[:, sl+j]= p[:,J_minpos] sentree.nodeScores[sl+j] = J_min sentree.pp[collapsed_sentence[J_minpos]] = sl+j sentree.pp[collapsed_sentence[J_minpos+1]] = sl+j sentree.kids[sl+j,:] = [collapsed_sentence[J_minpos], collapsed_sentence[J_minpos+1]] sentree.numkids[sl+j] = sentree.numkids[sentree.kids[sl+j,0]] + sentree.numkids[sentree.kids[sl+j,1]] freq=np.delete(freq,J_minpos+1) freq[J_minpos] = (sentree.numkids[sentree.kids[sl+j,0]]*freq1[J_minpos] + sentree.numkids[sentree.kids[sl+j,1]]*freq2[J_minpos])/(sentree.numkids[sentree.kids[sl+j,0]]+sentree.numkids[sentree.kids[sl+j,1]]) collapsed_sentence=np.delete(collapsed_sentence,J_minpos+1) collapsed_sentence[J_minpos]=sl+j return sentree