示例#1
0
    def __build_internal_node(self, c1_node, c2_node):
        """Build a new internal node which represents the representation of 
        c1_node.p and c2_node.p computed using autoencoder  
    
        Args:
        c1_node: left node
        c2_node: right node
      
        Returns:
        value1: a new internal node 
        value2: reconstruction error, a scalar
    
        """
        c1 = c1_node.p
        c2 = c2_node.p
        p_unnormalized = self.f(dot(self.Wi1, c1) + dot(self.Wi2, c2) + self.bi)
        p = p_unnormalized / LA.norm(p_unnormalized, axis=0)

        y1_unnormalized = self.f(dot(self.Wo1, p) + self.bo1)
        y1 = y1_unnormalized / LA.norm(y1_unnormalized, axis=0)

        y2_unnormalized = self.f(dot(self.Wo2, p) + self.bo2)
        y2 = y2_unnormalized / LA.norm(y2_unnormalized, axis=0)

        y1c1 = y1 - c1
        y2c2 = y2 - c2

        node = InternalNode(-1, c1_node, c2_node, p, p_unnormalized, y1c1, y2c2, y1_unnormalized, y2_unnormalized)

        reconstruction_error = sum_along_column(y1c1 ** 2) + sum_along_column(y2c2 ** 2)
        reconstruction_error = 0.5 * reconstruction_error[0]

        return node, reconstruction_error
示例#2
0
    def __build_internal_node(self, c1_node, c2_node):
        '''Build a new internal node which represents the representation of 
        c1_node.p and c2_node.p computed using autoencoder  
    
        Args:
            c1_node: left node
            c2_node: right node
      
        Returns:
            value1: a new internal node 
            value2: reconstruction error, a scalar
    
        '''
        c1 = c1_node.p
        c2 = c2_node.p
        p_unnormalized = self.f(
            dot(self.Wi1, c1) + dot(self.Wi2, c2) + self.bi)
        p = p_unnormalized / LA.norm(p_unnormalized, axis=0)

        y1_unnormalized = self.f(dot(self.Wo1, p) + self.bo1)
        y1 = y1_unnormalized / LA.norm(y1_unnormalized, axis=0)

        y2_unnormalized = self.f(dot(self.Wo2, p) + self.bo2)
        y2 = y2_unnormalized / LA.norm(y2_unnormalized, axis=0)

        y1c1 = y1 - c1
        y2c2 = y2 - c2

        node = InternalNode(-1, c1_node, c2_node, p, p_unnormalized, y1c1,
                            y2c2, y1_unnormalized, y2_unnormalized)

        reconstruction_error = sum_along_column(y1c1**2) + sum_along_column(
            y2c2**2)
        reconstruction_error = 0.5 * reconstruction_error[0]

        return node, reconstruction_error
示例#3
0
def process_la( src_rae_la, trg_rae_la, alpha, 
             src_word_vectors, src_instances, src_total_internal_node,
             trg_word_vectors, trg_instances, trg_total_internal_node,
             bad_src_instances, bad_trg_instances ):

    total_rec_error = 0
    total_sem_error = 0
    # 初始化梯度参数
    src_gradients_la = src_rae_la.get_zero_gradients_la()
    trg_gradients_la = trg_rae_la.get_zero_gradients_la()    
    src_total_rec_error = 0
    trg_total_rec_error = 0
    src_total_sem_error = 0
    trg_total_sem_error = 0
    for i in xrange( len( src_instances ) ): 
        src_instance = src_instances[i]
        trg_instance = trg_instances[i]
        bad_src_instance = bad_src_instances[i]
        bad_trg_instance = bad_trg_instances[i]
        # 取出该短语中所有词向量,instance.words中的单词idx还原成words.embedded中的词向量矩阵n*word_num
        src_words_embedded = src_word_vectors[src_instance.words]
        trg_words_embedded = trg_word_vectors[trg_instance.words]
        bad_src_embedded = src_word_vectors[bad_src_instance] 
        bad_trg_embedded = trg_word_vectors[bad_trg_instance]
        # 前向传播,计算错误
        src_root_node, src_rec_error = src_rae_la.forward_la( src_words_embedded, src_instance )
        trg_root_node, trg_rec_error = trg_rae_la.forward_la( trg_words_embedded, trg_instance )
        src_total_rec_error += src_rec_error * src_instance.freq
        trg_total_rec_error += trg_rec_error * trg_instance.freq
        #bad_src_root, _  = src_rae_la.forward_la( bad_src_embedded, instance )
        #bad_trg_root, _ = trg_rae_la.forward_la( bad_trg_embedded, instance )

        rec_s = alpha * src_instance.freq / src_total_internal_node
        rec_t = alpha * trg_instance.freq / trg_total_internal_node
        sem_s = ( 1 - alpha ) * src_instance.freq / src_total_internal_node
        sem_t = ( 1 - alpha ) * trg_instance.freq / trg_total_internal_node

        # Semantic Error
        # Source side
        src_yla_unnormalized = tanh( dot( src_rae_la.Wla, src_root_node.p ) + src_rae_la.bla )
        src_yla = src_yla_unnormalized / LA.norm( src_yla_unnormalized, axis=0 )
        src_ylapla = src_yla - trg_root_node.p
        src_sem_error = 0.5 * sum_along_column( src_ylapla**2 )[0]

        bad_src_ylapla = src_yla - bad_trg_root.p
        bad_src_sem_error = 0.5 * sum_along_column( bad_src_ylapla**2 )[0] 
        src_sem_margin = src_sem_error#(src_sem_error-bad_src_sem_error+1)*src_instance.freq
    
        src_sem_margin = max( 0.0, src_sem_margin )
        if src_sem_margin == 0.0:
            soptimal = True
        else:
            soptimal = False
    
        src_total_sem_error += src_sem_margin

        # Target side
        trg_yla_unnormalized = tanh( dot( trg_rae_la.Wla, trg_root_node.p ) + trg_rae_la.bla )
        trg_yla = trg_yla_unnormalized / LA.norm( trg_yla_unnormalized, axis=0 )
        trg_ylapla = trg_yla - src_root_node.p
        trg_sem_error = 0.5 * sum_along_column( trg_ylapla**2 )[0]

        bad_trg_ylapla = trg_yla - bad_src_root.p
        bad_trg_sem_error = 0.5 * sum_along_column( bad_trg_ylapla**2 )[0]
        trg_sem_margin = trg_sem_error#(trg_sem_error-bad_trg_sem_error+1)*trg_instance.freq
    
        trg_sem_margin = max( 0.0, trg_sem_margin )
        if trg_sem_margin == 0.0:
            toptimal = True
        else:
            toptimal = False
    
        trg_total_sem_error += trg_sem_margin 

        # 反向传播计算梯度
        src_rae_la.backward_la( src_root_node, bad_src_root, src_gradients_la, rec_s, sem_s, 
                src_yla_unnormalized, src_ylapla, 0, soptimal )
        #trg_rae_la.backward_la( trg_root_node, bad_trg_root, trg_gradients_la, rec_t, sem_t, 
                #trg_yla_unnormalized, trg_ylapla, bad_trg_ylapla, toptimal )

        src_total_rec_error = src_total_rec_error / src_total_internal_node
        trg_total_rec_error = trg_total_rec_error / trg_total_internal_node
 
        src_total_sem_error = src_total_sem_error / src_total_internal_node 
        trg_total_sem_error = trg_total_sem_error / trg_total_internal_node 

    return src_total_rec_error, src_total_sem_error, src_gradients_la.to_row_vector_la(),\
        trg_total_rec_error, trg_total_sem_error, trg_gradients_la.to_row_vector_la()
示例#4
0
    def encode(self, words_embedded):
        '''
        Forward pass of training recursive autoencoders using backpropagation
        through structures.

        Args:
          words_embedded: word embedding vectors (column vectors)

        Returns:
          value1: root of the tree, an instance of InternalNode
          value2: reconstruction_error
        '''
        words_num = words_embedded.shape[1]

        tree_nodes = [None]*(2*words_num - 1)
        tree_nodes[0:words_num] = [LeafNode(i, words_embedded[:, (i,)]) for i in range(words_num)]

        reconstruction_error = 0

        # build a tree greedily
        # initialize reconstruction errors
        c1 = words_embedded[:, arange(words_num-1)]
        c2 = words_embedded[:, arange(1, words_num)]

        p_unnormalized = self.f(dot(self.Wi1, c1) + dot(self.Wi2, c2) + self.bi[:, zeros(words_num-1, dtype=int)])
        p = p_unnormalized / LA.norm(p_unnormalized, axis=0)

        y1_unnormalized = self.f(dot(self.Wo1, p) + self.bo1[:, zeros(words_num-1, dtype=int)])
        y1 = y1_unnormalized / LA.norm(y1_unnormalized, axis=0)

        y2_unnormalized = self.f(dot(self.Wo2, p) + self.bo2[:, zeros(words_num-1, dtype=int)])
        y2 = y2_unnormalized / LA.norm(y2_unnormalized, axis=0)

        y1c1 = y1 - c1
        y2c2 = y2 - c2

        J = 1/2 * (sum_along_column(y1c1**2) + sum_along_column(y2c2**2))

        # initialize candidate internal nodes
        candidate_nodes = []
        for i in range(words_num-1):
            left_child = tree_nodes[i]
            right_child = tree_nodes[i+1]
            node = InternalNode(-i-1, left_child, right_child,
                                  p[:, (i,)], p_unnormalized[:, (i,)],
                                  y1c1[:, (i,)], y2c2[:, (i,)],
                                  y1_unnormalized[:, (i,)],
                                  y2_unnormalized[:, (i,)])
            candidate_nodes.append(node)
        debugging_cand_node_index = words_num


        for j in range(words_num-1):
              # find the smallest reconstruction error
              J_minpos = J.argmin()
              J_min = J[J_minpos]
              reconstruction_error += J_min

              node = candidate_nodes[J_minpos]
              node.index = words_num + j # for dubugging
              tree_nodes[words_num+j] = node

          # update reconstruction errors
              if J_minpos+1 < len(candidate_nodes):
                    c1 = node
                    c2 = candidate_nodes[J_minpos+1].right_child
                    right_cand_node, right_J = self.__build_internal_node(c1, c2)

                    right_cand_node.index = -debugging_cand_node_index
                    debugging_cand_node_index += 1
                    candidate_nodes[J_minpos+1] = right_cand_node

                    J[J_minpos+1] = right_J

              if J_minpos-1 >= 0:
                    c1 = candidate_nodes[J_minpos-1].left_child
                    c2 = node
                    left_cand_node, left_J = self.__build_internal_node(c1, c2)

                    left_cand_node.index = -debugging_cand_node_index
                    debugging_cand_node_index += 1
                    candidate_nodes[J_minpos-1] = left_cand_node
                    J[J_minpos-1] = left_J

              valid_indices = [i for i in range(words_num-1-j) if i != J_minpos]
              J = J[valid_indices]
              candidate_nodes = [candidate_nodes[k] for k in valid_indices]

        return tree_nodes, reconstruction_error
示例#5
0
def process_la(
    source_rae_la,
    target_rae_la,
    alpha,
    source_word_vectors,
    source_instances,
    source_total_internal_node,
    target_word_vectors,
    target_instances,
    target_total_internal_node,
    bad_src_instances,
    bad_trg_instances,
):

    total_rec_error = 0
    total_sem_error = 0
    # 初始化梯度参数
    source_gradients_la = source_rae_la.get_zero_gradients_la()
    target_gradients_la = target_rae_la.get_zero_gradients_la()
    source_total_rec_error = 0
    target_total_rec_error = 0
    source_total_sem_error = 0
    target_total_sem_error = 0
    for i in xrange(len(source_instances)):
        source_instance = source_instances[i]
        target_instance = target_instances[i]
        bad_src_instance = bad_src_instances[i]
        bad_trg_instance = bad_trg_instances[i]
        # 取出该短语中所有词向量,instance.words中的单词idx还原成words.embedded中的词向量矩阵n*word_num
        source_words_embedded = source_word_vectors[source_instance.words]
        target_words_embedded = target_word_vectors[target_instance.words]
        bad_source_embedded = source_word_vectors[bad_src_instance]
        bad_target_embedded = target_word_vectors[bad_trg_instance]
        # print source_words_embedded
        # print target_words_embedded
        # 前向传播,计算错误
        source_root_node, source_rec_error = source_rae_la.forward_la(source_words_embedded)
        target_root_node, target_rec_error = target_rae_la.forward_la(target_words_embedded)
        source_total_rec_error += source_rec_error * source_instance.freq
        target_total_rec_error += target_rec_error * target_instance.freq
        bad_source_root, _ = source_rae_la.forward_la(bad_source_embedded)
        bad_target_root, _ = target_rae_la.forward_la(bad_target_embedded)

        rec_s = alpha * source_instance.freq / source_total_internal_node
        rec_t = alpha * target_instance.freq / target_total_internal_node
        sem_s = (1 - alpha) * source_instance.freq / source_total_internal_node
        sem_t = (1 - alpha) * target_instance.freq / target_total_internal_node

        # Semantic Error
        # Source side
        source_yla_unnormalized = tanh(dot(source_rae_la.Wla, source_root_node.p) + source_rae_la.bla)
        source_yla = source_yla_unnormalized / LA.norm(source_yla_unnormalized, axis=0)
        source_ylapla = source_yla - target_root_node.p
        source_sem_error = 0.5 * sum_along_column(source_ylapla ** 2)[0]

        # print source_sem_error
        bad_source_ylapla = source_yla - bad_target_root.p
        bad_source_sem_error = 0.5 * sum_along_column(bad_source_ylapla ** 2)[0]
        source_sem_margin = (source_sem_error - bad_source_sem_error + 1) * source_instance.freq

        source_sem_margin = max(0.0, source_sem_margin)
        if source_sem_margin == 0.0:
            soptimal = True
        else:
            soptimal = False
        source_total_sem_error += source_sem_margin

        # Target side
        target_yla_unnormalized = tanh(dot(target_rae_la.Wla, target_root_node.p) + target_rae_la.bla)
        target_yla = target_yla_unnormalized / LA.norm(target_yla_unnormalized, axis=0)
        target_ylapla = target_yla - source_root_node.p
        target_sem_error = 0.5 * sum_along_column(target_ylapla ** 2)[0]

        bad_target_ylapla = target_yla - bad_source_root.p
        bad_target_sem_error = 0.5 * sum_along_column(bad_target_ylapla ** 2)[0]
        target_sem_margin = (target_sem_error - bad_target_sem_error + 1) * target_instance.freq

        target_sem_margin = max(0.0, target_sem_margin)
        if target_sem_margin == 0.0:
            toptimal = True
        else:
            toptimal = False
        target_total_sem_error += target_sem_margin

        # 反向传播计算梯度
        source_rae_la.backward_la(
            source_root_node,
            bad_source_root,
            source_gradients_la,
            rec_s,
            sem_s,
            sem_t,
            source_yla_unnormalized,
            source_ylapla,
            target_ylapla,
            bad_source_ylapla,
            bad_target_ylapla,
            soptimal,
            toptimal,
        )
        target_rae_la.backward_la(
            target_root_node,
            bad_target_root,
            target_gradients_la,
            rec_t,
            sem_t,
            sem_s,
            target_yla_unnormalized,
            target_ylapla,
            source_ylapla,
            bad_target_ylapla,
            bad_source_ylapla,
            toptimal,
            soptimal,
        )

    total_rec_error = source_total_rec_error * (1.0 / source_total_internal_node) + target_total_rec_error * (
        1.0 / target_total_internal_node
    )
    total_sem_error = source_total_sem_error * (1.0 / source_total_internal_node) + target_total_sem_error * (
        1.0 / target_total_internal_node
    )

    grad_row_vec = [source_gradients_la.to_row_vector_la(), target_gradients_la.to_row_vector_la()]

    return total_rec_error, total_sem_error, concatenate(grad_row_vec)
 def forward(self, words_embedded):
   ''' Forward pass of training recursive autoencoders using backpropagation
   through structures.  
   
   Args:
     words_embedded: word embedding vectors (column vectors)
     
   Returns:
     value1: root of the tree, an instance of InternalNode 
     value2: reconstruction_error
   '''
   
   sent_length = words_embedded.shape[1]
   tree_node_indices = arange(sent_length)
   # print "TNI: ",tree_node_indices
   tree_nodes = [None]*(2*sent_length - 1)
   # print "TN: ",tree_nodes
   tree_nodes[0:sent_length] = [LeafNode(i, words_embedded[:, (i,)]) 
                                       for i in range(sent_length)]
   # print "TN: ",tree_nodes
   reconstruction_error = 0
   # build a tree greedily
   for j in range(sent_length-1):
     words_num = words_embedded.shape[1]
     c1 = words_embedded[:, arange(words_num-1)]
     c2 = words_embedded[:, arange(1, words_num)]
     
     p_unnormalized = self.f(dot(self.Wi1, c1) + dot(self.Wi2, c2)\
                              + self.bi[:, zeros(words_num-1, dtype=int)])
     p = p_unnormalized / LA.norm(p_unnormalized, axis=0)
   
     y1_unnormalized = self.f(dot(self.Wo1, p)\
                              + self.bo1[:, zeros(words_num-1, dtype=int)])
     y1 = y1_unnormalized / LA.norm(y1_unnormalized, axis=0)
 
     y2_unnormalized = self.f(dot(self.Wo2, p)\
                              + self.bo2[:, zeros(words_num-1, dtype=int)])
     y2 = y2_unnormalized / LA.norm(y2_unnormalized, axis=0)
   
     y1c1 = y1 - c1
     y2c2 = y2 - c2
   
     J = 1/2 * (sum_along_column(y1c1**2) + sum_along_column(y2c2**2))
   
     # finding the pair with smallest reconstruction error for constructing tree
     J_minpos = J.argmin()
     J_min = J[J_minpos]
     reconstruction_error += J_min
   
     left_child = tree_nodes[tree_node_indices[J_minpos]]
     right_child = tree_nodes[tree_node_indices[J_minpos+1]]
     y1_minus_c1 = y1c1[:, (J_minpos,)]
     y2_minus_c2 = y2c2[:, (J_minpos,)]
     y1_unnormalized_minpos = y1_unnormalized[:, (J_minpos,)]
     y2_unnormalized_minpos = y2_unnormalized[:, (J_minpos,)]
     node = InternalNode(sent_length+j, left_child, right_child,
                         p[:, (J_minpos,)], p_unnormalized[:, (J_minpos,)],
                         y1_minus_c1, y2_minus_c2,
                         y1_unnormalized_minpos, y2_unnormalized_minpos)
     tree_nodes[sent_length+j] = node
     
     valid_indices = [i for i in range(sent_length-j) if i != J_minpos+1]
     words_embedded = words_embedded[:, valid_indices] 
     words_embedded[:, (J_minpos,)] = p[:, (J_minpos,)]
 
     tree_node_indices = tree_node_indices[valid_indices]
     tree_node_indices[J_minpos] = sent_length + j
     
   # return tree_nodes[-1], reconstruction_error 
   return tree_nodes, reconstruction_error 
示例#7
0
    def forward_la(self, words_embedded, instance ):
        ''' Forward pass of training recursive autoencoders using backpropagation
        through structures.  
    
        Args:
        words_embedded: word embedding vectors (column vectors)
      
        Returns:
        value1: root of the tree, an instance of InternalNode 
        value2: reconstruction_error
        '''
        # 短语中的单词数量
        words_num = words_embedded.shape[1]
    
        # 共有n+n-1 = 2n-1个节点
        tree_nodes = [None]*(2*words_num - 1)
        # 前n个节点为输入单词的叶节点
        tree_nodes[0:words_num] = [LeafNode(instance.words[i], words_embedded[:, (i,)]) 
                                        for i in range(words_num)]
    
        reconstruction_error = 0
    
        # build a tree greedily
        # initialize reconstruction errors
        # 前n-1个单词组成的列向量
        c1 = words_embedded[:, arange(words_num-1)]
        # 第1到第n个单词组成的列向量
        c2 = words_embedded[:, arange(1, words_num)]
        # c1,c2用来计算最小error.
        # 计算所有叶节点邻居对的error,挑选error最小的一组开始
        p_unnormalized = self.f(dot(self.Wi1, c1) + dot(self.Wi2, c2)\
                               + self.bi[:, zeros(words_num-1, dtype=int)])
        p = p_unnormalized / LA.norm(p_unnormalized, axis=0)
    
        y1_unnormalized = self.f(dot(self.Wo1, p)\
                               + self.bo1[:, zeros(words_num-1, dtype=int)])
        y1 = y1_unnormalized / LA.norm(y1_unnormalized, axis=0)
  
        y2_unnormalized = self.f(dot(self.Wo2, p)\
                               + self.bo2[:, zeros(words_num-1, dtype=int)])
        y2 = y2_unnormalized / LA.norm(y2_unnormalized, axis=0)
    
        y1c1 = y1 - c1
        y2c2 = y2 - c2
    
        J = 1/2 * (sum_along_column(y1c1**2) + sum_along_column(y2c2**2))
    
        # initialize candidate internal nodes
        candidate_nodes = []
        # 构造n-1个由叶节点邻居对构成的非叶节点
        for i in range(words_num-1):
            left_child = tree_nodes[i]
            right_child = tree_nodes[i+1]
            # idx : -1到-n + 1,共n-1个非叶节点
            node = InternalNode(-i-1, left_child, right_child,
                          p[:, (i,)], p_unnormalized[:, (i,)],
                          y1c1[:, (i,)], y2c2[:, (i,)],
                          y1_unnormalized[:, (i,)], 
                          y2_unnormalized[:, (i,)])
            candidate_nodes.append(node)
        debugging_cand_node_index = words_num
      
        # 寻找最优非叶节点
        for j in range(words_num-1):
            # find the smallest reconstruction error
            # 最小error非叶节点的index
            J_minpos = J.argmin()
            # 最小error非叶节点值
            J_min = J[J_minpos]
            # 更新总error
            reconstruction_error += J_min
      
            #取出该最小error非叶节点
            node = candidate_nodes[J_minpos]
            node.index = words_num + j # for dubugging
            # 在最小error生成树中添加进该节点
            tree_nodes[words_num+j] = node
  
            # update reconstruction errors
            # 最优节点右侧还有节点,可以同右侧节点组合
            if J_minpos+1 < len(candidate_nodes):
                c1 = node
                c2 = candidate_nodes[J_minpos+1].right_child
                right_cand_node, right_J = self.__build_internal_node(c1, c2)
        
                # 内节点index从-n开始累减
                right_cand_node.index = -debugging_cand_node_index
                debugging_cand_node_index += 1
                candidate_nodes[J_minpos+1] = right_cand_node
        
                J[J_minpos+1] = right_J

            #最优内节点不是最左侧的节点 
            if J_minpos-1 >= 0:
                c1 = candidate_nodes[J_minpos-1].left_child
                c2 = node
                left_cand_node, left_J = self.__build_internal_node(c1, c2)
        
                left_cand_node.index = -debugging_cand_node_index
                debugging_cand_node_index += 1
                candidate_nodes[J_minpos-1] = left_cand_node
                J[J_minpos-1] = left_J
      
            valid_indices = [i for i in range(words_num-1-j) if i != J_minpos]
            J = J[valid_indices]
            candidate_nodes = [candidate_nodes[k] for k in valid_indices]

        return tree_nodes[-1], reconstruction_error 
示例#8
0
    def forward(self, words_embedded):
        ''' Forward pass of training recursive autoencoders using backpropagation
        through structures.  
    
        Args:
            words_embedded: word embedding vectors (column vectors)
      
        Returns:
            value1: root of the tree, an instance of InternalNode 
            value2: reconstruction_error
        '''

        words_num = words_embedded.shape[1]

        tree_nodes = [None] * (2 * words_num - 1)
        tree_nodes[0:words_num] = [
            LeafNode(i, words_embedded[:, (i, )]) for i in range(words_num)
        ]

        reconstruction_error = 0

        # build a tree greedily
        # initialize reconstruction errors
        c1 = words_embedded[:, arange(words_num - 1)]
        c2 = words_embedded[:, arange(1, words_num)]
        p_unnormalized = self.f(dot(self.Wi1, c1) + dot(self.Wi2, c2)\
                               + self.bi[:, zeros(words_num-1, dtype=int)])
        p = p_unnormalized / LA.norm(p_unnormalized, axis=0)

        y1_unnormalized = self.f(dot(self.Wo1, p)\
                               + self.bo1[:, zeros(words_num-1, dtype=int)])
        y1 = y1_unnormalized / LA.norm(y1_unnormalized, axis=0)

        y2_unnormalized = self.f(dot(self.Wo2, p)\
                               + self.bo2[:, zeros(words_num-1, dtype=int)])
        y2 = y2_unnormalized / LA.norm(y2_unnormalized, axis=0)

        y1c1 = y1 - c1
        y2c2 = y2 - c2

        J = 1 / 2 * (sum_along_column(y1c1**2) + sum_along_column(y2c2**2))

        # initialize candidate internal nodes
        candidate_nodes = []
        for i in range(words_num - 1):
            left_child = tree_nodes[i]
            right_child = tree_nodes[i + 1]
            node = InternalNode(-i - 1, left_child, right_child, p[:, (i, )],
                                p_unnormalized[:, (i, )], y1c1[:, (i, )],
                                y2c2[:, (i, )], y1_unnormalized[:, (i, )],
                                y2_unnormalized[:, (i, )])
            candidate_nodes.append(node)
        debugging_cand_node_index = words_num

        # 寻找误差最小的组合,如1、2组合3、4组合,这个误差最小则使用这个
        # 每次寻找最小error,然后更新误差值,迭代查找
        for j in range(words_num - 1):
            # find the smallest reconstruction error
            J_minpos = J.argmin()
            J_min = J[J_minpos]
            reconstruction_error += J_min

            node = candidate_nodes[J_minpos]
            node.index = words_num + j  # for dubugging
            tree_nodes[words_num + j] = node

            # update reconstruction errors
            if J_minpos + 1 < len(candidate_nodes):
                c1 = node
                c2 = candidate_nodes[J_minpos + 1].right_child
                right_cand_node, right_J = self.__build_internal_node(c1, c2)

                right_cand_node.index = -debugging_cand_node_index
                debugging_cand_node_index += 1
                candidate_nodes[J_minpos + 1] = right_cand_node

                J[J_minpos + 1] = right_J

            if J_minpos - 1 >= 0:
                c1 = candidate_nodes[J_minpos - 1].left_child
                c2 = node
                left_cand_node, left_J = self.__build_internal_node(c1, c2)

                left_cand_node.index = -debugging_cand_node_index
                debugging_cand_node_index += 1
                candidate_nodes[J_minpos - 1] = left_cand_node
                J[J_minpos - 1] = left_J

            valid_indices = [
                i for i in range(words_num - 1 - j) if i != J_minpos
            ]
            J = J[valid_indices]
            candidate_nodes = [candidate_nodes[k] for k in valid_indices]

        return tree_nodes[-1], reconstruction_error
示例#9
0
  def test_sum_along_column(self):
    x = np.array([[1, 2, 3], [4, 5, 6]])
    x_sum = sum_along_column(x) 
    x_sum_expected = np.array([5, 7, 9])

    self.assertFalse((x_sum != x_sum_expected).any())
示例#10
0
文件: rae.py 项目: xuyisen/SCDector
    def forward(self, words_embedded):
        ''' Forward pass of training recursive autoencoders using backpropagation
    through structures.  
    
    Args:
      words_embedded: word embedding vectors (column vectors)
      
    Returns:
      value1: root of the tree, an instance of InternalNode 
      value2: reconstruction_error
    '''

        sent_length = words_embedded.shape[1]

        tree_node_indices = arange(sent_length)
        tree_nodes = [None] * (2 * sent_length - 1)
        tree_nodes[0:sent_length] = [
            LeafNode(i, words_embedded[:, (i, )]) for i in range(sent_length)
        ]

        reconstruction_error = 0
        # build a tree greedily
        for j in range(sent_length - 1):
            words_num = words_embedded.shape[1]
            c1 = words_embedded[:, arange(words_num - 1)]
            c2 = words_embedded[:, arange(1, words_num)]

            p_unnormalized = self.f(dot(self.Wi1, c1) + dot(self.Wi2, c2) \
                                    + self.bi[:, zeros(words_num - 1, dtype=int)])
            p = p_unnormalized / LA.norm(p_unnormalized, axis=0)

            y1_unnormalized = self.f(dot(self.Wo1, p) \
                                     + self.bo1[:, zeros(words_num - 1, dtype=int)])
            y1 = y1_unnormalized / LA.norm(y1_unnormalized, axis=0)

            y2_unnormalized = self.f(dot(self.Wo2, p) \
                                     + self.bo2[:, zeros(words_num - 1, dtype=int)])
            y2 = y2_unnormalized / LA.norm(y2_unnormalized, axis=0)

            y1c1 = y1 - c1
            y2c2 = y2 - c2

            J = 1 / 2 * (sum_along_column(y1c1**2) + sum_along_column(y2c2**2))

            # finding the pair with smallest reconstruction error for constructing tree
            J_minpos = J.argmin()
            J_min = J[J_minpos]
            reconstruction_error += J_min

            left_child = tree_nodes[tree_node_indices[J_minpos]]
            right_child = tree_nodes[tree_node_indices[J_minpos + 1]]
            y1_minus_c1 = y1c1[:, (J_minpos, )]
            y2_minus_c2 = y2c2[:, (J_minpos, )]
            y1_unnormalized_minpos = y1_unnormalized[:, (J_minpos, )]
            y2_unnormalized_minpos = y2_unnormalized[:, (J_minpos, )]
            node = InternalNode(sent_length + j, left_child, right_child,
                                p[:,
                                  (J_minpos, )], p_unnormalized[:,
                                                                (J_minpos, )],
                                y1_minus_c1, y2_minus_c2,
                                y1_unnormalized_minpos, y2_unnormalized_minpos)
            tree_nodes[sent_length + j] = node

            valid_indices = [
                i for i in range(sent_length - j) if i != J_minpos + 1
            ]
            words_embedded = words_embedded[:, valid_indices]
            words_embedded[:, (J_minpos, )] = p[:, (J_minpos, )]

            tree_node_indices = tree_node_indices[valid_indices]
            tree_node_indices[J_minpos] = sent_length + j

        return tree_nodes[-1], reconstruction_error
示例#11
0
def process_la( src_rae_la, trg_rae_la, alpha, 
             src_word_vectors, src_instances, src_total_internal_node,
             trg_word_vectors, trg_instances, trg_total_internal_node,
             bad_src_instances, bad_trg_instances,
             src_Xidx, trg_Xidx, src_hiero_map, trg_hiero_map ):

    total_rec_error = 0
    total_sem_error = 0
    # 初始化梯度参数
    src_gradients_la = src_rae_la.get_zero_gradients_la()
    trg_gradients_la = trg_rae_la.get_zero_gradients_la()    
    src_total_rec_error = 0
    trg_total_rec_error = 0
    src_total_sem_error = 0
    trg_total_sem_error = 0
    for i in xrange( len( src_instances ) ):
        src_instance = src_instances[i]
        trg_instance = trg_instances[i]
        bad_src_instance = bad_src_instances[i]
        bad_trg_instance = bad_trg_instances[i]
        if src_Xidx[0] in src_instance.words:
            src_words_embedded = src_word_vectors[src_instance.words]
            trg_words_embedded = trg_word_vectors[trg_instance.words]
            if src_Xidx[1] in src_instance.words:
                src_x1 = src_instance.words.index(src_Xidx[0])
                src_x2 = src_instance.words.index(src_Xidx[1])
                trg_x1 = trg_instance.words.index(trg_Xidx[0])
                trg_x2 = trg_instance.words.index(trg_Xidx[1])
                src_words_embedded[:,src_x1] = zeros_like( src_words_embedded[:,src_x1] )
                src_words_embedded[:,src_x2] = zeros_like( src_words_embedded[:,src_x2] )
                trg_words_embedded[:,trg_x1] = zeros_like( trg_words_embedded[:,trg_x1] )
                trg_words_embedded[:,trg_x2] = zeros_like( trg_words_embedded[:,trg_x2] )
                for i in xrange( len( src_instance.idx ) ):
                    src_idx = src_instance.idx[i]
                    src_idx = src_idx.strip().split( ',' )
                    if src_idx[0] in src_hiero_map:
                        src_words_embedded[:,src_x1] += src_hiero_map[src_idx[0]]
                    else:
                        src_words_embedded[:,src_x1] = src_words_embedded[:,src_x1] / ( i + 1 )
                    if src_idx[1] in src_hiero_map:
                        src_words_embedded[:,src_x2] += src_hiero_map[src_idx[1]]
                    else:
                        src_words_embedded[:,src_x2] = src_words_embedded[:,src_x2] / ( i + 1 ) 
                src_words_embedded[:,src_x1] /= src_instance.freq
                src_words_embedded[:,src_x2] /= src_instance.freq
                src_root_node, src_rec_error = src_rae_la.forward_la( src_words_embedded )
                for i in xrange( len( trg_instance.idx ) ):
                    trg_idx = trg_instance.idx[i]
                    trg_idx = trg_idx.strip().split( ',' )
                    if trg_idx[0] in trg_hiero_map:
                        trg_words_embedded[:,trg_x1] += trg_hiero_map[trg_idx[0]]
                    else:
                        trg_words_embedded[:,trg_x1] = trg_words_embedded[:,trg_x1] / ( i + 1 )
                    if trg_idx[1] in trg_hiero_map:
                        trg_words_embedded[:,trg_x2] += trg_hiero_map[trg_idx[1]]
                    else:
                        trg_words_embedded[:,trg_x2] = trg_words_embedded[:,trg_x2] / ( i + 1 ) 
                trg_words_embedded[:,trg_x1] /= trg_instance.freq
                trg_words_embedded[:,trg_x2] /= trg_instance.freq
                trg_root_node, trg_rec_error = trg_rae_la.forward_la( trg_words_embedded )
            else:
                #只包含x1
                src_x1 = src_instance.words.index(src_Xidx[0])
                trg_x1 = trg_instance.words.index(trg_Xidx[0])
                #print words_embedded[:,x1].shape,hiero_map['0'].shape
                src_words_embedded[:,src_x1] = zeros_like( src_words_embedded[:,src_x1] )
                trg_words_embedded[:,trg_x1] = zeros_like( trg_words_embedded[:,trg_x1] )
                for i in xrange( len( src_instance.idx ) ):
                    src_idx = src_instance.idx[i]
                    if src_idx in src_hiero_map:
                        src_words_embedded[:,src_x1] += src_hiero_map[src_idx]
                    else:
                        src_words_embedded[:,src_x1] += src_words_embedded[:,src_x1] / ( i + 1 ) 
                src_words_embedded[:,src_x1] /= src_instance.freq
                src_root_node, src_rec_error = src_rae_la.forward_la( src_words_embedded )
                for i in xrange( len( trg_instance.idx ) ):
                    trg_idx = trg_instance.idx[i]
                    if trg_idx in trg_hiero_map:
                        trg_words_embedded[:,trg_x1] += trg_hiero_map[trg_idx]
                    else:
                        trg_words_embedded[:,trg_x1] += trg_words_embedded[:,trg_x1] / ( i + 1 ) 
                trg_words_embedded[:,trg_x1] /= trg_instance.freq
                trg_root_node, trg_rec_error = trg_rae_la.forward_la( trg_words_embedded )
        else:
            # 取出该短语中所有词向量,instance.words中的单词idx还原成words.embedded中的词向量矩阵n*word_num
            src_words_embedded = src_word_vectors[src_instance.words]
            trg_words_embedded = trg_word_vectors[trg_instance.words]
            src_root_node, src_rec_error = src_rae_la.forward_la( src_words_embedded )
            trg_root_node, trg_rec_error = trg_rae_la.forward_la( trg_words_embedded )
            src_hiero_map[src_instance.idx[0]] = src_root_node.p.reshape(src_word_vectors.embsize(),)
            trg_hiero_map[trg_instance.idx[0]] = trg_root_node.p.reshape(trg_word_vectors.embsize(),)

        # 取出该短语中所有词向量,instance.words中的单词idx还原成words.embedded中的词向量矩阵n*word_num
        bad_src_embedded = src_word_vectors[bad_src_instance] 
        bad_trg_embedded = trg_word_vectors[bad_trg_instance]

        # 前向传播,计算错误
        src_total_rec_error += src_rec_error * src_instance.freq
        trg_total_rec_error += trg_rec_error * trg_instance.freq
        bad_src_root, _  = src_rae_la.forward_la( bad_src_embedded )
        bad_trg_root, _ = trg_rae_la.forward_la( bad_trg_embedded )
 
        rec_s = alpha * src_instance.freq / src_total_internal_node
        rec_t = alpha * trg_instance.freq / trg_total_internal_node
        sem_s = ( 1 - alpha ) * src_instance.freq / src_total_internal_node
        sem_t = ( 1 - alpha ) * trg_instance.freq / trg_total_internal_node

        # Semantic Error
        # Source side
        src_yla_unnormalized = tanh( dot( src_rae_la.Wla, src_root_node.p ) + src_rae_la.bla )
        src_yla = src_yla_unnormalized / LA.norm( src_yla_unnormalized, axis=0 )
        src_ylapla = src_yla - trg_root_node.p
        src_sem_error = 0.5 * sum_along_column( src_ylapla**2 )[0]

        bad_src_ylapla = src_yla - bad_trg_root.p
        bad_src_sem_error = 0.5 * sum_along_column( bad_src_ylapla**2 )[0] 
        src_sem_margin = (src_sem_error-bad_src_sem_error+1)*src_instance.freq
        
        src_sem_margin = max( 0.0, src_sem_margin )
        if src_sem_margin == 0.0:
            soptimal = True
        else:
            soptimal = False
        
        src_total_sem_error += src_sem_margin

        # Target side
        trg_yla_unnormalized = tanh( dot( trg_rae_la.Wla, trg_root_node.p ) + trg_rae_la.bla )
        trg_yla = trg_yla_unnormalized / LA.norm( trg_yla_unnormalized, axis=0 )
        trg_ylapla = trg_yla - src_root_node.p
        trg_sem_error = 0.5 * sum_along_column( trg_ylapla**2 )[0]

        bad_trg_ylapla = trg_yla - bad_src_root.p
        bad_trg_sem_error = 0.5 * sum_along_column( bad_trg_ylapla**2 )[0]
        trg_sem_margin = (trg_sem_error-bad_trg_sem_error+1)*trg_instance.freq
        
        trg_sem_margin = max( 0.0, trg_sem_margin )
        if trg_sem_margin == 0.0:
            toptimal = True
        else:
            toptimal = False
        
        trg_total_sem_error += trg_sem_margin 

        # 反向传播计算梯度
        src_rae_la.backward_la( src_root_node, bad_src_root, src_gradients_la, rec_s, sem_s, 
                src_yla_unnormalized, src_ylapla, bad_src_ylapla, soptimal )
        trg_rae_la.backward_la( trg_root_node, bad_trg_root, trg_gradients_la, rec_t, sem_t, 
                trg_yla_unnormalized, trg_ylapla, bad_trg_ylapla, toptimal )
    
    src_total_rec_error = src_total_rec_error * ( 1.0 / src_total_internal_node )
    trg_total_rec_error = trg_total_rec_error * ( 1.0 / trg_total_internal_node )
     
    src_total_sem_error = src_total_sem_error * ( 1.0 / src_total_internal_node )  
    trg_total_sem_error = trg_total_sem_error * ( 1.0 / trg_total_internal_node )  

    return src_total_rec_error, src_total_sem_error, src_gradients_la.to_row_vector_la(),\
        trg_total_rec_error, trg_total_sem_error, trg_gradients_la.to_row_vector_la()