示例#1
0
    def build_graph(self, input_network_outputs={}, reuse=True):
        """"""

        outputs = {}
        with tf.variable_scope('Embeddings'):
            input_tensors = [
                input_vocab.get_input_tensor(reuse=reuse)
                for input_vocab in self.input_vocabs
            ]
            for input_network, output in input_network_outputs:
                with tf.variable_scope(input_network.classname):
                    input_tensors.append(
                        input_network.get_input_tensor(output, reuse=reuse))
            layer = tf.concat(input_tensors, 2)
        n_nonzero = tf.to_float(
            tf.count_nonzero(layer, axis=-1, keep_dims=True))
        batch_size, bucket_size, input_size = nn.get_sizes(layer)
        layer *= input_size / (n_nonzero + tf.constant(1e-12))

        token_weights = nn.greater(self.id_vocab.placeholder,
                                   0,
                                   dtype=tf.int32)
        tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
        n_tokens = tf.reduce_sum(tokens_per_sequence)
        n_sequences = tf.count_nonzero(tokens_per_sequence)
        seq_lengths = tokens_per_sequence + 1
        tokens = {
            'n_tokens': n_tokens,
            'tokens_per_sequence': tokens_per_sequence,
            'token_weights': token_weights,
            'n_sequences': n_sequences
        }

        conv_keep_prob = 1. if reuse else self.conv_keep_prob
        recur_keep_prob = 1. if reuse else self.recur_keep_prob
        recur_include_prob = 1. if reuse else self.recur_include_prob

        rev_layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2)
        for i in six.moves.range(self.n_layers):
            conv_width = self.first_layer_conv_width if not i else self.conv_width
            with tf.variable_scope('RNN_FW-{}'.format(i)):
                layer, _ = recurrent.directed_RNN(
                    layer,
                    self.recur_size,
                    seq_lengths,
                    bidirectional=False,
                    recur_cell=self.recur_cell,
                    conv_width=conv_width,
                    recur_func=self.recur_func,
                    conv_keep_prob=conv_keep_prob,
                    recur_include_prob=recur_include_prob,
                    recur_keep_prob=recur_keep_prob,
                    cifg=self.cifg,
                    highway=self.highway,
                    highway_func=self.highway_func)
            if self.bidirectional:
                with tf.variable_scope('RNN_BW-{}'.format(i)):
                    rev_layer, _ = recurrent.directed_RNN(
                        rev_layer,
                        self.recur_size,
                        seq_lengths,
                        bidirectional=False,
                        recur_cell=self.recur_cell,
                        conv_width=conv_width,
                        recur_func=self.recur_func,
                        conv_keep_prob=conv_keep_prob,
                        recur_keep_prob=recur_keep_prob,
                        recur_include_prob=recur_include_prob,
                        cifg=self.cifg,
                        highway=self.highway,
                        highway_func=self.highway_func)
        ones = tf.ones([batch_size, 1, 1])
        with tf.variable_scope('RNN_FW-{}/RNN/Loop'.format(i), reuse=True):
            fw_initial_state = tf.get_variable('Initial_state')
            n_splits = fw_initial_state.get_shape().as_list(
            )[-1] / self.recur_size
            fw_initial_state = tf.split(fw_initial_state, int(n_splits), -1)[0]
            start_token = ones * fw_initial_state
            layer = tf.reverse_sequence(layer, seq_lengths, seq_axis=2)
            layer = layer[:, 1:]
            layer = tf.reverse_sequence(layer, seq_lengths - 1, seq_axis=2)
            layer = tf.concat([start_token, layer], axis=1)
        if self.bidirectional:
            with tf.variable_scope('RNN_BW-{}/RNN/Loop'.format(i), reuse=True):
                bw_initial_state = tf.get_variable('Initial_state')
                n_splits = bw_initial_state.get_shape().as_list(
                )[-1] / self.recur_size
                bw_initial_state = tf.split(bw_initial_state, int(n_splits),
                                            -1)[0]
                stop_token = ones * bw_initial_state
                rev_layer = tf.concat([stop_token, layer], axis=1)
                rev_layer = tf.reverse_sequence(rev_layer,
                                                seq_lengths + 1,
                                                seq_axis=2)[:, 1:]
            if self.bilin:
                layer = tf.concat([layer * rev_layer, layer, rev_layer],
                                  axis=2)
            else:
                layer = tf.concat([layer, rev_layer], axis=2)

        output_vocabs = {vocab.field: vocab for vocab in self.output_vocabs}
        outputs = {}
        with tf.variable_scope('Classifiers'):
            if 'form' in output_vocabs:
                vocab = output_vocabs['form']
                outputs[vocab.field] = vocab.get_sampled_linear_classifier(
                    layer,
                    self.n_samples,
                    token_weights=token_weights,
                    reuse=reuse)
                self._evals.add('form')
            if 'upos' in output_vocabs:
                vocab = output_vocabs['upos']
                outputs[vocab.field] = vocab.get_linear_classifier(
                    layer, token_weights=token_weights, reuse=reuse)
                self._evals.add('upos')
            if 'xpos' in output_vocabs:
                vocab = output_vocabs['xpos']
                outputs[vocab.field] = vocab.get_linear_classifier(
                    layer, token_weights=token_weights, reuse=reuse)
                self._evals.add('xpos')
        return outputs, tokens
示例#2
0
    def get_bilinear_discriminator(self,
                                   layer,
                                   token_weights,
                                   variable_scope=None,
                                   reuse=False):
        """"""

        recur_layer = layer
        hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
        add_linear = self.add_linear
        n_splits = 2 * (1 + self.linearize + self.distance)
        with tf.variable_scope(variable_scope or self.field):
            for i in six.moves.range(0, self.n_layers - 1):
                with tf.variable_scope('FC-%d' % i):
                    layer = classifiers.hidden(
                        layer,
                        n_splits * self.hidden_size,
                        hidden_func=self.hidden_func,
                        hidden_keep_prob=hidden_keep_prob)
            with tf.variable_scope('FC-top'):
                layers = classifiers.hiddens(layer,
                                             n_splits * [self.hidden_size],
                                             hidden_func=self.hidden_func,
                                             hidden_keep_prob=hidden_keep_prob)
            layer1, layer2 = layers.pop(0), layers.pop(0)
            if self.linearize:
                lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0)
            if self.distance:
                dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0)

            with tf.variable_scope('Discriminator'):
                if self.diagonal:
                    logits = classifiers.diagonal_bilinear_discriminator(
                        layer1,
                        layer2,
                        hidden_keep_prob=hidden_keep_prob,
                        add_linear=add_linear)
                    if self.linearize:
                        with tf.variable_scope('Linearization'):
                            lin_logits = classifiers.diagonal_bilinear_discriminator(
                                lin_layer1,
                                lin_layer2,
                                hidden_keep_prob=hidden_keep_prob,
                                add_linear=add_linear)
                    if self.distance:
                        with tf.variable_scope('Distance'):
                            dist_lamda = 1 + tf.nn.softplus(
                                classifiers.diagonal_bilinear_discriminator(
                                    dist_layer1,
                                    dist_layer2,
                                    hidden_keep_prob=hidden_keep_prob,
                                    add_linear=add_linear))
                else:
                    logits = classifiers.bilinear_discriminator(
                        layer1,
                        layer2,
                        hidden_keep_prob=hidden_keep_prob,
                        add_linear=add_linear)
                    if self.linearize:
                        with tf.variable_scope('Linearization'):
                            lin_logits = classifiers.bilinear_discriminator(
                                lin_layer1,
                                lin_layer2,
                                hidden_keep_prob=hidden_keep_prob,
                                add_linear=add_linear)
                    if self.distance:
                        with tf.variable_scope('Distance'):
                            dist_lamda = 1 + tf.nn.softplus(
                                classifiers.bilinear_discriminator(
                                    dist_layer1,
                                    dist_layer2,
                                    hidden_keep_prob=hidden_keep_prob,
                                    add_linear=add_linear))

                #-----------------------------------------------------------
                # Process the targets
                # (n x m x m) -> (n x m x m)
                unlabeled_targets = self.placeholder
                shape = tf.shape(layer1)
                batch_size, bucket_size = shape[0], shape[1]
                # (1 x m)
                ids = tf.expand_dims(tf.range(bucket_size), 0)
                # (1 x m) -> (1 x 1 x m)
                head_ids = tf.expand_dims(ids, -2)
                # (1 x m) -> (1 x m x 1)
                dep_ids = tf.expand_dims(ids, -1)
                if self.linearize:
                    # Wherever the head is to the left
                    # (n x m x m), (1 x m x 1) -> (n x m x m)
                    lin_targets = tf.to_float(
                        tf.less(unlabeled_targets, dep_ids))
                    # cross-entropy of the linearization of each i,j pair
                    # (1 x 1 x m), (1 x m x 1) -> (n x m x m)
                    lin_ids = tf.tile(tf.less(head_ids, dep_ids),
                                      [batch_size, 1, 1])
                    # (n x 1 x m), (n x m x 1) -> (n x m x m)
                    lin_xent = -tf.nn.softplus(
                        tf.where(lin_ids, -lin_logits, lin_logits))
                    # add the cross-entropy to the logits
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    logits += tf.stop_gradient(lin_xent)
                if self.distance:
                    # (n x m x m) - (1 x m x 1) -> (n x m x m)
                    dist_targets = tf.abs(unlabeled_targets - dep_ids)
                    # KL-divergence of the distance of each i,j pair
                    # (1 x 1 x m) - (1 x m x 1) -> (n x m x m)
                    dist_ids = tf.to_float(
                        tf.tile(tf.abs(head_ids - dep_ids),
                                [batch_size, 1, 1])) + 1e-12
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda)
                    dist_kld = -tf.log((dist_ids - dist_lamda)**2 / 2 + 1)
                    # add the KL-divergence to the logits
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    logits += tf.stop_gradient(dist_kld)

                #-----------------------------------------------------------
                # Compute probabilities/cross entropy
                # (n x m x m) -> (n x m x m)
                probabilities = tf.nn.sigmoid(logits) * tf.to_float(
                    token_weights)
                # (n x m x m), (n x m x m), (n x m x m) -> ()
                loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets,
                                                       logits,
                                                       weights=token_weights)
                n_tokens = tf.to_float(tf.reduce_sum(token_weights))
                if self.linearize:
                    lin_target_xent = lin_xent * unlabeled_targets
                    loss -= tf.reduce_sum(
                        lin_target_xent *
                        tf.to_float(token_weights)) / (n_tokens + 1e-12)
                if self.distance:
                    dist_target_kld = dist_kld * unlabeled_targets
                    loss -= tf.reduce_sum(
                        dist_target_kld *
                        tf.to_float(token_weights)) / (n_tokens + 1e-12)

                #-----------------------------------------------------------
                # Compute predictions/accuracy
                # (n x m x m) -> (n x m x m)
                predictions = nn.greater(logits, 0,
                                         dtype=tf.int32) * token_weights
                # (n x m x m) (*) (n x m x m) -> (n x m x m)
                true_positives = predictions * unlabeled_targets
                # (n x m x m) -> ()
                n_predictions = tf.reduce_sum(predictions)
                n_targets = tf.reduce_sum(unlabeled_targets)
                n_true_positives = tf.reduce_sum(true_positives)
                # () - () -> ()
                n_false_positives = n_predictions - n_true_positives
                n_false_negatives = n_targets - n_true_positives
                # (n x m x m) -> (n)
                n_targets_per_sequence = tf.reduce_sum(unlabeled_targets,
                                                       axis=[1, 2])
                n_true_positives_per_sequence = tf.reduce_sum(true_positives,
                                                              axis=[1, 2])
                # (n) x 2 -> ()
                n_correct_sequences = tf.reduce_sum(
                    nn.equal(n_true_positives_per_sequence,
                             n_targets_per_sequence))

        #-----------------------------------------------------------
        # Populate the output dictionary
        outputs = {}
        outputs['unlabeled_targets'] = unlabeled_targets
        outputs['probabilities'] = probabilities
        outputs['unlabeled_loss'] = loss
        outputs['loss'] = loss

        outputs['unlabeled_predictions'] = predictions
        outputs['n_unlabeled_true_positives'] = n_true_positives
        outputs['n_unlabeled_false_positives'] = n_false_positives
        outputs['n_unlabeled_false_negatives'] = n_false_negatives
        outputs['n_correct_unlabeled_sequences'] = n_correct_sequences
        outputs['predictions'] = predictions
        outputs['n_true_positives'] = n_true_positives
        outputs['n_false_positives'] = n_false_positives
        outputs['n_false_negatives'] = n_false_negatives
        outputs['n_correct_sequences'] = n_correct_sequences
        return outputs
示例#3
0
 def get_unfactored_bilinear_classifier(self, layer, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.field):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*self.hidden_size,
                                    hidden_func=self.hidden_func,
                                    hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top' % i):
       layers = classifiers.hidden(layer, 2*[self.hidden_size],
                                   hidden_func=self.hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Classifier'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       
       #-----------------------------------------------------------
       # Process the targets
       targets = self.placeholder
       # (n x m x m) -> (n x m x m)
       unlabeled_targets = nn.greater(targets, 0)
       
       #-----------------------------------------------------------
       # Process the logits
       # (n x m x c x m) -> (n x m x m x c)
       transposed_logits = tf.transpose(logits, [0,1,3,2])
       
       #-----------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m x m x c) -> (n x m x m x c)
       probabilities = tf.nn.softmax(transposed_logits) * tf.to_float(tf.expand_dims(token_weights, axis=-1))
       # (n x m x m), (n x m x m x c), (n x m x m) -> ()
       loss = tf.losses.sparse_softmax_cross_entropy(targets, transposed_logits, weights=token_weights)
       
       #-----------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x m x c) -> (n x m x m)
       predictions = tf.argmax(transposed_logits, axis=-1, output_type=tf.int32) * token_weights
       # (n x m x m) -> (n x m x m)
       unlabeled_predictions = nn.greater(predictions, 0)
       # (n x m x m) (*) (n x m x m) -> (n x m x m)
       unlabeled_true_positives = unlabeled_predictions * unlabeled_targets
       true_positives = nn.equal(targets, predictions) * unlabeled_true_positives
       # (n x m x m) -> ()
       n_predictions = tf.reduce_sum(unlabeled_predictions)
       n_targets = tf.reduce_sum(unlabeled_targets)
       n_unlabeled_true_positives = tf.reduce_sum(unlabeled_true_positives)
       n_true_positives = tf.reduce_sum(true_positives)
       # () - () -> ()
       n_unlabeled_false_positives = n_predictions - n_unlabeled_true_positives
       n_unlabeled_false_negatives = n_targets - n_unlabeled_true_positives
       n_false_positives = n_predictions - n_true_positives
       n_false_negatives = n_targets - n_true_positives
       # (n x m x m) -> (n)
       n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2])
       n_unlabeled_true_positives_per_sequence = tf.reduce_sum(unlabeled_true_positives, axis=[1,2])
       n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2])
       # (n) x 2 -> ()
       n_correct_unlabeled_sequences = tf.reduce_sum(nn.equal(n_unlabeled_true_positives_per_sequence, n_targets_per_sequence))
       n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence))
       
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer
   outputs['unlabeled_targets'] = unlabeled_targets
   outputs['label_targets'] = self.placeholder
   outputs['probabilities'] = probabilities
   outputs['unlabeled_loss'] = tf.constant(0.)
   outputs['loss'] = loss
   
   outputs['unlabeled_predictions'] = unlabeled_predictions
   outputs['label_predictions'] = predictions
   outputs['n_unlabeled_true_positives'] = n_unlabeled_true_positives
   outputs['n_unlabeled_false_positives'] = n_unlabeled_false_positives
   outputs['n_unlabeled_false_negatives'] = n_unlabeled_false_negatives
   outputs['n_correct_unlabeled_sequences'] = n_correct_unlabeled_sequences
   outputs['n_true_positives'] = n_true_positives
   outputs['n_false_positives'] = n_false_positives
   outputs['n_false_negatives'] = n_false_negatives
   outputs['n_correct_sequences'] = n_correct_sequences
   return outputs
示例#4
0
 def get_bilinear_discriminator(self, layer, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.classname):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*self.hidden_size,
                                    hidden_func=self.hidden_func,
                                    hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top' % i):
       layers = classifiers.hiddens(layer, 2*[self.hidden_size],
                                  hidden_func=self.hidden_func,
                                  hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Discriminator'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_discriminator(
           layer1, layer2,
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_discriminator(
           layer1, layer2,
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       
       #-----------------------------------------------------------
       # Process the targets
       # (n x m x m) -> (n x m x m)
       unlabeled_targets = nn.greater(self.placeholder, 0)
       
       #-----------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m x m) -> (n x m x m)
       probabilities = tf.nn.sigmoid(logits)
       # (n x m x m), (n x m x m x c), (n x m x m) -> ()
       loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets, logits, weights=token_weights)
       
       #-----------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x m x c) -> (n x m x m)
       predictions = nn.greater(logits, 0, dtype=tf.int32) * token_weights
       # (n x m x m) (*) (n x m x m) -> (n x m x m)
       true_positives = predictions * unlabeled_targets
       # (n x m x m) -> ()
       n_predictions = tf.reduce_sum(predictions)
       n_targets = tf.reduce_sum(unlabeled_targets)
       n_true_positives = tf.reduce_sum(true_positives)
       # () - () -> ()
       n_false_positives = n_predictions - n_true_positives
       n_false_negatives = n_targets - n_true_positives
       # (n x m x m) -> (n)
       n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2])
       n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2])
       # (n) x 2 -> ()
       n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence))
   
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer
   outputs['unlabeled_targets'] = unlabeled_targets
   outputs['probabilities'] = probabilities
   outputs['unlabeled_loss'] = loss
   outputs['loss'] = loss
   
   outputs['unlabeled_predictions'] = predictions
   outputs['n_unlabeled_true_positives'] = n_true_positives
   outputs['n_unlabeled_false_positives'] = n_false_positives
   outputs['n_unlabeled_false_negatives'] = n_false_negatives
   outputs['n_correct_unlabeled_sequences'] = n_correct_sequences
   outputs['predictions'] = predictions
   outputs['n_true_positives'] = n_true_positives
   outputs['n_false_positives'] = n_false_positives
   outputs['n_false_negatives'] = n_false_negatives
   outputs['n_correct_sequences'] = n_correct_sequences
   return outputs
示例#5
0
  def build_graph(self, input_network_outputs={}, reuse=True):
    """"""
    
    with tf.variable_scope('Embeddings'):
      if self.sum_pos: # TODO this should be done with a `POSMultivocab`
        pos_vocabs = list(filter(lambda x: 'POS' in x.classname, self.input_vocabs))
        pos_tensors = [input_vocab.get_input_tensor(embed_keep_prob=1, reuse=reuse) for input_vocab in pos_vocabs]
        non_pos_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs if 'POS' not in input_vocab.classname]
        #pos_tensors = [tf.Print(pos_tensor, [pos_tensor]) for pos_tensor in pos_tensors]
        #non_pos_tensors = [tf.Print(non_pos_tensor, [non_pos_tensor]) for non_pos_tensor in non_pos_tensors]
        if pos_tensors:
          pos_tensors = tf.add_n(pos_tensors)
          if not reuse:
            pos_tensors = [pos_vocabs[0].drop_func(pos_tensors, pos_vocabs[0].embed_keep_prob)]
          else:
            pos_tensors = [pos_tensors]
        input_tensors = non_pos_tensors + pos_tensors
      else:
        input_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs]
      for input_network, output in input_network_outputs:
        with tf.variable_scope(input_network.classname):
          input_tensors.append(input_network.get_input_tensor(output, reuse=reuse))
      layer = tf.concat(input_tensors, 2)

    n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1, keepdims=True))
    batch_size, bucket_size, input_size = nn.get_sizes(layer)
    layer *= input_size / (n_nonzero + tf.constant(1e-12))
    
    token_weights = nn.greater(self.id_vocab.placeholder, 0)
    tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
    n_tokens = tf.reduce_sum(tokens_per_sequence)
    n_sequences = tf.count_nonzero(tokens_per_sequence)
    seq_lengths = tokens_per_sequence+1

    root_weights = token_weights + (1-nn.greater(tf.range(bucket_size), 0))
    token_weights3D = tf.expand_dims(token_weights, axis=-1) * tf.expand_dims(root_weights, axis=-2)
    tokens = {'n_tokens': n_tokens,
              'tokens_per_sequence': tokens_per_sequence,
              'token_weights': token_weights,
              'token_weights3D': token_weights,
              'n_sequences': n_sequences}
    
    conv_keep_prob = 1. if reuse else self.conv_keep_prob
    recur_keep_prob = 1. if reuse else self.recur_keep_prob
    recur_include_prob = 1. if reuse else self.recur_include_prob
    
    for i in six.moves.range(self.n_layers):
      conv_width = self.first_layer_conv_width if not i else self.conv_width
      with tf.variable_scope('RNN-{}'.format(i)):
        layer, _ = recurrent.directed_RNN(layer, self.recur_size, seq_lengths,
                                          bidirectional=self.bidirectional,
                                          recur_cell=self.recur_cell,
                                          conv_width=conv_width,
                                          recur_func=self.recur_func,
                                          conv_keep_prob=conv_keep_prob,
                                          recur_include_prob=recur_include_prob,
                                          recur_keep_prob=recur_keep_prob,
                                          cifg=self.cifg,
                                          highway=self.highway,
                                          highway_func=self.highway_func,
                                          bilin=self.bilin)
  
    output_fields = {vocab.field: vocab for vocab in self.output_vocabs}
    outputs = {}
    with tf.variable_scope('Classifiers'):
      if 'semrel' in output_fields:
        vocab = output_fields['semrel']
        head_vocab = output_fields['semhead']
        if vocab.factorized:
          with tf.variable_scope('Unlabeled'):
            unlabeled_outputs = head_vocab.get_bilinear_discriminator(
              layer,
              token_weights=token_weights3D,
              reuse=reuse)
          with tf.variable_scope('Labeled'):
            labeled_outputs = vocab.get_bilinear_classifier(
              layer, unlabeled_outputs,
              token_weights=token_weights3D,
              reuse=reuse)
        else:
          labeled_outputs = vocab.get_unfactored_bilinear_classifier(layer, head_vocab.placeholder,
            token_weights=token_weights3D,
            reuse=reuse)
        outputs['semgraph'] = labeled_outputs
        self._evals.add('semgraph')
      elif 'semhead' in output_fields:
        vocab = output_fields['semhead']
        outputs[vocab.classname] = vocab.get_bilinear_classifier(
          layer,
          token_weights=token_weights3D,
          reuse=reuse)
        self._evals.add('semhead')
    
    return outputs, tokens
示例#6
0
 def build_graph(self, input_network_outputs={}, reuse=True):
   """"""
   
   with tf.variable_scope('Embeddings'):
     input_tensors = [input_vocab.get_input_tensor(reuse=reuse) for input_vocab in self.input_vocabs]
     for input_network, output in input_network_outputs:
       with tf.variable_scope(input_network.classname):
         input_tensors.append(input_network.get_input_tensor(output, reuse=reuse))
     layer = tf.concat(input_tensors, 2)
   batch_size, bucket_size, input_size = nn.get_sizes(layer)
   n_nonzero = tf.to_float(tf.count_nonzero(layer, axis=-1, keep_dims=True))
   layer *= input_size / (n_nonzero + tf.constant(1e-12))
   
   token_weights = nn.greater(self.id_vocab.placeholder, 0)
   tokens_per_sequence = tf.reduce_sum(token_weights, axis=1)
   n_tokens = tf.reduce_sum(tokens_per_sequence)
   n_sequences = tf.count_nonzero(tokens_per_sequence)
   seq_lengths = tokens_per_sequence + 1
   tokens = {'n_tokens': n_tokens,
             'tokens_per_sequence': tokens_per_sequence,
             'token_weights': token_weights,
             'n_sequences': n_sequences}
   
   conv_keep_prob = 1. if reuse else self.conv_keep_prob
   recur_keep_prob = 1. if reuse else self.recur_keep_prob
   recur_include_prob = 1. if reuse else self.recur_include_prob
   
   for i in six.moves.range(self.n_layers):
     conv_width = self.first_layer_conv_width if not i else self.conv_width
     with tf.variable_scope('RNN-{}'.format(i)):
       layer, _ = recurrent.directed_RNN(layer, self.recur_size, seq_lengths,
                                         bidirectional=self.bidirectional,
                                         recur_cell=self.recur_cell,
                                         conv_width=conv_width,
                                         recur_func=self.recur_func,
                                         conv_keep_prob=conv_keep_prob,
                                         recur_keep_prob=recur_keep_prob,
                                         recur_include_prob=recur_include_prob,
                                         cifg=self.cifg,
                                         highway=self.highway,
                                         highway_func=self.highway_func,
                                         bilin=self.bilin)
   
   output_vocabs = {vocab.field: vocab for vocab in self.output_vocabs}
   outputs = {}
   with tf.variable_scope('Classifiers'):
     last_output = None
     if 'lemma' in output_vocabs:
       vocab = output_vocabs['lemma']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None,
         reuse=reuse)
       self._evals.add('lemma')
       if last_output is None:
         last_output = outputs[vocab.field]
     if 'upos' in output_vocabs:
       vocab = output_vocabs['upos']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None, 
         reuse=reuse)
       self._evals.add('upos')
       if last_output is None:
         last_output = outputs[vocab.field]
       if reuse:
         upos_idxs = outputs[vocab.field]['predictions'] 
       else:
         upos_idxs = outputs[vocab.field]['targets']
       upos_embed = vocab.get_input_tensor(inputs=upos_idxs, embed_keep_prob=1, reuse=reuse)
       if 'xpos' in output_vocabs and not self.share_layer:
         vocab = output_vocabs['xpos']
         outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings(
           layer, upos_embed, token_weights,
           reuse=reuse)
         self._evals.add('xpos')
       if 'ufeats' in output_vocabs and not self.share_layer:
         vocab = output_vocabs['ufeats']
         outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings(
           layer, upos_embed, token_weights,
           reuse=reuse)
         self._evals.add('ufeats')
       #if 'ufeats' in output_vocabs and not self.share_layer:
       #  vocab = output_vocabs['ufeats']
       #  outputs[vocab.field] = vocab.get_bilinear_classifier_with_embeddings(
       #    layer, upos_embed, token_weights,
       #    reuse=reuse)
       #  self._evals.add('ufeats')
     if 'xpos' in output_vocabs and ('upos' not in output_vocabs or self.share_layer):
       vocab = output_vocabs['xpos']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None, 
         reuse=reuse)
       self._evals.add('xpos')
       if last_output is None:
         last_output = outputs[vocab.field]
     if 'ufeats' in output_vocabs and ('upos' not in output_vocabs or self.share_layer):
     #if 'ufeats' in output_vocabs and ('upos' not in output_vocabs or self.share_layer):
       vocab = output_vocabs['ufeats']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None, 
         reuse=reuse)
       self._evals.add('ufeats')
       if last_output is None:
         last_output = outputs[vocab.field]
     if 'deprel' in output_vocabs:
       vocab = output_vocabs['deprel']
       outputs[vocab.field] = vocab.get_linear_classifier(
         layer, token_weights,
         last_output if self.share_layer else None, 
         reuse=reuse)
       self._evals.add('deprel')
       if last_output is None:
         last_output = outputs[vocab.field]
   return outputs, tokens