Exemplo n.º 1
0
    def get_input_tensor(self, outputs, reuse=True):
        """"""

        output_keep_prob = 1. if reuse else self.output_keep_prob
        for output in outputs:
            pass  # we just need to grab one
        layer = output['recur_layer']
        with tf.variable_scope(self.classname):
            layer = classifiers.hiddens(layer,
                                        self.output_size,
                                        hidden_func=self.output_func,
                                        hidden_keep_prob=output_keep_prob,
                                        reuse=reuse)
        return [layer]
Exemplo n.º 2
0
    def get_bilinear_classifier(self,
                                layer,
                                token_weights,
                                variable_scope=None,
                                reuse=False):
        """"""

        recur_layer = layer
        hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
        hidden_func = self.hidden_func
        hidden_size = self.hidden_size
        add_linear = self.add_linear
        linearize = self.linearize
        distance = self.distance
        n_splits = 2 * (1 + linearize + distance)
        with tf.variable_scope(variable_scope or self.field):
            for i in six.moves.range(0, self.n_layers - 1):
                with tf.variable_scope('FC-%d' % i):
                    layer = classifiers.hidden(
                        layer,
                        n_splits * hidden_size,
                        hidden_func=hidden_func,
                        hidden_keep_prob=hidden_keep_prob)
            with tf.variable_scope('FC-top'):
                layers = classifiers.hiddens(layer,
                                             n_splits * [hidden_size],
                                             hidden_func=hidden_func,
                                             hidden_keep_prob=hidden_keep_prob)
            layer1, layer2 = layers.pop(0), layers.pop(0)
            if linearize:
                lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0)
            if distance:
                dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0)

            with tf.variable_scope('Attention'):
                if self.diagonal:
                    logits, _ = classifiers.diagonal_bilinear_attention(
                        layer1,
                        layer2,
                        hidden_keep_prob=hidden_keep_prob,
                        add_linear=add_linear)
                    if linearize:
                        with tf.variable_scope('Linearization'):
                            lin_logits = classifiers.diagonal_bilinear_discriminator(
                                lin_layer1,
                                lin_layer2,
                                hidden_keep_prob=hidden_keep_prob,
                                add_linear=add_linear)
                    if distance:
                        with tf.variable_scope('Distance'):
                            dist_lamda = 1 + tf.nn.softplus(
                                classifiers.diagonal_bilinear_discriminator(
                                    dist_layer1,
                                    dist_layer2,
                                    hidden_keep_prob=hidden_keep_prob,
                                    add_linear=add_linear))
                else:
                    logits, _ = classifiers.bilinear_attention(
                        layer1,
                        layer2,
                        hidden_keep_prob=hidden_keep_prob,
                        add_linear=add_linear)
                    if linearize:
                        with tf.variable_scope('Linearization'):
                            lin_logits = classifiers.bilinear_discriminator(
                                lin_layer1,
                                lin_layer2,
                                hidden_keep_prob=hidden_keep_prob,
                                add_linear=add_linear)
                    if distance:
                        with tf.variable_scope('Distance'):
                            dist_lamda = 1 + tf.nn.softplus(
                                classifiers.bilinear_discriminator(
                                    dist_layer1,
                                    dist_layer2,
                                    hidden_keep_prob=hidden_keep_prob,
                                    add_linear=add_linear))

                #-----------------------------------------------------------
                # Process the targets
                targets = self.placeholder
                shape = tf.shape(layer1)
                batch_size, bucket_size = shape[0], shape[1]
                # (1 x m)
                ids = tf.expand_dims(tf.range(bucket_size), 0)
                # (1 x m) -> (1 x 1 x m)
                head_ids = tf.expand_dims(ids, -2)
                # (1 x m) -> (1 x m x 1)
                dep_ids = tf.expand_dims(ids, -1)
                if linearize:
                    # Wherever the head is to the left
                    # (n x m), (1 x m) -> (n x m)
                    lin_targets = tf.to_float(tf.less(targets, ids))
                    # cross-entropy of the linearization of each i,j pair
                    # (1 x 1 x m), (1 x m x 1) -> (n x m x m)
                    lin_ids = tf.tile(tf.less(head_ids, dep_ids),
                                      [batch_size, 1, 1])
                    # (n x 1 x m), (n x m x 1) -> (n x m x m)
                    lin_xent = -tf.nn.softplus(
                        tf.where(lin_ids, -lin_logits, lin_logits))
                    # add the cross-entropy to the logits
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    logits += tf.stop_gradient(lin_xent)
                if distance:
                    # (n x m) - (1 x m) -> (n x m)
                    dist_targets = tf.abs(targets - ids)
                    # KL-divergence of the distance of each i,j pair
                    # (1 x 1 x m) - (1 x m x 1) -> (n x m x m)
                    dist_ids = tf.to_float(
                        tf.tile(tf.abs(head_ids - dep_ids),
                                [batch_size, 1, 1])) + 1e-12
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda)
                    dist_kld = -tf.log((dist_ids - dist_lamda)**2 / 2 + 1)
                    # add the KL-divergence to the logits
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    logits += tf.stop_gradient(dist_kld)

                #-----------------------------------------------------------
                # Compute probabilities/cross entropy
                # (n x m) + (m) -> (n x m)
                non_pads = tf.to_float(token_weights) + tf.to_float(
                    tf.logical_not(
                        tf.cast(tf.range(bucket_size), dtype=tf.bool)))
                # (n x m x m) o (n x 1 x m) -> (n x m x m)
                probabilities = tf.nn.softmax(logits) * tf.expand_dims(
                    non_pads, -2)
                # (n x m), (n x m x m), (n x m) -> ()
                loss = tf.losses.sparse_softmax_cross_entropy(
                    targets, logits, weights=token_weights)
                # (n x m) -> (n x m x m x 1)
                one_hot_targets = tf.expand_dims(
                    tf.one_hot(targets, bucket_size), -1)
                # (n x m) -> ()
                n_tokens = tf.to_float(tf.reduce_sum(token_weights))
                if linearize:
                    # (n x m x m) -> (n x m x 1 x m)
                    lin_xent_reshaped = tf.expand_dims(lin_xent, -2)
                    # (n x m x 1 x m) * (n x m x m x 1) -> (n x m x 1 x 1)
                    lin_target_xent = tf.matmul(lin_xent_reshaped,
                                                one_hot_targets)
                    # (n x m x 1 x 1) -> (n x m)
                    lin_target_xent = tf.squeeze(lin_target_xent, [-1, -2])
                    # (n x m), (n x m), (n x m) -> ()
                    loss -= tf.reduce_sum(
                        lin_target_xent *
                        tf.to_float(token_weights)) / (n_tokens + 1e-12)
                if distance:
                    # (n x m x m) -> (n x m x 1 x m)
                    dist_kld_reshaped = tf.expand_dims(dist_kld, -2)
                    # (n x m x 1 x m) * (n x m x m x 1) -> (n x m x 1 x 1)
                    dist_target_kld = tf.matmul(dist_kld_reshaped,
                                                one_hot_targets)
                    # (n x m x 1 x 1) -> (n x m)
                    dist_target_kld = tf.squeeze(dist_target_kld, [-1, -2])
                    # (n x m), (n x m), (n x m) -> ()
                    loss -= tf.reduce_sum(
                        dist_target_kld *
                        tf.to_float(token_weights)) / (n_tokens + 1e-12)

                #-----------------------------------------------------------
                # Compute predictions/accuracy
                # (n x m x m) -> (n x m)
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                # (n x m) (*) (n x m) -> (n x m)
                correct_tokens = nn.equal(targets, predictions) * token_weights
                # (n x m) -> (n)
                tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
                # (n x m) -> (n)
                correct_tokens_per_sequence = tf.reduce_sum(correct_tokens,
                                                            axis=-1)
                # (n), (n) -> (n)
                correct_sequences = nn.equal(tokens_per_sequence,
                                             correct_tokens_per_sequence)

        #-----------------------------------------------------------
        # Populate the output dictionary
        outputs = {}
        outputs['recur_layer'] = recur_layer
        outputs['unlabeled_targets'] = self.placeholder
        outputs['probabilities'] = probabilities
        outputs['unlabeled_loss'] = loss
        outputs['loss'] = loss

        outputs['unlabeled_predictions'] = predictions
        outputs['predictions'] = predictions
        outputs['correct_unlabeled_tokens'] = correct_tokens
        outputs['n_correct_unlabeled_tokens'] = tf.reduce_sum(correct_tokens)
        outputs['n_correct_unlabeled_sequences'] = tf.reduce_sum(
            correct_sequences)
        outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
        outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
        return outputs
Exemplo n.º 3
0
    def get_bilinear_discriminator(self,
                                   layer,
                                   token_weights,
                                   variable_scope=None,
                                   reuse=False):
        """"""

        recur_layer = layer
        hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
        add_linear = self.add_linear
        n_splits = 2 * (1 + self.linearize + self.distance)
        with tf.variable_scope(variable_scope or self.field):
            for i in six.moves.range(0, self.n_layers - 1):
                with tf.variable_scope('FC-%d' % i):
                    layer = classifiers.hidden(
                        layer,
                        n_splits * self.hidden_size,
                        hidden_func=self.hidden_func,
                        hidden_keep_prob=hidden_keep_prob)
            with tf.variable_scope('FC-top'):
                layers = classifiers.hiddens(layer,
                                             n_splits * [self.hidden_size],
                                             hidden_func=self.hidden_func,
                                             hidden_keep_prob=hidden_keep_prob)
            layer1, layer2 = layers.pop(0), layers.pop(0)
            if self.linearize:
                lin_layer1, lin_layer2 = layers.pop(0), layers.pop(0)
            if self.distance:
                dist_layer1, dist_layer2 = layers.pop(0), layers.pop(0)

            with tf.variable_scope('Discriminator'):
                if self.diagonal:
                    logits = classifiers.diagonal_bilinear_discriminator(
                        layer1,
                        layer2,
                        hidden_keep_prob=hidden_keep_prob,
                        add_linear=add_linear)
                    if self.linearize:
                        with tf.variable_scope('Linearization'):
                            lin_logits = classifiers.diagonal_bilinear_discriminator(
                                lin_layer1,
                                lin_layer2,
                                hidden_keep_prob=hidden_keep_prob,
                                add_linear=add_linear)
                    if self.distance:
                        with tf.variable_scope('Distance'):
                            dist_lamda = 1 + tf.nn.softplus(
                                classifiers.diagonal_bilinear_discriminator(
                                    dist_layer1,
                                    dist_layer2,
                                    hidden_keep_prob=hidden_keep_prob,
                                    add_linear=add_linear))
                else:
                    logits = classifiers.bilinear_discriminator(
                        layer1,
                        layer2,
                        hidden_keep_prob=hidden_keep_prob,
                        add_linear=add_linear)
                    if self.linearize:
                        with tf.variable_scope('Linearization'):
                            lin_logits = classifiers.bilinear_discriminator(
                                lin_layer1,
                                lin_layer2,
                                hidden_keep_prob=hidden_keep_prob,
                                add_linear=add_linear)
                    if self.distance:
                        with tf.variable_scope('Distance'):
                            dist_lamda = 1 + tf.nn.softplus(
                                classifiers.bilinear_discriminator(
                                    dist_layer1,
                                    dist_layer2,
                                    hidden_keep_prob=hidden_keep_prob,
                                    add_linear=add_linear))

                #-----------------------------------------------------------
                # Process the targets
                # (n x m x m) -> (n x m x m)
                unlabeled_targets = self.placeholder
                shape = tf.shape(layer1)
                batch_size, bucket_size = shape[0], shape[1]
                # (1 x m)
                ids = tf.expand_dims(tf.range(bucket_size), 0)
                # (1 x m) -> (1 x 1 x m)
                head_ids = tf.expand_dims(ids, -2)
                # (1 x m) -> (1 x m x 1)
                dep_ids = tf.expand_dims(ids, -1)
                if self.linearize:
                    # Wherever the head is to the left
                    # (n x m x m), (1 x m x 1) -> (n x m x m)
                    lin_targets = tf.to_float(
                        tf.less(unlabeled_targets, dep_ids))
                    # cross-entropy of the linearization of each i,j pair
                    # (1 x 1 x m), (1 x m x 1) -> (n x m x m)
                    lin_ids = tf.tile(tf.less(head_ids, dep_ids),
                                      [batch_size, 1, 1])
                    # (n x 1 x m), (n x m x 1) -> (n x m x m)
                    lin_xent = -tf.nn.softplus(
                        tf.where(lin_ids, -lin_logits, lin_logits))
                    # add the cross-entropy to the logits
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    logits += tf.stop_gradient(lin_xent)
                if self.distance:
                    # (n x m x m) - (1 x m x 1) -> (n x m x m)
                    dist_targets = tf.abs(unlabeled_targets - dep_ids)
                    # KL-divergence of the distance of each i,j pair
                    # (1 x 1 x m) - (1 x m x 1) -> (n x m x m)
                    dist_ids = tf.to_float(
                        tf.tile(tf.abs(head_ids - dep_ids),
                                [batch_size, 1, 1])) + 1e-12
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    #dist_kld = (dist_ids * tf.log(dist_lamda / dist_ids) + dist_ids - dist_lamda)
                    dist_kld = -tf.log((dist_ids - dist_lamda)**2 / 2 + 1)
                    # add the KL-divergence to the logits
                    # (n x m x m), (n x m x m) -> (n x m x m)
                    logits += tf.stop_gradient(dist_kld)

                #-----------------------------------------------------------
                # Compute probabilities/cross entropy
                # (n x m x m) -> (n x m x m)
                probabilities = tf.nn.sigmoid(logits) * tf.to_float(
                    token_weights)
                # (n x m x m), (n x m x m), (n x m x m) -> ()
                loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets,
                                                       logits,
                                                       weights=token_weights)
                n_tokens = tf.to_float(tf.reduce_sum(token_weights))
                if self.linearize:
                    lin_target_xent = lin_xent * unlabeled_targets
                    loss -= tf.reduce_sum(
                        lin_target_xent *
                        tf.to_float(token_weights)) / (n_tokens + 1e-12)
                if self.distance:
                    dist_target_kld = dist_kld * unlabeled_targets
                    loss -= tf.reduce_sum(
                        dist_target_kld *
                        tf.to_float(token_weights)) / (n_tokens + 1e-12)

                #-----------------------------------------------------------
                # Compute predictions/accuracy
                # (n x m x m) -> (n x m x m)
                predictions = nn.greater(logits, 0,
                                         dtype=tf.int32) * token_weights
                # (n x m x m) (*) (n x m x m) -> (n x m x m)
                true_positives = predictions * unlabeled_targets
                # (n x m x m) -> ()
                n_predictions = tf.reduce_sum(predictions)
                n_targets = tf.reduce_sum(unlabeled_targets)
                n_true_positives = tf.reduce_sum(true_positives)
                # () - () -> ()
                n_false_positives = n_predictions - n_true_positives
                n_false_negatives = n_targets - n_true_positives
                # (n x m x m) -> (n)
                n_targets_per_sequence = tf.reduce_sum(unlabeled_targets,
                                                       axis=[1, 2])
                n_true_positives_per_sequence = tf.reduce_sum(true_positives,
                                                              axis=[1, 2])
                # (n) x 2 -> ()
                n_correct_sequences = tf.reduce_sum(
                    nn.equal(n_true_positives_per_sequence,
                             n_targets_per_sequence))

        #-----------------------------------------------------------
        # Populate the output dictionary
        outputs = {}
        outputs['unlabeled_targets'] = unlabeled_targets
        outputs['probabilities'] = probabilities
        outputs['unlabeled_loss'] = loss
        outputs['loss'] = loss

        outputs['unlabeled_predictions'] = predictions
        outputs['n_unlabeled_true_positives'] = n_true_positives
        outputs['n_unlabeled_false_positives'] = n_false_positives
        outputs['n_unlabeled_false_negatives'] = n_false_negatives
        outputs['n_correct_unlabeled_sequences'] = n_correct_sequences
        outputs['predictions'] = predictions
        outputs['n_true_positives'] = n_true_positives
        outputs['n_false_positives'] = n_false_positives
        outputs['n_false_negatives'] = n_false_negatives
        outputs['n_correct_sequences'] = n_correct_sequences
        return outputs
Exemplo n.º 4
0
 def get_bilinear_classifier(self, layer, outputs, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.field):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*self.hidden_size,
                                     hidden_func=self.hidden_func,
                                     hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top'):
       layers = classifiers.hiddens(layer, 2*[self.hidden_size],
                                   hidden_func=self.hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Classifier'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
   
   #-----------------------------------------------------------
   # Process the targets
   # (n x m x m)
   label_targets = self.placeholder
   unlabeled_predictions = outputs['unlabeled_predictions']
   unlabeled_targets = outputs['unlabeled_targets']
   
   #-----------------------------------------------------------
   # Process the logits
   # (n x m x c x m) -> (n x m x m x c)
   transposed_logits = tf.transpose(logits, [0,1,3,2])
   
   #-----------------------------------------------------------
   # Compute the probabilities/cross entropy
   # (n x m x m) -> (n x m x m x 1)
   head_probabilities = tf.expand_dims(tf.stop_gradient(outputs['probabilities']), axis=-1)
   # (n x m x m x c) -> (n x m x m x c)
   label_probabilities = tf.nn.softmax(transposed_logits) * tf.to_float(tf.expand_dims(token_weights, axis=-1))
   # (n x m x m), (n x m x m x c), (n x m x m) -> ()
   label_loss = tf.losses.sparse_softmax_cross_entropy(label_targets, transposed_logits, weights=token_weights*unlabeled_targets)
   
   #-----------------------------------------------------------
   # Compute the predictions/accuracy
   # (n x m x m x c) -> (n x m x m)
   predictions = tf.argmax(transposed_logits, axis=-1, output_type=tf.int32)
   # (n x m x m) (*) (n x m x m) -> (n x m x m)
   true_positives = nn.equal(label_targets, predictions) * unlabeled_predictions
   correct_label_tokens = nn.equal(label_targets, predictions) * unlabeled_targets
   # (n x m x m) -> ()
   n_unlabeled_predictions = tf.reduce_sum(unlabeled_predictions)
   n_unlabeled_targets = tf.reduce_sum(unlabeled_targets)
   n_true_positives = tf.reduce_sum(true_positives)
   n_correct_label_tokens = tf.reduce_sum(correct_label_tokens)
   # () - () -> ()
   n_false_positives = n_unlabeled_predictions - n_true_positives
   n_false_negatives = n_unlabeled_targets - n_true_positives
   # (n x m x m) -> (n)
   n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2])
   n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2])
   n_correct_label_tokens_per_sequence = tf.reduce_sum(correct_label_tokens, axis=[1,2])
   # (n) x 2 -> ()
   n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence))
   n_correct_label_sequences = tf.reduce_sum(nn.equal(n_correct_label_tokens_per_sequence, n_targets_per_sequence))
   
   #-----------------------------------------------------------
   # Populate the output dictionary
   rho = self.loss_interpolation
   outputs['label_targets'] = label_targets
   outputs['probabilities'] = label_probabilities * head_probabilities
   outputs['label_loss'] = label_loss
   outputs['loss'] = 2*((1-rho) * outputs['loss'] + rho * label_loss)
   
   outputs['n_true_positives'] = n_true_positives
   outputs['n_false_positives'] = n_false_positives
   outputs['n_false_negatives'] = n_false_negatives
   outputs['n_correct_sequences'] = n_correct_sequences
   outputs['n_correct_label_tokens'] = n_correct_label_tokens
   outputs['n_correct_label_sequences'] = n_correct_label_sequences
   return outputs
Exemplo n.º 5
0
 def get_bilinear_discriminator(self, layer, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.classname):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*self.hidden_size,
                                    hidden_func=self.hidden_func,
                                    hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top' % i):
       layers = classifiers.hiddens(layer, 2*[self.hidden_size],
                                  hidden_func=self.hidden_func,
                                  hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Discriminator'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_discriminator(
           layer1, layer2,
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_discriminator(
           layer1, layer2,
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       
       #-----------------------------------------------------------
       # Process the targets
       # (n x m x m) -> (n x m x m)
       unlabeled_targets = nn.greater(self.placeholder, 0)
       
       #-----------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m x m) -> (n x m x m)
       probabilities = tf.nn.sigmoid(logits)
       # (n x m x m), (n x m x m x c), (n x m x m) -> ()
       loss = tf.losses.sigmoid_cross_entropy(unlabeled_targets, logits, weights=token_weights)
       
       #-----------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x m x c) -> (n x m x m)
       predictions = nn.greater(logits, 0, dtype=tf.int32) * token_weights
       # (n x m x m) (*) (n x m x m) -> (n x m x m)
       true_positives = predictions * unlabeled_targets
       # (n x m x m) -> ()
       n_predictions = tf.reduce_sum(predictions)
       n_targets = tf.reduce_sum(unlabeled_targets)
       n_true_positives = tf.reduce_sum(true_positives)
       # () - () -> ()
       n_false_positives = n_predictions - n_true_positives
       n_false_negatives = n_targets - n_true_positives
       # (n x m x m) -> (n)
       n_targets_per_sequence = tf.reduce_sum(unlabeled_targets, axis=[1,2])
       n_true_positives_per_sequence = tf.reduce_sum(true_positives, axis=[1,2])
       # (n) x 2 -> ()
       n_correct_sequences = tf.reduce_sum(nn.equal(n_true_positives_per_sequence, n_targets_per_sequence))
   
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer
   outputs['unlabeled_targets'] = unlabeled_targets
   outputs['probabilities'] = probabilities
   outputs['unlabeled_loss'] = loss
   outputs['loss'] = loss
   
   outputs['unlabeled_predictions'] = predictions
   outputs['n_unlabeled_true_positives'] = n_true_positives
   outputs['n_unlabeled_false_positives'] = n_false_positives
   outputs['n_unlabeled_false_negatives'] = n_false_negatives
   outputs['n_correct_unlabeled_sequences'] = n_correct_sequences
   outputs['predictions'] = predictions
   outputs['n_true_positives'] = n_true_positives
   outputs['n_false_positives'] = n_false_positives
   outputs['n_false_negatives'] = n_false_negatives
   outputs['n_correct_sequences'] = n_correct_sequences
   return outputs
Exemplo n.º 6
0
 def get_unfactored_bilinear_classifier(self, layer, unlabeled_targets, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   hidden_func = self.hidden_func
   hidden_size = self.hidden_size
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.classname):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*hidden_size,
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top'):
       layers = classifiers.hiddens(layer, 2*[hidden_size],
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Classifier'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       bucket_size = tf.shape(layer)[-2]
       
       #-------------------------------------------------------
       # Process the targets
       # c (*) (n x m) + (n x m)
       #targets = len(self) * unlabeled_targets + self.placeholder
       targets = bucket_size * self.placeholder + unlabeled_targets
       
       #-------------------------------------------------------
       # Process the logits
       # (n x m x c x m) -> (n x m x cm)
       reshaped_logits = tf.reshape(logits, tf.stack([-1, bucket_size, bucket_size * len(self)]))
       
       #-------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m x cm) -> (n x m x cm)
       probabilities = tf.nn.softmax(reshaped_logits)
       # (n x m x cm) -> (n x m x c x m)
       probabilities = tf.reshape(probabilities, tf.stack([-1, bucket_size, len(self), bucket_size]))
       # (n x m x c x m) -> (n x m x m x c)
       probabilities = tf.transpose(probabilities, [0,1,3,2])
       # (n x m), (n x m x cm), (n x m) -> ()
       loss = tf.losses.sparse_softmax_cross_entropy(targets, reshaped_logits, weights=token_weights)
       
       #-------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x cm) -> (n x m)
       predictions = tf.argmax(reshaped_logits, axis=-1, output_type=tf.int32)
       # (n x m), () -> (n x m)
       unlabeled_predictions = tf.mod(predictions, bucket_size)
       # (n x m) (*) (n x m) -> (n x m)
       correct_tokens = nn.equal(predictions, targets) * token_weights
       correct_unlabeled_tokens = nn.equal(unlabeled_predictions, unlabeled_targets) * token_weights
       
       # (n x m) -> (n)
       tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
       # (n x m) -> (n)
       correct_tokens_per_sequence = tf.reduce_sum(correct_tokens, axis=-1)
       correct_unlabeled_tokens_per_sequence = tf.reduce_sum(correct_unlabeled_tokens, axis=-1)
       # (n), (n) -> (n)
       correct_sequences = nn.equal(tokens_per_sequence, correct_tokens_per_sequence)
       correct_unlabeled_sequences = nn.equal(tokens_per_sequence, correct_unlabeled_tokens_per_sequence)
       
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer
   outputs['unlabeled_targets'] = unlabeled_targets
   outputs['probabilities'] = probabilities
   outputs['unlabeled_loss'] = tf.constant(0.)
   outputs['loss'] = loss
   
   outputs['unlabeled_predictions'] = unlabeled_predictions
   outputs['label_predictions'] = predictions
   outputs['n_correct_unlabeled_tokens'] = tf.reduce_sum(correct_unlabeled_tokens)
   outputs['n_correct_unlabeled_sequences'] = tf.reduce_sum(correct_unlabeled_sequences)
   outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
   outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
   
   return outputs
Exemplo n.º 7
0
 def get_bilinear_classifier(self, layer, outputs, token_weights, variable_scope=None, reuse=False):
   """"""
   
   layer1 = layer2 = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   hidden_func = self.hidden_func
   hidden_size = self.hidden_size
   add_linear = self.add_linear
   with tf.variable_scope(variable_scope or self.classname):
     for i in six.moves.range(0, self.n_layers-1):
       with tf.variable_scope('FC-%d' % i):
         layer = classifiers.hidden(layer, 2*hidden_size,
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     with tf.variable_scope('FC-top'):
       layers = classifiers.hiddens(layer, 2*[hidden_size],
                                   hidden_func=hidden_func,
                                   hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     
     with tf.variable_scope('Classifier'):
       if self.diagonal:
         logits = classifiers.diagonal_bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       else:
         logits = classifiers.bilinear_classifier(
           layer1, layer2, len(self),
           hidden_keep_prob=hidden_keep_prob,
           add_linear=add_linear)
       bucket_size = tf.shape(layer)[-2]
       
       #-------------------------------------------------------
       # Process the targets
       # (n x m)
       label_targets = self.placeholder
       unlabeled_predictions = outputs['unlabeled_predictions']
       unlabeled_targets = outputs['unlabeled_targets']
       # (n x m) -> (n x m x m)
       unlabeled_predictions = tf.one_hot(unlabeled_predictions, bucket_size)
       unlabeled_targets = tf.one_hot(unlabeled_targets, bucket_size)
       # (n x m x m) -> (n x m x m x 1)
       unlabeled_predictions = tf.expand_dims(unlabeled_predictions, axis=-1)
       unlabeled_targets = tf.expand_dims(unlabeled_targets, axis=-1)
       
       #-------------------------------------------------------
       # Process the logits
       # We use the gold heads for computing the label score and the predicted
       # heads for computing the unlabeled attachment score
       # (n x m x c x m) -> (n x m x m x c)
       transposed_logits = tf.transpose(logits, [0,1,3,2])
       # (n x m x c x m) * (n x m x m x 1) -> (n x m x c x 1)
       predicted_logits = tf.matmul(logits, unlabeled_predictions)
       oracle_logits = tf.matmul(logits, unlabeled_targets)
       # (n x m x c x 1) -> (n x m x c)
       predicted_logits = tf.squeeze(predicted_logits, axis=-1)
       oracle_logits = tf.squeeze(oracle_logits, axis=-1)
       
       #-------------------------------------------------------
       # Compute probabilities/cross entropy
       # (n x m x m) -> (n x m x m x 1)
       head_probabilities = tf.expand_dims(tf.stop_gradient(outputs['probabilities']), axis=-1)
       # (n x m x m x c) -> (n x m x m x c)
       label_probabilities = tf.nn.softmax(transposed_logits)
       # (n x m), (n x m x c), (n x m) -> ()
       label_loss = tf.losses.sparse_softmax_cross_entropy(label_targets, oracle_logits, weights=token_weights)
       
       #-------------------------------------------------------
       # Compute predictions/accuracy
       # (n x m x c) -> (n x m)
       label_predictions = tf.argmax(predicted_logits, axis=-1, output_type=tf.int32)
       label_oracle_predictions = tf.argmax(oracle_logits, axis=-1, output_type=tf.int32)
       # (n x m) (*) (n x m) -> (n x m)
       correct_label_tokens = nn.equal(label_targets, label_oracle_predictions) * token_weights
       correct_tokens = nn.equal(label_targets, label_predictions) * outputs['correct_unlabeled_tokens']
       
       # (n x m) -> (n)
       tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
       # (n x m) -> (n)
       correct_label_tokens_per_sequence = tf.reduce_sum(correct_label_tokens, axis=-1)
       correct_tokens_per_sequence = tf.reduce_sum(correct_tokens, axis=-1)
       # (n), (n) -> (n)
       correct_label_sequences = nn.equal(tokens_per_sequence, correct_label_tokens_per_sequence)
       correct_sequences = nn.equal(tokens_per_sequence, correct_tokens_per_sequence)
   
   #-----------------------------------------------------------
   # Populate the output dictionary
   rho = self.loss_interpolation
   outputs['label_targets'] = label_targets
   # This way we can reconstruct the head_probabilities by exponentiating and summing along the last axis
   outputs['probabilities'] = label_probabilities * head_probabilities
   outputs['label_loss'] = label_loss
   outputs['loss'] = 2*((1-rho) * outputs['loss'] + rho * label_loss)
   
   outputs['label_predictions'] = label_predictions
   outputs['n_correct_label_tokens'] = tf.reduce_sum(correct_label_tokens)
   outputs['n_correct_label_sequences'] = tf.reduce_sum(correct_label_sequences)
   outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
   outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
   
   return outputs
Exemplo n.º 8
0
 def get_bilinear_classifier(self, layer, outputs, token_weights, variable_scope=None, reuse=False):
   """"""
   
   recur_layer = layer
   hidden_keep_prob = 1 if reuse else self.hidden_keep_prob
   with tf.variable_scope(variable_scope or self.classname):
     with tf.variable_scope(variable_scope or self.classname):
       for i in six.moves.range(0, self.n_layers-1):
         with tf.variable_scope('FC-%d' % i):
           layer = classifiers.hidden(layer, 2*hidden_size,
                                      hidden_func=self.hidden_func,
                                      hidden_keep_prob=hidden_keep_prob)
         with tf.variable_scope('FC-top'):
           layers = classifiers.hiddens(layer, 2*[hidden_size],
                                        hidden_func=self.hidden_func,
                                        hidden_keep_prob=hidden_keep_prob)
     layer1, layer2 = layers.pop(0), layers.pop(0)
     with tf.variable_scope('Classifier'):
       probabilities = []
       loss = []
       predictions = []
       correct_tokens = []
       for i, feat in enumerate(self._feats):
         vs_feat = str(feat).replace('[', '-RSB-').replace(']', '-LSB-')
         with tf.variable_scope(vs_feat):
           if self.diagonal:
             logits = classifiers.diagonal_bilinear_classifier(layer1, layer2, self.getlen(feat),
                                                               hidden_keep_prob=hidden_keep_prob,
                                                               add_linear=self.add_linear)
           else:
             logits = classifiers.bilinear_classifier(layer1, layer2, self.getlen(feat),
                                                      hidden_keep_prob=hidden_keep_prob,
                                                      add_linear=self.add_linear)
           targets = self.placeholder[:,:,i]
           
           #---------------------------------------------------
           # Compute probabilities/cross entropy
           # (n x m x c) -> (n x m x c)
           probabilities.append(tf.nn.softmax(logits))
           # (n x m), (n x m x c), (n x m) -> ()
           loss.append(tf.losses.sparse_softmax_cross_entropy(targets, logits, weights=token_weights))
           
           #---------------------------------------------------
           # Compute predictions/accuracy
           # (n x m x c) -> (n x m)
           predictions.append(tf.argmax(logits, axis=-1, output_type=tf.int32))
           # (n x m) (*) (n x m) -> (n x m)
           correct_tokens.append(nn.equal(targets, predictions[-1]))
       # (n x m) x f -> (n x m x f)
       predictions = tf.stack(predictions, axis=-1)
       # (n x m) x f -> (n x m x f)
       correct_tokens = tf.stack(correct_tokens, axis=-1)
       # (n x m x f) -> (n x m)
       correct_tokens = tf.reduce_prod(correct_tokens, axis=-1) * token_weights
       # (n x m) -> (n)
       tokens_per_sequence = tf.reduce_sum(token_weights, axis=-1)
       # (n x m) -> (n)
       correct_tokens_per_sequence = tf.reduce_sum(correct_tokens, axis=-1)
       # (n), (n) -> (n)
       correct_sequences = nn.equal(tokens_per_sequence, correct_tokens_per_sequence)
   
   #-----------------------------------------------------------
   # Populate the output dictionary
   outputs = {}
   outputs['recur_layer'] = recur_layer
   outputs['targets'] = self.placeholder
   outputs['probabilities'] = probabilities
   outputs['loss'] = tf.add_n(loss)
   
   outputs['predictions'] = predictions
   outputs['n_correct_tokens'] = tf.reduce_sum(correct_tokens)
   outputs['n_correct_sequences'] = tf.reduce_sum(correct_sequences)
   return outputs