示例#1
0
文件: heads.py 项目: jgung/tf-nlp
    def _evaluation(self):
        self.evaluation_hooks = []
        self.metric_ops = {}
        predictions_key = append_label(constants.PREDICT_KEY, self.name)
        labels_key = append_label(constants.LABEL_KEY, self.name)

        eval_tensors = {  # tensors necessary for evaluation hooks (such as sequence length)
            constants.LENGTH_KEY: self._sequence_lengths,
            constants.SENTENCE_INDEX: self.features[constants.SENTENCE_INDEX],
            labels_key: self._mask_subtokens(self.targets),
            predictions_key: self.predictions,
        }

        overall_score = tf.identity(self.metric)
        overall_key = append_label(constants.OVERALL_KEY, self.name)
        self.metric_ops[overall_key] = (overall_score, overall_score)
        # https://github.com/tensorflow/tensorflow/issues/20418 -- metrics don't accept variables, so we create a tensor
        eval_placeholder = placeholder(dtype=tf.float32,
                                       name='update_%s' % overall_key)

        if constants.SRL_KEY in self.config.task:
            if constants.MARKER_KEY in self.features:
                eval_tensors[constants.MARKER_KEY] = self.features[
                    constants.MARKER_KEY]
            else:
                eval_tensors[constants.PREDICATE_INDEX_KEY] = self.features[
                    constants.PREDICATE_INDEX_KEY]

            self.evaluation_hooks.append(
                SrlEvalHook(tensors=eval_tensors,
                            evaluator=SrlEvaluator(target=self.extractor,
                                                   output_path=os.path.join(
                                                       self.params.job_dir,
                                                       self.name + '.dev')),
                            label_key=labels_key,
                            predict_key=predictions_key,
                            eval_update=assign(self.metric, eval_placeholder),
                            eval_placeholder=eval_placeholder,
                            output_confusions=self.params.verbose_eval,
                            output_dir=self.params.job_dir))
        else:
            self.evaluation_hooks.append(
                SequenceEvalHook(
                    tensors=eval_tensors,
                    evaluator=TaggerEvaluator(target=self.extractor,
                                              output_path=os.path.join(
                                                  self.params.job_dir,
                                                  self.name + '.dev')),
                    label_key=labels_key,
                    predict_key=predictions_key,
                    eval_update=assign(self.metric, eval_placeholder),
                    eval_placeholder=eval_placeholder,
                    output_dir=self.params.job_dir))
示例#2
0
文件: heads.py 项目: jgung/tf-nlp
    def _train_eval(self):
        self.mask = tf.sequence_mask(self.features[constants.LENGTH_KEY],
                                     name="padding_mask")

        num_labels = self.extractor.vocab_size()
        _logits = select_logits(self.logits, self.predicate_indices,
                                self.n_steps)

        seq_mask = None if constants.BERT_LENGTH_KEY in self.features else self.features.get(
            constants.SEQUENCE_MASK)
        rel_loss = sequence_loss(
            logits=_logits,
            targets=self.targets,
            sequence_lengths=self._sequence_lengths,
            num_labels=num_labels,
            crf=self.config.crf,
            tag_transitions=self._tag_transitions,
            label_smoothing=self.config.label_smoothing,
            confidence_penalty=self.config.confidence_penalty,
            name="bilinear_loss",
            mask=seq_mask)

        self.loss = rel_loss
        self.metric = Variable(0,
                               name=append_label(constants.OVERALL_KEY,
                                                 self.name),
                               dtype=tf.float32,
                               trainable=False)
示例#3
0
文件: heads.py 项目: jgung/tf-nlp
    def _evaluation(self):
        predictions_key = append_label(constants.PREDICT_KEY, self.name)
        labels_key = append_label(constants.LABEL_KEY, self.name)
        acc_key = append_label(constants.ACCURACY_METRIC_KEY, self.name)

        self.metric_ops = {
            acc_key:
            tf.metrics.accuracy(labels=self.targets,
                                predictions=self.predictions,
                                name=acc_key)
        }

        tensors = {
            labels_key: self.targets,
            predictions_key: self.predictions,
            constants.LABEL_SCORES: self.scores,
            constants.LENGTH_KEY: self._sequence_lengths,
            constants.SENTENCE_INDEX: self.features[constants.SENTENCE_INDEX],
        }

        constraint_key = self.extractor.constraint_key
        if constraint_key:
            tensors[constraint_key] = self.features[constraint_key]

        overall_score = tf.identity(self.metric)
        overall_key = append_label(constants.OVERALL_KEY, self.name)
        self.metric_ops[overall_key] = (overall_score, overall_score)
        # https://github.com/tensorflow/tensorflow/issues/20418 -- metrics don't accept variables, so we create a tensor
        eval_placeholder = tf.placeholder(dtype=tf.float32,
                                          name='update_%s' % overall_key)

        self.evaluation_hooks = [
            ClassifierEvalHook(
                label_key=labels_key,
                predict_key=predictions_key,
                tensors=tensors,
                evaluator=TokenClassifierEvaluator(target=self.extractor,
                                                   output_path=os.path.join(
                                                       self.params.job_dir,
                                                       self.name + '.dev')),
                output_dir=self.params.job_dir,
                eval_update=tf.assign(self.metric, eval_placeholder),
                eval_placeholder=eval_placeholder,
            )
        ]
示例#4
0
文件: parser.py 项目: jgung/tf-nlp
    def _evaluation(self):
        # compute metrics, such as UAS, LAS, and LA
        arc_correct = tf.boolean_mask(tf.to_int32(tf.equal(self.arc_predictions[:, 1:], self.arc_targets[:, 1:])),
                                      self.mask[:, 1:])
        rel_correct = tf.boolean_mask(tf.to_int32(tf.equal(self.predictions[:, 1:], self.targets[:, 1:])),
                                      self.mask[:, 1:])
        n_arc_correct = tf.cast(tf.reduce_sum(arc_correct), tf.int32)
        n_rel_correct = tf.cast(tf.reduce_sum(rel_correct), tf.int32)
        correct = arc_correct * rel_correct
        n_correct = tf.cast(tf.reduce_sum(correct), tf.int32)

        n_tokens = tf.cast(tf.reduce_sum(self.lens - 1), tf.int32)  # minus 1 for sentinel
        self.metric_ops = {
            constants.UNLABELED_ATTACHMENT_SCORE: tf.metrics.mean(n_arc_correct / n_tokens),
            constants.LABEL_SCORE: tf.metrics.mean(n_rel_correct / n_tokens),
            constants.LABELED_ATTACHMENT_SCORE: tf.metrics.mean(n_correct / n_tokens),
        }

        overall_score = tf.identity(self.metric)
        self.metric_ops[append_label(constants.OVERALL_KEY, self.name)] = (overall_score, overall_score)
        overall_key = append_label(constants.OVERALL_KEY, self.name)
        # https://github.com/tensorflow/tensorflow/issues/20418 -- metrics don't accept variables, so we create a tensor
        eval_placeholder = tf.placeholder(dtype=tf.float32, name='update_%s' % overall_key)

        self.evaluation_hooks = []

        hook = ParserEvalHook(
            {
                constants.ARC_PROBS: self.arc_probs,
                constants.REL_PROBS: self.rel_probs,
                constants.LENGTH_KEY: self.lens,  # plus one for the sentinel
                constants.HEAD_KEY: self.features[constants.HEAD_KEY],
                constants.DEPREL_KEY: self.features[constants.DEPREL_KEY],
                constants.SENTENCE_INDEX: self.features[constants.SENTENCE_INDEX]
            },
            evaluator=DepParserEvaluator(
                target=self.extractor,
                output_path=os.path.join(self.params.job_dir, self.name + '.dev'),
                script_path=self.params.script_path
            ),
            eval_update=tf.assign(self.metric, eval_placeholder),
            eval_placeholder=eval_placeholder,
            output_dir=self.params.job_dir
        )
        self.evaluation_hooks.append(hook)
示例#5
0
 def __init__(self, target=None, output_path=None, script_path=None):
     super().__init__(target, output_path, script_path)
     self.labels = None
     self.gold = None
     self.indices = None
     self.token_indices = None
     self.target_key = constants.LABEL_KEY if not self.target.name else self.target.name
     self.labels_key = constants.LABEL_KEY if not self.target.key else self.target.key
     self.scores_name = append_label(constants.LABEL_SCORES, self.target_key)
示例#6
0
文件: heads.py 项目: jgung/tf-nlp
 def _train_eval(self):
     if self.config.label_smoothing > 0:
         targets = tf.one_hot(self.targets,
                              depth=self.extractor.vocab_size())
         self.loss = tf.losses.softmax_cross_entropy(
             onehot_labels=targets,
             logits=self.logits,
             label_smoothing=self.config.label_smoothing)
     else:
         self.loss = tf.reduce_mean(
             tf.nn.sparse_softmax_cross_entropy_with_logits(
                 logits=self.logits, labels=self.targets))
     self.metric = Variable(0,
                            name=append_label(constants.OVERALL_KEY,
                                              self.name),
                            dtype=tf.float32,
                            trainable=False)
示例#7
0
文件: heads.py 项目: jgung/tf-nlp
    def _train_eval(self):
        num_labels = self.extractor.vocab_size()
        seq_mask = None if constants.BERT_LENGTH_KEY in self.features else self.features.get(
            constants.SEQUENCE_MASK)
        self.loss = sequence_loss(
            logits=self.logits,
            targets=self.targets,
            sequence_lengths=self._sequence_lengths,
            num_labels=num_labels,
            crf=self.config.crf,
            tag_transitions=self._tag_transitions,
            label_smoothing=self.config.label_smoothing,
            confidence_penalty=self.config.confidence_penalty,
            mask=seq_mask)

        self.metric = Variable(0,
                               name=append_label(constants.OVERALL_KEY,
                                                 self.name),
                               dtype=tf.float32,
                               trainable=False)
示例#8
0
文件: parser.py 项目: jgung/tf-nlp
    def _train_eval(self):
        self.mask = tf.sequence_mask(self.lens, name="padding_mask")

        # compute combined arc and rel losses (both via softmax cross entropy)
        def compute_loss(logits, targets, name):
            with tf.variable_scope(name):
                losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets)
                losses = tf.boolean_mask(losses, self.mask)
                return tf.reduce_mean(losses)

        self.arc_targets = tf.identity(self.features[constants.HEAD_KEY], name=constants.HEAD_KEY)

        arc_loss = compute_loss(self.arc_logits, self.arc_targets, "arc_bilinear_loss")
        _rel_logits = select_logits(self.rel_logits, self.arc_targets, self.n_steps)
        rel_loss = compute_loss(_rel_logits, self.targets, "rel_bilinear_loss")

        arc_loss = self.config.get('arc_loss_weight', 1) * arc_loss
        rel_loss = self.config.get('rel_loss_weight', 1) * rel_loss
        self.loss = arc_loss + rel_loss
        self.metric = Variable(0, name=append_label(constants.OVERALL_KEY, self.name), dtype=tf.float32, trainable=False)
示例#9
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self._score_name = append_label(LABEL_SCORES, self._target.name)
示例#10
0
文件: heads.py 项目: jgung/tf-nlp
 def _prediction(self):
     super()._prediction()
     self.export_outputs[append_label(constants.LABEL_SCORES,
                                      self.name)] = self.scores