示例#1
0
 def __init__(self, *args, **kwargs) -> None:
     super(DocumentClassifier, self).__init__(*args, **kwargs)
     self.metrics = {
         "accuracy": CategoricalAccuracy(),
         "f1": F1Measure(positive_label=1),
         "confusion_matrix": ConfusionMatrix(positive_label=1),
     }
示例#2
0
 def test_f1_measure_other_positive_label(self, device: str):
     f1_measure = F1Measure(positive_label=1)
     predictions = torch.tensor(
         [
             [0.35, 0.25, 0.1, 0.1, 0.2],
             [0.1, 0.6, 0.1, 0.2, 0.0],
             [0.1, 0.6, 0.1, 0.2, 0.0],
             [0.1, 0.5, 0.1, 0.2, 0.0],
             [0.1, 0.2, 0.1, 0.7, 0.0],
             [0.1, 0.6, 0.1, 0.2, 0.0],
         ],
         device=device,
     )
     # [True Negative, False Positive, True Positive,
     #  False Positive, True Negative, False Positive]
     targets = torch.tensor([0, 4, 1, 0, 3, 0], device=device)
     f1_measure(predictions, targets)
     metrics = f1_measure.get_metric()
     precision = metrics["precision"]
     recall = metrics["recall"]
     f1 = metrics["f1"]
     assert f1_measure._true_positives == 1.0
     assert f1_measure._true_negatives == 2.0
     assert f1_measure._false_positives == 3.0
     assert f1_measure._false_negatives == 0.0
     f1_measure.reset()
     # check value
     assert_allclose(precision, 0.25)
     assert_allclose(recall, 1.0)
     assert_allclose(f1, 0.4)
     # check type
     assert isinstance(precision, float)
     assert isinstance(recall, float)
     assert isinstance(f1, float)
示例#3
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 seq2vec_encoder: Seq2VecEncoder,
                 initializer: InitializerApplicator) -> None:
        super(BertModel, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.seq2vec_encoder = seq2vec_encoder

        self.num_types = self.vocab.get_vocab_size("state_change_type_labels")
        self.aggregate_feedforward = Linear(seq2vec_encoder.get_output_dim(),
                                            self.num_types)

        self._type_accuracy = CategoricalAccuracy()

        self.type_f1_metrics = {}
        self.type_labels_vocab = self.vocab.get_index_to_token_vocabulary(
            "state_change_type_labels")
        for type_label in self.type_labels_vocab.values():
            self.type_f1_metrics["type_" + type_label] = F1Measure(
                self.vocab.get_token_index(type_label,
                                           "state_change_type_labels"))

        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
示例#4
0
    def test_f1_measure_works_for_sequences(self):
        f1_measure = F1Measure(positive_label=0)
        predictions = torch.Tensor([[[0.35, 0.25, 0.1, 0.1, 0.2],
                                     [0.1, 0.6, 0.1, 0.2, 0.0],
                                     [0.1, 0.6, 0.1, 0.2, 0.0]],
                                    [[0.35, 0.25, 0.1, 0.1, 0.2],
                                     [0.1, 0.6, 0.1, 0.2, 0.0],
                                     [0.1, 0.6, 0.1, 0.2, 0.0]]])
        # [[True Positive, True Negative, True Negative],
        #  [True Positive, True Negative, False Negative]]
        targets = torch.Tensor([[0, 3, 4], [0, 1, 0]])
        f1_measure(predictions, targets)
        precision, recall, f1 = f1_measure.get_metric()
        assert f1_measure._true_positives == 2.0
        assert f1_measure._true_negatives == 3.0
        assert f1_measure._false_positives == 0.0
        assert f1_measure._false_negatives == 1.0
        f1_measure.reset()
        numpy.testing.assert_almost_equal(precision, 1.0)
        numpy.testing.assert_almost_equal(recall, 0.666666666)
        numpy.testing.assert_almost_equal(f1, 0.8)

        # Test the same thing with a mask:
        mask = torch.Tensor([[0, 1, 0], [1, 1, 1]])
        f1_measure(predictions, targets, mask)
        precision, recall, f1 = f1_measure.get_metric()
        assert f1_measure._true_positives == 1.0
        assert f1_measure._true_negatives == 2.0
        assert f1_measure._false_positives == 0.0
        assert f1_measure._false_negatives == 1.0
        numpy.testing.assert_almost_equal(precision, 1.0)
        numpy.testing.assert_almost_equal(recall, 0.5)
        numpy.testing.assert_almost_equal(f1, 0.66666666666)
 def __init__(
     self,
     num_entities: int,
     num_relations: int,
     embedding_dim: int,
     box_type: str = 'SigmoidBoxTensor',
     single_box: bool = False,
     softbox_temp: float = 10.,
     margin: float = 0.0,
     number_of_negative_samples: int = 0,
     debug: bool = False,
     regularization_weight: float = 0,
     init_interval_center: float = 0.25,
     init_interval_delta: float = 0.1,
     # adversarial_negative: bool = False,
     # adv_neg_softmax_temp: float = 0.8
 ) -> None:
     super().__init__(num_entities, num_relations, embedding_dim, box_type,
                      single_box, softbox_temp, margin,
                      number_of_negative_samples, debug,
                      regularization_weight, init_interval_center,
                      init_interval_delta)
     self.train_f1 = FBetaMeasure(average='micro')
     #self.valid_f1 = FBetaMeasure(average='micro')
     self.threshold_with_f1 = F1WithThreshold(flip_sign=True)
     self.istest = False
     self.test_threshold = None
     self.test_f1 = F1Measure(positive_label=1)
示例#6
0
    def __init__(self,
                 vocab: Vocabulary,
                 calculate_per_label_f1: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(WordConditionalMajoritySelectiveTagger,
              self).__init__(vocab, regularizer)

        self._num_classes = self.vocab.get_vocab_size("labels")
        self._total_label_counts: typing.Counter[str] = Counter()
        self._token_label_counts: Dict[str, typing.Counter[str]] = {}

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }

        self.calculate_per_label_f1 = calculate_per_label_f1

        label_metric_name = "label_{}" if self.calculate_per_label_f1 else "_label_{}"
        for label_name, label_index in self.vocab._token_to_index[
                "labels"].items():
            self.metrics[label_metric_name.format(label_name)] = F1Measure(
                positive_label=label_index)

        # Whether to run in error analysis mode or not, see commands.error_analysis
        self.error_analysis = False
        initializer(self)
示例#7
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        verbose_metrics: False,
        dropout: float = 0.2,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super(TextClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.dropout = torch.nn.Dropout(dropout)
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.classifier_feedforward = torch.nn.Linear(
            self.text_field_embedder.get_output_dim(), self.num_classes)

        self.label_accuracy = CategoricalAccuracy()
        self.label_f1_metrics = {}

        self.verbose_metrics = verbose_metrics

        for i in range(self.num_classes):
            self.label_f1_metrics[vocab.get_token_from_index(
                index=i, namespace="labels")] = F1Measure(positive_label=i)
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 context_encoder: Seq2SeqEncoder,
                 qa_attention_module: MatrixAttention,
                 text_encoder_qa_matching: Seq2VecEncoder,
                 qa_matching_layer: FeedForward,
                 qr_attention_module: Attention,
                 text_encoder_ra_entailment: Seq2VecEncoder,
                 ra_matching_layer: FeedForward,
                 predict_layer: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None):
        super(AnswerHelpfulPredictionModel, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.context_encoder = context_encoder
        self.qa_attention_module = qa_attention_module
        self.text_encoder_qa_matching = text_encoder_qa_matching
        self.qa_matching_layer = qa_matching_layer
        self.qr_attention_module = qr_attention_module
        self.text_encoder_ra_entailment = text_encoder_ra_entailment
        self.ra_matching_layer = ra_matching_layer
        self.predict_layer = predict_layer

        # performance scores are running values, reset the values every epoch
        self.f1_measure = F1Measure(positive_label=1)
        self.auc_score = Auc(positive_label=1)
        self.accuracy = CategoricalAccuracy()

        self.criterion = torch.nn.CrossEntropyLoss()
        initializer(self)
示例#9
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 embedding_dropout: float,
                 encoder: Seq2VecEncoder,
                 encoder_dropout: float,
                 out_dim: int,
                 vocab: Vocabulary,
                 positive_label: int = 4,
                 verbose=True) -> None:
        super().__init__(vocab)
        # 将word id 转为vector representations
        self._word_embeddings = word_embeddings
        self._embedding_dropout = torch.nn.Dropout(embedding_dropout)
        self._encoder = encoder
        self._encoder_dropout = torch.nn.Dropout(encoder_dropout)

        # fc 层将上一层的维度转为输出的类别数
        self._linear = torch.nn.Linear(in_features=encoder.get_output_dim(),
                                       out_features=out_dim)

        # 评价指标,分类准确率, F1 score
        self.accuracy = CategoricalAccuracy()
        self.f1_measure = F1Measure(positive_label)

        # 对于分类任务,交叉熵作为loss 函数
        # 而pytorch中的CrossEntropyLoss内部包含了一个softmax 和log likelihood loss,因此不必显示定义softmax层
        self.loss_function = torch.nn.CrossEntropyLoss()
        # self.loss_function = torch.nn.BCEWithLogitsLoss()

        self._verbose = verbose
示例#10
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 node_embedder: TokenEmbedder,
                 verbose_metrics: False,
                 classifier_feedforward: FeedForward,
                 use_node_vector: bool = True,
                 use_abstract: bool = True,
                 dropout: float = 0.2,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(AclClassifier, self).__init__(vocab, regularizer)

        self.node_embedder = node_embedder
        self.text_field_embedder = text_field_embedder
        self.use_node_vector = use_node_vector
        self.use_abstract = use_abstract
        self.dropout = torch.nn.Dropout(dropout)
        self.num_classes = self.vocab.get_vocab_size("labels")

        self.classifier_feedforward = classifier_feedforward

        self.label_accuracy = CategoricalAccuracy()
        self.label_f1_metrics = {}

        self.verbose_metrics = verbose_metrics

        for i in range(self.num_classes):
            label_name = vocab.get_token_from_index(index=i, namespace="labels")
            self.label_f1_metrics[label_name] = F1Measure(positive_label=i)

        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
示例#11
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 seq2seq_encoder: Seq2SeqEncoder,
                 initializer: InitializerApplicator) -> None:
        super(ProLocalModel, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.seq2seq_encoder = seq2seq_encoder

        self.attention_layer = \
            Attention(similarity_function=BilinearSimilarity(2 * seq2seq_encoder.get_output_dim(),
                                                             seq2seq_encoder.get_output_dim()), normalize=True)

        self.num_types = self.vocab.get_vocab_size("state_change_type_labels")
        self.aggregate_feedforward = Linear(seq2seq_encoder.get_output_dim(),
                                            self.num_types)

        self.span_metric = SpanBasedF1Measure(vocab,
                                              tag_namespace="state_change_tags")  # by default "O" is ignored in metric computation
        self.num_tags = self.vocab.get_vocab_size("state_change_tags")

        self.tag_projection_layer = TimeDistributed(Linear(self.seq2seq_encoder.get_output_dim() + 2
                                                           , self.num_tags))
        self._type_accuracy = CategoricalAccuracy()

        self.type_f1_metrics = {}
        self.type_labels_vocab = self.vocab.get_index_to_token_vocabulary("state_change_type_labels")
        for type_label in self.type_labels_vocab.values():
            self.type_f1_metrics["type_" + type_label] = F1Measure(self.vocab.get_token_index(type_label, "state_change_type_labels"))

        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
示例#12
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        seq2seq_encoder: Seq2SeqEncoder,
        feedforward_encoder: Seq2SeqEncoder,
        dropout: float = 0.0,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):

        super(SimpleGeneratorModel, self).__init__(vocab, regularizer)
        self._vocabulary = vocab
        self._text_field_embedder = text_field_embedder
        self._seq2seq_encoder = seq2seq_encoder
        self._dropout = torch.nn.Dropout(p=dropout)

        self._feedforward_encoder = feedforward_encoder
        self._classifier_input_dim = feedforward_encoder.get_output_dim()

        self._classification_layer = torch.nn.Linear(
            self._classifier_input_dim, 1)

        self._rationale_f1_metric = F1Measure(positive_label=1)
        self._rationale_length = Average()
        self._rationale_supervision_loss = Average()

        initializer(self)
示例#13
0
    def __init__(self, args, word_embeddings: TextFieldEmbedder,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)

        # parameters
        self.args = args
        self.word_embeddings = word_embeddings

        # gate
        self.W_z = nn.Linear(self.args.embedding_size, 1, bias=False)
        self.U_z = nn.Linear(self.args.embedding_size, 1, bias=False)
        self.W_r = nn.Linear(self.args.embedding_size, 1, bias=False)
        self.U_r = nn.Linear(self.args.embedding_size, 1, bias=False)
        self.W = nn.Linear(self.args.embedding_size, 1, bias=False)
        self.U = nn.Linear(self.args.embedding_size, 1, bias=False)

        # layers
        self.event_embedding = EventEmbedding(args, self.word_embeddings)
        self.attention = Attention(self.args.embedding_size,
                                   score_function='mlp')
        self.sigmoid = Sigmoid()
        self.tanh = Tanh()
        self.score = Score(self.args.embedding_size,
                           self.args.embedding_size,
                           threshold=self.args.threshold)

        # metrics
        self.accuracy = BooleanAccuracy()
        self.f1_score = F1Measure(positive_label=1)
        self.loss_function = BCELoss()
 def __init__(
         self,
         num_entities: int,
         num_relations: int,
         embedding_dim: int,
         box_type: str = 'SigmoidBoxTensor',
         softbox_temp: float = 10.,
         single_box: bool = False,
         margin: float = 0.0,
         vocab: Optional[None] = None,
         debug: bool = False
         # we don't need vocab but some api relies on its presence as an argument
 ) -> None:
     super().__init__()
     self.debug = debug
     self.num_entities = num_entities
     self.num_relations = num_relations
     self.embedding_dim = embedding_dim
     self.box_type = box_type
     self.create_embeddings_layer(num_entities, num_relations,
                                  embedding_dim, single_box)
     self.loss_f = torch.nn.MarginRankingLoss(  # type: ignore
         margin=margin, reduction='mean')
     self.softbox_temp = softbox_temp
     self.margin = margin
     self.f1 = F1Measure(1)
示例#15
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 class_loss_weights: List[float],
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.classifier_feedforward = classifier_feedforward

        class_loss_weights = torch.Tensor(class_loss_weights)
        class_loss_weights = class_loss_weights / class_loss_weights.sum()
        self.loss = torch.nn.CrossEntropyLoss(weight=class_loss_weights)

        self.metric_overall_accuracy = CategoricalAccuracy()
        self.metric_class_accuracies = {
            c: F1Measure(positive_label=i)
            for i, c in enumerate(['unfunny', 'somewhat_funny', 'funny'])
        }

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 dropout: float = 0.5,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")

        self._f1 = F1Measure(
            positive_label=vocab._token_to_index["labels"]["1"])
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
示例#17
0
    def __init__(self,
                 num_entities: int,
                 num_relations: int,
                 embedding_dim: int,
                 box_type: str = 'SigmoidBoxTensor',
                 single_box: bool = False,
                 softbox_temp: float = 10.,
                 number_of_negative_samples: int = 0,
                 debug: bool = False,
                 regularization_weight: float = 0,
                 init_interval_center: float = 0.25,
                 init_interval_delta: float = 0.1,
                 neg_samples_in_dataset_reader: int = 0) -> None:
        super().__init__(
            num_entities,
            num_relations,
            embedding_dim,
            box_type=box_type,
            single_box=single_box,
            softbox_temp=softbox_temp,
            number_of_negative_samples=number_of_negative_samples,
            debug=debug,
            regularization_weight=regularization_weight,
            init_interval_center=init_interval_center,
            init_interval_delta=init_interval_delta,
            neg_samples_in_dataset_reader=neg_samples_in_dataset_reader)
        self.train_f1 = FBetaMeasure(average='micro')
        # self.valid_f1 = FBetaMeasure(average='micro')
        self.threshold_with_f1 = F1WithThreshold(flip_sign=True)

        self.istest = False
        self.test_threshold = None
        # self.test_f1 = FBetaMeasure(average='macro')
        self.test_f1 = F1Measure(positive_label=1)
示例#18
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 classifier_feedforward: FeedForward,
                 elmo: Elmo = None,
                 use_input_elmo: bool = False):
        super().__init__(vocab)
        self.elmo = elmo
        self.use_elmo = use_input_elmo
        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.encoder = encoder
        self.classifier_feed_forward = classifier_feedforward
        self.label_accuracy = CategoricalAccuracy()

        self.label_f1_metrics = {}

        # create F1 Measures for each class
        for i in range(self.num_classes):
            self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] = \
                F1Measure(positive_label=i)

        self.loss = torch.nn.CrossEntropyLoss()

        self.attention = Attention(encoder.get_output_dim())
示例#19
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        quote_response_encoder: Seq2VecEncoder,
        classifier_feedforward: FeedForward,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
        predict_mode: bool = False,
    ) -> None:

        super(SarcasmClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.quote_response_encoder = quote_response_encoder
        self.classifier_feedforward = classifier_feedforward

        self.label_acc_metrics = {"accuracy": CategoricalAccuracy()}
        self.label_f1_metrics = {}
        # for i in range(self.num_classes):
        #     self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="label")] =\
        #         F1Measure(positive_label=i)

        for i in range(self.num_classes):
            self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] =\
                F1Measure(positive_label=i)

        self.loss = torch.nn.CrossEntropyLoss()

        # self.attention_seq2seq = Attention(quote_response_encoder.get_output_dim())

        self.predict_mode = predict_mode

        initializer(self)
示例#20
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        embedding_dropout: float,
        seq2seq_encoder: Seq2SeqEncoder,
        classifier_feedforward: FeedForward,
        initializer: InitializerApplicator = InitializerApplicator(),
        loss_weights: Optional[List] = [],
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super(ICC, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self._embedding_dropout = nn.Dropout(embedding_dropout)
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.seq2seq_encoder = seq2seq_encoder
        self.self_attentive_pooling_projection = nn.Linear(
            seq2seq_encoder.get_output_dim(), 1)
        self.classifier_feedforward = classifier_feedforward

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(positive_label=1),
        }
        self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
示例#21
0
 def test_f1_measure_accumulates_and_resets_correctly(self):
     f1_measure = F1Measure(positive_label=0)
     predictions = torch.Tensor([[0.35, 0.25, 0.1, 0.1, 0.2],
                                 [0.1, 0.6, 0.1, 0.2, 0.0],
                                 [0.1, 0.6, 0.1, 0.2, 0.0],
                                 [0.1, 0.5, 0.1, 0.2, 0.0],
                                 [0.1, 0.2, 0.1, 0.7, 0.0],
                                 [0.1, 0.6, 0.1, 0.2, 0.0]])
     # [True Positive, True Negative, True Negative,
     #  False Negative, True Negative, False Negative]
     targets = torch.Tensor([0, 4, 1, 0, 3, 0])
     f1_measure(predictions, targets)
     f1_measure(predictions, targets)
     precision, recall, f1 = f1_measure.get_metric()
     assert f1_measure._true_positives == 2.0
     assert f1_measure._true_negatives == 6.0
     assert f1_measure._false_positives == 0.0
     assert f1_measure._false_negatives == 4.0
     f1_measure.reset()
     numpy.testing.assert_almost_equal(precision, 1.0)
     numpy.testing.assert_almost_equal(recall, 0.333333333)
     numpy.testing.assert_almost_equal(f1, 0.499999999)
     assert f1_measure._true_positives == 0.0
     assert f1_measure._true_negatives == 0.0
     assert f1_measure._false_positives == 0.0
     assert f1_measure._false_negatives == 0.0
示例#22
0
 def test_f1_measure_other_positive_label(self):
     f1_measure = F1Measure(positive_label=1)
     predictions = torch.Tensor([
         [0.35, 0.25, 0.1, 0.1, 0.2],
         [0.1, 0.6, 0.1, 0.2, 0.0],
         [0.1, 0.6, 0.1, 0.2, 0.0],
         [0.1, 0.5, 0.1, 0.2, 0.0],
         [0.1, 0.2, 0.1, 0.7, 0.0],
         [0.1, 0.6, 0.1, 0.2, 0.0],
     ])
     # [True Negative, False Positive, True Positive,
     #  False Positive, True Negative, False Positive]
     targets = torch.Tensor([0, 4, 1, 0, 3, 0])
     f1_measure(predictions, targets)
     precision, recall, f1 = f1_measure.get_metric()
     assert f1_measure._true_positives == 1.0
     assert f1_measure._true_negatives == 2.0
     assert f1_measure._false_positives == 3.0
     assert f1_measure._false_negatives == 0.0
     f1_measure.reset()
     # check value
     numpy.testing.assert_almost_equal(precision, 0.25)
     numpy.testing.assert_almost_equal(recall, 1.0)
     numpy.testing.assert_almost_equal(f1, 0.4)
     # check type
     assert isinstance(precision, float)
     assert isinstance(recall, float)
     assert isinstance(f1, float)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        use_sep: bool = True,
        with_crf: bool = False,
        self_attn: Seq2SeqEncoder = None,
        bert_dropout: float = 0.1,
        sci_sum: bool = False,
        additional_feature_size: int = 0,
    ) -> None:
        super(SeqClassificationModel, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.vocab = vocab
        self.use_sep = use_sep
        self.with_crf = with_crf
        self.sci_sum = sci_sum
        self.self_attn = self_attn
        self.additional_feature_size = additional_feature_size

        self.dropout = torch.nn.Dropout(p=bert_dropout)

        # define loss
        if self.sci_sum:
            self.loss = torch.nn.MSELoss(
                reduction='none')  # labels are rouge scores
            self.labels_are_scores = True
            self.num_labels = 1
        else:
            self.loss = torch.nn.CrossEntropyLoss(ignore_index=-1,
                                                  reduction='none')
            self.labels_are_scores = False
            self.num_labels = self.vocab.get_vocab_size(namespace='labels')
            # define accuracy metrics
            self.label_accuracy = CategoricalAccuracy()
            self.all_f1_metrics = FBetaMeasure(beta=1.0, average='micro')
            self.label_f1_metrics = {}

            # define F1 metrics per label
            for label_index in range(self.num_labels):
                label_name = self.vocab.get_token_from_index(
                    namespace='labels', index=label_index)
                self.label_f1_metrics[label_name] = F1Measure(label_index)

        encoded_senetence_dim = text_field_embedder._token_embedders[
            'bert'].output_dim

        ff_in_dim = encoded_senetence_dim if self.use_sep else self_attn.get_output_dim(
        )
        ff_in_dim += self.additional_feature_size

        self.time_distributed_aggregate_feedforward = TimeDistributed(
            Linear(ff_in_dim, self.num_labels))

        if self.with_crf:
            self.crf = ConditionalRandomField(
                self.num_labels,
                constraints=None,
                include_start_end_transitions=True)
示例#24
0
    def __init__(self,
                 args,
                 word_embeddings: TextFieldEmbedder,
                 vocab: Vocabulary,
                 domain_info: bool = True) -> None:
        super().__init__(vocab)

        # parameters
        self.args = args
        self.word_embeddings = word_embeddings
        self.domain = domain_info

        # layers
        self.event_embedding = EventEmbedding(args, self.word_embeddings)
        self.event_type_embedding = EventTypeEmbedding(args,
                                                       self.word_embeddings)
        self.lstm = LSTM(input_size=self.args.embedding_size,
                         hidden_size=self.args.hidden_size)
        self.linear = Linear(self.args.hidden_size, self.args.embedding_size)
        self.W_c = Linear(self.args.embedding_size,
                          self.args.hidden_size,
                          bias=False)
        self.W_e = Linear(self.args.hidden_size,
                          self.args.hidden_size,
                          bias=False)
        self.relu = ReLU()
        self.score = Score(self.args.embedding_size,
                           self.args.embedding_size,
                           threshold=self.args.threshold)

        # metrics
        self.accuracy = BooleanAccuracy()
        self.f1_score = F1Measure(positive_label=1)
        self.loss_function = BCELoss()
示例#25
0
 def test_f1_measure_accumulates_and_resets_correctly(self, device: str):
     f1_measure = F1Measure(positive_label=0)
     predictions = torch.tensor(
         [
             [0.35, 0.25, 0.1, 0.1, 0.2],
             [0.1, 0.6, 0.1, 0.2, 0.0],
             [0.1, 0.6, 0.1, 0.2, 0.0],
             [0.1, 0.5, 0.1, 0.2, 0.0],
             [0.1, 0.2, 0.1, 0.7, 0.0],
             [0.1, 0.6, 0.1, 0.2, 0.0],
         ],
         device=device,
     )
     # [True Positive, True Negative, True Negative,
     #  False Negative, True Negative, False Negative]
     targets = torch.tensor([0, 4, 1, 0, 3, 0], device=device)
     f1_measure(predictions, targets)
     f1_measure(predictions, targets)
     metrics = f1_measure.get_metric()
     precision = metrics["precision"]
     recall = metrics["recall"]
     f1 = metrics["f1"]
     assert f1_measure._true_positives == 2.0
     assert f1_measure._true_negatives == 6.0
     assert f1_measure._false_positives == 0.0
     assert f1_measure._false_negatives == 4.0
     f1_measure.reset()
     assert_allclose(precision, 1.0)
     assert_allclose(recall, 0.333333333)
     assert_allclose(f1, 0.499999999)
     assert f1_measure._true_positives == 0.0
     assert f1_measure._true_negatives == 0.0
     assert f1_measure._false_positives == 0.0
     assert f1_measure._false_negatives == 0.0
示例#26
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 feedforward: Optional[FeedForward] = None,
                 include_start_end_transitions: bool = True,
                 dropout: Optional[float] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = 'labels'
        self.num_tags = self.vocab.get_vocab_size(self.label_namespace)

        # encode text
        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.dropout = torch.nn.Dropout(dropout) if dropout else None
        self.feedforward = feedforward

        # crf
        output_dim = self.encoder.get_output_dim() if feedforward is None else feedforward.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags))
        self.crf = ConditionalRandomField(self.num_tags, constraints=None, include_start_end_transitions=include_start_end_transitions)

        initializer(self)

        self.metrics = {}

        # Add F1 score for individual labels to metrics 
        for index, label in self.vocab.get_index_to_token_vocabulary(self.label_namespace).items():
            self.metrics[label] = F1Measure(positive_label=index)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 class_weights: List[float] = (1.0, 1.0),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)
        self._text_field_embedder = text_field_embedder
        self.classifier_feedforward = classifier_feedforward
        self.num_classes = self.vocab.get_vocab_size('labels')

        assert self.num_classes == classifier_feedforward.get_output_dim()

        # if classifier_feedforward.get_input_dim() != 768:
        #     raise ConfigurationError(F"The input dimension of the classifier_feedforward, "
        #                              F"found {classifier_feedforward.get_input_dim()}, must match the "
        #                              F" output dimension of the bert embeder, {768}")
        index = 0
        if self.num_classes == 2:
            index = self.vocab.get_token_index("正类", "labels")
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(index)
        }
        # weights = torch.Tensor(class_weights)
        self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
示例#28
0
    def __init__(self, args, word_embeddings: TextFieldEmbedder,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)

        # parameters
        self.args = args
        self.word_embeddings = word_embeddings

        # layers
        self.event_embedding = EventEmbedding(self.args, self.word_embeddings)
        self.lstm = DynamicLSTM(self.args.embedding_size * 2,
                                self.args.hidden_size,
                                num_layers=1,
                                batch_first=True)
        self.attention = NoQueryAttention(self.args.hidden_size +
                                          self.args.embedding_size * 2,
                                          score_function='bi_linear')
        self.score = Score(self.args.hidden_size,
                           self.args.embedding_size,
                           threshold=self.args.threshold)

        # metrics
        self.accuracy = BooleanAccuracy()
        self.f1_score = F1Measure(positive_label=1)
        self.loss_function = BCELoss()
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 vocab: Vocabulary,
                 positive_label: int = 4) -> None:
        super().__init__(vocab)
        # We need the embeddings to convert word IDs to their vector representations
        self.word_embeddings = word_embeddings

        self.encoder = encoder

        # After converting a sequence of vectors to a single vector, we feed it into
        # a fully-connected linear layer to reduce the dimension to the total number of labels.
        self.linear = torch.nn.Linear(
            in_features=encoder.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))

        # Monitor the metrics - we use accuracy, as well as prec, rec, f1 for 4 (very positive)
        self.accuracy = CategoricalAccuracy()
        self.f1_measure = F1Measure(positive_label)

        # We use the cross entropy loss because this is a classification task.
        # Note that PyTorch's CrossEntropyLoss combines softmax and log likelihood loss,
        # which makes it unnecessary to add a separate softmax layer.
        self.loss_function = torch.nn.CrossEntropyLoss()
示例#30
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        text_encoder: Seq2SeqEncoder,
        classifier_feedforward: FeedForward,
        verbose_metrics: False,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super(TextClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.text_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward
        self.prediction_layer = torch.nn.Linear(
            self.classifier_feedforward.get_output_dim(), self.num_classes)

        self.label_accuracy = CategoricalAccuracy()
        self.label_f1_metrics = {}

        self.verbose_metrics = verbose_metrics

        for i in range(self.num_classes):
            self.label_f1_metrics[vocab.get_token_from_index(
                index=i, namespace="labels")] = F1Measure(positive_label=i)
        self.loss = torch.nn.CrossEntropyLoss()

        self.pool = lambda text, mask: util.get_final_encoder_states(
            text, mask, bidirectional=True)

        initializer(self)