def setUp(self):
        super().setUp()
        self.logits = torch.Tensor([
            [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
            [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
        ])
        self.tags = torch.LongTensor([
            [2, 3, 4],
            [3, 2, 2]
        ])

        self.transitions = torch.Tensor([
            [0.1, 0.2, 0.3, 0.4, 0.5],
            [0.8, 0.3, 0.1, 0.7, 0.9],
            [-0.3, 2.1, -5.6, 3.4, 4.0],
            [0.2, 0.4, 0.6, -0.3, -0.4],
            [1.0, 1.0, 1.0, 1.0, 1.0]
        ])

        self.transitions_from_start = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.6])
        self.transitions_to_end = torch.Tensor([-0.1, -0.2, 0.3, -0.4, -0.4])

        # Use the CRF Module with fixed transitions to compute the log_likelihood
        self.crf = ConditionalRandomField(5)
        self.crf.transitions = torch.nn.Parameter(self.transitions)
        self.crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)
Пример #2
0
    def __init__(self, bert_dir: str, vocabulary_builder: VocabularyBuilder,
                 dropout: float, is_used_crf: bool):
        """
        初始化
        :param bert_dir: 预训练好的 bert 模型所在 dir
        :param vocabulary_builder: vocabulary builder
        :param dropout: bert 最后一层输出的 dropout
        :param is_used_crf: 是否使用 crf, True: 使用 crf; False: 不使用 crf
        """

        super().__init__()

        self.label_vocabulary = vocabulary_builder.label_vocabulary
        self.dropout = Dropout(dropout)
        self.is_used_crf = is_used_crf
        self.bert = BertModel.from_pretrained(bert_dir)

        bert_config: BertConfig = self.bert.config

        self.classifier = Linear(bert_config.hidden_size,
                                 self.label_vocabulary.label_size)

        if self.is_used_crf:
            constraints = BIO.allowed_transitions(
                label_vocabulary=self.label_vocabulary)
            self.crf = ConditionalRandomField(
                num_tags=self.label_vocabulary.label_size,
                constraints=constraints)
        else:
            self.crf = None

        self.reset_parameters()
    def test_constrained_viterbi_tags(self):
        constraints = {(0, 0), (0, 1),
                       (1, 1), (1, 2),
                       (2, 2), (2, 3),
                       (3, 3), (3, 4),
                       (4, 4), (4, 0)}

        # Add the transitions to the end tag
        # and from the start tag.
        for i in range(5):
            constraints.add((5, i))
            constraints.add((i, 6))

        crf = ConditionalRandomField(num_tags=5, constraints=constraints)
        crf.transitions = torch.nn.Parameter(self.transitions)
        crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)

        mask = torch.LongTensor([
            [1, 1, 1],
            [1, 1, 0]
        ])

        viterbi_path = crf.viterbi_tags(self.logits, mask)

        # Get just the tags from each tuple of (tags, score).
        viterbi_tags = [x for x, y in viterbi_path]

        # Now the tags should respect the constraints
        assert viterbi_tags == [
            [2, 3, 3],
            [2, 3]
        ]
    def __init__(self):
        bio_labels = [["O", "I-X", "B-X", "I-Y", "B-Y"]]

        self.label_vocabulary = LabelVocabulary(
            labels=bio_labels, padding=LabelVocabulary.PADDING)

        self.logits = torch.tensor([
            [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
            [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
        ],
                                   dtype=torch.float)

        self.tags = torch.tensor([[2, 3, 4], [3, 2, 2]], dtype=torch.long)

        self.transitions = torch.tensor(
            [[0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9],
             [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4],
             [1.0, 1.0, 1.0, 1.0, 1.0]],
            dtype=torch.float)

        self.transitions_from_start = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.6],
                                                   dtype=torch.float)
        self.transitions_to_end = torch.tensor([-0.1, -0.2, 0.3, -0.4, -0.4],
                                               dtype=torch.float)

        # Use the CRF Module with fixed transitions to compute the log_likelihood
        self.crf = ConditionalRandomField(5)
        self.crf.transitions = torch.nn.Parameter(self.transitions)
        self.crf.start_transitions = torch.nn.Parameter(
            self.transitions_from_start)
        self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)

        # constraint crf
        constraints = {(0, 0), (0, 1), (1, 1), (1, 2), (2, 2), (2, 3), (3, 3),
                       (3, 4), (4, 4), (4, 0)}

        # Add the transitions to the end tag
        # and from the start tag.
        for i in range(5):
            constraints.add((5, i))
            constraints.add((i, 6))

        constraint_crf = ConditionalRandomField(num_tags=5,
                                                constraints=constraints)
        constraint_crf.transitions = torch.nn.Parameter(self.transitions)
        constraint_crf.start_transitions = torch.nn.Parameter(
            self.transitions_from_start)
        constraint_crf.end_transitions = torch.nn.Parameter(
            self.transitions_to_end)
        self.constraint_crf = constraint_crf
class TestConditionalRandomField(TestCase):
    def setUp(self):
        super().setUp()
        self.logits = torch.Tensor([
            [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
            [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
        ])
        self.tags = torch.LongTensor([
            [2, 3, 4],
            [3, 2, 2]
        ])

        self.transitions = torch.Tensor([
            [0.1, 0.2, 0.3, 0.4, 0.5],
            [0.8, 0.3, 0.1, 0.7, 0.9],
            [-0.3, 2.1, -5.6, 3.4, 4.0],
            [0.2, 0.4, 0.6, -0.3, -0.4],
            [1.0, 1.0, 1.0, 1.0, 1.0]
        ])

        self.transitions_from_start = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.6])
        self.transitions_to_end = torch.Tensor([-0.1, -0.2, 0.3, -0.4, -0.4])

        # Use the CRF Module with fixed transitions to compute the log_likelihood
        self.crf = ConditionalRandomField(5)
        self.crf.transitions = torch.nn.Parameter(self.transitions)
        self.crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)

    def score(self, logits, tags):
        """
        Computes the likelihood score for the given sequence of tags,
        given the provided logits (and the transition weights in the CRF model)
        """
        # Start with transitions from START and to END
        total = self.transitions_from_start[tags[0]] + self.transitions_to_end[tags[-1]]
        # Add in all the intermediate transitions
        for tag, next_tag in zip(tags, tags[1:]):
            total += self.transitions[tag, next_tag]
        # Add in the logits for the observed tags
        for logit, tag in zip(logits, tags):
            total += logit[tag]
        return total

    def test_forward_works_without_mask(self):
        log_likelihood = self.crf(self.logits, self.tags).item()

        # Now compute the log-likelihood manually
        manual_log_likelihood = 0.0

        # For each instance, manually compute the numerator
        # (which is just the score for the logits and actual tags)
        # and the denominator
        # (which is the log-sum-exp of the scores for the logits across all possible tags)
        for logits_i, tags_i in zip(self.logits, self.tags):
            numerator = self.score(logits_i.detach(), tags_i.detach())
            all_scores = [self.score(logits_i.detach(), tags_j)
                          for tags_j in itertools.product(range(5), repeat=3)]
            denominator = math.log(sum(math.exp(score) for score in all_scores))
            # And include them in the manual calculation.
            manual_log_likelihood += numerator - denominator

        # The manually computed log likelihood should equal the result of crf.forward.
        assert manual_log_likelihood.item() == approx(log_likelihood)

    def test_forward_works_with_mask(self):
        # Use a non-trivial mask
        mask = torch.LongTensor([
            [1, 1, 1],
            [1, 1, 0]
        ])

        log_likelihood = self.crf(self.logits, self.tags, mask).item()

        # Now compute the log-likelihood manually
        manual_log_likelihood = 0.0

        # For each instance, manually compute the numerator
        #   (which is just the score for the logits and actual tags)
        # and the denominator
        #   (which is the log-sum-exp of the scores for the logits across all possible tags)
        for logits_i, tags_i, mask_i in zip(self.logits, self.tags, mask):
            # Find the sequence length for this input and only look at that much of each sequence.
            sequence_length = torch.sum(mask_i.detach())
            logits_i = logits_i.data[:sequence_length]
            tags_i = tags_i.data[:sequence_length]

            numerator = self.score(logits_i, tags_i)
            all_scores = [self.score(logits_i, tags_j)
                          for tags_j in itertools.product(range(5), repeat=sequence_length)]
            denominator = math.log(sum(math.exp(score) for score in all_scores))
            # And include them in the manual calculation.
            manual_log_likelihood += numerator - denominator

        # The manually computed log likelihood should equal the result of crf.forward.
        assert manual_log_likelihood.item() == approx(log_likelihood)

    def test_viterbi_tags(self):
        mask = torch.LongTensor([
            [1, 1, 1],
            [1, 1, 0]
        ])

        viterbi_path = self.crf.viterbi_tags(self.logits, mask)

        # Separate the tags and scores.
        viterbi_tags = [x for x, y in viterbi_path]
        viterbi_scores = [y for x, y in viterbi_path]

        # Check that the viterbi tags are what I think they should be.
        assert viterbi_tags == [
            [2, 4, 3],
            [4, 2]
        ]

        # We can also iterate over all possible tag sequences and use self.score
        # to check the likelihood of each. The most likely sequence should be the
        # same as what we get from viterbi_tags.
        most_likely_tags = []
        best_scores = []

        for logit, mas in zip(self.logits, mask):
            sequence_length = torch.sum(mas.detach())
            most_likely, most_likelihood = None, -float('inf')
            for tags in itertools.product(range(5), repeat=sequence_length):
                score = self.score(logit.data, tags)
                if score > most_likelihood:
                    most_likely, most_likelihood = tags, score
            # Convert tuple to list; otherwise == complains.
            most_likely_tags.append(list(most_likely))
            best_scores.append(most_likelihood)

        assert viterbi_tags == most_likely_tags
        assert viterbi_scores == best_scores

    def test_constrained_viterbi_tags(self):
        constraints = {(0, 0), (0, 1),
                       (1, 1), (1, 2),
                       (2, 2), (2, 3),
                       (3, 3), (3, 4),
                       (4, 4), (4, 0)}

        # Add the transitions to the end tag
        # and from the start tag.
        for i in range(5):
            constraints.add((5, i))
            constraints.add((i, 6))

        crf = ConditionalRandomField(num_tags=5, constraints=constraints)
        crf.transitions = torch.nn.Parameter(self.transitions)
        crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)

        mask = torch.LongTensor([
            [1, 1, 1],
            [1, 1, 0]
        ])

        viterbi_path = crf.viterbi_tags(self.logits, mask)

        # Get just the tags from each tuple of (tags, score).
        viterbi_tags = [x for x, y in viterbi_path]

        # Now the tags should respect the constraints
        assert viterbi_tags == [
            [2, 3, 3],
            [2, 3]
        ]

    def test_allowed_transitions(self):
        bio_labels = ['O', 'B-X', 'I-X', 'B-Y', 'I-Y']  # start tag, end tag

        label_vocabulary = LabelVocabulary(labels=[bio_labels],
                                           padding=LabelVocabulary.PADDING)
        #              0     1      2      3      4         5          6
        allowed = BIO.allowed_transitions(label_vocabulary=label_vocabulary)

        # The empty spaces in this matrix indicate disallowed transitions.
        assert set(allowed) == {  # Extra column for end tag.
            (0, 0), (0, 1), (0, 3), (0, 6),
            (1, 0), (1, 1), (1, 2), (1, 3), (1, 6),
            (2, 0), (2, 1), (2, 2), (2, 3), (2, 6),
            (3, 0), (3, 1), (3, 3), (3, 4), (3, 6),
            (4, 0), (4, 1), (4, 3), (4, 4), (4, 6),
            (5, 0), (5, 1), (5, 3)  # Extra row for start tag
        }
Пример #6
0
    def __init__(self, token_vocabulary: Vocabulary, token_embedding_dim: int,
                 token_embedding_dropout: float,
                 gaz_vocabulary: PretrainedVocabulary,
                 gaz_word_embedding_dropout: float,
                 gaz_word_embedding_dim: int, num_lstm_layer: int,
                 lstm_hidden_size: int, gat_hidden_size: int,
                 gat_num_heads: int, gat_dropout: float, lstm_dropout: float,
                 alpha: float, fusion_strategy: str,
                 label_vocabulary: LabelVocabulary):

        super().__init__()

        assert gaz_word_embedding_dim == lstm_hidden_size * 2, \
            f"gaz_vocabulary.embedding_dim: {gaz_vocabulary.embedding_dim} " \
            f"与 lstm_hidden_size * 2: {lstm_hidden_size * 2} 不相等, 因为二者都会作为图的节点,所以 size 必须一致"

        self.token_vocabulary = token_vocabulary
        self.label_vocabulary = label_vocabulary

        self.token_embedding_dropout = Dropout(token_embedding_dropout)

        if isinstance(self.token_vocabulary, Vocabulary):
            self.token_embedding: Embedding = Embedding(
                num_embeddings=self.token_vocabulary.size,
                embedding_dim=token_embedding_dim,
                padding_idx=self.token_vocabulary.padding_index)

        elif isinstance(self.token_vocabulary, PretrainedVocabulary):
            self.token_embedding: Embedding = Embedding.from_pretrained(
                self.token_vocabulary.embedding_matrix,
                freeze=True,
                padding_idx=self.token_vocabulary.padding_index)

        self.gaz_word_embedding = Embedding.from_pretrained(
            gaz_vocabulary.embedding_matrix,
            freeze=True,
            padding_idx=gaz_vocabulary.padding_index)
        self.gaz_word_embedding_dropout = Dropout(gaz_word_embedding_dropout)

        # bilstm
        bilstm = DynamicRnn(rnn=LSTM(input_size=token_embedding_dim,
                                     hidden_size=lstm_hidden_size,
                                     num_layers=num_lstm_layer,
                                     batch_first=True,
                                     bidirectional=True))
        self.bilstm_seq2seq = RnnSeq2Seq(bilstm)
        self.lstm_dropout = Dropout(lstm_dropout)
        self.lstm_encoding_feed_forward = Linear(
            in_features=lstm_hidden_size * 2,
            out_features=self.label_vocabulary.label_size)
        # C-Graph
        self.c_gat = GAT(in_features=2 * lstm_hidden_size,
                         out_features=label_vocabulary.label_size,
                         dropout=gat_dropout,
                         alpha=alpha,
                         num_heads=gat_num_heads,
                         hidden_size=gat_hidden_size)

        # T-Graph
        self.t_gat = GAT(in_features=2 * lstm_hidden_size,
                         out_features=label_vocabulary.label_size,
                         dropout=gat_dropout,
                         alpha=alpha,
                         num_heads=gat_num_heads,
                         hidden_size=gat_hidden_size)

        # L-Graph
        self.l_gat = GAT(in_features=2 * lstm_hidden_size,
                         out_features=label_vocabulary.label_size,
                         dropout=gat_dropout,
                         alpha=alpha,
                         num_heads=gat_num_heads,
                         hidden_size=gat_hidden_size)

        if fusion_strategy == "m":
            self.fusion_layer = MFunsionLayer(
                label_size=label_vocabulary.label_size)
        elif fusion_strategy == "v":
            self.fusion_layer = VFusionLayer(
                label_size=label_vocabulary.label_size)
        elif fusion_strategy == "n":
            self.fusion_layer = NFusionLayer()
        else:
            raise RuntimeError(
                f"fusion_stategy 必须是: m, v, n 之一, 而现在是 {fusion_strategy}")
        # crf
        constraints = BIO.allowed_transitions(
            label_vocabulary=self.label_vocabulary)
        self.crf = ConditionalRandomField(
            num_tags=self.label_vocabulary.label_size, constraints=constraints)
Пример #7
0
    def __init__(self,
                 token_vocabulary: Vocabulary,
                 token_embedding_dim: int,
                 token_embedding_dropout: float,
                 gaz_vocabulary: PretrainedVocabulary,
                 gaz_word_embedding_dim: int,
                 gaz_word_embedding_dropout: float,
                 label_vocabulary: LabelVocabulary,
                 hidden_size: int,
                 lstm_dropout: float):
        """

        :param token_vocabulary: token vocabulary
        :param token_embedding_dim: token embedding 维度
        :param token_embedding_dropout: token embedding dropout
        :param gaz_vocabulary: gaz vocabualry
        :param gaz_word_embedding_dim: gaz word embedding 维度
        :param gaz_word_embedding_dropout: gaz word embedding droupout
        :param label_vocabulary: labe vocabulary
        :param hidden_size: lattice lstm 隐层输出, 2*hidden_size, 因为使用了双向的
        :param lstm_dropout: lstm dropout
        """

        super().__init__()

        self.token_vocabulary = token_vocabulary
        self.label_vocabulary = label_vocabulary

        self.token_embedding_dropout = Dropout(token_embedding_dropout)
        self.lstm_dropout = Dropout(lstm_dropout)

        if isinstance(self.token_vocabulary, Vocabulary):
            self.token_embedding: Embedding = Embedding(num_embeddings=self.token_vocabulary.size,
                                                        embedding_dim=token_embedding_dim,
                                                        padding_idx=self.token_vocabulary.padding_index)

        elif isinstance(self.token_vocabulary, PretrainedVocabulary):
            self.token_embedding: Embedding = Embedding.from_pretrained(self.token_vocabulary.embedding_matrix,
                                                                        freeze=True,
                                                                        padding_idx=self.token_vocabulary.padding_index)

        self.gaz_word_embedding = Embedding.from_pretrained(gaz_vocabulary.embedding_matrix,
                                                            freeze=True,
                                                            padding_idx=gaz_vocabulary.padding_index)
        # 默认使用双向的 Lattice LSTM
        # 前向 lattice lstm
        self.forward_lattice_lstm = LatticeLSTM(input_dim=token_embedding_dim,
                                                hidden_dim=hidden_size,
                                                gaz_word_embedding_dim=gaz_word_embedding_dim,
                                                gaz_word_embedding=self.gaz_word_embedding,
                                                gaz_word_embedding_dropout=gaz_word_embedding_dropout,
                                                left2right=True)

        # 反向 lattice lstm
        self.backward_lattice_lstm = LatticeLSTM(input_dim=token_embedding_dim,
                                                 hidden_dim=hidden_size,
                                                 gaz_word_embedding_dim=gaz_word_embedding_dim,
                                                 gaz_word_embedding=self.gaz_word_embedding,
                                                 gaz_word_embedding_dropout=gaz_word_embedding_dropout,
                                                 left2right=False)
        # 将 双向 lattice lstm 的输出转化到 label 空间
        self.linear = Linear(in_features=(hidden_size * 2),
                             out_features=label_vocabulary.label_size)

        # crf
        constraints = BIO.allowed_transitions(label_vocabulary=self.label_vocabulary)
        self.crf = ConditionalRandomField(num_tags=self.label_vocabulary.label_size,
                                          constraints=constraints)

        self.reset_parameters()
Пример #8
0
    def __init__(self, vocabulary_builder: VocabularyBuilder,
                 word_embedding_dim: int, rnn_type: str, hidden_size: int,
                 num_layer: int, dropout: float, is_used_crf: bool):

        super().__init__()

        self.word_embedding_dim = word_embedding_dim
        self.token_vocabulary = vocabulary_builder.token_vocabulary
        self.label_vocabulary = vocabulary_builder.label_vocabulary
        self.is_used_crf = is_used_crf

        if isinstance(self.token_vocabulary, Vocabulary):
            self.embedding: Embedding = Embedding(
                num_embeddings=self.token_vocabulary.size,
                embedding_dim=word_embedding_dim,
                padding_idx=self.token_vocabulary.padding_index)

        elif isinstance(self.token_vocabulary, PretrainedVocabulary):
            self.embedding: Embedding = Embedding.from_pretrained(
                self.token_vocabulary.embedding_matrix,
                freeze=True,
                padding_idx=self.token_vocabulary.padding_index)

        self.hidden_size = hidden_size

        if rnn_type == DynamicRnn.LSTM:

            lstm = LSTM(input_size=word_embedding_dim,
                        hidden_size=hidden_size,
                        num_layers=num_layer,
                        bidirectional=True,
                        dropout=dropout,
                        batch_first=True)
            dynamic_rnn = DynamicRnn(rnn=lstm)
        elif rnn_type == DynamicRnn.GRU:
            gru = GRU(input_size=word_embedding_dim,
                      hidden_size=hidden_size,
                      num_layers=num_layer,
                      bidirectional=True,
                      dropout=dropout,
                      batch_first=True)
            dynamic_rnn = DynamicRnn(rnn=gru)
        else:
            raise RuntimeError(
                f"rnn_type: {rnn_type} 必须是 {DynamicRnn.LSTM} 或 {DynamicRnn.GRU} "
            )

        self.rnn_seq2seq = RnnSeq2Seq(dynamic_rnn=dynamic_rnn)

        self.liner = Linear(in_features=hidden_size * 2,
                            out_features=self.label_vocabulary.label_size)

        if self.is_used_crf:
            constraints = BIO.allowed_transitions(
                label_vocabulary=self.label_vocabulary)
            self.crf = ConditionalRandomField(
                num_tags=self.label_vocabulary.label_size,
                constraints=constraints)
        else:
            self.crf = None

        self.reset_parameters()