Exemplo n.º 1
0
class LacNet(nn.Module):
    def __init__(self, args):
        super(LacNet, self).__init__()

        vocab_size = args.vocab_size
        word_dim = args.word_dim
        num_gru_layers = args.num_gru_layers
        num_labels = args.num_labels
        hidden_dim = args.hidden_dim

        self.word_emb = nn.Embedding(vocab_size, word_dim)
        self.gru_layers = nn.ModuleList(
            [BiGruLayer(args) for _ in range(num_gru_layers)])
        self.emission = nn.Linear(hidden_dim * 2, num_labels)

        self.crf = ConditionalRandomField(num_labels)
        # self.crf_decode = crf_decoding()
        # self.crf_cost = linear_chain_crf()

    def forward(self, x, lens=None):
        x = self.word_emb(x)
        for gru in self.gru_layers:
            x = gru(x)
        x = self.emission(x)

        if lens is None:
            lens = torch.tensor([words.size(1)], device=words.device)
        mask = sequence_mask(lens)

        # Run features through Viterbi decode algorithm.
        preds = self.crf.viterbi_tags(feats, mask)

        # loglik = self.crf(feats, labs, mask=mask)
        # loss = -1. * loglik
        return preds

    def get_trainable_params(self):
        module_params = [
            self.char_feats_layer.parameters(),
            self.rnn.parameters(),
            self.rnn_to_crf.parameters(),
            self.crf.parameters()
        ]
        return module_params
Exemplo n.º 2
0
class BertLstmCrf(nn.Module):
    """
    bert_lstm_crf model
    """
    def __init__(
        self,
        bert_model,
        num_labels=9,
        embedding_dim=512,
        hidden_dim=512,
        rnn_layers=1,
        rnn_dropout=0.1,
        output_dropout=0.1,
        use_cuda=False,
    ):
        super(BertLstmCrf, self).__init__()
        self.bert_encoder = bert_model

        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.rnn_layers = rnn_layers

        self.lstm = None
        if rnn_layers > 0:
            self.lstm = nn.LSTM(
                embedding_dim,
                hidden_dim,
                num_layers=rnn_layers,
                bidirectional=True,
                dropout=rnn_dropout,
                batch_first=True,
            )

        # TODO: add contraints
        constraints = None
        include_start_end_transitions = False
        self.crf = ConditionalRandomField(
            num_labels,
            constraints,
            include_start_end_transitions=include_start_end_transitions,
        )

        self.liner = nn.Linear(hidden_dim * 2, num_labels)
        self.num_labels = num_labels

        self.output_dropout = nn.Dropout(p=output_dropout)

    def rand_init_hidden(self, batch_size):
        """
        random initialize hidden variable
        """
        return (
            torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim),
            torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim),
        )

    def forward(self, **kwargs):
        """
        args:
            sentence (word_seq_len, batch_size) : word-level representation of sentence
            hidden: initial hidden state

        return:
            crf output (word_seq_len, batch_size, tag_size, tag_size), hidden
        """

        kwargs_copy = copy.deepcopy(kwargs)
        if "labels" in kwargs_copy:
            kwargs_copy.pop("labels")

        batch_size = kwargs["input_ids"].size(0)
        seq_length = kwargs["input_ids"].size(1)

        bert_outputs = self.bert_encoder(**kwargs_copy)
        sequence_output = bert_outputs[0]

        if self.lstm is not None:
            hidden = self.rand_init_hidden(batch_size)
            if kwargs["input_ids"].is_cuda:
                hidden = [i.cuda() for i in hidden]
            sequence_output, hidden = self.lstm(sequence_output, hidden)
            sequence_output = sequence_output.contiguous().view(
                -1, self.hidden_dim * 2)
            sequence_output = self.output_dropout(sequence_output)

        out = self.liner(sequence_output)
        logits = out.contiguous().view(batch_size, seq_length, -1)

        best_paths = self.crf.viterbi_tags(logits,
                                           kwargs["attention_mask"].long(),
                                           top_k=1)
        # Just get the top tags and ignore the scores.
        predicted_tags = cast(List[List[int]], [x[0][0] for x in best_paths])

        if kwargs.get("labels") is not None:
            labels = kwargs.get("labels")

            log_likelihood = self.crf(logits, labels, kwargs["attention_mask"])
            loss = -log_likelihood
            return loss, logits, predicted_tags

        return None, logits, predicted_tags
Exemplo n.º 3
0
class LSTM_CRF(nn.Module):
    def __init__(self,
                 tagset_size,
                 vocab_size,
                 hidden_dim,
                 embedding_dim,
                 pretrained_embeddings,
                 dropout,
                 num_layers,
                 pad_index,
                 device,
                 fine_tune=True,
                 bidirectional=True):

        super(LSTM_CRF, self).__init__()

        self.tagset_size = tagset_size
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(p=dropout)
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.pad_index = pad_index
        self.device = device

        self.embedding_layer = nn.Embedding(self.vocab_size,
                                            self.embedding_dim)

        if type(pretrained_embeddings) == torch.Tensor:
            self.embedding_layer.weight.data.copy_(pretrained_embeddings)

        if not fine_tune:
            self.embedding_layer.weight.requires_grad = False

        self.lstm = nn.LSTM(self.embedding_dim,
                            self.hidden_dim,
                            num_layers=self.num_layers,
                            bidirectional=self.bidirectional)

        self.hidden2tag = nn.Linear(2 * self.hidden_dim, self.tagset_size)

        self.crf = ConditionalRandomField(self.tagset_size, 1, 2)

    def get_lstm_feats(self, batch):

        lens = batch['lens']
        word_sequences = batch['word_sequences']
        max_len = max(lens)
        batch_size = len(word_sequences)

        embeddings = self.embedding_layer(word_sequences)
        embeddings = self.dropout(embeddings)

        packed_input = pack_padded_sequence(embeddings, lens, batch_first=True)
        packed_hidden_states, _ = self.lstm(packed_input)
        hidden_states, _ = pad_packed_sequence(packed_hidden_states,
                                               batch_first=True)
        hidden_states = self.dropout(hidden_states)

        logits = self.hidden2tag(hidden_states)

        return logits
        #logits = logits.view(batch_size * max_len, self.tagset_size)

    def loss(self, batch):
        logits = self.get_lstm_feats(batch)
        mask = batch['mask'].squeeze(1)
        return self.crf.forward(logits, batch['tag_sequences'], mask)

    def forward(self, batch):
        logits = self.get_lstm_feats(batch)
        mask = batch['mask'].squeeze(1)
        all_tags = self.crf.viterbi_tags(logits.to('cpu'), mask.to('cpu'))
        max_len = max(batch['lens'])
        for i in range(len(all_tags)):
            all_tags[i] += [0 for i in range(max_len - len(all_tags[i]))]
            #print(all_tags[i])
        return None, torch.tensor(all_tags)