Exemplo n.º 1
0
    def __init__(self, emb_matrix, config, num_tags):
        super(NER_SOFTMAX_CHAR, self).__init__()

        embd_vector = torch.from_numpy(emb_matrix['word']).float()
        self.word_embeds = nn.Embedding.from_pretrained(embd_vector, freeze=False)

        embd_vector = torch.from_numpy(emb_matrix['char']).float()
        self.char_embeds = nn.Embedding.from_pretrained(embd_vector, freeze=False)

        self.lstm_char = nn.LSTM(self.char_embeds.embedding_dim,
                            config.char_lstm_dim,
                            num_layers=1, bidirectional=True, batch_first=True)

        input_size = self.word_embeds.embedding_dim + config.char_lstm_dim * 2

        self.lstm = nn.LSTM(input_size,
                            config.word_lstm_dim,
                            num_layers=1, bidirectional=True, batch_first=True)

        self.dropout = nn.Dropout(config.dropout_rate)
        self.hidden_layer = nn.Linear(config.word_lstm_dim * 2, config.word_lstm_dim)
        self.tanh_layer = torch.nn.Tanh()

        self.hidden2tag = nn.Linear(config.word_lstm_dim, num_tags)

        self.config = config

        model_utils.init_lstm_wt(self.lstm_char)
        model_utils.init_lstm_wt(self.lstm)
        model_utils.init_linear_wt(self.hidden_layer)
        model_utils.init_linear_wt(self.hidden2tag)
Exemplo n.º 2
0
    def __init__(self, vocab, config):
        super(NER_SOFTMAX_CHAR, self).__init__()
        word_emb_matrix = get_word_embd(vocab, config)
        embd_vector = torch.from_numpy(word_emb_matrix).float()

        self.word_embeds = nn.Embedding.from_pretrained(embd_vector,
                                                        freeze=False)
        self.char_embeds = nn.Embedding(len(vocab.char_to_id),
                                        config.char_embd_dim,
                                        padding_idx=Constants.PAD_ID)
        if config.is_caps:
            self.caps_embeds = nn.Embedding(vocab.get_caps_cardinality(),
                                            config.caps_embd_dim,
                                            padding_idx=Constants.PAD_ID)

        self.lstm_char = nn.LSTM(self.char_embeds.embedding_dim,
                                 config.char_lstm_dim,
                                 num_layers=1,
                                 bidirectional=True,
                                 batch_first=True)
        if config.is_caps:
            self.lstm = nn.LSTM(self.word_embeds.embedding_dim +
                                config.char_embd_dim * 2 +
                                config.caps_embd_dim,
                                config.word_lstm_dim,
                                num_layers=1,
                                bidirectional=True,
                                batch_first=True)
        else:
            self.lstm = nn.LSTM(self.word_embeds.embedding_dim +
                                config.char_embd_dim * 2,
                                config.word_lstm_dim,
                                num_layers=1,
                                bidirectional=True,
                                batch_first=True)

        self.dropout = nn.Dropout(config.dropout_rate)
        self.hidden_layer = nn.Linear(config.word_lstm_dim * 2,
                                      config.word_lstm_dim)
        self.tanh_layer = torch.nn.Tanh()

        self.hidden2tag = nn.Linear(config.word_lstm_dim, len(vocab.id_to_tag))

        self.config = config

        init_lstm_wt(self.lstm_char)
        init_lstm_wt(self.lstm)
        init_linear_wt(self.hidden_layer)
        init_linear_wt(self.hidden2tag)
        self.char_embeds.weight.data.uniform_(-1., 1.)
        if config.is_caps:
            self.caps_embeds.weight.data.uniform_(-1., 1.)