示例#1
0
class RawEmbedding(AbstractEmbedding):
    def __init__(self, device):
        super(RawEmbedding, self).__init__(device=device)
        self.indexer = Indexer(special_tokens={
            '<s>': 0,
            '<unk>': 1,
            '<pad>': 2,
            '<\s>': 3,
            '<mask>': 4
        },
                               with_del_stopwords=self.with_del_stopwords)
        datasets = Dataset().get_instance()
        sentences = [pairs[0] for pairs in datasets['train']]
        self.indexer.count_word_in_text(sentences)
        self.indexer.add_sentences(sentences)
        self.embedding_dim = 100
        self.embedding = nn.Embedding(num_embeddings=len(self.indexer),
                                      embedding_dim=self.embedding_dim,
                                      padding_idx=self.indexer.padding_index)
        self.embedding.to(device)