Пример #1
0
    def __init__(self, config, pretrained_word_embedding, writer):
        super(NewsEncoder, self).__init__()
        self.config = config
        if pretrained_word_embedding is None:
            self.word_embedding = nn.Embedding(config.num_words,
                                               config.word_embedding_dim,
                                               padding_idx=0)
        else:
            self.word_embedding = nn.Embedding.from_pretrained(
                pretrained_word_embedding, freeze=False, padding_idx=0) 
        self.word_embedding.to(device)
            
        
        # category
        self.category_embedding = nn.Embedding(config.num_categories,
                                               config.category_embedding_dim,
                                               padding_idx=0)
        self.category_embedding.to(device)
        self.category_linear = nn.Linear(config.category_embedding_dim,
                                         config.num_filters)
        # subcategory
        self.subcategory_linear = nn.Linear(config.category_embedding_dim,
                                            config.num_filters)
        assert config.window_size >= 1 and config.window_size % 2 == 1
        # title 
        self.title_CNN = nn.Conv2d(
            1, 
            config.num_filters,
            (config.window_size, config.word_embedding_dim),
            padding=(int((config.window_size - 1) / 2), 0))
        self.title_attention = AdditiveAttention(config.query_vector_dim,
                                                 config.num_filters)
        self.title_CNN.to(device)
        # abstract
        self.abstract_CNN = nn.Conv2d(
            1,
            config.num_filters,
            (config.window_size, config.word_embedding_dim),
            padding=(int((config.window_size - 1) / 2), 0)) 
        self.abstract_CNN.to(device) 
        self.abstract_attention = AdditiveAttention(config.query_vector_dim,
                                                    config.num_filters) 

        # final_att
        self.final_attention = AdditiveAttention(config.query_vector_dim,
                                                 config.num_filters, writer,
                                                 'Train/NewsAttentionWeight',
                                                 ['category', 'subcategory',
                                                  'title', 'abstract'])
Пример #2
0
 def __init__(self, config, pretrained_word_embedding):
     super(NewsEncoder, self).__init__()
     self.config = config
     if pretrained_word_embedding is None:
         word_embedding = nn.Embedding(config.num_words,
                                       config.word_embedding_dim,
                                       padding_idx=0)
     else:
         word_embedding = nn.Embedding.from_pretrained(
             pretrained_word_embedding, freeze=False, padding_idx=0)
     assert len(config.dataset_attributes['news']) > 0
     text_encoders_candidates = ['title', 'abstract']
     self.text_encoders = nn.ModuleDict({
         name:
         TextEncoder(word_embedding, config.word_embedding_dim,
                     config.num_attention_heads, config.query_vector_dim,
                     config.dropout_probability)
         for name in (set(config.dataset_attributes['news'])
                      & set(text_encoders_candidates))
     })
     category_embedding = nn.Embedding(config.num_categories,
                                       config.category_embedding_dim,
                                       padding_idx=0)
     element_encoders_candidates = ['category', 'subcategory']
     self.element_encoders = nn.ModuleDict({
         name:
         ElementEncoder(category_embedding, config.category_embedding_dim,
                        config.word_embedding_dim)
         for name in (set(config.dataset_attributes['news'])
                      & set(element_encoders_candidates))
     })
     if len(config.dataset_attributes['news']) > 1:
         self.final_attention = AdditiveAttention(config.query_vector_dim,
                                                  config.word_embedding_dim)
Пример #3
0
    def __init__(self, config, pretrained_word_embedding):
        super(NewsEncoder, self).__init__()
        self.config = config
        self.device = torch.device(config.device_str)

        if pretrained_word_embedding is None:
            self.word_embedding = nn.Embedding(config.num_words,
                                               config.word_embedding_dim,
                                               padding_idx=0)
        else:
            self.word_embedding = nn.Embedding.from_pretrained(
                pretrained_word_embedding, freeze=False, padding_idx=0)
        assert config.window_size >= 1 and config.window_size % 2 == 1
        self.title_CNN = nn.Conv2d(
            1,
            config.num_filters,
            (config.window_size, config.word_embedding_dim),
            padding=(int((config.window_size - 1) / 2), 0))
        self.abstract_CNN = nn.Conv2d(
            1,
            config.num_filters,
            (config.window_size, config.word_embedding_dim),
            padding=(int((config.window_size - 1) / 2), 0))
        self.title_attention = AdditiveAttention(config.query_vector_dim,
                                                 config.num_filters)
Пример #4
0
 def __init__(self, config):
     super(UserEncoder, self).__init__()
     self.config = config
     self.multihead_self_attention = MultiHeadSelfAttention(
         config.word_embedding_dim, config.num_attention_heads)
     self.additive_attention = AdditiveAttention(config.query_vector_dim,
                                                 config.word_embedding_dim)
    def __init__(self, config):
        super(NewsEncoder, self).__init__()
        self.config = config
        assert len(config.dataset_attributes['news']) > 0
        self.text_encoders = nn.ModuleDict()
        if config.fine_tune:
            roberta = RobertaModel.from_pretrained('roberta-base')
            if self.training:
                roberta.train()
        else:
            roberta = None

        for x in ['title', 'abstract']:
            if x in ' '.join(config.dataset_attributes['news']):
                self.text_encoders[x] = TextEncoder(roberta,
                                                    config.word_embedding_dim,
                                                    config.num_attention_heads,
                                                    config.query_vector_dim,
                                                    config.dropout_probability,
                                                    config.roberta_level)

        category_embedding = nn.Embedding(config.num_categories,
                                          config.category_embedding_dim,
                                          padding_idx=0)
        element_encoders_candidates = ['category', 'subcategory']
        self.element_encoders = nn.ModuleDict({
            name:
            ElementEncoder(category_embedding, config.category_embedding_dim,
                           config.word_embedding_dim)
            for name in (set(config.dataset_attributes['news'])
                         & set(element_encoders_candidates))
        })
        if len(config.dataset_attributes['news']) > 1:
            self.final_attention = AdditiveAttention(config.query_vector_dim,
                                                     config.word_embedding_dim)
Пример #6
0
    def __init__(self, config, pretrained_word_embedding,
                 pretrained_entity_embedding, pretrained_context_embedding):
        super(KCNN, self).__init__()
        self.config = config
        if pretrained_word_embedding is None:
            self.word_embedding = nn.Embedding(config.num_words,
                                               config.word_embedding_dim,
                                               padding_idx=0)
        else:
            self.word_embedding = nn.Embedding.from_pretrained(
                pretrained_word_embedding, freeze=False, padding_idx=0)
        self.entity_embedding = pretrained_entity_embedding
        self.context_embedding = pretrained_context_embedding
        self.transform_matrix = nn.Parameter(
            torch.empty(self.config.word_embedding_dim,
                        self.config.entity_embedding_dim).uniform_(-0.1, 0.1))
        self.transform_bias = nn.Parameter(
            torch.empty(self.config.word_embedding_dim).uniform_(-0.1, 0.1))

        self.conv_filters = nn.ModuleDict({
            str(x): nn.Conv2d(3 if self.context_embedding is not None else 2,
                              self.config.num_filters,
                              (x, self.config.word_embedding_dim))
            for x in self.config.window_sizes
        })
        self.additive_attention = AdditiveAttention(
            self.config.query_vector_dim, self.config.num_filters)
Пример #7
0
 def __init__(self, word_embedding, word_embedding_dim, num_attention_heads,
              query_vector_dim, dropout_probability):
     super(TextEncoder, self).__init__()
     self.word_embedding = word_embedding
     self.dropout_probability = dropout_probability
     self.multihead_self_attention = MultiHeadSelfAttention(
         word_embedding_dim, num_attention_heads)
     self.additive_attention = AdditiveAttention(query_vector_dim,
                                                 word_embedding_dim)
Пример #8
0
    def __init__(self, config, pretrained_word_embedding=None):
        super(NewsEncoder, self).__init__(config, pretrained_word_embedding)

        self.category_embedding = nn.Embedding(config.num_categories,
                                               config.category_embedding_dim)
        self.entity_embedding = nn.Embedding(config.num_entities,
                                             config.entity_embedding_dim,
                                             padding_idx=0)
        self.additive_attention = AdditiveAttention(
            config.query_vector_dim, config.category_embedding_dim)
 def __init__(self, word_embedding, word_embedding_dim, num_filters,
              window_size, query_vector_dim, dropout_probability):
     super(TextEncoder, self).__init__()
     self.word_embedding = word_embedding
     self.dropout_probability = dropout_probability
     self.CNN = nn.Conv2d(1,
                          num_filters, (window_size, word_embedding_dim),
                          padding=(int((window_size - 1) / 2), 0))
     self.additive_attention = AdditiveAttention(query_vector_dim,
                                                 num_filters)
 def __init__(self, roberta, word_embedding_dim, num_attention_heads,
              query_vector_dim, dropout_probability, roberta_level):
     super(TextEncoder, self).__init__()
     self.roberta = roberta
     self.reduce_dim = nn.Linear(768, word_embedding_dim)
     self.dropout_probability = dropout_probability
     self.roberta_level = roberta_level
     self.multihead_self_attention = MultiHeadSelfAttention(
         word_embedding_dim, num_attention_heads)
     self.additive_attention = AdditiveAttention(query_vector_dim,
                                                 word_embedding_dim)
Пример #11
0
    def __init__(self, config, pretrained_word_embedding):
        super(NewsEncoder, self).__init__()
        self.config = config
        if pretrained_word_embedding is None:
            self.word_embedding = nn.Embedding(config.num_words,
                                               config.word_embedding_dim,
                                               padding_idx=0)
        else:
            self.word_embedding = nn.Embedding.from_pretrained(
                pretrained_word_embedding, freeze=False, padding_idx=0)

        self.multihead_self_attention = MultiHeadSelfAttention(
            config.word_embedding_dim, config.num_attention_heads)
        self.additive_attention = AdditiveAttention(config.query_vector_dim,
                                                    config.word_embedding_dim)
Пример #12
0
    def __init__(self, config, pretrained_word_embedding=None):
        super(NewsEncoder, self).__init__(config, pretrained_word_embedding)

        # todo: get pre-trained word embedding

        self.category_embedding = nn.Embedding(config.num_categories, config.category_embedding_dim)
        self.entity_embedding = nn.Embedding(config.num_entities, config.entity_embedding_dim, padding_idx=0)

        if config.use_pretrain_word_embedding:
            self.word_embedding = nn.Embedding.from_pretrained(
                pretrained_word_embedding, freeze=False, padding_idx=0)
        else:
            self.word_embedding = nn.Embedding(config.num_words, config.word_embedding_dim,
                                               padding_idx=0)
        self.linear = nn.Linear(config.word_embedding_dim, config.category_embedding_dim)
        self.multihead_self_attention = MultiHeadSelfAttention(
            config.category_embedding_dim, config.num_attention_heads)

        self.additive_attention = AdditiveAttention(config.query_vector_dim,
                                                    config.category_embedding_dim)
Пример #13
0
 def __init__(self, config):
     super(UserEncoder, self).__init__()
     self.config = config
     self.additive_attention = AdditiveAttention(config.query_vector_dim,
                                                 config.word_embedding_dim)
Пример #14
0
 def __init__(self, config):
     super(UserEncoder, self).__init__()
     self.additive_attention = AdditiveAttention(config.query_vector_dim,
                                                 config.num_filters)