Пример #1
0
    def __call__(self, raw_article_sents, raw_clusters):
        self._net.eval()
        n_art = len(raw_article_sents)
        articles = conver2id(UNK, self._word2id, raw_article_sents)

        clusters = (conver2id(UNK, self._word2id, raw_clusters[0]),
                    raw_clusters[1], raw_clusters[2])
        article = pad_batch_tensorize(articles, PAD, cuda=False,
                                      max_num=5).to(self._device)
        clusters = (pad_batch_tensorize(clusters[0],
                                        PAD,
                                        cuda=False,
                                        max_num=4).to(self._device),
                    pad_batch_tensorize(clusters[1],
                                        PAD,
                                        cuda=False,
                                        max_num=4).to(self._device),
                    pad_batch_tensorize(clusters[2],
                                        PAD,
                                        cuda=False,
                                        max_num=4).to(self._device))
        if raw_clusters == []:
            print(clusters)

        indices = self._net.extract([article],
                                    clusters,
                                    k=min(n_art, self._max_ext))
        return indices
 def __call__(self, raw_article_sents, raw_query):
     self._net.eval()
     n_art = len(raw_article_sents)
     articles = conver2id(UNK, self._word2id, raw_article_sents)
     queries = conver2id(UNK, self._word2id, raw_query)
     article = pad_batch_tensorize(articles, PAD, cuda=False
                                  ).to(self._device)
     query = pad_batch_tensorize(queries, PAD, cuda=False
                                  ).to(self._device)
     indices = self._net.extract([article], k=min(n_art, self._max_ext), queries=[query])
     return indices
Пример #3
0
 def __call__(self, raw_article_sents):
     self._net.eval()
     n_art = len(raw_article_sents)
     articles = conver2id(UNK, self._word2id, raw_article_sents)
     article = pad_batch_tensorize(articles, PAD, cuda=False
                                  ).to(self._device)
     indices = self._net.extract([article], k=min(n_art, self._max_ext))
     return indices
Пример #4
0
 def _prepro(self, raw_article_sents):
     ext_word2id = dict(self._word2id)
     ext_id2word = dict(self._id2word)
     for raw_words in raw_article_sents:
         for w in raw_words:
             if not w in ext_word2id:
                 ext_word2id[w] = len(ext_word2id)
                 ext_id2word[len(ext_id2word)] = w
     articles = conver2id(UNK, self._word2id, raw_article_sents)
     art_lens = [len(art) for art in articles]
     article = pad_batch_tensorize(articles, PAD, cuda=False).to(self._device)
     extend_arts = conver2id(UNK, ext_word2id, raw_article_sents)
     extend_art = pad_batch_tensorize(extend_arts, PAD, cuda=False).to(self._device)
     extend_vsize = len(ext_word2id)
     dec_args = (article, art_lens, extend_art, extend_vsize,
                 START, END, UNK, self._max_len)
     return dec_args, ext_id2word
Пример #5
0
 def __call__(self, raw_article_sents, sent_labels):
     self._net.eval()
     n_art = len(raw_article_sents)
     articles = conver2id(UNK, self._word2id, raw_article_sents)
     article = pad_batch_tensorize(articles, PAD,
                                   cuda=False).to(self._device)
     indices = self._net.extract([article], [sent_labels])
     return indices
Пример #6
0
 def _prepro(self, raw_article_sents):
     ext_word2id = dict(self._word2id)
     ext_id2word = dict(self._id2word)
     for raw_words in raw_article_sents:
         for w in raw_words:
             if not w in ext_word2id:
                 ext_word2id[w] = len(ext_word2id)
                 ext_id2word[len(ext_id2word)] = w
     articles = conver2id(UNK, self._word2id, raw_article_sents)
     art_lens = [len(art) for art in articles]
     article = pad_batch_tensorize(articles, PAD, cuda=False
                                  ).to(self._device)
     extend_arts = conver2id(UNK, ext_word2id, raw_article_sents)
     extend_art = pad_batch_tensorize(extend_arts, PAD, cuda=False
                                     ).to(self._device)
     extend_vsize = len(ext_word2id)
     dec_args = (article, art_lens, extend_art, extend_vsize,
                 START, END, UNK, self._max_len)
     return dec_args, ext_id2word
Пример #7
0
 def __call__(self, raw_article_sents, raw_abs_sents=None):
     if self.net_type == 'ml_rnn_extractor':
         articles = conver2id(UNK, self._word2id, raw_article_sents)
         article = pad_batch_tensorize(articles, PAD, cuda=False
                                     ).to(self._device)
     elif self.net_type == 'ml_trans_rnn_extractor':
         # print([" ".join(r) for r in raw_article_sents])
         # print([" ".join(r) for r in raw_abs_sents])
         article = myextract.get_batch_trans([([" ".join(r) for r in raw_article_sents], [" ".join(r) for r in raw_abs_sents])])
     return article
Пример #8
0
 def __call__(self, raw_article_sents):
     self._net.eval()
     n_art = len(raw_article_sents)
     articles = conver2id(UNK, self._word2id, raw_article_sents)
     article = pad_batch_tensorize(articles, PAD, cuda=False, max_num=5
                                  ).to(self._device)
     if not self.force_ext:
         indices = self._net.extract([article], k=min(n_art, self._max_ext), force_ext=self.force_ext)
     else:
         indices = self._net.extract([article], k=min(n_art, self._max_ext))
     return indices
Пример #9
0
    def _prepro(self, raw_article_sents):
        ext_word2id = dict(self._word2id)
        ext_id2word = dict(self._id2word)

        articles = conver2id(UNK, self._word2id, raw_article_sents)
        art_lens = [len(art) for art in articles]
        article = pad_batch_tensorize(articles, PAD,
                                      cuda=False).to(self._device)

        dec_args = (article, art_lens, START, END, UNK, self._max_len)
        return dec_args, ext_id2word
 def __call__(self, raw_article_sents):
     self._net.eval()
     n_art = len(raw_article_sents)
     if self._emb_type == 'W2V':
         articles = conver2id(UNK, self._word2id, raw_article_sents)
     else:
         articles = [self._tokenizer.convert_tokens_to_ids(sentence) 
                     for sentence in raw_article_sents]
     article = pad_batch_tensorize(articles, PAD, cuda=False
                                  ).to(self._device)
     indices = self._net.extract([article], k=min(n_art, self._max_ext))
     return indices
Пример #11
0
 def __call__(self, raw_article_sents):
     self._net.eval()
     n_art = len(raw_article_sents)
     if self._net_type == 'ml_trans_rnn_extractor':
         n_art = len(raw_article_sents[0])
         batch = myextract.get_batch_trans([raw_article_sents])
         indices = self._net.extract(batch, k=min(n_art, self._max_ext))
         return indices, batch
     else:
         articles = conver2id(UNK, self._word2id, raw_article_sents)
         article = pad_batch_tensorize(articles, PAD, cuda=False
                                     ).to(self._device)
         indices = self._net.extract([article], k=min(n_art, self._max_ext))
     return indices
Пример #12
0
 def __call__(self, raw_article_sents):
     articles = conver2id(UNK, self._word2id, raw_article_sents)
     article = pad_batch_tensorize(articles, PAD,
                                   cuda=False).to(self._device)
     return article
Пример #13
0
 def __call__(self, raw_article_sents):
     articles = conver2id(UNK, self._word2id, raw_article_sents)
     article = pad_batch_tensorize(articles, PAD, cuda=False
                                  ).to(self._device)
     return article