示例#1
0
文件: qa.py 项目: zhemingyang/PQA
 def focus_words(self):
   # Focus words may come from questions
   focus_words = [token(w) for i, w in enumerate(self.q_doc) if self.q_focus_mask[i]]
   # Focus words may also come from answers
   for aid, a_doc in enumerate(self.a_docs):
     for i, w in enumerate(a_doc):
       if self.a_focus_mask[aid][i]: focus_words.append(token(w))
   return list(set(focus_words))
示例#2
0
文件: qa.py 项目: zhemingyang/PQA
 def _keep_answer(self, a_doc):
   effect_w_cnt = 0
   for w in a_doc:
     if not (w.is_stop or w.is_punct or w.is_space or \
             token(w) == "yes" or token(w) == "no"):
       effect_w_cnt += 1
   if effect_w_cnt >= KEEP_ANS_MIN_WORD: 
     return True
   else:
     logger.debug(u"Filter out answer: {}".format(a_doc))
     return False
示例#3
0
 def _spacy_doc_to_token(self, doc):
     que_len, tokens, POSs = 0, [], []
     delete_idx = []
     for _, w in enumerate(doc):
         t = correct_token(token(w))
         if t:
             tokens.append(t)
             POSs.append(w.pos_)
         else:
             delete_idx.append(_)
     que_len = len(tokens)
     assert que_len == len(POSs)
     return que_len, tokens, POSs, delete_idx
示例#4
0
文件: qa.py 项目: zhemingyang/PQA
 def cntxt_words(self):
   cntxt_words = []
   for aid, a_doc in enumerate(self.a_docs):
     for i, w in enumerate(a_doc):
       if self.a_cntxt_mask[aid][i]: cntxt_words.append(token(w))
   return cntxt_words
示例#5
0
 def count_vocab(self, doc):
     for w in doc:
         t = correct_token(token(w))
         if t:
             self.vocab_cnt[t] += 1
             self.pos_cnt[w.pos_] += 1