Exemplo n.º 1
0
 def get_important_keyphrases_from_single_doc_yahoo(self, corpus_text, min_len=3, max_len=20):
     api = YahooAPI(YAHOO_APP_KEY)
     keywords = []
     res = api.keyphrases(corpus_text)
     for phrase, s in res.iteritems():
         kanjimatch = re.search(KANJI, phrase, re.U)
         hiramatch = re.search(HIRA, phrase, re.U)
         katamatch = re.search(KATA, phrase, re.U)
         if not kanjimatch and not hiramatch and not katamatch:
             continue
         freq = self.phrase_frequency(phrase, corpus_text)
         if (freq > 0) and (len(phrase) >= min_len) and (len(phrase) <= max_len) and (phrase not in self.stoplist) and (phrase.lower() != self.query.lower()):
             kw = Keyword(phrase, float(s)/101, freq)
             if kw not in keywords:
                 keywords.append(kw)
     keywords.sort(key=lambda x: x.score, reverse=True)
     return keywords[:min(len(keywords), self.numberofkeywords)]