Exemplo n.º 1
0
 def getKeyWordsFromDocuments(documents):
     phrases = []
     for d in documents: phrases+=getPhrases(getWordsFromRawEnglishMessage(d), 2, 2)
     phrasesDistribution = sorted([(k, len(list(g))) for k,g in groupby(sorted(phrases))], key=itemgetter(1), reverse=True)
     hashtags, nonhashtags = [], []
     for p,_ in phrasesDistribution:
         if p.startswith('#'): hashtags.append(p)
         else: nonhashtags.append(p)
     numberOfNonHashtags = 3
     if len(hashtags[:3])<3: numberOfNonHashtags=6-len(hashtags[:3])
     return nonhashtags[:numberOfNonHashtags]+hashtags[:3]
Exemplo n.º 2
0
 def test_getWordsFromRawEnglishMessage(self):
     self.assertEqual(['existing', 'distutils', 'code'], getWordsFromRawEnglishMessage('the existing distutils code'))