def getAllWords(stopword, allDocs, porterstemmer): wordlist = {} tmp = 1 for news in allDocs: line_split = news.split() for words in line_split: if not (stopword.has_key(words)): words = porterstemmer.stem(words, 0, len(words) - 1) if (len(words) > 1) and not (stopword.has_key(words)): if words.find("'") > -1: print "!" + words + "!" temp = wordlist.get(words, 0) wordlist[words] = temp + 1 return wordlist
def process(self, words): return [porterstemmer.stem(word) for word in words]