def getCategoryProbabilityFromDocument(self, document): if self._nltkClassifier == None: raise ClassifierException("The classifier must first be loaded by the loadClassifier() method") docwords = [word for word in document_as_words(document) if word not in nltk.corpus.stopwords.words('english')] bigrams = bigram_feats(docwords,200) words = word_feats(docwords,2000) featureset = dict(bigrams.items() + words.items()) probdist = self._nltkClassifier.prob_classify(featureset) results = [] samples = probdist.samples() for i in samples: prob = probdist.prob(i) if (prob >= 0.001): results.append((probdist.prob(i),i)) results = sorted(results, reverse=True) return results
def getCategoryFromDocument(self, document): if self._nltkClassifier == None: raise ClassifierException("The classifier must first be loaded by the loadClassifier() method") docwords = document_as_words(document) topic = self._nltkClassifier.classify(docwords) return topic