Пример #1
0
 def getCategoryProbabilityFromDocument(self, document):
     
     if self._nltkClassifier == None:
         raise ClassifierException("The classifier must first be loaded by the loadClassifier() method")
     
     docwords = [word for word in document_as_words(document) if word not in nltk.corpus.stopwords.words('english')]
     
     bigrams = bigram_feats(docwords,200)
     words = word_feats(docwords,2000)
     
     
     
     featureset = dict(bigrams.items() + words.items())
     
     probdist = self._nltkClassifier.prob_classify(featureset)
     
     results = []
     samples = probdist.samples()
     for i in samples:
         prob = probdist.prob(i)
         if (prob >= 0.001):
             results.append((probdist.prob(i),i))
                 
     results = sorted(results, reverse=True)
     return results        
Пример #2
0
 def getCategoryFromDocument(self, document):
     
     if self._nltkClassifier == None:
         raise ClassifierException("The classifier must first be loaded by the loadClassifier() method")
     
     docwords = document_as_words(document)
     topic = self._nltkClassifier.classify(docwords)
     return topic