def main(): ''' Main function of the boilerplate code is the entry point of the 'chitragoopt' executable script (defined in setup.py). Use doctests, those are very helpful. >>> main() Hello >>> 2 + 2 4 ''' lfeats = label_feats_from_corpus(movie_reviews) train_feats, test_feats = split_label_feats(lfeats, split=0.75) train_feats, test_feats = split_label_feats(lfeats, split=0.75) # nb_classifier = NaiveBayesClassifier.train(train_feats) print(sys.argv[1].split()) negfeat = bag_of_words(sys.argv[1].split()) f = open('my_classifier.pickle') nb_classifier = pickle.load(f) f.close() print(accuracy(nb_classifier, test_feats)) print(nb_classifier.classify(negfeat)) for x in range(0, 50): print(nb_classifier.classify(negfeat))
from nltk.classify import NaiveBayesClassifier from nltk.classify.util import accuracy from nltk.probability import DictionaryProbDist from nltk.probability import LaplaceProbDist from featx import label_feats_from_corpus, split_label_feats, bag_of_words # featx.py debe estar en el mismo dir. import time print(movie_reviews.categories()) # ['neg', 'pos'] lfeats = label_feats_from_corpus(movie_reviews) print(lfeats.keys()) # dict_keys(['neg', 'pos']) train_feats, test_feats = split_label_feats(lfeats, split=0.75) print(len(train_feats)) print(len(test_feats)) nb_classifier = NaiveBayesClassifier.train(train_feats) print(nb_classifier.labels()) negfeat = bag_of_words(['the', 'plot', 'was', 'ludicrous']) print(nb_classifier.classify(negfeat)) posfeat = bag_of_words(['kate', 'winslet', 'is', 'accessible']) print(nb_classifier.classify(posfeat)) print(accuracy(nb_classifier, test_feats))
from featx import label_feats_from_corpus, split_label_feats, high_information_words, bag_of_words_in_set from classification import precision_recall, MaxVoteClassifier # classification.py debe estar en el mismo dir. from nltk.corpus import movie_reviews from nltk.classify.util import accuracy from nltk.classify import NaiveBayesClassifier from nltk.classify import MaxentClassifier from nltk.classify import DecisionTreeClassifier from nltk.classify.scikitlearn import SklearnClassifier from sklearn.svm import LinearSVC labels = movie_reviews.categories() labeled_words = [(l, movie_reviews.words(categories=[l])) for l in labels] high_info_words = set(high_information_words(labeled_words)) feat_det = lambda words: bag_of_words_in_set(words, high_info_words) lfeats = label_feats_from_corpus(movie_reviews, feature_detector=feat_det) train_feats, test_feats = split_label_feats(lfeats) print("######################################################################") nb_classifier = NaiveBayesClassifier.train(train_feats) print("Accuracy Naive Bayes: " + str(accuracy(nb_classifier, test_feats))) # Accuracy: 0.91 nb_precisions, nb_recalls = precision_recall(nb_classifier, test_feats) print("Precisions Naive Bayes Pos: " + str(nb_precisions['pos'])) # Precisions Pos: 0.8988326848249028 print("Precisions Naive Bayes Neg: " + str(nb_precisions['neg'])) # Precisions Neg: 0.9218106995884774 print("Recalls Naive Bayes Pos: " + str(nb_recalls['pos'])) # Recalls Pos: 0.924 print("Recalls Naive Bayes Neg: " + str(nb_recalls['neg'])) # Recalls Neg: 0.896 print("######################################################################")