def accuracy(featureset): accuracy = [] for i in range(10): print(i) random.shuffle(featureset) train = featureset[:trainsize] test = featureset[trainsize:] nltkNB = nltk.NaiveBayesClassifier.train(train) nltkDT = nltk.DecisionTreeClassifier.train(train) BNB.train(train) MNB.train(train) L1.train(train) L2.train(train) SVC.train(train) LSVC.train(train) NuSVC.train(train) #we report accuracy using list of tuple #each tuple represents each iteration #[(acc1,acc2...acc9),(acc1,acc2...acc9),...] acc1 = nltk.classify.accuracy(nltkNB, test) acc2 = nltk.classify.accuracy(nltkDT, test) acc3 = nltk.classify.accuracy(BNB, test) acc4 = nltk.classify.accuracy(MNB, test) acc5 = nltk.classify.accuracy(L1, test) acc6 = nltk.classify.accuracy(L2, test) acc7 = nltk.classify.accuracy(SVC, test) acc8 = nltk.classify.accuracy(LSVC, test) acc9 = nltk.classify.accuracy(NuSVC, test) accuracy.append((acc1, acc2, acc3, acc4, acc5, acc6, acc7, acc8, acc9)) print([statistics.mean(x) for x in accuracy]) return accuracy
SGDClassifier.train(train_set) print("SGDClassifier Accuracy Percentage: ", (nltk.classify.accuracy(SGDClassifier, test_set)) * 100) SVC = SklearnClassifier(SVC()) SVC.train(train_set) print("SVC Accuracy Percentage: ", (nltk.classify.accuracy(SVC, test_set)) * 100) LinearSVC = SklearnClassifier(LinearSVC()) LinearSVC.train(train_set) print("LinearSVC Accuracy Percentage: ", (nltk.classify.accuracy(LinearSVC, test_set)) * 100) NuSVC = SklearnClassifier(NuSVC()) NuSVC.train(train_set) print("NuSVC Accuracy Percentage: ", (nltk.classify.accuracy(NuSVC, test_set)) * 100) voted_classifier = VoteClassifier(classifier, MNB_classifier, BernoulliNB, LogisticRegression, SGDClassifier, LinearSVC, NuSVC) print("voted_classifier accuracy percentage:", (nltk.classify.accuracy(voted_classifier, test_set)) * 100) #print("Classification:", voted_classifier.classify(test_set[0][0]), "Confidence:", voted_classifier.confidence(test_set[0][0])*100) #print("Classification:", voted_classifier.classify(test_set[1][0]), "Confidence:", voted_classifier.confidence(test_set[1][0])*100) #print("Classification:", voted_classifier.classify(test_set[2][0]), "Confidence:", voted_classifier.confidence(test_set[2][0])*100) #print("Classification:", voted_classifier.classify(test_set[3][0]), "Confidence:", voted_classifier.confidence(test_set[3][0])*100) #print("Classification:", voted_classifier.classify(test_set[4][0]), "Confidence:", voted_classifier.confidence(test_set[4][0])*100) #print("Classification:", voted_classifier.classify(test_set[5][0]), "Confidence:", voted_classifier.confidence(test_set[5][0])*100)
#GaussianNB.train(training_set) #print("GaussianNB accuracy percent:", (nltk.classify.accuracy(GaussianNB, testing_set))*100) #BernoulliNB = SklearnClassifier(BernoulliNB()) #BernoulliNB.train(training_set) #print("BernoulliNB accuracy percent:", (nltk.classify.accuracy(BernoulliNB, testing_set))*100) # LogisticRegression, SGDClassifier # SVC, LinearSVC, NuSVC LogisticRegression = SklearnClassifier(LogisticRegression()) LogisticRegression.train(training_set) print("LogisticRegression accuracy percent:", (nltk.classify.accuracy(LogisticRegression, testing_set))*100) SGDClassifier = SklearnClassifier(SGDClassifier()) SGDClassifier.train(training_set) print("SGDClassifier accuracy percent:", (nltk.classify.accuracy(SGDClassifier, testing_set))*100) SVC = SklearnClassifier(SVC()) SVC.train(training_set) print("SVC accuracy percent:", (nltk.classify.accuracy(SVC, testing_set))*100) LinearSVC = SklearnClassifier(LinearSVC()) LinearSVC.train(training_set) print("LinearSVC accuracy percent:", (nltk.classify.accuracy(LinearSVC, testing_set))*100) NuSVC = SklearnClassifier(NuSVC()) NuSVC.train(training_set) print("NuSVC accuracy percent:", (nltk.classify.accuracy(NuSVC, testing_set))*100)