from textblob.classifiers import MaxEntClassifier with open('data/train-toy.csv', 'r') as fp: cl = MaxEntClassifier(fp, format="csv") with open('data/test-toy.csv', 'r') as gp: print cl.accuracy(gp, format="csv")
print(len(words), len(tags)) for i in range(1000): if (i < 800): temp = (words[i], tags[i]) train.append(temp) else: temp = (words[i], tags[i]) test.append(temp) print(train) print(test) naive = NaiveBayesClassifier(train) dtc = DecisionTreeClassifier(train) mec = MaxEntClassifier(train) print("NaiveBayesClassifier Accuracy: {0}".format(naive.accuracy(test))) print("DecisionTreeClassifier Accuracy: {0}".format(dtc.accuracy(test))) print("MaxEntClassifier Accuracy: {0}".format(mec.accuracy(test))) cl = NaiveBayesClassifier(train) print("NaiveBayesClassifier Accuracy: {0}".format(cl.accuracy(test))) for i in range(0, len(test)): tag = cl.classify(test[i]) pred_tags.append(tag) if (tag == test_tags[i]): count += 1 print(len(pred_tags), len(test_tags)) print(count)
test = [] temp = () file1 = open("train_utterance", "r") file_data = file1.readlines() for i in range(0, len(file_data)): m = file_data[i].strip().split("\t\t") n = m[2].split(":") temp = (m[1], n[0]) train.append(temp) file1.close() file1 = open("test_utterance", "r") file_data = file1.readlines() for i in range(0, len(file_data)): m = file_data[i].strip().split("\t\t") n = m[2].split(":") temp = (m[1], n[0]) test.append(temp) cl = NaiveBayesClassifier(train) mec = MaxEntClassifier(train) #dtc = DecisionTreeClassifier(train) # Compute accuracy print("Accuracy: {0}".format(cl.accuracy(test))) print("Accuracy: {0}".format(mec.accuracy(test))) # Show 5 most informative features cl.show_informative_features(5)
# trains.append(train[i]) trains = train if choice == "1": print("\n" + "#NaiveBayesClassifier") cl1 = NaiveBayesClassifier(trains) print("Classifier: Naive Bayes -- Accuracy: ", cl1.accuracy(test), "\n") elif choice == "2": print("\n" + "#DecisionTreeClassifier") cl2 = DecisionTreeClassifier(trains) print("Classifier: Decision Tree -- Accuracy: ", cl2.accuracy(test), "\n") elif choice == "3": print("\n" + "#MaxEntClassifier") cl3 = MaxEntClassifier(trains) print("Classifier: Maximum Entropy -- Accuracy: ", cl3.accuracy(test), "\n") elif choice == "4": print("\n" + "#NLTKClassifier") cl4 = NLTKClassifier(trains) print("Classifier: NLTK -- Accuracy: ", cl4.accuracy(test), "\n") else: print("Bad input!") # most repeated words (most important properties) totalDictPosSorted = sorted(totalDictPos.items(), key=operator.itemgetter(1)) totalDictNegSorted = sorted(totalDictNeg.items(), key=operator.itemgetter(1))