def confusion_matrix_scores(gold_labs, pred_labs, scores=True): '''Draw a confusion matrix and calculate accuracy, precision and recall scores''' # Draw a confusion matrix if not gold_labs or not pred_labs: raise RuntimeError("One of the prediction lists is empty") if len(gold_labs) != len(pred_labs): raise RuntimeError( "The number of predictions != the number of gold labels") cm = ConfusionMatrix(gold_labs, pred_labs) print(cm.pretty_format(show_percents=False)) # calculate accuracy, precision and recall for a SICK part (not for individual problems) try: pre = (cm[('E', 'E')] + cm[('C', 'C')]) / float( sum([cm[(i, j)] for i in 'NEC' for j in 'EC'])) except: pre = 0 try: rec = (cm[('E', 'E')] + cm[('C', 'C')]) / float( sum([cm[(i, j)] for i in 'EC' for j in 'NEC'])) except: rec = 0 try: acc = (cm[('E', 'E')] + cm[('C', 'C')] + cm[('N', 'N')]) / float( cm._total) except: acc = 0 # print accuracy, precision and recall for a SICK part (not for individual problems) if scores: print("Accuracy: {:.2f}%\nPrecision: {:.2f}%\nRecall: {:.2f}%".format( acc * 100, pre * 100, rec * 100)) return (acc, pre, rec), cm
def exercise4(dataset, runs=5, test_portion=0.50): LOGGER.info('Building datasets...') # Build Test and Training Review Sets test_reviews = None train_reviews = None predetermined = None overall_classifications = [] accuracies = [] rmses = [] for n in range(runs): if dataset.test and dataset.train: test_reviews = dataset.test train_reviews = dataset.train predetermined = True else: test_reviews, train_reviews = dataset.make_author_test_train( test_portion) predetermined = False if not predetermined: LOGGER.info('Run %d of %d', n + 1, runs) LOGGER.info('Building features...') # Build Features test_features = [(extract_features4(r), r.author) for r in test_reviews] train_features = [(extract_features4(r), r.author) for r in train_reviews] LOGGER.info('Building classifier...') # Build Classifier LOGGER.info('Training Examples: %d', len(train_reviews)) LOGGER.info('Training Features: %d', len(train_features)) classifier = nltk.NaiveBayesClassifier.train(train_features) #classifier = nltk.DecisionTreeClassifier.train(train_features) LOGGER.info('Checking accuracy...') # Perform Classification classifications = [] for t in test_features: classifications.append((t[1], classifier.classify(t[0]))) LOGGER.info('Printing results...') classifications.sort() accuracy = nltk.classify.accuracy(classifier, test_features) rmse = math.sqrt( sum(1 for a, c in classifications if a != c) / len(classifications)) confusion = ConfusionMatrix([ref for ref, test in classifications], [test for ref, test in classifications]) overall_classifications.extend(classifications) if not predetermined: HEADER = ('ACTUAL', 'CLASSIFIED') col_width = max(len(a) for a, c in (classifications + [HEADER])) for a, c in ([HEADER] + classifications): print("Exercise 4: %s %s" % (a.ljust(col_width), c)) print("Exercise 4: %.3f" % (accuracy, )) print("Exercise 4: Average RMSE Error: %.3f" % (rmse, )) if predetermined: return accuracy print('Exercise 4: Confusion Matrix:\n%s' % (confusion.pretty_format( show_percents=False, values_in_chart=True), )) accuracies.append(accuracy) rmses.append(rmse) overall_confusion = ConfusionMatrix( [ref for ref, test in overall_classifications], [test for ref, test in overall_classifications]) print('Exercise 4: Overall Confusion Matrix:\n%s' % (overall_confusion.pretty_format(show_percents=False, values_in_chart=True), )) print("Exercise 4: Runs: %d Average : %.3f Max: %.3f Min: %.3f" % (runs, sum(accuracies) / len(accuracies), max(accuracies), min(accuracies))) print("Exercise 4: Runs: %d Average RMSE: %.3f Max: %.3f Min: %.3f" % (runs, sum(rmses) / len(rmses), max(rmses), min(rmses))) return accuracies