def evaluate( self, test_set, classifier=None, accuracy=True, f_measure=True, precision=True, recall=True, verbose=False, ): """ Evaluate and print classifier performance on the test set. :param test_set: A list of (tokens, label) tuples to use as gold set. :param classifier: a classifier instance (previously trained). :param accuracy: if `True`, evaluate classifier accuracy. :param f_measure: if `True`, evaluate classifier f_measure. :param precision: if `True`, evaluate classifier precision. :param recall: if `True`, evaluate classifier recall. :return: evaluation results. :rtype: dict(str): float """ if classifier is None: classifier = self.classifier print("Evaluating {0} results...".format(type(classifier).__name__)) metrics_results = {} if accuracy == True: accuracy_score = eval_accuracy(classifier, test_set) metrics_results["Accuracy"] = accuracy_score gold_results = defaultdict(set) test_results = defaultdict(set) labels = set() for i, (feats, label) in enumerate(test_set): labels.add(label) gold_results[label].add(i) observed = classifier.classify(feats) test_results[observed].add(i) for label in labels: if precision == True: precision_score = eval_precision( gold_results[label], test_results[label] ) metrics_results["Precision [{0}]".format(label)] = precision_score if recall == True: recall_score = eval_recall(gold_results[label], test_results[label]) metrics_results["Recall [{0}]".format(label)] = recall_score if f_measure == True: f_measure_score = eval_f_measure( gold_results[label], test_results[label] ) metrics_results["F-measure [{0}]".format(label)] = f_measure_score # Print evaluation results (in alphabetical order) if verbose == True: for result in sorted(metrics_results): print("{0}: {1}".format(result, metrics_results[result])) return metrics_results
def evaluate( self, test_set, classifier=None, accuracy=True, f_measure=True, precision=True, recall=True, verbose=False, ): """ Evaluate and print classifier performance on the test set. :param test_set: A list of (tokens, label) tuples to use as gold set. :param classifier: a classifier instance (previously trained). :param accuracy: if `True`, evaluate classifier accuracy. :param f_measure: if `True`, evaluate classifier f_measure. :param precision: if `True`, evaluate classifier precision. :param recall: if `True`, evaluate classifier recall. :return: evaluation results. :rtype: dict(str): float """ if classifier is None: classifier = self.classifier print("Evaluating {0} results...".format(type(classifier).__name__)) metrics_results = {} if accuracy == True: accuracy_score = eval_accuracy(classifier, test_set) metrics_results['Accuracy'] = accuracy_score gold_results = defaultdict(set) test_results = defaultdict(set) labels = set() for i, (feats, label) in enumerate(test_set): labels.add(label) gold_results[label].add(i) observed = classifier.classify(feats) test_results[observed].add(i) for label in labels: if precision == True: precision_score = eval_precision( gold_results[label], test_results[label] ) metrics_results['Precision [{0}]'.format(label)] = precision_score if recall == True: recall_score = eval_recall(gold_results[label], test_results[label]) metrics_results['Recall [{0}]'.format(label)] = recall_score if f_measure == True: f_measure_score = eval_f_measure( gold_results[label], test_results[label] ) metrics_results['F-measure [{0}]'.format(label)] = f_measure_score # Print evaluation results (in alphabetical order) if verbose == True: for result in sorted(metrics_results): print('{0}: {1}'.format(result, metrics_results[result])) return metrics_results