示例#1
0
    def evaluate(
        self,
        test_set,
        classifier=None,
        accuracy=True,
        f_measure=True,
        precision=True,
        recall=True,
        verbose=False,
    ):
        """
        Evaluate and print classifier performance on the test set.

        :param test_set: A list of (tokens, label) tuples to use as gold set.
        :param classifier: a classifier instance (previously trained).
        :param accuracy: if `True`, evaluate classifier accuracy.
        :param f_measure: if `True`, evaluate classifier f_measure.
        :param precision: if `True`, evaluate classifier precision.
        :param recall: if `True`, evaluate classifier recall.
        :return: evaluation results.
        :rtype: dict(str): float
        """
        if classifier is None:
            classifier = self.classifier
        print("Evaluating {0} results...".format(type(classifier).__name__))
        metrics_results = {}
        if accuracy == True:
            accuracy_score = eval_accuracy(classifier, test_set)
            metrics_results["Accuracy"] = accuracy_score

        gold_results = defaultdict(set)
        test_results = defaultdict(set)
        labels = set()
        for i, (feats, label) in enumerate(test_set):
            labels.add(label)
            gold_results[label].add(i)
            observed = classifier.classify(feats)
            test_results[observed].add(i)

        for label in labels:
            if precision == True:
                precision_score = eval_precision(
                    gold_results[label], test_results[label]
                )
                metrics_results["Precision [{0}]".format(label)] = precision_score
            if recall == True:
                recall_score = eval_recall(gold_results[label], test_results[label])
                metrics_results["Recall [{0}]".format(label)] = recall_score
            if f_measure == True:
                f_measure_score = eval_f_measure(
                    gold_results[label], test_results[label]
                )
                metrics_results["F-measure [{0}]".format(label)] = f_measure_score

        # Print evaluation results (in alphabetical order)
        if verbose == True:
            for result in sorted(metrics_results):
                print("{0}: {1}".format(result, metrics_results[result]))

        return metrics_results
示例#2
0
    def evaluate(
        self,
        test_set,
        classifier=None,
        accuracy=True,
        f_measure=True,
        precision=True,
        recall=True,
        verbose=False,
    ):
        """
        Evaluate and print classifier performance on the test set.

        :param test_set: A list of (tokens, label) tuples to use as gold set.
        :param classifier: a classifier instance (previously trained).
        :param accuracy: if `True`, evaluate classifier accuracy.
        :param f_measure: if `True`, evaluate classifier f_measure.
        :param precision: if `True`, evaluate classifier precision.
        :param recall: if `True`, evaluate classifier recall.
        :return: evaluation results.
        :rtype: dict(str): float
        """
        if classifier is None:
            classifier = self.classifier
        print("Evaluating {0} results...".format(type(classifier).__name__))
        metrics_results = {}
        if accuracy == True:
            accuracy_score = eval_accuracy(classifier, test_set)
            metrics_results['Accuracy'] = accuracy_score

        gold_results = defaultdict(set)
        test_results = defaultdict(set)
        labels = set()
        for i, (feats, label) in enumerate(test_set):
            labels.add(label)
            gold_results[label].add(i)
            observed = classifier.classify(feats)
            test_results[observed].add(i)

        for label in labels:
            if precision == True:
                precision_score = eval_precision(
                    gold_results[label], test_results[label]
                )
                metrics_results['Precision [{0}]'.format(label)] = precision_score
            if recall == True:
                recall_score = eval_recall(gold_results[label], test_results[label])
                metrics_results['Recall [{0}]'.format(label)] = recall_score
            if f_measure == True:
                f_measure_score = eval_f_measure(
                    gold_results[label], test_results[label]
                )
                metrics_results['F-measure [{0}]'.format(label)] = f_measure_score

        # Print evaluation results (in alphabetical order)
        if verbose == True:
            for result in sorted(metrics_results):
                print('{0}: {1}'.format(result, metrics_results[result]))

        return metrics_results
示例#3
0
def demo_vader_tweets(n_instances=None, output=None):
    """
    Classify 10000 positive and negative tweets using Vader approach.

    :param n_instances: the number of total tweets that have to be classified.
    :param output: the output file where results have to be reported.
    """
    from collections import defaultdict
    from nltk.corpus import twitter_samples
    from nltk.sentiment import SentimentIntensityAnalyzer
    from nltk.metrics import (accuracy as eval_accuracy, precision as
                              eval_precision, recall as eval_recall, f_measure
                              as eval_f_measure)

    if n_instances is not None:
        n_instances = int(n_instances / 2)

    fields = ['id', 'text']
    positive_json = twitter_samples.abspath("positive_tweets.json")
    positive_csv = 'positive_tweets.csv'
    json2csv_preprocess(positive_json,
                        positive_csv,
                        fields,
                        strip_off_emoticons=False,
                        limit=n_instances)

    negative_json = twitter_samples.abspath("negative_tweets.json")
    negative_csv = 'negative_tweets.csv'
    json2csv_preprocess(negative_json,
                        negative_csv,
                        fields,
                        strip_off_emoticons=False,
                        limit=n_instances)

    pos_docs = parse_tweets_set(positive_csv, label='pos')
    neg_docs = parse_tweets_set(negative_csv, label='neg')

    # We separately split subjective and objective instances to keep a balanced
    # uniform class distribution in both train and test sets.
    train_pos_docs, test_pos_docs = split_train_test(pos_docs)
    train_neg_docs, test_neg_docs = split_train_test(neg_docs)

    training_tweets = train_pos_docs + train_neg_docs
    testing_tweets = test_pos_docs + test_neg_docs

    vader_analyzer = SentimentIntensityAnalyzer()

    gold_results = defaultdict(set)
    test_results = defaultdict(set)
    acc_gold_results = []
    acc_test_results = []
    labels = set()
    num = 0
    for i, (text, label) in enumerate(testing_tweets):
        labels.add(label)
        gold_results[label].add(i)
        acc_gold_results.append(label)
        score = vader_analyzer.polarity_scores(text)['compound']
        if score > 0:
            observed = 'pos'
        else:
            observed = 'neg'
        num += 1
        acc_test_results.append(observed)
        test_results[observed].add(i)
    metrics_results = {}
    for label in labels:
        accuracy_score = eval_accuracy(acc_gold_results, acc_test_results)
        metrics_results['Accuracy'] = accuracy_score
        precision_score = eval_precision(gold_results[label],
                                         test_results[label])
        metrics_results['Precision [{0}]'.format(label)] = precision_score
        recall_score = eval_recall(gold_results[label], test_results[label])
        metrics_results['Recall [{0}]'.format(label)] = recall_score
        f_measure_score = eval_f_measure(gold_results[label],
                                         test_results[label])
        metrics_results['F-measure [{0}]'.format(label)] = f_measure_score

    for result in sorted(metrics_results):
        print('{0}: {1}'.format(result, metrics_results[result]))

    if output:
        output_markdown(output,
                        Approach='Vader',
                        Dataset='labeled_tweets',
                        Instances=n_instances,
                        Results=metrics_results)
示例#4
0
文件: util.py 项目: DrDub/nltk
def demo_vader_tweets(n_instances=None, output=None):
    """
    Classify 10000 positive and negative tweets using Vader approach.

    :param n_instances: the number of total tweets that have to be classified.
    :param output: the output file where results have to be reported.
    """
    from collections import defaultdict
    from nltk.corpus import twitter_samples
    from nltk.sentiment import SentimentIntensityAnalyzer
    from nltk.metrics import (accuracy as eval_accuracy, precision as eval_precision,
        recall as eval_recall, f_measure as eval_f_measure)

    if n_instances is not None:
        n_instances = int(n_instances/2)

    fields = ['id', 'text']
    positive_json = twitter_samples.abspath("positive_tweets.json")
    positive_csv = 'positive_tweets.csv'
    json2csv_preprocess(positive_json, positive_csv, fields, strip_off_emoticons=False,
                        limit=n_instances)

    negative_json = twitter_samples.abspath("negative_tweets.json")
    negative_csv = 'negative_tweets.csv'
    json2csv_preprocess(negative_json, negative_csv, fields, strip_off_emoticons=False,
                        limit=n_instances)

    pos_docs = parse_tweets_set(positive_csv, label='pos')
    neg_docs = parse_tweets_set(negative_csv, label='neg')

    # We separately split subjective and objective instances to keep a balanced
    # uniform class distribution in both train and test sets.
    train_pos_docs, test_pos_docs = split_train_test(pos_docs)
    train_neg_docs, test_neg_docs = split_train_test(neg_docs)

    training_tweets = train_pos_docs+train_neg_docs
    testing_tweets = test_pos_docs+test_neg_docs

    vader_analyzer = SentimentIntensityAnalyzer()

    gold_results = defaultdict(set)
    test_results = defaultdict(set)
    acc_gold_results = []
    acc_test_results = []
    labels = set()
    num = 0
    for i, (text, label) in enumerate(testing_tweets):
        labels.add(label)
        gold_results[label].add(i)
        acc_gold_results.append(label)
        score = vader_analyzer.polarity_scores(text)['compound']
        if score > 0:
            observed = 'pos'
        else:
            observed = 'neg'
        num += 1
        acc_test_results.append(observed)
        test_results[observed].add(i)
    metrics_results = {}
    for label in labels:
        accuracy_score = eval_accuracy(acc_gold_results,
            acc_test_results)
        metrics_results['Accuracy'] = accuracy_score
        precision_score = eval_precision(gold_results[label],
            test_results[label])
        metrics_results['Precision [{0}]'.format(label)] = precision_score
        recall_score = eval_recall(gold_results[label],
            test_results[label])
        metrics_results['Recall [{0}]'.format(label)] = recall_score
        f_measure_score = eval_f_measure(gold_results[label],
            test_results[label])
        metrics_results['F-measure [{0}]'.format(label)] = f_measure_score

    for result in sorted(metrics_results):
            print('{0}: {1}'.format(result, metrics_results[result]))

    if output:
        output_markdown(output, Approach='Vader', Dataset='labeled_tweets',
            Instances=n_instances, Results=metrics_results)
示例#5
0
print('Ethotic Statement Classifier Accuracy')
print(accuracy_score(testData['IsEthotic'], predictions) * 100, '%\n')
print('Ethotic sentences with VADER sentiment classifier')

reference_set = []
test_set = []
for index, item in enumerate(testData['Text']):
    if predictions[np.where(item == testData['Text'])].any() == 1:
        print(item)
        reference_set.append(testData['SentimentPolarity'][index])
        #print(testData['SentimentPolarity'].keys()[index])
        polarity_score = sid.polarity_scores(item)
        for j in sorted(polarity_score):
            print('{0}: {1}, \n'.format(j, polarity_score[j]), end='')
        if polarity_score['compound'] > 0:
            test_set.append(1)
        else:
            test_set.append(2)

accuracy_score = eval_accuracy(reference_set, test_set)
reference_set = set(reference_set)
test_set = set(test_set)
precision_score = eval_precision(reference_set, test_set)
recall_score = eval_recall(reference_set, test_set)
f_measure_score = eval_f_measure(reference_set, test_set)
print('VADER Classification Statistics')
print('Accuracy: ', accuracy_score * 100, '%')
print('Precision: ', precision_score)
print('Recall: ', recall_score)
print('F-measure', f_measure_score)