示例#1
0
def test_aggregated_sentiments():
    sub_clf = classifier.get_optimal_subjectivity_classifier()
    pol_clf = classifier.get_optimal_polarity_classifier()
    tweets = utils.get_pickles(2)
    sentimentvalues = utils.get_sentimentvalues(2)
    sub_train_tweets, sub_train_targets, _, _, sub_train_sentiments, _ = utils.make_subjectivity_train_and_test_and_targets(
        tweets, sentimentvalues, splitvalue=1.0
    )
    pol_train_tweets, pol_train_targets, _, _, pol_train_sentiments, _ = utils.make_polarity_train_and_test_and_targets(
        tweets, sentimentvalues, splitvalue=1.0
    )

    sub_predictions = sub_clf.classify(sub_train_tweets, sub_train_sentiments)
    pol_predictions = pol_clf.classify(pol_train_tweets, pol_train_sentiments)
    print pol_train_targets, pol_predictions
    days, targets, predicts, total_frequencies = utils.temporally_aggregate_subjectivity(
        sub_train_tweets, sub_predictions, targets=sub_train_targets
    )
    data = {"Targets": [days, targets], "Predictions": [days, predicts], "Frequencies": [days, total_frequencies]}
    plotting.plot_subjectivity_aggregates(data, "aggregated_subjectivity")
    days, targets, predicts, frequencies = utils.temporally_aggregate_polarity(
        pol_train_tweets, pol_predictions, targets=pol_train_targets
    )
    for i in range(len(days)):
        targets[i] = targets[i] * 1.0 / frequencies[i]
        predicts[i] = predicts[i] * 1.0 / frequencies[i]
        frequencies[i] = frequencies[i] * 1.0 / total_frequencies[i]
    data = {"Targets": [days, targets], "Predictions": [days, predicts], "Frequencies": [days, frequencies]}
    plotting.plot_polarity_aggregates(data, "aggregated_polarity")
def get_optimal_subjectivity_classifier():
    """
    Trains and returns the optimal subjectivity classifier.
    """
    tweets = utils.get_pickles(3)
    tweets, targets = utils.make_subjectivity_targets(tweets)
    vect_options = {
          'ngram_range': (1,1),
          'max_df': 0.5
        }
    tfidf_options = {
         'sublinear_tf': False,
          'use_idf': True,
          'smooth_idf': True,
                     }
    clf = SVM(tweets, targets, vect_options, tfidf_options)
    clf.set_feature_set('SA', utils.get_sentimentvalues(3))
    clf.train_on_feature_set()
    return clf