示例#1
0
def test_remporal_topics():
    tweets1 = pickle.load(open("temporal_tweets1", "rb"))
    tweets2 = pickle.load(open("temporal_tweets2", "rb"))
    tweets = tweets1 + tweets2
    print len(tweets)
    sentiments = pickle.load(open("temporal_sentiments", "rb"))
    print len(sentiments)
    subclf = classifier.get_optimal_subjectivity_classifier()
    polclf = classifier.get_optimal_polarity_classifier()
    # TODO SKRIVE HER TEMPORALLY AGGREGATE ETC
    sub_predictions = subclf.classify(tweets, sentiments)
    subjective_tweets = [t for p, t in zip(sub_predictions, tweets) if p == "subjective"]
    subjective_sentiments = [s for p, s in zip(sub_predictions, sentiments) if p == "subjective"]
    pol_predictions = polclf.classify(subjective_tweets, subjective_sentiments)
    topics = entity_extraction.perform_entity_extraction(
        subjective_tweets, subjective_sentiments, use_pmi=True, breakword_min_freq=0.1, breakword_range=14
    )
    days, unique_topics, aggregated_values = utils.topically_aggregate_polarity(
        subjective_tweets, pol_predictions, topics=topics
    )
    data = {}
    for i in range(len(unique_topics)):
        data[unique_topics[i]] = [days, aggregated_values[i]]
    print data
    pickle.dump(data, open("topically_aggregated_polarity", "wb"))
示例#2
0
def test_aggregated_sentiments():
    sub_clf = classifier.get_optimal_subjectivity_classifier()
    pol_clf = classifier.get_optimal_polarity_classifier()
    tweets = utils.get_pickles(2)
    sentimentvalues = utils.get_sentimentvalues(2)
    sub_train_tweets, sub_train_targets, _, _, sub_train_sentiments, _ = utils.make_subjectivity_train_and_test_and_targets(
        tweets, sentimentvalues, splitvalue=1.0
    )
    pol_train_tweets, pol_train_targets, _, _, pol_train_sentiments, _ = utils.make_polarity_train_and_test_and_targets(
        tweets, sentimentvalues, splitvalue=1.0
    )

    sub_predictions = sub_clf.classify(sub_train_tweets, sub_train_sentiments)
    pol_predictions = pol_clf.classify(pol_train_tweets, pol_train_sentiments)
    print pol_train_targets, pol_predictions
    days, targets, predicts, total_frequencies = utils.temporally_aggregate_subjectivity(
        sub_train_tweets, sub_predictions, targets=sub_train_targets
    )
    data = {"Targets": [days, targets], "Predictions": [days, predicts], "Frequencies": [days, total_frequencies]}
    plotting.plot_subjectivity_aggregates(data, "aggregated_subjectivity")
    days, targets, predicts, frequencies = utils.temporally_aggregate_polarity(
        pol_train_tweets, pol_predictions, targets=pol_train_targets
    )
    for i in range(len(days)):
        targets[i] = targets[i] * 1.0 / frequencies[i]
        predicts[i] = predicts[i] * 1.0 / frequencies[i]
        frequencies[i] = frequencies[i] * 1.0 / total_frequencies[i]
    data = {"Targets": [days, targets], "Predictions": [days, predicts], "Frequencies": [days, frequencies]}
    plotting.plot_polarity_aggregates(data, "aggregated_polarity")