def test_remporal_topics(): tweets1 = pickle.load(open("temporal_tweets1", "rb")) tweets2 = pickle.load(open("temporal_tweets2", "rb")) tweets = tweets1 + tweets2 print len(tweets) sentiments = pickle.load(open("temporal_sentiments", "rb")) print len(sentiments) subclf = classifier.get_optimal_subjectivity_classifier() polclf = classifier.get_optimal_polarity_classifier() # TODO SKRIVE HER TEMPORALLY AGGREGATE ETC sub_predictions = subclf.classify(tweets, sentiments) subjective_tweets = [t for p, t in zip(sub_predictions, tweets) if p == "subjective"] subjective_sentiments = [s for p, s in zip(sub_predictions, sentiments) if p == "subjective"] pol_predictions = polclf.classify(subjective_tweets, subjective_sentiments) topics = entity_extraction.perform_entity_extraction( subjective_tweets, subjective_sentiments, use_pmi=True, breakword_min_freq=0.1, breakword_range=14 ) days, unique_topics, aggregated_values = utils.topically_aggregate_polarity( subjective_tweets, pol_predictions, topics=topics ) data = {} for i in range(len(unique_topics)): data[unique_topics[i]] = [days, aggregated_values[i]] print data pickle.dump(data, open("topically_aggregated_polarity", "wb"))
def test_aggregated_sentiments(): sub_clf = classifier.get_optimal_subjectivity_classifier() pol_clf = classifier.get_optimal_polarity_classifier() tweets = utils.get_pickles(2) sentimentvalues = utils.get_sentimentvalues(2) sub_train_tweets, sub_train_targets, _, _, sub_train_sentiments, _ = utils.make_subjectivity_train_and_test_and_targets( tweets, sentimentvalues, splitvalue=1.0 ) pol_train_tweets, pol_train_targets, _, _, pol_train_sentiments, _ = utils.make_polarity_train_and_test_and_targets( tweets, sentimentvalues, splitvalue=1.0 ) sub_predictions = sub_clf.classify(sub_train_tweets, sub_train_sentiments) pol_predictions = pol_clf.classify(pol_train_tweets, pol_train_sentiments) print pol_train_targets, pol_predictions days, targets, predicts, total_frequencies = utils.temporally_aggregate_subjectivity( sub_train_tweets, sub_predictions, targets=sub_train_targets ) data = {"Targets": [days, targets], "Predictions": [days, predicts], "Frequencies": [days, total_frequencies]} plotting.plot_subjectivity_aggregates(data, "aggregated_subjectivity") days, targets, predicts, frequencies = utils.temporally_aggregate_polarity( pol_train_tweets, pol_predictions, targets=pol_train_targets ) for i in range(len(days)): targets[i] = targets[i] * 1.0 / frequencies[i] predicts[i] = predicts[i] * 1.0 / frequencies[i] frequencies[i] = frequencies[i] * 1.0 / total_frequencies[i] data = {"Targets": [days, targets], "Predictions": [days, predicts], "Frequencies": [days, frequencies]} plotting.plot_polarity_aggregates(data, "aggregated_polarity")