def main_function(): conn = MySQLdb.connect(host=DATABASES['date_cutoff']['HOST'], user=DATABASES['date_cutoff']['USER'], passwd=DATABASES['date_cutoff']['PASSWORD'], db=DATABASES['date_cutoff']['NAME']) total_word_count = total_words(conn) training_feature_set = process_bigrams(conn, '+', total_word_count, best_words) training_feature_set += process_bigrams(conn, '-', total_word_count, best_words) training_feature_set += process_bigrams(conn, 'I', total_word_count, best_words) training_feature_set += process_bigrams(conn, 'O', total_word_count, best_words) config_megam('/opt/packages') #classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) classifier = NaiveBayesClassifier.train(training_feature_set) classifier.show_most_informative_features(10) test_tweets = classify.get_test_tweets(conn) test_feature_set = process_tweets(test_tweets) classifier_accuracy = accuracy(classifier, test_feature_set) print "classifier accuracy: " + repr(classifier_accuracy)
def main_function(): conn = MySQLdb.connect(host=DATABASES['date_cutoff']['HOST'], user=DATABASES['date_cutoff']['USER'], passwd=DATABASES['date_cutoff']['PASSWORD'], db=DATABASES['date_cutoff']['NAME']) training_tweets = classify.get_training_tweets(conn_analysis) training_feature_set = process_tweets(training_tweets) config_megam('/opt/packages') classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) error_dict = {'+':0, '-':0, 'I':0, 'O':0} count_dict = {'+':0, '-':0, 'I':0, 'O':0} guess_dict = {'+':0, '-':0, 'I':0, 'O':0} full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, '-':{'+':0, '-':0, 'I':0, 'O':0}, 'I':{'+':0, '-':0, 'I':0, 'O':0}, 'O':{'+':0, '-':0, 'I':0, 'O':0}} test_tweets = classify.get_test_tweets(conn_analysis) test_feature_set = process_tweets(test_tweets) classifier.show_most_informative_features(10) classifier_accuracy = accuracy(classifier, test_feature_set) print "classifier accuracy: " + repr(classifier_accuracy)
def main_function(): conn = MySQLdb.connect(host=DATABASES['default']['HOST'], user=DATABASES['default']['USER'], passwd=DATABASES['default']['PASSWORD'], db=DATABASES['default']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) config_megam('/opt/packages') #classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) classifier = NaiveBayesClassifier.train(training_feature_set) #classifier.show_most_informative_features(50, show='pos') #classifier.show_most_informative_features(50, show='neg') #classifier.explain(training_feature_set[0][0]) #print training_feature_set[0] error_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} count_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} guess_dict = {'+': 0, '-': 0, 'I': 0, 'O': 0} full_matrix = { '+': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, '-': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'I': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'O': { '+': 0, '-': 0, 'I': 0, 'O': 0 } } count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} test_tweets = classify.get_test_tweets(conn) test_feature_set = classify.process_tweets(test_tweets) classifier_accuracy = accuracy(classifier, test_feature_set) #print count_table print "classifier accuracy: " + repr(classifier_accuracy)
def main_function(): conn = MySQLdb.connect(host=DATABASES['default']['HOST'], user=DATABASES['default']['USER'], passwd=DATABASES['default']['PASSWORD'], db=DATABASES['default']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) bayes_classifier = NaiveBayesClassifier.train(training_feature_set) count_table = {'+':0, '-':0, 'I':0, 'O':0} test_tweets = classify.get_test_tweets(conn) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = bayes_classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 print "Naive Bayes" print count_table count_table = {'+':0, '-':0, 'I':0, 'O':0} config_megam('/opt/packages') max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = max_ent_classifier.classify(classify.process_tweet(text)) update_tweet_polarity_ensemble(tweet[0], guess, conn) count_table[guess] += 1 print "Maximum Entropy" print count_table #generate the accuracy matrix full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, '-':{'+':0, '-':0, 'I':0, 'O':0}, 'I':{'+':0, '-':0, 'I':0, 'O':0}, 'O':{'+':0, '-':0, 'I':0, 'O':0}} for tweet in test_tweets: result = classify.run_sql(conn, classify.Statements.CHECK_CONSENSUS % tweet[0]) guess = result[0][0] actual_result = classify.run_sql(conn, classify.Statements.CHECK_MAJORITY % tweet[0]) actual = actual_result[0][0] if guess is not None: if actual is not None: full_matrix[actual][guess] += 1 print full_matrix
def main_function(): conn = MySQLdb.connect(host=DATABASES['default']['HOST'], user=DATABASES['default']['USER'], passwd=DATABASES['default']['PASSWORD'], db=DATABASES['default']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) config_megam('/opt/packages') #classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) classifier = NaiveBayesClassifier.train(training_feature_set) #classifier.show_most_informative_features(50, show='pos') #classifier.show_most_informative_features(50, show='neg') #classifier.explain(training_feature_set[0][0]) #print training_feature_set[0] error_dict = {'+':0, '-':0, 'I':0, 'O':0} count_dict = {'+':0, '-':0, 'I':0, 'O':0} guess_dict = {'+':0, '-':0, 'I':0, 'O':0} full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, '-':{'+':0, '-':0, 'I':0, 'O':0}, 'I':{'+':0, '-':0, 'I':0, 'O':0}, 'O':{'+':0, '-':0, 'I':0, 'O':0}} count_table = {'+':0, '-':0, 'I':0, 'O':0} test_tweets = classify.get_test_tweets(conn) test_feature_set = classify.process_tweets(test_tweets) classifier_accuracy = accuracy(classifier, test_feature_set) #print count_table print "classifier accuracy: " + repr(classifier_accuracy)
def main_function(): conn = MySQLdb.connect(host=DATABASES['ensemble']['HOST'], user=DATABASES['ensemble']['USER'], passwd=DATABASES['ensemble']['PASSWORD'], db=DATABASES['ensemble']['NAME']) training_tweets = classify.get_training_tweets(conn) training_feature_set = classify.process_tweets(training_tweets) bayes_classifier = NaiveBayesClassifier.train(training_feature_set) count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} test_tweets = classify.get_test_tweets(conn) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = bayes_classifier.classify(classify.process_tweet(text)) classify.update_tweet_polarity(tweet[0], guess, conn) count_table[guess] += 1 print "Naive Bayes" print count_table count_table = {'+': 0, '-': 0, 'I': 0, 'O': 0} config_megam('/opt/packages') max_ent_classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0) for tweet in test_tweets: text = classify.get_tweet_text(conn, tweet[0])[0][0] guess = max_ent_classifier.classify(classify.process_tweet(text)) update_tweet_polarity_ensemble(tweet[0], guess, conn) count_table[guess] += 1 print "Maximum Entropy" print count_table #generate the accuracy matrix full_matrix = { '+': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, '-': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'I': { '+': 0, '-': 0, 'I': 0, 'O': 0 }, 'O': { '+': 0, '-': 0, 'I': 0, 'O': 0 } } for tweet in test_tweets: result = classify.run_sql( conn, classify.Statements.CHECK_CONSENSUS % tweet[0]) guess = result[0][0] actual_result = classify.run_sql( conn, classify.Statements.CHECK_MAJORITY % tweet[0]) actual = actual_result[0][0] if guess is not None: if actual is not None: full_matrix[actual][guess] += 1 print full_matrix