def correct_positive(): """correct number of positive tweets.""" classify = uva.check50.py.run("tweet.py").module.classify import helpers dates, tweets = helpers.read_tweets("trump.txt") positives = helpers.read_words("positive_words.txt") negatives = helpers.read_words("negative_words.txt") with uva.check50.py.capture_stdout() as stdout: classify(tweets, positives, negatives) out = stdout.getvalue() check_classify(out, "positive", 538) return out
def n_days(): """correct number of bad days.""" bad_days = uva.check50.py.run("tweet.py").module.bad_days import helpers dates, tweets = helpers.read_tweets("trump.txt") positives = helpers.read_words("positive_words.txt") negatives = helpers.read_words("negative_words.txt") with uva.check50.py.capture_stdout() as stdout: bad_days(dates, tweets, positives, negatives) out = stdout.getvalue() matches = re.findall("[\d]+[^\n^\d]*[\d]+[^\n^\d]*[\d]+", out) if not matches or len(matches) != 31: n = len(matches) if matches else 0 raise check50.Failure(f"expected 31 bad days, but found {n} dates!") return out
def best_words(): """correct top five positive words.""" positive_word = uva.check50.py.run("tweet.py").module.positive_word import helpers dates, tweets = helpers.read_tweets("trump.txt") positives = helpers.read_words("positive_words.txt") with uva.check50.py.capture_stdout() as stdout: positive_word(tweets, positives) out = stdout.getvalue() top5 = ["great", "trump", "thank", "good", "honor"] for word in top5: if word not in out: raise check50.Mismatch(f"{word}", out) for word in set(positives) - set(top5) - {"positive", "top"}: if word and word in out: raise check50.Failure(f"Did not expect {word} in top 5!") return out
# detemine sentiment for a single day def determineDaySentiment(acc, sentiment): return [ acc[0], acc[1] + (1 if sentiment[1] > 0 else 0 if sentiment[1] == 0 else -1) ] # detemine sentiments for all days daySentiments = [ reduce(determineDaySentiment, list(day[1]), [day[0], 0]) for day in groupedSentiments ] # print bad days print("Trump's bad days:") [print(" ", day[0]) for day in daySentiments if day[1] < 0] if __name__ == "__main__": # get the dates and tweets from tweet_filename dates, tweets = helpers.read_tweets("trump.txt") # get the lists of negative and positive words positives = helpers.read_words("positive_words.txt") negatives = helpers.read_words("negative_words.txt") classify(tweets, positives, negatives) positive_word(tweets, positives) bad_days(dates, tweets, positives, negatives)
import collections from scipy.sparse import csr_matrix import numpy as np import time import helpers import config #np.random.seed(42) # Read tags tags, tag2idx, tag_count = helpers.read_tags() # Read words words, word2idx, word_count = helpers.read_words() # Clusters K = tag_count # Initialize cluster centers mu = np.random.rand(K, word_count) # Get chunks chunk_reader = helpers.ChunkReader( post_filename=config.paths.TRAIN_DATA_IDX, chunk_size=config.data.CHUNK_SIZE) # TODO: Change chunks = [chunk for chunk in chunk_reader] #with open(config.paths.TRAIN_DATA_IDX, 'r') as f: for iteration in range(0, config.algorithm.MAX_ITER): start = time.time()