return temp trueIrrelevants = [] possibleRelevants = [] with open('cleaned_geo_tweets_Apr_12_to_22.csv') as csvfile: tweetData = csv.DictReader(csvfile) for tweet in tweetData: if tweet['time'] != "": # parse date/time into object date = time.strptime(tweet['time'], tweet_time_fmt) tweet['tweet_text'] = twc.cleanUpTweet(tweet['tweet_text']) if date.tm_mday < 15: trueIrrelevants.append(tweet['tweet_text']) elif twc.tweetContainsKeyword(tweet['tweet_text']): possibleRelevants.append(tweet['tweet_text']) trueIrrelevants = randomSubset(trueIrrelevants) possibleRelevants = randomSubset(possibleRelevants) trueRelevants = [] for each in possibleRelevants: print each result = raw_input("Enter a r for relevant, i for irrelevant, n for neither (not English): ") result = result.lower() if result != '': if result[0] == 'i': trueIrrelevants.append(each) elif result[0] == 'r':
# sentimentTweets[c] = [] # tweetList = [] # textList = [] with open('cleaned_geo_tweets_4_12_22.csv') as csvfile: # reads first line of csv to determine keys for the tweet hash, tweets # is an iterator through the list of tweet hashes the DictReader makes tweets = csv.DictReader(csvfile) # for all the tweets the reader finds for tweetData in tweets: # make sure its not a 'false tweet' from people using newlines in their tweet_text's if tweetData['time'] != "": # parse date/time into object date = time.strptime(tweetData['time'], tweet_time_fmt) if date.tm_mday == 15 and twc.tweetContainsKeyword(tweetData['tweet_text']): #if date.tm_mday == 15: if date.tm_hour == currentHour: kwTweets.append(tweetData) #tweetList.append(tweetData) #textList.append(tweetData['tweet_text']) if containsHandle(tweetData['tweet_text']): infoTweets.append(tweetData) elif date.tm_hour == currentHour + 1: currentHour += 1 timeStr = getTimeString(currentHour) # results = clssfr.classify(textList) # for i in range(0, len(results)): # sentimentTweets[cats[results[i]]].append(tweetList[i]) # for sentiment in sentimentTweets.keys():
with open('test_tweets_4_12_22.csv') as csvfile: # reads first line of csv to determine keys for the tweet hash, tweets # is an iterator through the list of tweet hashes the DictReader makes tweets = csv.DictReader(csvfile) # for all the tweets the reader finds for tweetData in tweets: # make sure its not a 'false tweet' from people using newlines in their tweet_text's if tweetData['time'] != "": # parse date/time into object date = time.strptime(tweetData['time'], tweet_time_fmt) #if date.tm_mday == 15 and twc.tweetContainsKeyword(tweetData['tweet_text']): if date.tm_mday == 12: count2 += 1 if date.tm_hour == currentHour: if twc.tweetContainsKeyword(tweetData['tweet_text'].lower()): kwTweets.append(tweetData) if containsHandle(tweetData['tweet_text']): infoTweets.append(tweetData) tweetList.append(tweetData) textList.append(tweetData['tweet_text']) elif date.tm_hour == currentHour + 1: currentHour += 1 timeStr = getTimeString(currentHour) results = relClssfr.classify(textList) for i in range(0, len(results)): if results[i] == 0: relTweets.append(tweetList[i]) if containsHandle(textList[i]): relInfo.append(tweetList[i])
import twittercriteria as twc import matplotlib.pyplot as plt # author: Hayden Fuss handles = {} senders = {} with open('cleaned_geo_tweets_4_12_22.csv') as csvfile: twitterData = csv.DictReader(csvfile) for tweet in twitterData: if tweet['time'] != "": date = twc.getTweetDate(tweet['time']) if date.tm_mday > 15 or (date.tm_mday == 15 and date.tm_hour >= 14): if twc.tweetContainsKeyword(tweet['tweet_text'].lower()): if not tweet['sender_name'] in senders.keys(): senders[tweet['sender_name']] = 1 else: senders[tweet['sender_name']] += 1 results = twc.getHandlesFromTweet(tweet['tweet_text']) for r in results: handle = r.strip("@").lower() if not handle in handles.keys(): handles[handle] = {'senders':[], 'count':1} handles[handle]['senders'].append(tweet['sender_name']) else: if not tweet['sender_name'] in handles[handle]['senders']: handles[handle]['count'] += 1 handles[handle]['senders'].append(tweet['sender_name'])