def main(): controlTweets = None cohortTweets = None with dbUtils.setup_mysql_cxn() as cxn: curs = cxn.cursor() curs.execute(SELECT_CONTROL_TWEETS) controlTweets = dbUtils.get_named_rows(curs) curs.execute(SELECT_COHORT_TWEETS) cohortTweets = dbUtils.get_named_rows(curs) make_cdfs("control", controlTweets) make_cdfs("cohort", cohortTweets)
fields.sort() with open(fname, "w+") as f_out: writer = csv.writer(f_out, delimiter="\t") writer.writerow(fields) for row in results: row_elems = [row[k] if k in row else 0 for k in fields] writer.writerow(row_elems) if __name__ == "__main__": processor = TweetProcessor() results = [] with dbUtils.setup_mysql_cxn() as cxn: curs = cxn.cursor(); curs.execute(dbUtils.GET_USERS_W_COHORT) users = dbUtils.get_named_rows(curs) for user in users: curs.execute(dbUtils.GET_TWEETS_TEMPL % user['twitter_user_id']) tweets = dbUtils.get_named_rows(curs) result = defaultdict(lambda: 0) for tweet in tweets: result = processor.process_tweet(tweet['tweet_text'], result) ntweets = len(tweets) if ntweets > 0: for k, v in result.items(): result[k] = float(v) / float(ntweets) result = add_time_model(tweets, result) if user['utc_offset_sec'] is None: result['time.offsetted'] = False