def thread_worker(seed_id): agent = TwitterAgent() agent.request(seed_id) tweets = agent.get_tweets(seed_id) with open("inputs/"+seed_id+".tweets", "w") as f: for tweet in tweets["tweets"]: f.write(tweet+"\n") with open("inputs/"+seed_id+".retweets", "w") as f: for tweet in tweets["retweets"]: f.write(tweet[0]+": "+tweet[1]+"\n") with open("inputs/"+seed_id+".tweets") as f: handler = TF_IDF.DocumentHandler(f, frequency_filter=3) result = handler.get_term_frequency() print result
__author__ = 'y.zhou' from twitter_connection import TwitterAgent from IRToolKit import TF_IDF MINING = True if __name__ == "__main__": seed_id = "nytimes" if MINING: agent = TwitterAgent() agent.request(seed_id) tweets = agent.get_tweets(seed_id, limit=50) with open("inputs/"+seed_id+".tweets", "w") as f: for tweet in tweets["tweets"]: f.write(tweet+"\n") with open("inputs/"+seed_id+".retweets", "w") as f: for tweet in tweets["retweets"]: f.write(tweet[0]+": "+tweet[1]+"\n") with open("inputs/"+seed_id+".tweets") as f: handler = TF_IDF.DocumentHandler(f, frequency_filter=1) result = handler.get_term_frequency() print result print "done"
MINING = True PARALLEL = True MANY = False if __name__ == "__main__": if PARALLEL: pool = multiprocessing.Pool() # pool.map(thread_worker, ["CBCCanada", "CBCWorldNews", "nytchangster"]) pool.map(thread_worker, read_tweets_id_list("tmp/twitter_ids.txt")) elif MANY: for account in read_tweets_id_list("tmp/twitter_ids.txt"): seed_id = account if MINING: agent = TwitterAgent() agent.request(seed_id) tweets = agent.get_tweets(seed_id) with open("inputs/"+seed_id+".tweets", "w") as f: for tweet in tweets["tweets"]: f.write(tweet+"\n") with open("inputs/"+seed_id+".retweets", "w") as f: for tweet in tweets["retweets"]: f.write(tweet[0]+": "+tweet[1]+"\n") with open("inputs/"+seed_id+".tweets") as f: handler = TF_IDF.DocumentHandler(f, frequency_filter=3) result = handler.get_term_frequency() print result else: seed_id = "BBCFood" if MINING: agent = TwitterAgent() agent.request(seed_id)