def thread_worker(seed_id):
    agent = TwitterAgent()
    agent.request(seed_id)
    tweets = agent.get_tweets(seed_id)
    with open("inputs/"+seed_id+".tweets", "w") as f:
        for tweet in tweets["tweets"]:
            f.write(tweet+"\n")
    with open("inputs/"+seed_id+".retweets", "w") as f:
        for tweet in tweets["retweets"]:
            f.write(tweet[0]+": "+tweet[1]+"\n")
    with open("inputs/"+seed_id+".tweets") as f:
        handler = TF_IDF.DocumentHandler(f, frequency_filter=3)
        result = handler.get_term_frequency()
    print result
__author__ = 'y.zhou'
from twitter_connection import TwitterAgent
from IRToolKit import TF_IDF
MINING = True
if __name__ == "__main__":
    seed_id = "nytimes"
    if MINING:
        agent = TwitterAgent()
        agent.request(seed_id)
        tweets = agent.get_tweets(seed_id, limit=50)
        with open("inputs/"+seed_id+".tweets", "w") as f:
            for tweet in tweets["tweets"]:
                f.write(tweet+"\n")
        with open("inputs/"+seed_id+".retweets", "w") as f:
            for tweet in tweets["retweets"]:
                f.write(tweet[0]+": "+tweet[1]+"\n")
    with open("inputs/"+seed_id+".tweets") as f:
        handler = TF_IDF.DocumentHandler(f, frequency_filter=1)
        result = handler.get_term_frequency()
    print result
    print "done"

MINING = True
PARALLEL = True
MANY = False
if __name__ == "__main__":

    if PARALLEL:
        pool = multiprocessing.Pool()
        # pool.map(thread_worker, ["CBCCanada", "CBCWorldNews", "nytchangster"])
        pool.map(thread_worker, read_tweets_id_list("tmp/twitter_ids.txt"))
    elif MANY:
        for account in read_tweets_id_list("tmp/twitter_ids.txt"):
            seed_id = account
            if MINING:
                agent = TwitterAgent()
                agent.request(seed_id)
                tweets = agent.get_tweets(seed_id)
                with open("inputs/"+seed_id+".tweets", "w") as f:
                    for tweet in tweets["tweets"]:
                        f.write(tweet+"\n")
                with open("inputs/"+seed_id+".retweets", "w") as f:
                    for tweet in tweets["retweets"]:
                        f.write(tweet[0]+": "+tweet[1]+"\n")
            with open("inputs/"+seed_id+".tweets") as f:
                handler = TF_IDF.DocumentHandler(f, frequency_filter=3)
                result = handler.get_term_frequency()
            print result
    else:
        seed_id = "BBCFood"
        if MINING: