Exemplo n.º 1
0
print 'n authed users: ', len(handles)

#user screen names we are interested in
user_screenname_id_pairs = [line.strip().split("\t") for line in open(sys.argv[3]).readlines()]

print user_screenname_id_pairs[0]

pickle_dir = OUTPUT_DIRECTORY +"/obj/"
network_dir = OUTPUT_DIRECTORY+"/net/"

general_utils.mkdir_no_err(OUTPUT_DIRECTORY)
general_utils.mkdir_no_err(pickle_dir)
general_utils.mkdir_no_err(network_dir)

multiprocess_setup.init_good_sync_manager()

##put data on the queue
request_queue = multiprocess_setup.load_request_queue(user_screenname_id_pairs, len(handles))


processes = []
for i in range(len(handles)):
    p = TwitterEgoNetworkWorker(request_queue, handles[i], network_dir, pickle_dir)
    p.start()
    processes.append(p)

try:
    for p in processes:
        p.join()
except KeyboardInterrupt:
from twitter_dm.utility.general_utils import mkdir_no_err,collect_system_arguments, chunk_data

handles, output_dir, tweet_ids, is_ids = collect_system_arguments(sys.argv)


# Create the output directory
mkdir_no_err(output_dir)

# chunk tweets into 100s (the API takes them by 100)
i = 0
tweets_chunked = chunk_data(tweet_ids)


print tweets_chunked[0]
# init a sync manager
multiprocess_setup.init_good_sync_manager()

# put data on the queue
request_queue = multiprocess_setup.load_request_queue(tweets_chunked, len(handles))
# run!
processes = []
for i in range(len(handles)):
    p = TweetDataWorker(request_queue,handles[i],i,output_dir)
    p.start()
    processes.append(p)

try:
    for p in processes:
        p.join()
except KeyboardInterrupt:
    print 'keyboard interrupt'