""" __author__ = 'kjoseph' import glob import os import sys from multiprocessing import Queue from twitter_dm.multiprocess import multiprocess_setup from twitter_dm.multiprocess.WorkerTweetData import TweetDataWorker from twitter_dm.utility import general_utils from twitter_dm.utility.general_utils import mkdir_no_err,collect_system_arguments, chunk_data handles, output_dir, tweet_ids, is_ids = collect_system_arguments(sys.argv) # Create the output directory mkdir_no_err(output_dir) # chunk tweets into 100s (the API takes them by 100) i = 0 tweets_chunked = chunk_data(tweet_ids) print tweets_chunked[0] # init a sync manager multiprocess_setup.init_good_sync_manager() # put data on the queue
""" __author__ = 'kjoseph' import glob import os import sys from twitter_dm.multiprocess import multiprocess_setup from twitter_dm.multiprocess.WorkerUserData import UserDataWorker from datetime import datetime from twitter_dm.utility.general_utils import mkdir_no_err, collect_system_arguments (handles, out_dir, user_ids, is_ids, collect_friends, collect_followers, gen_tweet_counts_file) = collect_system_arguments(sys.argv, [ 'collect_friends (y/n)', 'collect_followers (y/n)', "gen_tweet_counts_file (y/n)" ]) handles = handles[:2] print 'num users: ', len(user_ids) mkdir_no_err(out_dir) mkdir_no_err(os.path.join(out_dir, "obj")) mkdir_no_err(os.path.join(out_dir, "json")) multiprocess_setup.init_good_sync_manager() ##put data on the queue request_queue = multiprocess_setup.load_request_queue(user_ids, len(handles))
import glob import os import sys from time import sleep import io from twitter_dm.utility.general_utils import mkdir_no_err, collect_system_arguments handles, output_dir, data_to_collect, is_ids, friends_or_followers = collect_system_arguments( sys.argv, ['friends or followers']) # in case its weird format accounts = [x.strip().split()[-1] for x in data_to_collect] mkdir_no_err(output_dir) handle_iter = 0 param_arg = 'user_id' if is_ids else 'screen_name' for i, handle in enumerate(accounts): print 'user: '******'cursor': -1, 'count': 5000} n_collected = 0
""" Another simple, alternative way to get basic information about a file of user_ids/sns """ import io import sys from twitter_dm.TwitterUser import get_user_ids_and_sn_data_from_list from twitter_dm.utility.general_utils import collect_system_arguments handles, output_file, data_to_collect, is_ids = collect_system_arguments( sys.argv) out_fil = io.open(output_file, "w") user_data = get_user_ids_and_sn_data_from_list(data_to_collect, handles, not is_ids, out_fil) out_fil.close()
""" An example of how to use BotOMeter in parallel """ __author__ = 'kjoseph' import glob import sys from twitter_dm.utility.general_utils import collect_system_arguments from twitter_dm.multiprocess import multiprocess_setup from twitter_dm.multiprocess.WorkerBotOMeter import BotOMeterWorker from twitter_dm.utility import general_utils handles, out_dir, data_to_collect, is_ids, mashape_key = collect_system_arguments( sys.argv, ['mashape_key']) general_utils.mkdir_no_err(out_dir) # initialize a better sync manager multiprocess_setup.init_good_sync_manager() # put data on the queue request_queue = multiprocess_setup.load_request_queue( [x.strip() for x in data_to_collect], len(handles), add_nones=True) processes = [] for i in range(len(handles)): p = BotOMeterWorker(request_queue, handles[i], i, out_dir, mashape_key) p.start() processes.append(p)
import botometer from twitter_dm.utility.general_utils import Unbuffered, collect_system_arguments import ujson as json import sys handles, out_file, user_ids, is_ids, mashape_key = collect_system_arguments(sys.argv, ['mashape_key']) twitter_app_auth = { 'consumer_key': handles[0].consumer_key, 'consumer_secret': handles[0].consumer_secret, 'access_token': handles[0].access_token, 'access_token_secret': handles[0].access_token_secret, "wait_on_ratelimit" : True } bom = botometer.Botometer(mashape_key=mashape_key,**twitter_app_auth) accounts = set([x.strip() for x in open(out_file)]) with open(out_file) as inf: for line in inf: uid = json.loads(line)['user']['id_str'] accounts.remove(uid) print 'n accounts: ', len(accounts) of = Unbuffered(open("bot_out.json", "a")) for i, a in enumerate(accounts): if i % 250 == 0: