def cmd(config, args): if (args.command not in avaliable_cmds): raise Exception("not a valid command...") nid = args.node_id logger.info("node_id: %s"%(nid)) node_queue = NodeQueue(nid, redis_config=config['redis_config']) node_coordinator = NodeCoordinator(config['redis_config']) # this can be done locally without sending the command to the servers... if (args.command == 'GET_UIDS_FROM_SCREEN_NAMES'): apikeys = config["apikeys"].values()[0] if (not os.path.exists(args.json)): raise Exception("doesn't exist... ") with open(os.path.abspath(args.json), 'rb') as f, open(os.path.abspath(args.output), 'wb') as o_f: screen_names = json.load(f) user_api = User(apikeys=apikeys) user_ids = user_api.get_user_ids_by_screen_names(screen_names) json.dump(list(user_ids), o_f) elif (args.command == 'GET_USERS_FROM_IDS'): apikeys = config["apikeys"].values()[0] if (not os.path.exists(args.json)): raise Exception("doesn't exist... ") with open(os.path.abspath(args.json), 'rb') as f, open(os.path.abspath(args.output), 'wb') as o_f: user_ids = json.load(f) user_api = User(apikeys=apikeys) users = user_api.get_users(user_ids) json.dump(list(users), o_f) elif (args.command.startswith('BATCH_')): command = args.command.replace('BATCH_', '') args_dict = copy.copy(args.__dict__) if (not os.path.exists(args.json)): raise Exception("doesn't exist... ") with open(os.path.abspath(args.json), 'rb') as f: user_ids = json.load(f) for user_id in user_ids: args_dict['user_id'] = user_id cmd = new_cmd(command, args_dict) node_queue.put(cmd) elif (args.command == 'LIST_NODES'): pp.pprint(node_coordinator.list_nodes()) elif (args.command == 'NODE_QSIZES'): raise NotImplemented("NotImplemented yet...") #pp.pprint(node_coordinator.list_nodes()) elif (args.command == 'SHUTDOWN_NODE'): #node_coordinator.remove_node(nid) #pp.pprint(node_coordinator.list_nodes()) raise NotImplemented("NotImplemented yet...") elif (args.command == 'CLEAR_NODE_QUEUES'): node_queue.clear_all_queues() else: args_dict = copy.copy(args.__dict__) cmd = new_cmd(args.command, args_dict) node_queue.put(cmd) logger.info('sent [%s]'%(cmd))
def test_get_user_id(self): from tweetf0rm.twitterapi.users import User from tweetf0rm.handler.inmemory_handler import InMemoryHandler apikeys = self.config["apikeys"]["i0mf0rmer03"] #inmemoryhandler = InMemoryHandler() user_api = User(apikeys=apikeys) userIds = user_api.get_user_ids_by_screen_names(["AmericanCance"]) logger.info(userIds)
def init_user_api( self ): # this will throw StopIteration if all proxies have been tried... if (self.proxies): try: self.client_args['proxies'] = next( self.proxies)['proxy_dict'] # this will throw out #logger.info("client_args: %s"%json.dumps(self.client_args)) except StopIteration as exc: raise except Exception as exc: self.init_user_api() if (self.user_api): del self.user_api #crawler_id=self.crawler_id, self.user_api = User(apikeys=self.apikeys, client_args=self.client_args)
def call_user_api(apikeys, client_args): user_api = User(apikeys=apikeys, client_args=client_args) user_api.find_all_friend_ids(53039176, [Handler()])
def cmd(config, args): if (args.command not in avaliable_cmds): raise Exception("not a valid command...") nid = args.node_id logger.info("node_id: %s" % (nid)) node_queue = NodeQueue(nid, redis_config=config['redis_config']) node_coordinator = NodeCoordinator(config['redis_config']) # this can be done locally without sending the command to the servers... if (args.command == 'GET_UIDS_FROM_SCREEN_NAMES'): apikeys = config["apikeys"].values()[0] if (not os.path.exists(args.json)): raise Exception("doesn't exist... ") with open(os.path.abspath(args.json), 'rb') as f, open(os.path.abspath(args.output), 'wb') as o_f: screen_names = json.load(f) user_api = User(apikeys=apikeys) user_ids = user_api.get_user_ids_by_screen_names(screen_names) json.dump(list(user_ids), o_f) elif (args.command == 'GET_USERS_FROM_IDS'): apikeys = config["apikeys"].values()[0] if (not os.path.exists(args.json)): raise Exception("doesn't exist... ") with open(os.path.abspath(args.json), 'rb') as f, open(os.path.abspath(args.output), 'wb') as o_f: user_ids = json.load(f) user_api = User(apikeys=apikeys) users = user_api.get_users(user_ids) json.dump(list(users), o_f) elif (args.command.startswith('BATCH_')): new_command = args.command.replace('BATCH_', '') args_dict = copy.copy(args.__dict__) if (not os.path.exists(args.json)): raise Exception("doesn't exist... ") with open(os.path.abspath(args.json), 'rb') as f: if (args.command == 'BATCH_CRAWL_TWEET'): tweet_ids = json.load(f) for tweet_id in tweet_ids: print "Loading Tweet ID: ", tweet_id args_dict['tweet_id'] = tweet_id cmd = new_cmd(new_command, args_dict) node_queue.put(cmd) else: user_ids = json.load(f) for user_id in user_ids: args_dict['user_id'] = user_id cmd = new_cmd(new_command, args_dict) node_queue.put(cmd) elif (args.command == 'LIST_NODES'): pp.pprint(node_coordinator.list_nodes()) elif (args.command == 'NODE_QSIZES'): raise NotImplemented("NotImplemented yet...") #pp.pprint(node_coordinator.list_nodes()) elif (args.command == 'SHUTDOWN_NODE'): #node_coordinator.remove_node(nid) #pp.pprint(node_coordinator.list_nodes()) raise NotImplemented("NotImplemented yet...") elif (args.command == 'CLEAR_NODE_QUEUES'): node_queue.clear_all_queues() else: args_dict = copy.copy(args.__dict__) cmd = new_cmd(args.command, args_dict) node_queue.put(cmd) logger.info('sent [%s]' % (cmd))