def create_queues(self): ''' Updates the in memory list of the redis queues Creates new throttled queue instances if it does not have them ''' # new config could have loaded between scrapes newConf = self.check_config() self.queue_keys = self.redis_conn.keys(self.spider.name + ":*:queue") for key in self.queue_keys: # build final queue key, depending on type and ip bools throttle_key = "" if self.add_type: throttle_key = self.spider.name + ":" if self.add_ip: throttle_key = throttle_key + self.my_ip + ":" # add the tld from the key `type:tld:queue` the_domain = re.split(':', key)[1] throttle_key = throttle_key + the_domain if key not in self.queue_dict or newConf: self.logger.debug( "Added new Throttled Queue {q}".format(q=key)) q = RedisPriorityQueue(self.redis_conn, key, encoding=ujson) # use default window and hits if the_domain not in self.domain_config: # this is now a tuple, all access needs to use [0] to get # the object, use [1] to get the time self.queue_dict[key] = [ RedisThrottledQueue(self.redis_conn, q, self.window, self.hits, self.moderated, throttle_key, throttle_key, True), time.time() ] # use custom window and hits else: window = self.domain_config[the_domain]['window'] hits = self.domain_config[the_domain]['hits'] # adjust the crawl rate based on the scale if exists if 'scale' in self.domain_config[the_domain]: hits = int(hits * self.fit_scale( self.domain_config[the_domain]['scale'])) self.queue_dict[key] = [ RedisThrottledQueue(self.redis_conn, q, window, hits, self.moderated, throttle_key, throttle_key, True), time.time() ]
def create_throttle_queues(self): """ 创建限流队列 :return: """ new_conf = self.check_config() queue_key = '{spider_type}:{job_id}:*:queue'.format( spider_type=self.spider.name, job_id=self.job_id) self.queue_keys = self.redis_conn.keys(queue_key) for key in self.queue_keys: throttle_key = "" if self.add_type: throttle_key = self.spider.name + ":" if self.add_ip: throttle_key = throttle_key + self.ip + ":" the_domain = re.split(':', key)[2] throttle_key += the_domain if key not in self.queue_dict or new_conf: self.logger.debug( "Added new Throttled Queue {q}".format(q=key)) q = RedisPriorityQueue(self.redis_conn, key) if the_domain not in self.domain_config: self.queue_dict[key] = [ RedisThrottledQueue(self.redis_conn, q, self.window, self.hits, self.moderated, throttle_key, throttle_key, True), time.time() ] else: window = self.domain_config[the_domain]['window'] hits = self.domain_config[the_domain]['hits'] if 'scale' in self.domain_config[the_domain]: hits = int(hits * self.fit_scale( self.domain_config[the_domain]['scale'])) self.queue_dict[key] = [ RedisThrottledQueue(self.redis_conn, q, window, hits, self.moderated, throttle_key, throttle_key, True), time.time() ]
'--stack', action='store_true', help="Use a RedisStack") group.add_argument('-p', '--priority', action='store_true', help="Use a RedisPriorityQueue") args = vars(parser.parse_args()) if args['queue']: queue = RedisQueue(redis_conn, "my_key") elif args['stack']: queue = RedisStack(redis_conn, "my_key") elif args['priority']: queue = RedisPriorityQueue(redis_conn, "my_key") print "Using " + queue.__class__.__name__ if isinstance(queue, RedisPriorityQueue): queue.push("item1", 50) queue.push("item2", 100) queue.push("item3", 20) else: queue.push("item1") queue.push("item2") queue.push("item3") print "Pop 1 " + queue.pop() print "Pop 2 " + queue.pop() print "Pop 3 " + queue.pop()
def main(): parser = argparse.ArgumentParser(description='Example Redis Queues.') parser.add_argument('-r', '--redis-host', action='store', default='scdev', help="The Redis host ip") parser.add_argument('-rp', '--redis-port', action='store', default='6379', help="The Redis port") parser.add_argument('-rP', '--redis-password', action='store', default=None, help="The Redis password") group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-q', '--queue', action='store_true', help="Use a RedisQueue") group.add_argument('-s', '--stack', action='store_true', help="Use a RedisStack") group.add_argument('-p', '--priority', action='store_true', help="Use a RedisPriorityQueue") args = vars(parser.parse_args()) host = args['redis_host'] port = args['redis_port'] password = args['redis_password'] redis_conn = redis.Redis(host=host, port=port, password=password, decode_responses=True) if args['queue']: queue = RedisQueue(redis_conn, "my_key") elif args['stack']: queue = RedisStack(redis_conn, "my_key") elif args['priority']: queue = RedisPriorityQueue(redis_conn, "my_key") print("Using " + queue.__class__.__name__) if isinstance(queue, RedisPriorityQueue): queue.push("item1", 50) queue.push("item2", 100) queue.push("item3", 20) else: queue.push("item1") queue.push("item2") queue.push("item3") print("Pop 1 " + queue.pop()) print("Pop 2 " + queue.pop()) print("Pop 3 " + queue.pop())
def main(): import argparse import redis import time import sys from os import path sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from scutils.redis_queue import RedisPriorityQueue from scutils.redis_throttled_queue import RedisThrottledQueue parser = argparse.ArgumentParser( description="Throttled Queue Test Script." " Start either a single or multiple processes to see the " " throttled queue mechanism in action.") parser.add_argument('-r', '--redis-host', action='store', required=True, help="The Redis host ip") parser.add_argument('-p', '--redis-port', action='store', default='6379', help="The Redis port") parser.add_argument('-m', '--moderate', action='store_const', const=True, default=False, help="Moderate the outbound Queue") parser.add_argument('-w', '--window', action='store', default=60, help="The window time to test") parser.add_argument('-n', '--num-hits', action='store', default=10, help="The number of pops allowed in the given window") parser.add_argument('-q', '--queue', action='store', default='testqueue', help="The Redis queue name") args = vars(parser.parse_args()) window = int(args['window']) num = int(args['num_hits']) host = args['redis_host'] port = args['redis_port'] mod = args['moderate'] queue = args['queue'] conn = redis.Redis(host=host, port=port) q = RedisPriorityQueue(conn, queue) t = RedisThrottledQueue(conn, q, window, num, mod) def push_items(amount): for i in range(0, amount): t.push('item-' + str(i), i) print("Adding", num * 2, "items for testing") push_items(num * 2) def read_items(): print("Kill when satisfied ^C") ti = time.time() count = 0 while True: item = t.pop() if item: print("My item", item, "My time:", time.time() - ti) count += 1 try: read_items() except KeyboardInterrupt: pass t.clear() print("Finished")