def create_queues(self):
        '''
        Updates the in memory list of the redis queues
        Creates new throttled queue instances if it does not have them
        '''
        # new config could have loaded between scrapes
        newConf = self.check_config()

        self.queue_keys = self.redis_conn.keys(self.spider.name + ":*:queue")

        for key in self.queue_keys:
            # build final queue key, depending on type and ip bools
            throttle_key = ""

            if self.add_type:
                throttle_key = self.spider.name + ":"
            if self.add_ip:
                throttle_key = throttle_key + self.my_ip + ":"

            # add the tld from the key `type:tld:queue`
            the_domain = re.split(':', key)[1]
            throttle_key = throttle_key + the_domain

            if key not in self.queue_dict or newConf:
                self.logger.debug(
                    "Added new Throttled Queue {q}".format(q=key))
                q = RedisPriorityQueue(self.redis_conn, key, encoding=ujson)

                # use default window and hits
                if the_domain not in self.domain_config:
                    # this is now a tuple, all access needs to use [0] to get
                    # the object, use [1] to get the time
                    self.queue_dict[key] = [
                        RedisThrottledQueue(self.redis_conn, q, self.window,
                                            self.hits, self.moderated,
                                            throttle_key, throttle_key, True),
                        time.time()
                    ]
                # use custom window and hits
                else:
                    window = self.domain_config[the_domain]['window']
                    hits = self.domain_config[the_domain]['hits']

                    # adjust the crawl rate based on the scale if exists
                    if 'scale' in self.domain_config[the_domain]:
                        hits = int(hits * self.fit_scale(
                            self.domain_config[the_domain]['scale']))

                    self.queue_dict[key] = [
                        RedisThrottledQueue(self.redis_conn, q, window, hits,
                                            self.moderated, throttle_key,
                                            throttle_key, True),
                        time.time()
                    ]
Пример #2
0
    def create_throttle_queues(self):
        """
        创建限流队列
        :return:
        """
        new_conf = self.check_config()
        queue_key = '{spider_type}:{job_id}:*:queue'.format(
            spider_type=self.spider.name, job_id=self.job_id)
        self.queue_keys = self.redis_conn.keys(queue_key)
        for key in self.queue_keys:
            throttle_key = ""

            if self.add_type:
                throttle_key = self.spider.name + ":"
            if self.add_ip:
                throttle_key = throttle_key + self.ip + ":"

            the_domain = re.split(':', key)[2]
            throttle_key += the_domain

            if key not in self.queue_dict or new_conf:
                self.logger.debug(
                    "Added new Throttled Queue {q}".format(q=key))
                q = RedisPriorityQueue(self.redis_conn, key)
                if the_domain not in self.domain_config:
                    self.queue_dict[key] = [
                        RedisThrottledQueue(self.redis_conn, q, self.window,
                                            self.hits, self.moderated,
                                            throttle_key, throttle_key, True),
                        time.time()
                    ]
                else:
                    window = self.domain_config[the_domain]['window']
                    hits = self.domain_config[the_domain]['hits']
                    if 'scale' in self.domain_config[the_domain]:
                        hits = int(hits * self.fit_scale(
                            self.domain_config[the_domain]['scale']))

                    self.queue_dict[key] = [
                        RedisThrottledQueue(self.redis_conn, q, window, hits,
                                            self.moderated, throttle_key,
                                            throttle_key, True),
                        time.time()
                    ]
Пример #3
0
                   '--stack',
                   action='store_true',
                   help="Use a RedisStack")
group.add_argument('-p',
                   '--priority',
                   action='store_true',
                   help="Use a RedisPriorityQueue")

args = vars(parser.parse_args())

if args['queue']:
    queue = RedisQueue(redis_conn, "my_key")
elif args['stack']:
    queue = RedisStack(redis_conn, "my_key")
elif args['priority']:
    queue = RedisPriorityQueue(redis_conn, "my_key")

print "Using " + queue.__class__.__name__

if isinstance(queue, RedisPriorityQueue):
    queue.push("item1", 50)
    queue.push("item2", 100)
    queue.push("item3", 20)
else:
    queue.push("item1")
    queue.push("item2")
    queue.push("item3")

print "Pop 1 " + queue.pop()
print "Pop 2 " + queue.pop()
print "Pop 3 " + queue.pop()
Пример #4
0
def main():
    parser = argparse.ArgumentParser(description='Example Redis Queues.')
    parser.add_argument('-r',
                        '--redis-host',
                        action='store',
                        default='scdev',
                        help="The Redis host ip")
    parser.add_argument('-rp',
                        '--redis-port',
                        action='store',
                        default='6379',
                        help="The Redis port")
    parser.add_argument('-rP',
                        '--redis-password',
                        action='store',
                        default=None,
                        help="The Redis password")
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-q',
                       '--queue',
                       action='store_true',
                       help="Use a RedisQueue")
    group.add_argument('-s',
                       '--stack',
                       action='store_true',
                       help="Use a RedisStack")
    group.add_argument('-p',
                       '--priority',
                       action='store_true',
                       help="Use a RedisPriorityQueue")

    args = vars(parser.parse_args())

    host = args['redis_host']
    port = args['redis_port']
    password = args['redis_password']
    redis_conn = redis.Redis(host=host,
                             port=port,
                             password=password,
                             decode_responses=True)

    if args['queue']:
        queue = RedisQueue(redis_conn, "my_key")
    elif args['stack']:
        queue = RedisStack(redis_conn, "my_key")
    elif args['priority']:
        queue = RedisPriorityQueue(redis_conn, "my_key")

    print("Using " + queue.__class__.__name__)

    if isinstance(queue, RedisPriorityQueue):
        queue.push("item1", 50)
        queue.push("item2", 100)
        queue.push("item3", 20)
    else:
        queue.push("item1")
        queue.push("item2")
        queue.push("item3")

    print("Pop 1 " + queue.pop())
    print("Pop 2 " + queue.pop())
    print("Pop 3 " + queue.pop())
Пример #5
0
def main():

    import argparse
    import redis
    import time

    import sys
    from os import path
    sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))

    from scutils.redis_queue import RedisPriorityQueue
    from scutils.redis_throttled_queue import RedisThrottledQueue

    parser = argparse.ArgumentParser(
        description="Throttled Queue Test Script."
        " Start either a single or multiple processes to see the "
        " throttled queue mechanism in action.")
    parser.add_argument('-r',
                        '--redis-host',
                        action='store',
                        required=True,
                        help="The Redis host ip")
    parser.add_argument('-p',
                        '--redis-port',
                        action='store',
                        default='6379',
                        help="The Redis port")
    parser.add_argument('-m',
                        '--moderate',
                        action='store_const',
                        const=True,
                        default=False,
                        help="Moderate the outbound Queue")
    parser.add_argument('-w',
                        '--window',
                        action='store',
                        default=60,
                        help="The window time to test")
    parser.add_argument('-n',
                        '--num-hits',
                        action='store',
                        default=10,
                        help="The number of pops allowed in the given window")
    parser.add_argument('-q',
                        '--queue',
                        action='store',
                        default='testqueue',
                        help="The Redis queue name")

    args = vars(parser.parse_args())

    window = int(args['window'])
    num = int(args['num_hits'])
    host = args['redis_host']
    port = args['redis_port']
    mod = args['moderate']
    queue = args['queue']

    conn = redis.Redis(host=host, port=port)

    q = RedisPriorityQueue(conn, queue)
    t = RedisThrottledQueue(conn, q, window, num, mod)

    def push_items(amount):
        for i in range(0, amount):
            t.push('item-' + str(i), i)

    print("Adding", num * 2, "items for testing")
    push_items(num * 2)

    def read_items():
        print("Kill when satisfied ^C")
        ti = time.time()
        count = 0
        while True:
            item = t.pop()
            if item:
                print("My item", item, "My time:", time.time() - ti)
                count += 1

    try:
        read_items()
    except KeyboardInterrupt:
        pass
    t.clear()
    print("Finished")