def __init__(self, *args, **kwargs): print('>>>>>>>>>>>>>RabbitMQMixin#__init__') self.crawler = args[0] self.queue_name = self.crawler.settings.get('RABBITMQ_QUEUE_NAME') self.connection = con.from_settings(self.crawler.settings) self.channel = self.connection.channel() self.channel.queue_declare(self.queue_name)
def from_settings(cls, settings): server = from_settings(settings) # create one-time key. needed to support to use this # class as standalone dupefilter with scrapy's default scheduler # if scrapy passes spider on open() method this wouldn't be needed key = "dupefilter:%s" % int(time.time()) return cls(server, key)
def test_get_conn_localhost(self): settings = { 'host': 'localhost', 'port': 5672, } conn = connection.from_settings(settings) debug(conn)
def from_settings(cls, settings): persist = settings.get('SCHEDULER_PERSIST', SCHEDULER_PERSIST) queue_key = settings.get('SCHEDULER_QUEUE_KEY', QUEUE_KEY) queue_cls = load_object(settings.get('SCHEDULER_QUEUE_CLASS', QUEUE_CLASS)) dupefilter_key = settings.get('DUPEFILTER_KEY', DUPEFILTER_KEY) idle_before_close = settings.get('SCHEDULER_IDLE_BEFORE_CLOSE', IDLE_BEFORE_CLOSE) server = con.from_settings(settings).channel() return cls(server, persist, queue_key, queue_cls, dupefilter_key, idle_before_close)
def test_from_setings_auth(self): credentials = connection.credentials(self.user, self.password) settings = { 'RABBITMQ_CONNECTION_PARAMETERS': { 'host': 'localhost', 'credentials': credentials } } ch = connection.from_settings(settings) ch.close()
def next_request(self): block_pop_timeout = self.idle_before_close if not self.server.is_open and not self.settings is None: #重新打开连接 self.server = from_settings(self.settings) #重建队列 self.open(self.spider) request = self.queue.pop() if request and self.stats: self.stats.inc_value('scheduler/dequeued/rabbitmq', spider=self.spider) return request
def setup_rabbitmq(self): """ Setup RabbitMQ connection. Call this method after spider has set its crawler object. :return: None """ if not self.rabbitmq_key: self.rabbitmq_key = '{}:start_urls'.format(self.name) self.server = connection.from_settings(self.crawler.settings) self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle) self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
def next_request(self): """ Provides a request to be scheduled. :return: Request object or None """ if not self.server.is_open: #重新打开连接 self.server = connection.from_settings(self.crawler.settings) method_frame, header_frame, url = self.server.basic_get( queue=self.rabbitmq_key) if url: req = self.make_requests_from_url(bytes_to_str(url)) yield req
def setup_rabbitmq(self): """ Setup RabbitMQ connection. Call this method after spider has set its crawler object. :return: None """ if self.crawler.settings.get('RABBITMQ_QUEUE_NAME', None): self.rabbitmq_key = self.crawler.settings.get( 'RABBITMQ_QUEUE_NAME', None) self.rabbitmq_key = self.rabbitmq_key % {'name': self.name} self.crawler.settings.frozen = False self.crawler.settings.set('RABBITMQ_QUEUE_NAME', self.rabbitmq_key) self.crawler.settings.frozen = True if not self.rabbitmq_key: self.rabbitmq_key = '{}:start_urls'.format(self.name) self.server = connection.from_settings(self.crawler.settings) self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle) self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
def from_settings(cls, settings): server = from_settings(settings) exchange_name = settings.get('RABBITMQ_EXCHANGE_NAME', EXCHANGE_NAME) return cls(server, exchange_name)