class Slot(object): '''Slot represents a queue of requests for one particular domain. It respects both DOWNLOAD_DELAY and CONCURRENT_REQUESTS_PER_DOMAIN. ''' def __init__(self, download_handler, concurrency, delay, randomize_delay, clock=None): self.download_handler = download_handler self.concurrency = concurrency self.delay = delay self.randomize_delay = randomize_delay self.in_progress = set() # request waiting to be downloaded self.transferring = set() # requests being downloaded (subset of `in_progress`) self.last_download_time = 0 self.queue = MemoryQueue() # queue of (request, deferred) # clock is used in unittests self.clock = clock or reactor self.delayed_processing = ScheduledCall(self._process, clock=self.clock) def enqueue(self, request, dfd): '''Main entry point. Put the new request to the queue and if possible, start downloading it. ''' def remove_in_progress(response): self.in_progress.remove(request) return response self.in_progress.add(request) dfd.addBoth(remove_in_progress) self.queue.push((request, dfd)) self._process() @property def free_slots(self): return self.concurrency - len(self.transferring) def is_idle(self): return len(self.in_progress) == 0 def _process(self): '''Process the requests in the queue, while respecting the delay and concurrency. ''' if self.delayed_processing.is_scheduled() or self._schedule_delay(): return while self.queue and self.free_slots > 0: self.last_download_time = self.clock.seconds() request, downloaded_dfd = self.queue.pop() dfd = self._download(request) dfd.chainDeferred(downloaded_dfd) if self._schedule_delay(): return def _schedule_delay(self): if self.delay: penalty = (self.last_download_time + self.get_download_delay() - self.clock.seconds()) if penalty > 0: # following schedule should always be successfull, because # `_schedule_delay()` is only called from within `_process()` self.delayed_processing.schedule(penalty) return True return False def _download(self, request): dfd = defer.succeed(request) # download the response dfd.addCallback(self.download_handler.download_request) # it is VERY important to wrap the failure into a new object! # For errors like ConnectionLost, the same Failure object is returned # everytime and we cannot use 'failure.request' field. def wrap_failure(failure): return Failure(failure.value) dfd.addErrback(wrap_failure) # put the request into the set of `transferring` to block other requests # after the response is downloaded, remove it from `transferring` def remove_transferring(response): self.transferring.remove(request) self._process() # process unblocked requests return response self.transferring.add(request) dfd.addBoth(remove_transferring) return dfd def get_download_delay(self): if self.randomize_delay: return random.uniform(0.5 * self.delay, 1.5 * self.delay) return self.delay