class PlotlyStreamProducer(object): """Implements a producer that copies from a buffer to a plot.ly connection. """ implements(IBodyProducer) length = UNKNOWN_LENGTH def __init__(self, buffer, start_callback=None): self.buffer = buffer self._done = False self._flush = DeferredSemaphore(1) self._waiter = DeferredSemaphore(1) self._flush.acquire() self._started = start_callback self._keepalive = LoopingCall(self._send_keepalive) @inlineCallbacks def startProducing(self, consumer): self._keepalive.start(60) self._started.callback(None) while True: # if paused, this will block yield self._waiter.acquire() while len(self.buffer): v = self.buffer.pop(0) if v is not None: consumer.write(json.dumps(v)) consumer.write("\n") yield self._waiter.release() if self._done: return yield self._flush.acquire() def pauseProducing(self): return self._waiter.acquire() def resumeProducing(self): return self._waiter.release() def stopProducing(self): self._done = True if self._keepalive.running: self._keepalive.stop() def _send_keepalive(self): self.buffer.append(None) self.flush() def flush(self): if self._flush.tokens == 0: self._flush.release()
class ThreadedUrllib2TestMixin(object): def setUp(self): self._semaphore = DeferredSemaphore(2) def tearDown(self): pass def getPages(self, count, url): return gatherResults([self.getPage(url) for i in xrange(0, count)]) @inlineCallbacks def getPage(self, url): yield self._semaphore.acquire() page = yield deferToThread(self._openPage, url) self._semaphore.release() returnValue(page) def _openPage(self, url): log.msg("Opening url: %r" % url) return urlopen(url).read() @inlineCallbacks def getPageLength(self, url): response = yield self.getPage(url) returnValue(len(response))
class DeferredConcurrencyLimiter: """Initiliaze me, and then use me as a decorator, to limit the ammount of defers that can execute asynchronously.""" def __init__(self, tokens = 5): if tokens < 1: raise ValueError("tokens must be > 0") if tokens == 1: self.lock = DeferredLock() else: self.lock = DeferredSemaphore(tokens) def _releaseLock(self, response, lock): lock.release() return response def _lockAcquired(self, lock, f, *a, **kw): d = maybeDeferred(f, *a, **kw) d.addBoth(self._releaseLock, lock) return d def __call__(self, f): @wraps(f) def wrapped(*a, **kw): d = self.lock.acquire() d.addCallback(self._lockAcquired, f, *a, **kw) return d return wrapped
class TwistedWebTestMixin(object): def setUp(self): self._semaphore = DeferredSemaphore(2) def tearDown(self): pass @inlineCallbacks def getPages(self, count, url): return gatherResults([self.getPage(url) for i in xrange(0, count)]) @inlineCallbacks def getPage(self, url): yield self._semaphore.acquire() page = yield tx_getPage(url) self._semaphore.release() returnValue(page) @inlineCallbacks def getPageLength(self, url): response = yield self.getPage(url) returnValue(len(response))
class IndxConnectionPool: """ A wrapper for txpostgres connection pools, which auto-reconnects. """ def __init__(self): logging.debug("IndxConnectionPool starting. ") self.connections = {} # by connection string self.conn_strs = {} # by db_name self.semaphore = DeferredSemaphore(1) self.subscribers = {} # by db name def removeAll(self, db_name): """ Remove all connections for a named database - used before deleting that database. """ logging.debug("IndxConnectionPool removeAll {0}".format(db_name)) d_list = [] if db_name in self.conn_strs: for conn_str in self.conn_strs[db_name]: for conn in self.connections[conn_str].getInuse(): d_list.append(conn.close()) for conn in self.connections[conn_str].getFree(): d_list.append(conn.close()) del self.connections[conn_str] del self.conn_strs[db_name] dl = DeferredList(d_list) return dl def connect(self, db_name, db_user, db_pass, db_host, db_port): """ Returns an IndxConnection (Actual connection and pool made when query is made). """ return_d = Deferred() log_conn_str = "dbname='{0}' user='******' password='******' host='{3}' port='{4}' application_name='{5}'".format( db_name, db_user, "XXXX", db_host, db_port, indx_pg2.APPLICATION_NAME) conn_str = "dbname='{0}' user='******' password='******' host='{3}' port='{4}' application_name='{5}'".format( db_name, db_user, db_pass, db_host, db_port, indx_pg2.APPLICATION_NAME) logging.debug("IndxConnectionPool connect: {0}".format(log_conn_str)) if db_name not in self.conn_strs: self.conn_strs[db_name] = [] self.conn_strs[db_name].append(conn_str) def free_cb(conn): """ Called back when this IndxConnection has finished querying, so we put the real connection back into the pool. """ logging.debug("IndxConnectionPool free_cb, conn: {0}".format(conn)) self.connections[conn_str].freeConnection( conn) # no dealing with callbacks, just carry on def alloc_cb(conn_str): # a query was called - allocate a connection now and pass it back return self._connect(conn_str) indx_connection = IndxConnection(conn_str, alloc_cb, free_cb) return_d.callback(indx_connection) return return_d def _connect(self, conn_str): """ Connect and return a free Connection. Figures out whether to make new connections, use the pool, or wait in a queue. """ logging.debug("IndxConnectionPool _connect ({0})".format(conn_str)) return_d = Deferred() def err_cb(failure): logging.error( "IndxConnectionPool _connect err_cb: {0}".format(failure)) self.semaphore.release() return_d.errback(failure) def succeed_cb(empty): logging.debug("IndxConnectionPool _connect succeed_cb") # TODO pass a Connection back if len(self.connections[conn_str].getFree()) > 0: # free connection, use it straight away conn = self.connections[conn_str].getFree().pop() self.connections[conn_str].getInuse().append(conn) self.semaphore.release() return_d.callback(conn) return if len(self.connections[conn_str].getInuse()) < MAX_CONNS: # not at max connections for this conn_str # create a new one d = self._newConnection(conn_str) def connected_cb(indx_conn): logging.debug( "IndxConnectionPool _connect connected_cb ({0})". format(indx_conn)) self.connections[conn_str].getFree().remove(indx_conn) self.connections[conn_str].getInuse().append(indx_conn) self.semaphore.release() return_d.callback(indx_conn) return d.addCallbacks(connected_cb, err_cb) return # wait for a connection def wait_cb(conn): logging.debug( "IndxConnectionPool _connect wait_cb ({0})".format(conn)) # already put in 'inuse' return_d.callback(conn) return self.semaphore.release() self.connections[conn_str].getWaiting().append(wait_cb) return def locked_cb(empty): logging.debug("IndxConnectionPool _connect locked_cb") if conn_str not in self.connections: self._newConnections(conn_str).addCallbacks(succeed_cb, err_cb) else: threads.deferToThread(succeed_cb, None) # succeed_cb(None) self.semaphore.acquire().addCallbacks(locked_cb, err_cb) return return_d def _closeOldConnection(self): """ Close the oldest connection, so we can open a new one up. """ # is already in a semaphore lock, from _newConnection logging.debug("IndxConnectionPool _closeOldConnection") ### we could force quite them through postgresql like this - but instead we kill them from inside #query = "SELECT * FROM pg_stat_activity WHERE state = 'idle' AND application_name = %s AND query != 'LISTEN wb_new_version' ORDER BY state_change LIMIT 1;" #params = [indx_pg2.APPLICATION_NAME] return_d = Deferred() def err_cb(failure): return_d.errback(failure) ages = {} for conn_str, dbpool in self.connections.items(): lastused = dbpool.getTime() if lastused not in ages: ages[lastused] = [] ages[lastused].append(dbpool) times = ages.keys() times.sort() pool_queue = [] for timekey in times: pools = ages[timekey] pool_queue.extend(pools) def removed_cb(count): if count < REMOVE_AT_ONCE and len(pool_queue) > 0: pool = pool_queue.pop(0) pool.getFree() pool.removeAll(count).addCallbacks(removed_cb, err_cb) else: return_d.callback(None) removed_cb(0) return return_d def _newConnection(self, conn_str): """ Makes a new connection to the DB and then puts it in the 'free' pool of this conn_str. """ logging.debug("IndxConnectionPool _newConnection") # lock with the semaphore before calling this return_d = Deferred() def close_old_cb(failure): failure.trap(psycopg2.OperationalError, Exception) # couldn't connect, so close an old connection first logging.error( "IndxConnectionPool error close_old_cb: {0} - state of conns is: {1}" .format(failure.value, self.connections)) logging.error("IndxConnectionPool connections: {0}".format( "\n".join( map(lambda name: self.connections[name].__str__(), self.connections)))) def closed_cb(empty): # closed, so try connecting again self._newConnection(conn_str).addCallbacks( return_d.callback, return_d.errback) closed_d = self._closeOldConnection() closed_d.addCallbacks(closed_cb, return_d.errback) try: # try to connect def connected_cb(connection): logging.debug( "IndxConnectionPool _newConnection connected_cb, connection: {0}" .format(connection)) self.connections[conn_str].getFree().append(connection) return_d.callback(connection) conn = txpostgres.Connection() connection_d = conn.connect(conn_str) connection_d.addCallbacks(connected_cb, close_old_cb) except Exception as e: # close an old connection first logging.debug( "IndxConnectionPool Exception, going to call close_old_cb: ({0})" .format(e)) close_old_cb(Failure(e)) return return_d def _newConnections(self, conn_str): """ Make a pool of new connections. """ # lock with the semaphore before calling this logging.debug("IndxConnectionPool _newConnections") return_d = Deferred() self.connections[conn_str] = DBConnectionPool(conn_str) try: d_list = [] for i in range(MIN_CONNS): connection_d = self._newConnection(conn_str) d_list.append(connection_d) dl = DeferredList(d_list) dl.addCallbacks(return_d.callback, return_d.errback) except Exception as e: logging.error( "IndxConnectionPool error in _newConnections: {0}".format(e)) return_d.errback(Failure(e)) return return_d
class DBConnectionPool(): """ A pool of DB connections for a specific connection string / DB. """ def __init__(self, conn_str): self.waiting = [] self.inuse = [] self.free = [] self.semaphore = DeferredSemaphore(1) self.updateTime() def __unicode__(self): return self.__str__() def __str__(self): return "waiting: {0}, inuse: {1}, free: {2}, semaphore: {3}, lastused: {4}".format( self.waiting, self.inuse, self.free, self.semaphore, self.lastused) def updateTime(self): self.lastused = time.mktime(time.gmtime()) # epoch time def getTime(self): return self.lastused def getWaiting(self): self.updateTime() return self.waiting def getInuse(self): self.updateTime() return self.inuse def getFree(self): self.updateTime() return self.free def freeConnection(self, conn): """ Free a connection from this DBPool. """ def locked_cb(empty): logging.debug("DBConnectionPool locked_cb") self.getInuse().remove(conn) if len(self.getWaiting()) > 0: callback = self.getWaiting().pop() self.getInuse().append(conn) self.semaphore.release() callback(conn) else: self.getFree().append(conn) self.semaphore.release() def err_cb(failure): failure.trap(Exception) logging.error("DBConnectionPool free, err_cb: {0}".format( failure.value)) self.semaphore.release() self.semaphore.acquire().addCallbacks(locked_cb, err_cb) def removeAll(self, count): """ Remove all free connections (usually because they're old and we're in a freeing up period. """ logging.debug( "DBConnectionPool removeAll called, count: {0}".format(count)) return_d = Deferred() self.updateTime() def err_cb(failure): self.semaphore.release() return_d.errback(failure) def locked_cb(count): # immediately close the free connections while len(self.free) > 0: conn = self.free.pop(0) conn.close() count += 1 self.semaphore.release() return_d.callback(count) self.semaphore.acquire().addCallbacks(lambda s: locked_cb(count), err_cb) return return_d
class RateLimitedClient(object): """A Web client with per-second request limit. """ # Max number of requests per second (can be < 1.0) rate_limit = None # Grace delay (seconds) when the server throttles us grace_delay = 30 # Max number of parallel requests max_concurrency = 5 def __init__(self, time=None): self.sem = DeferredSemaphore(self.max_concurrency) self.grace_deferred = None self.logger = logging.getLogger("webclient") self.time = time or reactor self.last_request = 0.0 def _enable_grace_delay(self, delay): if self.grace_deferred: # Already enabled by an earlier concurrent request return self.grace_deferred = Deferred() def expire(): g = self.grace_deferred self.grace_deferred = None g.callback(None) reactor.callLater(self.grace_delay, expire) def _delay_if_necessary(self, func, *args, **kwargs): d = Deferred() d.addCallback(lambda _: func(*args, **kwargs)) trigger = None if self.grace_deferred: trigger = self.grace_deferred elif self.rate_limit: delay = (self.last_request + 1.0 / self.rate_limit) - self.time.seconds() if delay > 0: self.logger.debug("inserting rate limit delay of %.1f", delay) trigger = Deferred() self.time.callLater(delay, trigger.callback, None) (trigger or maybeDeferred(lambda: None)).chainDeferred(d) return d def get_page(self, url, *args, **kwargs): if isinstance(url, unicode): url = url.encode("utf8") def schedule_request(_): return self._delay_if_necessary(issue_request, None) def issue_request(_): self.last_request = self.time.seconds() self.logger.debug("fetching %r", url) return getPage(url, *args, **kwargs) def handle_success(value): self.sem.release() self.logger.debug("got %d bytes for %r", len(value), url) return value def handle_error(failure): self.sem.release() failure.trap(HTTPError) self.logger.debug("got HTTP error %s", failure.value) self.trap_throttling(failure) delay = self.grace_delay self.logger.warning("we are throttled, delaying by %.1f seconds", delay) self._enable_grace_delay(delay) # auto-retry return do_get_page() def do_get_page(): # We acquire the semaphore *before* seeing if we should delay # the request, so that we avoid pounding on the server when # the grace period is entered. d = self.sem.acquire() d.addCallback(schedule_request) d.addCallbacks(handle_success, handle_error) return d return do_get_page() def trap_throttling(self, failure): """Trap HTTP failures and return if we are throttled by the distant site, else re-raise. """ e = failure.value if e.status in ("400", "420", "500", "503"): return failure.raiseException()
class AggregationResponseCache(object): ''' This holds all the responses being aggregated for a single destination. One of the main challenges here is to make sure while we're sending the responses, we don't get a new response in and not send it. ''' def __init__(self, numSecondsToWait, numMessagesToWaitFor, chordNode): ''' Constructor ''' self.numSecondsToWait = numSecondsToWait self.numMessagesToWaitFor = numMessagesToWaitFor self.numSecondsToWait = numSecondsToWait self.chordNode = chordNode self.semaphore = DeferredSemaphore(1) self.messageList = [] # Holds tuples of (message, envelope) # Construct a timer to wait self.timerID = None def addResponse(self, message, envelope): '''We use a semaphore to ensure we don't modify the list while sending.''' d = self.semaphore.acquire() d.addCallback(self._addResponse, message, envelope) def _addResponse(self, dummy_defResult, message, envelope): '''This is called only once we have the semaphore.''' self.messageList.append ( (message, envelope) ) print("DEBUG: AggRespCache: %s adding message %s " % (self.chordNode.nodeLocation.port, message)) if len(self.messageList) >= self.numMessagesToWaitFor: # Send it! self._sendResponse() else: # Make sure a timer is running if self.timerID is None or not self.timerID.active(): self.timerID = reactor.callLater(self.numSecondsToWait, self.sendResponse) # We're done. self.semaphore.release() def sendResponse(self): '''Only call sendResponse when you have the lock.''' d = self.semaphore.acquire() d.addCallback(self._sendResponse) def _sendResponse(self, dummy_deferResult=None): '''Send the response but only after acquiring the semaphore ''' # Copy the list messagesListCopy = self.messageList self.messageList = [] # Release the semaphore self.semaphore.release() # Stop the timer if it's still going if self.timerID is not None and self.timerID.active(): self.timerID.cancel() self.timerID = None print("DEBUG: AggResponseCache-Sending %d Messages %s" % (len(messagesListCopy), self.chordNode.nodeLocation.port)) # Send a P2P message to the dest with all the responses d = self.chordNode.sendSyncMultipleMessage(messagesListCopy, 'p2p') # Will this break message authentication? d.addCallback(self.sendAcks, messagesListCopy) d.addErrback(self.sendResponseFailed) # def emptyMessageList(self, _): # self.messageList = [] def sendAcks(self, resultsDict, messageList): # Send ACK messages to the nodes for which we aggregated for (_message, envelope) in messageList: # Get the status to return msgID = envelope['msgID'] if msgID not in resultsDict: status = False else: status = resultsDict[msgID] d = self.chordNode.sendSingleAck(msgID, envelope['source'], status) d.addErrback(self.sendAckFailed, envelope['source']) def sendAckFailed(self, fail, sourceNode): log.err("We failed to SendAck for source %s" % sourceNode, fail) def sendResponseFailed(self, theFailure): log.err(theFailure)
class IndxConnectionPool: """ A wrapper for txpostgres connection pools, which auto-reconnects. """ def __init__(self): logging.debug("IndxConnectionPool starting. ") self.connections = {} # by connection string self.conn_strs = {} # by db_name self.semaphore = DeferredSemaphore(1) self.subscribers = {} # by db name def removeAll(self, db_name): """ Remove all connections for a named database - used before deleting that database. """ logging.debug("IndxConnectionPool removeAll {0}".format(db_name)) d_list = [] if db_name in self.conn_strs: for conn_str in self.conn_strs[db_name]: for conn in self.connections[conn_str].getInuse(): d_list.append(conn.close()) for conn in self.connections[conn_str].getFree(): d_list.append(conn.close()) del self.connections[conn_str] del self.conn_strs[db_name] dl = DeferredList(d_list) return dl def connect(self, db_name, db_user, db_pass, db_host, db_port): """ Returns an IndxConnection (Actual connection and pool made when query is made). """ return_d = Deferred() log_conn_str = "dbname='{0}' user='******' password='******' host='{3}' port='{4}' application_name='{5}'".format(db_name, db_user, "XXXX", db_host, db_port, indx_pg2.APPLICATION_NAME) conn_str = "dbname='{0}' user='******' password='******' host='{3}' port='{4}' application_name='{5}'".format(db_name, db_user, db_pass, db_host, db_port, indx_pg2.APPLICATION_NAME) logging.debug("IndxConnectionPool connect: {0}".format(log_conn_str)) if db_name not in self.conn_strs: self.conn_strs[db_name] = [] self.conn_strs[db_name].append(conn_str) def free_cb(conn): """ Called back when this IndxConnection has finished querying, so we put the real connection back into the pool. """ logging.debug("IndxConnectionPool free_cb, conn: {0}".format(conn)) self.connections[conn_str].freeConnection(conn) # no dealing with callbacks, just carry on def alloc_cb(conn_str): # a query was called - allocate a connection now and pass it back return self._connect(conn_str) indx_connection = IndxConnection(conn_str, alloc_cb, free_cb) return_d.callback(indx_connection) return return_d def _connect(self, conn_str): """ Connect and return a free Connection. Figures out whether to make new connections, use the pool, or wait in a queue. """ logging.debug("IndxConnectionPool _connect ({0})".format(conn_str)) return_d = Deferred() def err_cb(failure): logging.error("IndxConnectionPool _connect err_cb: {0}".format(failure)) self.semaphore.release() return_d.errback(failure) def succeed_cb(empty): logging.debug("IndxConnectionPool _connect succeed_cb") # TODO pass a Connection back if len(self.connections[conn_str].getFree()) > 0: # free connection, use it straight away conn = self.connections[conn_str].getFree().pop() self.connections[conn_str].getInuse().append(conn) self.semaphore.release() return_d.callback(conn) return if len(self.connections[conn_str].getInuse()) < MAX_CONNS: # not at max connections for this conn_str # create a new one d = self._newConnection(conn_str) def connected_cb(indx_conn): logging.debug("IndxConnectionPool _connect connected_cb ({0})".format(indx_conn)) self.connections[conn_str].getFree().remove(indx_conn) self.connections[conn_str].getInuse().append(indx_conn) self.semaphore.release() return_d.callback(indx_conn) return d.addCallbacks(connected_cb, err_cb) return # wait for a connection def wait_cb(conn): logging.debug("IndxConnectionPool _connect wait_cb ({0})".format(conn)) # already put in 'inuse' return_d.callback(conn) return self.semaphore.release() self.connections[conn_str].getWaiting().append(wait_cb) return def locked_cb(empty): logging.debug("IndxConnectionPool _connect locked_cb") if conn_str not in self.connections: self._newConnections(conn_str).addCallbacks(succeed_cb, err_cb) else: threads.deferToThread(succeed_cb, None) # succeed_cb(None) self.semaphore.acquire().addCallbacks(locked_cb, err_cb) return return_d def _closeOldConnection(self): """ Close the oldest connection, so we can open a new one up. """ # is already in a semaphore lock, from _newConnection logging.debug("IndxConnectionPool _closeOldConnection") ### we could force quite them through postgresql like this - but instead we kill them from inside #query = "SELECT * FROM pg_stat_activity WHERE state = 'idle' AND application_name = %s AND query != 'LISTEN wb_new_version' ORDER BY state_change LIMIT 1;" #params = [indx_pg2.APPLICATION_NAME] return_d = Deferred() def err_cb(failure): return_d.errback(failure) ages = {} for conn_str, dbpool in self.connections.items(): lastused = dbpool.getTime() if lastused not in ages: ages[lastused] = [] ages[lastused].append(dbpool) times = ages.keys() times.sort() pool_queue = [] for timekey in times: pools = ages[timekey] pool_queue.extend(pools) def removed_cb(count): if count < REMOVE_AT_ONCE and len(pool_queue) > 0: pool = pool_queue.pop(0) pool.getFree() pool.removeAll(count).addCallbacks(removed_cb, err_cb) else: return_d.callback(None) removed_cb(0) return return_d def _newConnection(self, conn_str): """ Makes a new connection to the DB and then puts it in the 'free' pool of this conn_str. """ logging.debug("IndxConnectionPool _newConnection") # lock with the semaphore before calling this return_d = Deferred() def close_old_cb(failure): failure.trap(psycopg2.OperationalError, Exception) # couldn't connect, so close an old connection first logging.error("IndxConnectionPool error close_old_cb: {0} - state of conns is: {1}".format(failure.value, self.connections)) logging.error("IndxConnectionPool connections: {0}".format("\n".join(map(lambda name: self.connections[name].__str__(), self.connections)))) def closed_cb(empty): # closed, so try connecting again self._newConnection(conn_str).addCallbacks(return_d.callback, return_d.errback) closed_d = self._closeOldConnection() closed_d.addCallbacks(closed_cb, return_d.errback) try: # try to connect def connected_cb(connection): logging.debug("IndxConnectionPool _newConnection connected_cb, connection: {0}".format(connection)) self.connections[conn_str].getFree().append(connection) return_d.callback(connection) conn = txpostgres.Connection() connection_d = conn.connect(conn_str) connection_d.addCallbacks(connected_cb, close_old_cb) except Exception as e: # close an old connection first logging.debug("IndxConnectionPool Exception, going to call close_old_cb: ({0})".format(e)) close_old_cb(Failure(e)) return return_d def _newConnections(self, conn_str): """ Make a pool of new connections. """ # lock with the semaphore before calling this logging.debug("IndxConnectionPool _newConnections") return_d = Deferred() self.connections[conn_str] = DBConnectionPool(conn_str) try: d_list = [] for i in range(MIN_CONNS): connection_d = self._newConnection(conn_str) d_list.append(connection_d) dl = DeferredList(d_list) dl.addCallbacks(return_d.callback, return_d.errback) except Exception as e: logging.error("IndxConnectionPool error in _newConnections: {0}".format(e)) return_d.errback(Failure(e)) return return_d
class DBConnectionPool(): """ A pool of DB connections for a specific connection string / DB. """ def __init__(self, conn_str): self.waiting = [] self.inuse = [] self.free = [] self.semaphore = DeferredSemaphore(1) self.updateTime() def __unicode__(self): return self.__str__() def __str__(self): return "waiting: {0}, inuse: {1}, free: {2}, semaphore: {3}, lastused: {4}".format(self.waiting, self.inuse, self.free, self.semaphore, self.lastused) def updateTime(self): self.lastused = time.mktime(time.gmtime()) # epoch time def getTime(self): return self.lastused def getWaiting(self): self.updateTime() return self.waiting def getInuse(self): self.updateTime() return self.inuse def getFree(self): self.updateTime() return self.free def freeConnection(self, conn): """ Free a connection from this DBPool. """ def locked_cb(empty): logging.debug("DBConnectionPool locked_cb") self.getInuse().remove(conn) if len(self.getWaiting()) > 0: callback = self.getWaiting().pop() self.getInuse().append(conn) self.semaphore.release() callback(conn) else: self.getFree().append(conn) self.semaphore.release() def err_cb(failure): failure.trap(Exception) logging.error("DBConnectionPool free, err_cb: {0}".format(failure.value)) self.semaphore.release() self.semaphore.acquire().addCallbacks(locked_cb, err_cb) def removeAll(self, count): """ Remove all free connections (usually because they're old and we're in a freeing up period. """ logging.debug("DBConnectionPool removeAll called, count: {0}".format(count)) return_d = Deferred() self.updateTime() def err_cb(failure): self.semaphore.release() return_d.errback(failure) def locked_cb(count): # immediately close the free connections while len(self.free) > 0: conn = self.free.pop(0) conn.close() count += 1 self.semaphore.release() return_d.callback(count) self.semaphore.acquire().addCallbacks(lambda s: locked_cb(count), err_cb) return return_d
class BaseQtWebKitMiddleware(object): nam_cls = ScrapyNetworkAccessManager @classmethod def from_crawler(cls, crawler): settings = crawler.settings if crawler.settings.getbool('QTWEBKIT_COOKIES_ENABLED', False): cookies_middleware = CookiesMiddleware( crawler.settings.getbool('COOKIES_DEBUG') ) else: cookies_middleware = None qt_platform = settings.get("QTWEBKIT_QT_PLATFORM", "minimal") if qt_platform == "default": qt_platform = None ext = cls( crawler, show_window=settings.getbool("QTWEBKIT_SHOW_WINDOW", False), qt_platform=qt_platform, enable_webkit_dev_tools=settings.get("QTWEBKIT_ENABLE_DEV_TOOLS", False), page_limit=settings.getint("QTWEBKIT_PAGE_LIMIT", 4), cookies_middleware=cookies_middleware ) return ext @staticmethod def engine_stopped(): if QApplication.instance(): QApplication.instance().quit() def __init__(self, crawler, show_window=False, qt_platform="minimal", enable_webkit_dev_tools=False, page_limit=4, cookies_middleware=None): super(BaseQtWebKitMiddleware, self).__init__() self._crawler = crawler self.show_window = show_window self.qt_platform = qt_platform self.enable_webkit_dev_tools = enable_webkit_dev_tools if page_limit != 1: if QWebSettings is not None: QWebSettings.setObjectCacheCapacities(0, 0, 0) if page_limit is None: self.semaphore = DummySemaphore() else: self.semaphore = DeferredSemaphore(page_limit) self.cookies_middleware = cookies_middleware self._references = set() @staticmethod def _schedule_qt_event_loop(app): """ Schedule a QApplication's event loop within Twisted. Should be called at most once per QApplication. """ # XXX: This is ugly but I don't know another way to do it. call = LoopingCall(app.processEvents) call.start(0.02, False) app.aboutToQuit.connect(call.stop) def _setup_page(self, page, extra_settings): settings = page.settings() settings.setAttribute(QWebSettings.JavaEnabled, False) settings.setAttribute(QWebSettings.PluginsEnabled, False) settings.setAttribute(QWebSettings.PrivateBrowsingEnabled, True) settings.setAttribute(QWebSettings.LocalStorageEnabled, True) settings.setAttribute(QWebSettings.LocalContentCanAccessRemoteUrls, True) settings.setAttribute(QWebSettings.LocalContentCanAccessFileUrls, True) settings.setAttribute(QWebSettings.NotificationsEnabled, False) settings.setAttribute(QWebSettings.DeveloperExtrasEnabled, self.enable_webkit_dev_tools) for setting, value in extra_settings.items(): settings.setAttribute(setting, value) @staticmethod def _make_qt_request(scrapy_request): """Build a QNetworkRequest from a Scrapy request.""" qt_request = QNetworkRequest(QUrl(scrapy_request.url)) for header, values in scrapy_request.headers.items(): qt_request.setRawHeader(header, b', '.join(values)) try: operation = HTTP_METHOD_TO_QT_OPERATION[scrapy_request.method] except KeyError: operation = QNetworkAccessManager.CustomOperation qt_request.setAttribute(QNetworkRequest.CustomVerbAttribute, scrapy_request.method) qt_request.setAttribute(QNetworkRequest.CacheSaveControlAttribute, False) req_body = QByteArray(scrapy_request.body) return qt_request, operation, req_body @inlineCallbacks def process_request(self, request, spider): if self.cookies_middleware: yield self.cookies_middleware.process_request(request, spider) if isinstance(request, QtWebKitRequest): if request.webpage: # Request is to continue processing with an existing webpage # object. webpage = request.webpage request = request.replace(webpage=None) webpage.networkAccessManager().request = request returnValue(self._handle_page_request(spider, request, webpage)) else: yield self.semaphore.acquire() response = yield self.create_page(request, spider) returnValue(response) def process_response(self, request, response, spider): if self.cookies_middleware: return self.cookies_middleware.process_response(request, response, spider) else: return response def ensure_qapplication(self): """Create and setup a QApplication if one does not already exist.""" if not QApplication.instance(): args = ["scrapy"] if self.qt_platform is not None: args.extend(["-platform", self.qt_platform]) app = QApplication(args) self._schedule_qt_event_loop(app) _QApplicationStopper(self._crawler.signals, app) def create_page(self, request, spider): """ Create a webpage object, load a request on it, return a deferred that fires with a response on page load. """ self.ensure_qapplication() webpage = WebPage() self._setup_page(webpage, request.meta.get('qwebsettings_settings', {})) self._references.add(webpage) if self.show_window: webview = QWebView() webview.setPage(webpage) webpage.webview = webview self._add_webview_to_window(webview, spider.name) if request.meta.get('qtwebkit_user_agent', False): request.headers['User-Agent'] = webpage.userAgentForUrl( QUrl(request.url) ) nam = self.nam_cls(spider, request, request.headers.get('User-Agent'), parent=webpage) if ((self.cookies_middleware and 'dont_merge_cookies' not in request.meta)): cookiejarkey = request.meta.get("cookiejar") cookiejar = ScrapyAwareCookieJar(self.cookies_middleware, cookiejarkey, parent=nam) nam.setCookieJar(cookiejar) webpage.setNetworkAccessManager(nam) d = deferred_for_signal(webpage.load_finished_with_error) d.addCallback(partial(self._handle_page_request, spider, request, webpage)) webpage.mainFrame().load(*self._make_qt_request(request)) return d def _add_webview_to_window(self, webview, title=""): pass def _remove_webview_from_window(self, webview): pass def _handle_page_request(self, spider, request, webpage, load_result=(True, None)): """ Handle a request for a web page, either a page load or a request to continue using an existing page object. """ try: ok, error = load_result if ok: # The error object is not available if a page load was not # requested. if error and error.domain == QWebPage.Http: status = error.error else: status = 200 if error: url = error.url else: url = webpage.mainFrame().url() qwebpage_response = request.meta.get('qwebpage_response', False) if qwebpage_response: respcls = QtWebKitResponse else: respcls = HtmlResponse response = respcls(status=status, url=url.toString(), headers=error.headers, body=webpage.mainFrame().toHtml(), encoding='utf-8', request=request) if qwebpage_response: response.webpage = webpage request.callback = partial(self._request_callback, spider, request.callback or 'parse') else: self._close_page(webpage) else: raise self._exception_from_errorpageextensionoption(error) except Exception as err: response = Failure(err) return response @inlineCallbacks def _request_callback(self, spider, original_callback, response): """ Close the page (lose the reference to it so it is garbage collected) when the callback returns. The original callback may prevent page closing by setting the should_close_webpage attribute in responses. This is useful for example if the page is stored somewhere else (e.g. request meta) to be used later. The page then needs to be closed manually at some point by calling its close_page() function, which is created here. """ if isinstance(original_callback, basestring): original_callback = getattr(spider, original_callback) webpage = response.webpage response.should_close_webpage = True try: returnValue(arg_to_iter((yield maybeDeferred(original_callback, response)))) finally: # FIXME: sometimes this section is reached before the wrapped # callback finishes, when it returns a Deferred. if response.should_close_webpage: self._close_page(webpage) else: webpage.close_page = partial(self._close_page, webpage) webpage.close_page.__doc__ = ("Lose the reference to the " "webpage object and allow it " "to be garbage collected.") def _close_page(self, webpage): self._references.remove(webpage) # Resetting the main frame URL prevents it from making any more # requests, which would cause Qt errors after the webpage is deleted. webpage.mainFrame().setUrl(QUrl()) if webpage.webview is not None: self._remove_webview_from_window(webpage.webview) self.semaphore.release() _qt_error_exc_mapping = { QNetworkReply.ConnectionRefusedError: ConnectionRefusedError, QNetworkReply.RemoteHostClosedError: ConnectionLost, QNetworkReply.HostNotFoundError: DNSLookupError, QNetworkReply.TimeoutError: TimeoutError, QNetworkReply.OperationCanceledError: ConnectingCancelledError, QNetworkReply.SslHandshakeFailedError: SSLError, QNetworkReply.ProtocolUnknownError: NotSupported } def _exception_from_errorpageextensionoption(self, option): if option.domain == QWebPage.QtNetwork: exc_cls = self._qt_error_exc_mapping.get(option.error, ConnectError) # elif option.domain == QWebPage.WebKit: # exc_cls = Exception else: exc_cls = Exception return exc_cls(option.errorString)