def _handle_message(self, websock, json_message): message = json.loads(json_message) if 'method' in message: if message['method'] == 'Page.loadEventFired': self.got_page_load_event = datetime.datetime.utcnow() elif message['method'] == 'Network.responseReceived': self._network_response_received(message) elif message['method'] == 'Network.requestWillBeSent': if self.on_request: self.on_request(message) elif message['method'] == 'Debugger.paused': self._debugger_paused(message) elif message['method'] == 'Inspector.targetCrashed': self.logger.error( '''chrome tab went "aw snap" or "he's dead jim"!''') brozzler.thread_raise(self.calling_thread, BrowsingException) elif message['method'] == 'Console.messageAdded': self.logger.debug( 'console.%s %s', message['params']['message']['level'], message['params']['message']['text']) elif message['method'] == 'Page.javascriptDialogOpening': self._javascript_dialog_opening(message) elif (message['method'] == 'Network.loadingFailed' and 'params' in message and 'errorText' in message['params'] and message['params']['errorText'] == 'net::ERR_PROXY_CONNECTION_FAILED'): brozzler.thread_raise(self.calling_thread, brozzler.ProxyError) # else: # self.logger.debug("%s %s", message["method"], json_message) elif 'result' in message: if message['id'] in self._result_messages: self._result_messages[message['id']] = message
def test_thread_raise_second_with_block(): def two_with_blocks(): try: with brozzler.thread_accept_exceptions(): time.sleep(2) return # test fails except Exception1 as e: pass except: return # fail test try: with brozzler.thread_accept_exceptions(): brozzler.sleep(2) except Exception as e: nonlocal thread_caught_exception thread_caught_exception = e # test that second `with` block gets second exception raised during first # `with` block thread_caught_exception = None th = threading.Thread(target=two_with_blocks) th.start() brozzler.thread_raise(th, Exception1) brozzler.thread_raise(th, Exception2) th.join() assert isinstance(thread_caught_exception, Exception2)
def stop(self): with self._start_stop_lock: if self._thread and self._thread.is_alive(): self.logger.info("brozzler worker shutting down") brozzler.thread_raise(self._thread, brozzler.ShutdownRequested) self._thread.join() self._thread = None
def _handle_message(self, websock, json_message): message = json.loads(json_message) if 'method' in message: if message['method'] == 'Page.loadEventFired': self.got_page_load_event = datetime.datetime.utcnow() elif message['method'] == 'Network.responseReceived': self._network_response_received(message) elif message['method'] == 'Network.requestWillBeSent': if self.on_request: self.on_request(message) elif message['method'] == 'Page.interstitialShown': # for AITFIVE-1529: handle http auth # for now, we should consider killing the browser when we receive Page.interstitialShown and # consider the page finished—-first we should figure out when else that event might happen self.logger.info('Page.interstitialShown received') elif message['method'] == 'Inspector.targetCrashed': self.logger.error( '''chrome tab went "aw snap" or "he's dead jim"!''') brozzler.thread_raise(self.calling_thread, BrowsingException) elif message['method'] == 'Console.messageAdded': self.logger.debug('console.%s %s', message['params']['message']['level'], message['params']['message']['text']) elif message['method'] == 'Page.javascriptDialogOpening': self._javascript_dialog_opening(message) elif (message['method'] == 'Network.loadingFailed' and 'params' in message and 'errorText' in message['params'] and message['params']['errorText'] == 'net::ERR_PROXY_CONNECTION_FAILED'): brozzler.thread_raise(self.calling_thread, brozzler.ProxyError) # else: # self.logger.debug("%s %s", message["method"], json_message) elif 'result' in message: if message['id'] in self._result_messages: self._result_messages[message['id']] = message
def test_thread_raise_safe_exit(): def delay_context_exit(): gate = brozzler.thread_accept_exceptions() orig_exit = type(gate).__exit__ try: type(gate).__exit__ = lambda self, et, ev, t: ( brozzler.sleep(2), orig_exit(self, et, ev, t), False)[-1] with brozzler.thread_accept_exceptions() as gate: brozzler.sleep(2) except Exception as e: nonlocal thread_caught_exception thread_caught_exception = e finally: type(gate).__exit__ = orig_exit # test that a second thread_raise() doesn't result in an exception in # ThreadExceptionGate.__exit__ thread_caught_exception = None th = threading.Thread(target=delay_context_exit) th.start() time.sleep(0.2) brozzler.thread_raise(th, Exception1) time.sleep(0.2) brozzler.thread_raise(th, Exception2) th.join() assert thread_caught_exception assert isinstance(thread_caught_exception, Exception1)
def run(self): try: latest_state = None while True: self._service_heartbeat_if_due() try: browser = self._browser_pool.acquire() try: site = self._frontier.claim_site("%s:%s" % ( socket.gethostname(), browser.chrome.port)) self.logger.info( "brozzling site (proxy=%s) %s", repr(self._proxy(site)), site) th = threading.Thread( target=self._brozzle_site, args=(browser, site), name="BrozzlingThread:%s" % site.seed, daemon=True) with self._browsing_threads_lock: self._browsing_threads.add(th) th.start() except: self._browser_pool.release(browser) raise except brozzler.browser.NoBrowsersAvailable: if latest_state != "browsers-busy": self.logger.info( "all %s browsers are busy", self._max_browsers) latest_state = "browsers-busy" except brozzler.NothingToClaim: if latest_state != "no-unclaimed-sites": self.logger.info("no unclaimed sites to browse") latest_state = "no-unclaimed-sites" time.sleep(0.5) except brozzler.ShutdownRequested: self.logger.info("shutdown requested") except: self.logger.critical( "thread exiting due to unexpected exception", exc_info=True) finally: if self._service_registry and hasattr(self, "status_info"): try: self._service_registry.unregister(self.status_info["id"]) except: self.logger.error( "failed to unregister from service registry", exc_info=True) self.logger.info( 'shutting down %s brozzling threads', len(self._browsing_threads)) with self._browsing_threads_lock: for th in self._browsing_threads: if th.is_alive(): brozzler.thread_raise(th, brozzler.ShutdownRequested) self._browser_pool.shutdown_now() # copy to avoid "RuntimeError: Set changed size during iteration" thredz = set(self._browsing_threads) for th in thredz: th.join()
def shutdown_now(self): self.logger.info("shutting down amqp consumer %s", self.amqp_url) self._consumer_stop.set() with self._browsing_threads_lock: for th in self._browsing_threads: if th.is_alive(): brozzler.thread_raise(th, brozzler.ShutdownRequested) # self._browser_pool.shutdown_now() self._consumer_thread.join()
def run(self): self.logger.info("brozzler worker starting") try: latest_state = None while True: self._service_heartbeat_if_due() try: browser = self._browser_pool.acquire() try: site = self._frontier.claim_site( "%s:%s" % (socket.gethostname(), browser.chrome.port)) th = threading.Thread( target=self._brozzle_site_thread_target, args=(browser, site), name="BrozzlingThread:%s" % browser.chrome.port, daemon=True) with self._browsing_threads_lock: self._browsing_threads.add(th) th.start() except: self._browser_pool.release(browser) raise except brozzler.browser.NoBrowsersAvailable: if latest_state != "browsers-busy": self.logger.info("all %s browsers are busy", self._max_browsers) latest_state = "browsers-busy" except brozzler.NothingToClaim: pass time.sleep(0.5) except brozzler.ShutdownRequested: self.logger.info("shutdown requested") except: self.logger.critical("thread exiting due to unexpected exception", exc_info=True) finally: if self._service_registry and hasattr(self, "status_info"): try: self._service_registry.unregister(self.status_info["id"]) except: self.logger.error( "failed to unregister from service registry", exc_info=True) self.logger.info('shutting down %s brozzling threads', len(self._browsing_threads)) with self._browsing_threads_lock: for th in self._browsing_threads: if th.is_alive(): brozzler.thread_raise(th, brozzler.ShutdownRequested) self._browser_pool.shutdown_now() # copy to avoid "RuntimeError: Set changed size during iteration" thredz = set(self._browsing_threads) for th in thredz: th.join()
def on_error(websock, e): ''' Raises BrowsingException in the thread that called start() ''' if isinstance(e, websocket.WebSocketConnectionClosedException): self.logger.error('websocket closed, did chrome die?') else: self.logger.error( 'exception from websocket receiver thread', exc_info=1) brozzler.thread_raise(calling_thread, BrowsingException)
def _on_error(self, websock, e): ''' Raises BrowsingException in the thread that created this instance. ''' if isinstance(e, (websocket.WebSocketConnectionClosedException, ConnectionResetError)): self.logger.error('websocket closed, did chrome die?') else: self.logger.error('exception from websocket receiver thread', exc_info=1) brozzler.thread_raise(self.calling_thread, BrowsingException)
def run(self): self.logger.notice( 'brozzler %s - brozzler-worker starting', brozzler.__version__) last_nothing_to_claim = 0 try: while not self._shutdown.is_set(): self._service_heartbeat_if_due() if time.time() - last_nothing_to_claim > 20: try: self._start_browsing_some_sites() except brozzler.browser.NoBrowsersAvailable: logging.trace( "all %s browsers are in use", self._max_browsers) except brozzler.NothingToClaim: last_nothing_to_claim = time.time() logging.trace( "nothing to claim, all available active sites " "are already claimed by a brozzler worker") time.sleep(0.5) self.logger.notice("shutdown requested") except r.ReqlError as e: self.logger.error( "caught rethinkdb exception, will try to proceed", exc_info=True) except brozzler.ShutdownRequested: self.logger.info("shutdown requested") except: self.logger.critical( "thread exiting due to unexpected exception", exc_info=True) finally: if self._service_registry and hasattr(self, "status_info"): try: self._service_registry.unregister(self.status_info["id"]) except: self.logger.error( "failed to unregister from service registry", exc_info=True) self.logger.info( 'shutting down %s brozzling threads', len(self._browsing_threads)) with self._browsing_threads_lock: for th in self._browsing_threads: if th.is_alive(): brozzler.thread_raise(th, brozzler.ShutdownRequested) self._browser_pool.shutdown_now() # copy to avoid "RuntimeError: Set changed size during iteration" thredz = set(self._browsing_threads) for th in thredz: th.join()
def _on_error(self, websock, e): ''' Raises BrowsingException in the thread that created this instance. ''' if isinstance(e, ( websocket.WebSocketConnectionClosedException, ConnectionResetError)): self.logger.error('websocket closed, did chrome die?') else: self.logger.error( 'exception from websocket receiver thread', exc_info=1) brozzler.thread_raise(self.calling_thread, BrowsingException)
def test_thread_raise_not_accept(): def never_accept(): try: brozzler.sleep(2) except Exception as e: nonlocal thread_caught_exception thread_caught_exception = e # test that thread_raise does not raise exception in a thread that has no # `with thread_exception_gate()` block thread_caught_exception = None th = threading.Thread(target=never_accept) th.start() brozzler.thread_raise(th, Exception1) th.join() assert thread_caught_exception is None
def _network_response_received(self, message): if (message['params']['response']['status'] == 420 and 'Warcprox-Meta' in CaseInsensitiveDict( message['params']['response']['headers'])): if not self.reached_limit: warcprox_meta = json.loads(CaseInsensitiveDict( message['params']['response']['headers'])['Warcprox-Meta']) self.reached_limit = brozzler.ReachedLimit( warcprox_meta=warcprox_meta) self.logger.info('reached limit %s', self.reached_limit) brozzler.thread_raise( self.calling_thread, brozzler.ReachedLimit) else: self.logger.info( 'reached limit but self.reached_limit is already set, ' 'assuming the calling thread is already handling this') if self.on_response: self.on_response(message)
def test_thread_raise_immediate(): def accept_immediately(): try: with brozzler.thread_accept_exceptions(): brozzler.sleep(2) except Exception as e: nonlocal thread_caught_exception thread_caught_exception = e # test immediate exception raise thread_caught_exception = None th = threading.Thread(target=accept_immediately) th.start() brozzler.thread_raise(th, Exception1) start = time.time() th.join() assert thread_caught_exception assert isinstance(thread_caught_exception, Exception1) assert time.time() - start < 1.0
def test_thread_raise_pending_exception(): def accept_eventually(): try: brozzler.sleep(2) with brozzler.thread_accept_exceptions(): pass except Exception as e: nonlocal thread_caught_exception thread_caught_exception = e # test exception that has to wait for `with thread_exception_gate()` block thread_caught_exception = None th = threading.Thread(target=accept_eventually) th.start() brozzler.thread_raise(th, Exception1) start = time.time() th.join() assert isinstance(thread_caught_exception, Exception1) assert time.time() - start > 1.0
def _handle_message(self, websock, json_message): message = json.loads(json_message) if 'method' in message: if message['method'] == 'Page.loadEventFired': self.got_page_load_event = datetime.datetime.utcnow() elif message['method'] == 'Network.responseReceived': self._network_response_received(message) elif message['method'] == 'Network.requestWillBeSent': if self.on_request: self.on_request(message) elif message['method'] == 'Page.interstitialShown': # AITFIVE-1529: handle http auth # we should kill the browser when we receive Page.interstitialShown and # consider the page finished, until this is fixed: # https://bugs.chromium.org/p/chromium/issues/detail?id=764505 self.logger.info( 'Page.interstialShown (likely unsupported http auth request)' ) brozzler.thread_raise(self.calling_thread, brozzler.PageInterstitialShown) elif message['method'] == 'Inspector.targetCrashed': self.logger.error( '''chrome tab went "aw snap" or "he's dead jim"!''') brozzler.thread_raise(self.calling_thread, BrowsingException) elif message['method'] == 'Console.messageAdded': self.logger.debug('console.%s %s', message['params']['message']['level'], message['params']['message']['text']) elif message['method'] == 'Runtime.exceptionThrown': self.logger.debug('uncaught exception: %s', message) elif message['method'] == 'Page.javascriptDialogOpening': self._javascript_dialog_opening(message) elif (message['method'] == 'Network.loadingFailed' and 'params' in message and 'errorText' in message['params'] and message['params']['errorText'] == 'net::ERR_PROXY_CONNECTION_FAILED'): brozzler.thread_raise(self.calling_thread, brozzler.ProxyError) elif message['method'] == 'ServiceWorker.workerVersionUpdated': if self.on_service_worker_version_updated: self.on_service_worker_version_updated(message) # else: # self.logger.debug("%s %s", message["method"], json_message) elif 'result' in message: if message['id'] in self._result_messages: self._result_messages[message['id']] = message
def _handle_message(self, websock, json_message): message = json.loads(json_message) if 'method' in message: if message['method'] == 'Page.loadEventFired': self.got_page_load_event = datetime.datetime.utcnow() elif message['method'] == 'Network.responseReceived': self._network_response_received(message) elif message['method'] == 'Network.requestWillBeSent': if self.on_request: self.on_request(message) elif message['method'] == 'Page.interstitialShown': # AITFIVE-1529: handle http auth # we should kill the browser when we receive Page.interstitialShown and # consider the page finished, until this is fixed: # https://bugs.chromium.org/p/chromium/issues/detail?id=764505 self.logger.info('Page.interstialShown (likely unsupported http auth request)') brozzler.thread_raise(self.calling_thread, brozzler.PageInterstitialShown) elif message['method'] == 'Inspector.targetCrashed': self.logger.error( '''chrome tab went "aw snap" or "he's dead jim"!''') brozzler.thread_raise(self.calling_thread, BrowsingException) elif message['method'] == 'Console.messageAdded': self.logger.debug( 'console.%s %s', message['params']['message']['level'], message['params']['message']['text']) elif message['method'] == 'Runtime.exceptionThrown': self.logger.debug('uncaught exception: %s', message) elif message['method'] == 'Page.javascriptDialogOpening': self._javascript_dialog_opening(message) elif (message['method'] == 'Network.loadingFailed' and 'params' in message and 'errorText' in message['params'] and message['params']['errorText'] == 'net::ERR_PROXY_CONNECTION_FAILED'): brozzler.thread_raise(self.calling_thread, brozzler.ProxyError) elif message['method'] == 'ServiceWorker.workerVersionUpdated': if self.on_service_worker_version_updated: self.on_service_worker_version_updated(message) # else: # self.logger.debug("%s %s", message["method"], json_message) elif 'result' in message: if message['id'] in self._result_messages: self._result_messages[message['id']] = message