def BrowserManager(command_queue, status_queue, browser_params, crash_recovery): logger = loggingclient(*browser_params['logger_address']) # Start the proxy proxy_site_queue = None # used to pass the current site down to the proxy if browser_params['proxy']: (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy(browser_params['aggregator_address'], browser_params['logger_address'], browser_params['crawl_id']) browser_params['proxy'] = local_port # Start the virtualdisplay (if necessary), webdriver, and browser (driver, prof_folder, browser_settings) = deploy_browser.deploy_browser(status_queue, browser_params, crash_recovery) # Read the extension port -- if extension is enabled # TODO: This needs to be cleaner if browser_params['browser'] == 'firefox' and browser_params['extension']['enabled']: logger.debug("BROWSER %i: Looking for extension port information in %s" % (browser_params['crawl_id'], prof_folder)) while not os.path.isfile(prof_folder + 'extension_port.txt'): time.sleep(0.1) time.sleep(0.5) with open(prof_folder + 'extension_port.txt', 'r') as f: port = f.read().strip() extension_socket = clientsocket() extension_socket.connect('127.0.0.1',int(port)) else: extension_socket = None # passes the profile folder, WebDriver pid and display pid back to the TaskManager # now, the TaskManager knows that the browser is successfully set up status_queue.put('READY') browser_params['profile_path'] = prof_folder # starts accepting arguments until told to die while True: # no command for now -> sleep to avoid pegging CPU on blocking get if command_queue.empty(): time.sleep(0.001) continue # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable command = command_queue.get() logger.info("BROWSER %i: EXECUTING COMMAND: %s" % (browser_params['crawl_id'], str(command))) # attempts to perform an action and return an OK signal # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes try: command_executor.execute_command(command, driver, proxy_site_queue, browser_settings, browser_params, extension_socket) status_queue.put("OK") except Exception as e: logger.info("BROWSER %i: Crash in driver, restarting browser manager \n %s \n %s" % (browser_params['crawl_id'], str(type(e)), str(e))) status_queue.put("FAILED") break
def BrowserManager(command_queue, status_queue, browser_params, crash_recovery): # sets up the proxy (for now, mitmproxy) if necessary proxy_site_queue = None # used to pass the current site down to the proxy if browser_params['proxy']: (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy(browser_params['aggregator_address'], browser_params['crawl_id']) browser_params['proxy'] = local_port # Gets the WebDriver, profile folder (i.e. where history/cookies are stored) and display pid (None if not headless) (driver, prof_folder, display_pid, browser_settings) = deploy_browser.deploy_browser(browser_params, crash_recovery) # Read the extension port -- if extension is enabled # TODO: This needs to be cleaner if browser_params['browser'] == 'firefox' and browser_params['extension']['enabled']: while not os.path.isfile(prof_folder + 'extension_port.txt'): time.sleep(0.01) with open(prof_folder + 'extension_port.txt', 'r') as f: port = f.read().strip() extension_socket = clientsocket() extension_socket.connect('127.0.0.1',int(port)) else: extension_socket = None # passes the profile folder, WebDriver pid and display pid back to the TaskManager # now, the TaskManager knows that the browser is successfully set up status_queue.put((prof_folder, int(driver.binary.process.pid), display_pid, browser_settings)) browser_params['profile_path'] = prof_folder # starts accepting arguments until told to die while True: # no command for now -> sleep to avoid pegging CPU on blocking get if command_queue.empty(): time.sleep(0.001) continue # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable command = command_queue.get() print "EXECUTING COMMAND: " + str(command) # attempts to perform an action and return an OK signal # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes try: command_executor.execute_command(command, driver, proxy_site_queue, browser_settings, browser_params, extension_socket) status_queue.put("OK") except Exception as ex: print "CRASH IN DRIVER ORACLE:" + str(ex) + " RESTARTING BROWSER MANAGER" status_queue.put("FAILED") break
def BrowserManager(command_queue, status_queue, browser_params, crash_recovery): # sets up the proxy (for now, mitmproxy) if necessary proxy_site_queue = None # used to pass the current site down to the proxy if browser_params['proxy']: (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy( browser_params['aggregator_address'], browser_params['crawl_id']) browser_params['proxy'] = local_port # Gets the WebDriver, profile folder (i.e. where history/cookies are stored) and display pid (None if not headless) (driver, prof_folder, display_pid, browser_settings) = deploy_browser.deploy_browser(browser_params, crash_recovery) # passes the profile folder, WebDriver pid and display pid back to the TaskManager # now, the TaskManager knows that the browser is successfully set up status_queue.put((prof_folder, int(driver.binary.process.pid), display_pid, browser_settings)) browser_params['profile_path'] = prof_folder # starts accepting arguments until told to die while True: # no command for now -> sleep to avoid pegging CPU on blocking get if command_queue.empty(): time.sleep(0.001) continue # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable command = command_queue.get() print "EXECUTING COMMAND: " + str(command) # attempts to perform an action and return an OK signal # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes try: command_executor.execute_command(command, driver, proxy_site_queue, browser_settings, browser_params) status_queue.put("OK") except Exception as ex: print "CRASH IN DRIVER ORACLE:" + str( ex) + " RESTARTING BROWSER MANAGER" status_queue.put("FAILED") break
def BrowserManager(command_queue, status_queue, browser_params, manager_params, crash_recovery): """ The BrowserManager function runs in each new browser process. It is responsible for listening to command instructions from the Task Manager and passing them to the command module to execute and interface with Selenium. Command execution status is sent back to the TaskManager. """ try: logger = loggingclient(*manager_params['logger_address']) # Start the proxy proxy_site_queue = None # used to pass the current site down to the proxy if browser_params['proxy']: (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy( browser_params, manager_params, status_queue) browser_params['proxy'] = local_port status_queue.put(('STATUS', 'Proxy Ready', 'READY')) # Start the virtualdisplay (if necessary), webdriver, and browser (driver, prof_folder, browser_settings) = deploy_browser.deploy_browser( status_queue, browser_params, manager_params, crash_recovery) # Read the extension port -- if extension is enabled # TODO: This needs to be cleaner if browser_params['browser'] == 'firefox' and browser_params[ 'extension_enabled']: logger.debug( "BROWSER %i: Looking for extension port information in %s" % (browser_params['crawl_id'], prof_folder)) while not os.path.isfile(prof_folder + 'extension_port.txt'): time.sleep(0.1) time.sleep(0.5) with open(prof_folder + 'extension_port.txt', 'r') as f: port = f.read().strip() extension_socket = clientsocket(serialization='json') extension_socket.connect('127.0.0.1', int(port)) else: extension_socket = None # passes the profile folder, WebDriver pid and display pid back to the TaskManager # now, the TaskManager knows that the browser is successfully set up status_queue.put(('STATUS', 'Browser Ready', 'READY')) browser_params['profile_path'] = prof_folder # starts accepting arguments until told to die while True: # no command for now -> sleep to avoid pegging CPU on blocking get if command_queue.empty(): time.sleep(0.001) continue # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable command = command_queue.get() logger.info("BROWSER %i: EXECUTING COMMAND: %s" % (browser_params['crawl_id'], str(command))) # attempts to perform an action and return an OK signal # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes command_executor.execute_command(command, driver, proxy_site_queue, browser_settings, browser_params, manager_params, extension_socket) status_queue.put("OK") except (ProfileLoadError, BrowserConfigError, AssertionError) as e: logger.info("BROWSER %i: %s thrown, informing parent and raising" % (browser_params['crawl_id'], e.__class__.__name__)) err_info = sys.exc_info() status_queue.put(('CRITICAL', cPickle.dumps(err_info))) return except Exception as e: excp = traceback.format_exception(*sys.exc_info()) logger.info( "BROWSER %i: Crash in driver, restarting browser manager \n %s" % (browser_params['crawl_id'], ''.join(excp))) status_queue.put(('FAILED', None)) return
def BrowserManager(command_queue, status_queue, browser_params, manager_params, crash_recovery): """ The BrowserManager function runs in each new browser process. It is responsible for listening to command instructions from the Task Manager and passing them to the command module to execute and interface with Selenium. Command execution status is sent back to the TaskManager. """ try: logger = loggingclient(*manager_params['logger_address']) # Start the proxy proxy_site_queue = None # used to pass the current site down to the proxy if browser_params['proxy']: (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy(browser_params, manager_params, status_queue) browser_params['proxy'] = local_port status_queue.put(('STATUS','Proxy Ready','READY')) # Start the virtualdisplay (if necessary), webdriver, and browser (driver, prof_folder, browser_settings) = deploy_browser.deploy_browser(status_queue, browser_params, manager_params, crash_recovery) # Read the extension port -- if extension is enabled # TODO: This needs to be cleaner if browser_params['browser'] == 'firefox' and browser_params['extension_enabled']: logger.debug("BROWSER %i: Looking for extension port information in %s" % (browser_params['crawl_id'], prof_folder)) while not os.path.isfile(prof_folder + 'extension_port.txt'): time.sleep(0.1) time.sleep(0.5) with open(prof_folder + 'extension_port.txt', 'r') as f: port = f.read().strip() extension_socket = clientsocket(serialization='json') extension_socket.connect('127.0.0.1',int(port)) else: extension_socket = None # passes the profile folder, WebDriver pid and display pid back to the TaskManager # now, the TaskManager knows that the browser is successfully set up status_queue.put(('STATUS','Browser Ready','READY')) browser_params['profile_path'] = prof_folder # starts accepting arguments until told to die while True: # no command for now -> sleep to avoid pegging CPU on blocking get if command_queue.empty(): time.sleep(0.001) continue # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable command = command_queue.get() logger.info("BROWSER %i: EXECUTING COMMAND: %s" % (browser_params['crawl_id'], str(command))) # attempts to perform an action and return an OK signal # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes command_executor.execute_command(command, driver, proxy_site_queue, browser_settings, browser_params, manager_params, extension_socket) status_queue.put("OK") except (ProfileLoadError, BrowserConfigError, AssertionError) as e: logger.info("BROWSER %i: %s thrown, informing parent and raising" % (browser_params['crawl_id'], e.__class__.__name__)) err_info = sys.exc_info() status_queue.put(('CRITICAL',cPickle.dumps(err_info))) return except Exception as e: excp = traceback.format_exception(*sys.exc_info()) logger.info("BROWSER %i: Crash in driver, restarting browser manager \n %s" % (browser_params['crawl_id'], ''.join(excp))) status_queue.put(('FAILED',None)) return
def BrowserManager(command_queue, status_queue, browser_params, manager_params, crash_recovery): """ The BrowserManager function runs in each new browser process. It is responsible for listening to command instructions from the Task Manager and passing them to the command module to execute and interface with Selenium. Command execution status is sent back to the TaskManager. """ try: logger = loggingclient(*manager_params['logger_address']) # Start the virtualdisplay (if necessary), webdriver, and browser driver, prof_folder, browser_settings = deploy_browser.deploy_browser( status_queue, browser_params, manager_params, crash_recovery) if prof_folder[-1] != '/': prof_folder += '/' # Read the extension port -- if extension is enabled # TODO: Initial communication from extension to TM should use sockets if (browser_params['browser'] == 'firefox' and browser_params['extension_enabled']): logger.debug("BROWSER %i: Looking for extension port information " "in %s" % (browser_params['crawl_id'], prof_folder)) elapsed = 0 port = None ep_filename = os.path.join(prof_folder, 'extension_port.txt') while elapsed < 5: try: with open(ep_filename, 'rt') as f: port = int(f.read().strip()) break except IOError as e: if e.errno != errno.ENOENT: raise time.sleep(0.1) elapsed += 0.1 if port is None: # try one last time, allowing all exceptions to propagate with open(ep_filename, 'rt') as f: port = int(f.read().strip()) logger.debug("BROWSER %i: Connecting to extension on port %i" % (browser_params['crawl_id'], port)) extension_socket = clientsocket(serialization='json') extension_socket.connect('127.0.0.1', int(port)) else: extension_socket = None logger.debug("BROWSER %i: BrowserManager ready." % browser_params['crawl_id']) # passes the profile folder, WebDriver pid and display pid back to the # TaskManager to signal a successful startup status_queue.put(('STATUS', 'Browser Ready', (prof_folder, 'READY'))) browser_params['profile_path'] = prof_folder # starts accepting arguments until told to die while True: # no command for now -> sleep to avoid pegging CPU on blocking get if command_queue.empty(): time.sleep(0.001) continue # reads in the command tuple of form: # (command, arg0, arg1, arg2, ..., argN) where N is variable command = command_queue.get() logger.info("BROWSER %i: EXECUTING COMMAND: %s" % (browser_params['crawl_id'], str(command))) # attempts to perform an action and return an OK signal # if command fails for whatever reason, tell the TaskManager to # kill and restart its worker processes command_executor.execute_command(command, driver, browser_settings, browser_params, manager_params, extension_socket) status_queue.put("OK") except (ProfileLoadError, BrowserConfigError, AssertionError) as e: logger.info("BROWSER %i: %s thrown, informing parent and raising" % (browser_params['crawl_id'], e.__class__.__name__)) err_info = sys.exc_info() status_queue.put(('CRITICAL', pickle.dumps(err_info))) return except Exception: excp = traceback.format_exception(*sys.exc_info()) logger.info("BROWSER %i: Crash in driver, restarting browser manager " "\n %s" % (browser_params['crawl_id'], ''.join(excp))) status_queue.put(('FAILED', None)) return
def BrowserManager(command_queue, status_queue, browser_params, crash_recovery): logger = loggingclient(*browser_params['logger_address']) # Start the proxy proxy_site_queue = None # used to pass the current site down to the proxy if browser_params['proxy']: (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy( browser_params['aggregator_address'], browser_params['logger_address'], browser_params['crawl_id']) browser_params['proxy'] = local_port # Start the virtualdisplay (if necessary), webdriver, and browser (driver, prof_folder, browser_settings) = deploy_browser.deploy_browser(status_queue, browser_params, crash_recovery) # Read the extension port -- if extension is enabled # TODO: This needs to be cleaner if browser_params['browser'] == 'firefox' and browser_params['extension'][ 'enabled']: logger.debug( "BROWSER %i: Looking for extension port information in %s" % (browser_params['crawl_id'], prof_folder)) while not os.path.isfile(prof_folder + 'extension_port.txt'): time.sleep(0.1) time.sleep(0.5) with open(prof_folder + 'extension_port.txt', 'r') as f: port = f.read().strip() extension_socket = clientsocket() extension_socket.connect('127.0.0.1', int(port)) else: extension_socket = None # passes the profile folder, WebDriver pid and display pid back to the TaskManager # now, the TaskManager knows that the browser is successfully set up status_queue.put('READY') browser_params['profile_path'] = prof_folder # starts accepting arguments until told to die while True: # no command for now -> sleep to avoid pegging CPU on blocking get if command_queue.empty(): time.sleep(0.001) continue # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable command = command_queue.get() logger.info("BROWSER %i: EXECUTING COMMAND: %s" % (browser_params['crawl_id'], str(command))) # attempts to perform an action and return an OK signal # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes try: command_executor.execute_command(command, driver, proxy_site_queue, browser_settings, browser_params, extension_socket) status_queue.put("OK") except Exception as e: logger.info( "BROWSER %i: Crash in driver, restarting browser manager \n %s \n %s" % (browser_params['crawl_id'], str(type(e)), str(e))) status_queue.put("FAILED") break