Exemplo n.º 1
0
def BrowserManager(command_queue, status_queue, browser_params, crash_recovery):
    logger = loggingclient(*browser_params['logger_address'])
    
    # Start the proxy
    proxy_site_queue = None  # used to pass the current site down to the proxy
    if browser_params['proxy']:
        (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy(browser_params['aggregator_address'],
                                                                      browser_params['logger_address'],
                                                                      browser_params['crawl_id'])
        browser_params['proxy'] = local_port

    # Start the virtualdisplay (if necessary), webdriver, and browser
    (driver, prof_folder, browser_settings) = deploy_browser.deploy_browser(status_queue, browser_params, crash_recovery)

    # Read the extension port -- if extension is enabled
    # TODO: This needs to be cleaner
    if browser_params['browser'] == 'firefox' and browser_params['extension']['enabled']:
        logger.debug("BROWSER %i: Looking for extension port information in %s" % (browser_params['crawl_id'], prof_folder))
        while not os.path.isfile(prof_folder + 'extension_port.txt'):
            time.sleep(0.1)
        time.sleep(0.5)
        with open(prof_folder + 'extension_port.txt', 'r') as f:
            port = f.read().strip()
        extension_socket = clientsocket()
        extension_socket.connect('127.0.0.1',int(port))
    else:
        extension_socket = None

    # passes the profile folder, WebDriver pid and display pid back to the TaskManager
    # now, the TaskManager knows that the browser is successfully set up
    status_queue.put('READY')
    browser_params['profile_path'] = prof_folder

    # starts accepting arguments until told to die
    while True:
        # no command for now -> sleep to avoid pegging CPU on blocking get
        if command_queue.empty():
            time.sleep(0.001)
            continue

        # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable
        command = command_queue.get()
        logger.info("BROWSER %i: EXECUTING COMMAND: %s" % (browser_params['crawl_id'], str(command)))

        # attempts to perform an action and return an OK signal
        # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes
        try:
            command_executor.execute_command(command, driver, proxy_site_queue, browser_settings, browser_params, extension_socket)
            status_queue.put("OK")
        except Exception as e:
            logger.info("BROWSER %i: Crash in driver, restarting browser manager \n %s \n %s" % (browser_params['crawl_id'], str(type(e)), str(e)))
            status_queue.put("FAILED")
            break
Exemplo n.º 2
0
def BrowserManager(command_queue, status_queue, browser_params, crash_recovery):
    # sets up the proxy (for now, mitmproxy) if necessary
    proxy_site_queue = None  # used to pass the current site down to the proxy
    if browser_params['proxy']:
        (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy(browser_params['aggregator_address'],
                                                                      browser_params['crawl_id'])
        browser_params['proxy'] = local_port

    # Gets the WebDriver, profile folder (i.e. where history/cookies are stored) and display pid (None if not headless)
    (driver, prof_folder, display_pid, browser_settings) = deploy_browser.deploy_browser(browser_params, crash_recovery)

    # Read the extension port -- if extension is enabled
    # TODO: This needs to be cleaner
    if browser_params['browser'] == 'firefox' and browser_params['extension']['enabled']:
        while not os.path.isfile(prof_folder + 'extension_port.txt'):
            time.sleep(0.01)
        with open(prof_folder + 'extension_port.txt', 'r') as f:
            port = f.read().strip()
        extension_socket = clientsocket()
        extension_socket.connect('127.0.0.1',int(port))
    else:
        extension_socket = None

    # passes the profile folder, WebDriver pid and display pid back to the TaskManager
    # now, the TaskManager knows that the browser is successfully set up
    status_queue.put((prof_folder, int(driver.binary.process.pid), display_pid, browser_settings))
    browser_params['profile_path'] = prof_folder

    # starts accepting arguments until told to die
    while True:
        # no command for now -> sleep to avoid pegging CPU on blocking get
        if command_queue.empty():
            time.sleep(0.001)
            continue

        # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable
        command = command_queue.get()
        print "EXECUTING COMMAND: " + str(command)

        # attempts to perform an action and return an OK signal
        # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes
        try:
            command_executor.execute_command(command, driver, proxy_site_queue, browser_settings, browser_params, extension_socket)
            status_queue.put("OK")
        except Exception as ex:
            print "CRASH IN DRIVER ORACLE:" + str(ex) + " RESTARTING BROWSER MANAGER"
            status_queue.put("FAILED")
            break
Exemplo n.º 3
0
def BrowserManager(command_queue, status_queue, browser_params,
                   crash_recovery):
    # sets up the proxy (for now, mitmproxy) if necessary
    proxy_site_queue = None  # used to pass the current site down to the proxy
    if browser_params['proxy']:
        (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy(
            browser_params['aggregator_address'], browser_params['crawl_id'])
        browser_params['proxy'] = local_port

    # Gets the WebDriver, profile folder (i.e. where history/cookies are stored) and display pid (None if not headless)
    (driver, prof_folder, display_pid,
     browser_settings) = deploy_browser.deploy_browser(browser_params,
                                                       crash_recovery)

    # passes the profile folder, WebDriver pid and display pid back to the TaskManager
    # now, the TaskManager knows that the browser is successfully set up
    status_queue.put((prof_folder, int(driver.binary.process.pid), display_pid,
                      browser_settings))
    browser_params['profile_path'] = prof_folder

    # starts accepting arguments until told to die
    while True:
        # no command for now -> sleep to avoid pegging CPU on blocking get
        if command_queue.empty():
            time.sleep(0.001)
            continue

        # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable
        command = command_queue.get()
        print "EXECUTING COMMAND: " + str(command)

        # attempts to perform an action and return an OK signal
        # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes
        try:
            command_executor.execute_command(command, driver, proxy_site_queue,
                                             browser_settings, browser_params)
            status_queue.put("OK")
        except Exception as ex:
            print "CRASH IN DRIVER ORACLE:" + str(
                ex) + " RESTARTING BROWSER MANAGER"
            status_queue.put("FAILED")
            break
Exemplo n.º 4
0
def BrowserManager(command_queue, status_queue, browser_params, manager_params,
                   crash_recovery):
    """
    The BrowserManager function runs in each new browser process.
    It is responsible for listening to command instructions from
    the Task Manager and passing them to the command module to execute
    and interface with Selenium. Command execution status is sent back
    to the TaskManager.
    """
    try:
        logger = loggingclient(*manager_params['logger_address'])

        # Start the proxy
        proxy_site_queue = None  # used to pass the current site down to the proxy
        if browser_params['proxy']:
            (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy(
                browser_params, manager_params, status_queue)
            browser_params['proxy'] = local_port
        status_queue.put(('STATUS', 'Proxy Ready', 'READY'))

        # Start the virtualdisplay (if necessary), webdriver, and browser
        (driver, prof_folder,
         browser_settings) = deploy_browser.deploy_browser(
             status_queue, browser_params, manager_params, crash_recovery)

        # Read the extension port -- if extension is enabled
        # TODO: This needs to be cleaner
        if browser_params['browser'] == 'firefox' and browser_params[
                'extension_enabled']:
            logger.debug(
                "BROWSER %i: Looking for extension port information in %s" %
                (browser_params['crawl_id'], prof_folder))
            while not os.path.isfile(prof_folder + 'extension_port.txt'):
                time.sleep(0.1)
            time.sleep(0.5)
            with open(prof_folder + 'extension_port.txt', 'r') as f:
                port = f.read().strip()
            extension_socket = clientsocket(serialization='json')
            extension_socket.connect('127.0.0.1', int(port))
        else:
            extension_socket = None

        # passes the profile folder, WebDriver pid and display pid back to the TaskManager
        # now, the TaskManager knows that the browser is successfully set up
        status_queue.put(('STATUS', 'Browser Ready', 'READY'))
        browser_params['profile_path'] = prof_folder

        # starts accepting arguments until told to die
        while True:
            # no command for now -> sleep to avoid pegging CPU on blocking get
            if command_queue.empty():
                time.sleep(0.001)
                continue

            # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable
            command = command_queue.get()
            logger.info("BROWSER %i: EXECUTING COMMAND: %s" %
                        (browser_params['crawl_id'], str(command)))
            # attempts to perform an action and return an OK signal
            # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes
            command_executor.execute_command(command, driver, proxy_site_queue,
                                             browser_settings, browser_params,
                                             manager_params, extension_socket)
            status_queue.put("OK")

    except (ProfileLoadError, BrowserConfigError, AssertionError) as e:
        logger.info("BROWSER %i: %s thrown, informing parent and raising" %
                    (browser_params['crawl_id'], e.__class__.__name__))
        err_info = sys.exc_info()
        status_queue.put(('CRITICAL', cPickle.dumps(err_info)))
        return
    except Exception as e:
        excp = traceback.format_exception(*sys.exc_info())
        logger.info(
            "BROWSER %i: Crash in driver, restarting browser manager \n %s" %
            (browser_params['crawl_id'], ''.join(excp)))
        status_queue.put(('FAILED', None))
        return
Exemplo n.º 5
0
def BrowserManager(command_queue, status_queue, browser_params, manager_params, crash_recovery):
    """
    The BrowserManager function runs in each new browser process.
    It is responsible for listening to command instructions from
    the Task Manager and passing them to the command module to execute
    and interface with Selenium. Command execution status is sent back
    to the TaskManager.
    """
    try:
        logger = loggingclient(*manager_params['logger_address'])

        # Start the proxy
        proxy_site_queue = None  # used to pass the current site down to the proxy
        if browser_params['proxy']:
            (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy(browser_params,
                                                                          manager_params,
                                                                          status_queue)
            browser_params['proxy'] = local_port
        status_queue.put(('STATUS','Proxy Ready','READY'))

        # Start the virtualdisplay (if necessary), webdriver, and browser
        (driver, prof_folder, browser_settings) = deploy_browser.deploy_browser(status_queue, browser_params, manager_params, crash_recovery)

        # Read the extension port -- if extension is enabled
        # TODO: This needs to be cleaner
        if browser_params['browser'] == 'firefox' and browser_params['extension_enabled']:
            logger.debug("BROWSER %i: Looking for extension port information in %s" % (browser_params['crawl_id'], prof_folder))
            while not os.path.isfile(prof_folder + 'extension_port.txt'):
                time.sleep(0.1)
            time.sleep(0.5)
            with open(prof_folder + 'extension_port.txt', 'r') as f:
                port = f.read().strip()
            extension_socket = clientsocket(serialization='json')
            extension_socket.connect('127.0.0.1',int(port))
        else:
            extension_socket = None

        # passes the profile folder, WebDriver pid and display pid back to the TaskManager
        # now, the TaskManager knows that the browser is successfully set up
        status_queue.put(('STATUS','Browser Ready','READY'))
        browser_params['profile_path'] = prof_folder

        # starts accepting arguments until told to die
        while True:
            # no command for now -> sleep to avoid pegging CPU on blocking get
            if command_queue.empty():
                time.sleep(0.001)
                continue

            # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable
            command = command_queue.get()
            logger.info("BROWSER %i: EXECUTING COMMAND: %s" % (browser_params['crawl_id'], str(command)))
            # attempts to perform an action and return an OK signal
            # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes
            command_executor.execute_command(command,
                                             driver,
                                             proxy_site_queue,
                                             browser_settings,
                                             browser_params,
                                             manager_params,
                                             extension_socket)
            status_queue.put("OK")

    except (ProfileLoadError, BrowserConfigError, AssertionError) as e:
        logger.info("BROWSER %i: %s thrown, informing parent and raising" %
                (browser_params['crawl_id'], e.__class__.__name__))
        err_info = sys.exc_info()
        status_queue.put(('CRITICAL',cPickle.dumps(err_info)))
        return
    except Exception as e:
        excp = traceback.format_exception(*sys.exc_info())
        logger.info("BROWSER %i: Crash in driver, restarting browser manager \n %s" % (browser_params['crawl_id'], ''.join(excp)))
        status_queue.put(('FAILED',None))
        return
Exemplo n.º 6
0
def BrowserManager(command_queue, status_queue, browser_params, manager_params,
                   crash_recovery):
    """
    The BrowserManager function runs in each new browser process.
    It is responsible for listening to command instructions from
    the Task Manager and passing them to the command module to execute
    and interface with Selenium. Command execution status is sent back
    to the TaskManager.
    """
    try:
        logger = loggingclient(*manager_params['logger_address'])

        # Start the virtualdisplay (if necessary), webdriver, and browser
        driver, prof_folder, browser_settings = deploy_browser.deploy_browser(
            status_queue, browser_params, manager_params, crash_recovery)
        if prof_folder[-1] != '/':
            prof_folder += '/'

        # Read the extension port -- if extension is enabled
        # TODO: Initial communication from extension to TM should use sockets
        if (browser_params['browser'] == 'firefox'
                and browser_params['extension_enabled']):
            logger.debug("BROWSER %i: Looking for extension port information "
                         "in %s" % (browser_params['crawl_id'], prof_folder))
            elapsed = 0
            port = None
            ep_filename = os.path.join(prof_folder, 'extension_port.txt')
            while elapsed < 5:
                try:
                    with open(ep_filename, 'rt') as f:
                        port = int(f.read().strip())
                        break
                except IOError as e:
                    if e.errno != errno.ENOENT:
                        raise
                time.sleep(0.1)
                elapsed += 0.1
            if port is None:
                # try one last time, allowing all exceptions to propagate
                with open(ep_filename, 'rt') as f:
                    port = int(f.read().strip())

            logger.debug("BROWSER %i: Connecting to extension on port %i" %
                         (browser_params['crawl_id'], port))
            extension_socket = clientsocket(serialization='json')
            extension_socket.connect('127.0.0.1', int(port))
        else:
            extension_socket = None

        logger.debug("BROWSER %i: BrowserManager ready." %
                     browser_params['crawl_id'])

        # passes the profile folder, WebDriver pid and display pid back to the
        # TaskManager to signal a successful startup
        status_queue.put(('STATUS', 'Browser Ready', (prof_folder, 'READY')))
        browser_params['profile_path'] = prof_folder

        # starts accepting arguments until told to die
        while True:
            # no command for now -> sleep to avoid pegging CPU on blocking get
            if command_queue.empty():
                time.sleep(0.001)
                continue

            # reads in the command tuple of form:
            # (command, arg0, arg1, arg2, ..., argN) where N is variable
            command = command_queue.get()
            logger.info("BROWSER %i: EXECUTING COMMAND: %s" %
                        (browser_params['crawl_id'], str(command)))
            # attempts to perform an action and return an OK signal
            # if command fails for whatever reason, tell the TaskManager to
            # kill and restart its worker processes
            command_executor.execute_command(command, driver, browser_settings,
                                             browser_params, manager_params,
                                             extension_socket)
            status_queue.put("OK")

    except (ProfileLoadError, BrowserConfigError, AssertionError) as e:
        logger.info("BROWSER %i: %s thrown, informing parent and raising" %
                    (browser_params['crawl_id'], e.__class__.__name__))
        err_info = sys.exc_info()
        status_queue.put(('CRITICAL', pickle.dumps(err_info)))
        return
    except Exception:
        excp = traceback.format_exception(*sys.exc_info())
        logger.info("BROWSER %i: Crash in driver, restarting browser manager "
                    "\n %s" % (browser_params['crawl_id'], ''.join(excp)))
        status_queue.put(('FAILED', None))
        return
Exemplo n.º 7
0
def BrowserManager(command_queue, status_queue, browser_params,
                   crash_recovery):
    logger = loggingclient(*browser_params['logger_address'])

    # Start the proxy
    proxy_site_queue = None  # used to pass the current site down to the proxy
    if browser_params['proxy']:
        (local_port, proxy_site_queue) = deploy_mitm_proxy.init_proxy(
            browser_params['aggregator_address'],
            browser_params['logger_address'], browser_params['crawl_id'])
        browser_params['proxy'] = local_port

    # Start the virtualdisplay (if necessary), webdriver, and browser
    (driver, prof_folder,
     browser_settings) = deploy_browser.deploy_browser(status_queue,
                                                       browser_params,
                                                       crash_recovery)

    # Read the extension port -- if extension is enabled
    # TODO: This needs to be cleaner
    if browser_params['browser'] == 'firefox' and browser_params['extension'][
            'enabled']:
        logger.debug(
            "BROWSER %i: Looking for extension port information in %s" %
            (browser_params['crawl_id'], prof_folder))
        while not os.path.isfile(prof_folder + 'extension_port.txt'):
            time.sleep(0.1)
        time.sleep(0.5)
        with open(prof_folder + 'extension_port.txt', 'r') as f:
            port = f.read().strip()
        extension_socket = clientsocket()
        extension_socket.connect('127.0.0.1', int(port))
    else:
        extension_socket = None

    # passes the profile folder, WebDriver pid and display pid back to the TaskManager
    # now, the TaskManager knows that the browser is successfully set up
    status_queue.put('READY')
    browser_params['profile_path'] = prof_folder

    # starts accepting arguments until told to die
    while True:
        # no command for now -> sleep to avoid pegging CPU on blocking get
        if command_queue.empty():
            time.sleep(0.001)
            continue

        # reads in the command tuple of form (command, arg0, arg1, arg2, ..., argN) where N is variable
        command = command_queue.get()
        logger.info("BROWSER %i: EXECUTING COMMAND: %s" %
                    (browser_params['crawl_id'], str(command)))

        # attempts to perform an action and return an OK signal
        # if command fails for whatever reason, tell the TaskMaster to kill and restart its worker processes
        try:
            command_executor.execute_command(command, driver, proxy_site_queue,
                                             browser_settings, browser_params,
                                             extension_socket)
            status_queue.put("OK")
        except Exception as e:
            logger.info(
                "BROWSER %i: Crash in driver, restarting browser manager \n %s \n %s"
                % (browser_params['crawl_id'], str(type(e)), str(e)))
            status_queue.put("FAILED")
            break