def save_web_page_stats_to_har(url, webdriver_name, save_to_file):
    """Generate the HAR archive from an URL with the Selenium webdriver
    'webdriver_name', saving the HAR file to 'save_to_file'
    """
    browsermob_server = Server(Config.browsermob_executable)
    browsermob_server.start()
    random_port = get_a_random_free_tcp_port()
    proxy_conn = browsermob_server.create_proxy({"port": random_port})
    driver = create_selenium_webdriver(webdriver_name, proxy_conn)
    try:
        proxy_conn.new_har(url, options={'captureHeaders': True})
        driver.get(url)

        har_json = json.dumps(proxy_conn.har, ensure_ascii=False,
                              indent=4, separators=(',', ': '))
        # Save '.HAR' file
        with io.open(save_to_file + '.har', mode='wt', buffering=1,
                     encoding='utf8', errors='backslashreplace',
                     newline=None) as output_har_f:
            output_har_f.write(unicode(har_json))

        # Save '.PROF' file with profiling report (timings, sizes, etc)
        with io.open(save_to_file + '.prof', mode='wb', buffering=1,
                     newline=None) as prof_output:
            report_har_dictionary(proxy_conn.har, prof_output)

    finally:
        proxy_conn.close()
        browsermob_server.stop()
        driver.quit()
Пример #2
0
    def setUp(self):
        """
        Start the browser with a browsermob-proxy instance for use by the test.
        You *must* call this in the `setUp` method of any subclasses before using the browser!

        Returns:
            None
        """

        try:
            # Start server proxy
            server = Server('browsermob-proxy')
            server.start()
            self.proxy = server.create_proxy()
            proxy_host = os.environ.get('BROWSERMOB_PROXY_HOST', '127.0.0.1')
            self.proxy.remap_hosts('localhost', proxy_host)
        except:
            self.skipTest('Skipping: could not start server with browsermob-proxy.')

        # parent's setUp
        super(WebAppPerfReport, self).setUp()

        # Initialize vars
        self._page_timings = []
        self._active_har = False
        self._with_cache = False

        # Add one more cleanup for the server
        self.addCleanup(server.stop)
Пример #3
0
def setupdevices():    
    """
    Description:
        Sets u browser proxy, Selenium driver, and har object

    Usage:
        [driver,proxy]=setupdevices()
        
    Inputs:
        NA
    
    Output:
        Selenium driver
        Browsermob proxy
        Browsermob server        
    """    
    #set up proxy
    server = Server("############/browsermob-proxy-2.0-beta-9/bin/browsermob-proxy")
    server.start()
    proxy = server.create_proxy()
    profile  = webdriver.FirefoxProfile()
    profile.set_proxy(proxy.selenium_proxy())
    proxy.new_har("________")
    
    #set up driver
    driver = webdriver.Firefox(firefox_profile=profile)
    
    return (driver,proxy,server)
Пример #4
0
def main(argv):
	init()

	parser = argparse.ArgumentParser()
	parser.add_argument('-u', action='store', dest='start_url', help='Set page URL', required=True)
	parser.add_argument('-c', action='store', dest='cookies_file', help='JSON file with cookies', required=False)
	parser.add_argument('-w', action='store', dest='webdriver_type', help='Set WebDriver type (firefox or phantomjs, firebox by default)', default="firefox", required=False)
	results = parser.parse_args()
	
	start_url = results.start_url
	cookies_file = results.cookies_file
	webdriver_type = results.webdriver_type

	allowed_domain = urlparse(start_url).netloc

	browsermobproxy_path = get_browsermobproxy_path()

	options = {
		'port': 9090,
	}

	server = Server(browsermobproxy_path,options)
	server.start()
	proxy = server.create_proxy()

	if webdriver_type == "phantomjs":
		service_args = ['--proxy=localhost:9091','--proxy-type=http',]
		driver = webdriver.PhantomJS(service_args=service_args)
		driver.set_window_size(1440, 1024)
	else:
		profile  = webdriver.FirefoxProfile()
		profile.set_proxy(proxy.selenium_proxy())
		driver = webdriver.Firefox(firefox_profile=profile)

	proxy.new_har('woodpycker', options={'captureHeaders': True, 'captureContent': True})
	driver.get(start_url)

	if not cookies_file is None:
		with open(cookies_file, 'rb') as fp:
		    cookies = json.load(fp)
		for cookie in cookies:
			driver.add_cookie(cookie)
		driver.refresh()

	links = driver.find_elements_by_tag_name('a')
	lenl = len(links)
	for i in range(0,lenl):
		if links[i].is_displayed():
			url = links[i].get_attribute('href')
			text = links[i].get_attribute('text')
			if url.find(allowed_domain) != -1:
				links[i].click()
				print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL)
				show_status_codes(proxy.har,allowed_domain)
			driver.back()
			driver.refresh()
			links = driver.find_elements_by_tag_name('a')

	driver.quit()
	server.stop()
def CaptureNetworkTraffic(url,server_ip,headers,file_path):
	''' 
	This function can be used to capture network traffic from the browser. Using this function we can capture header/cookies/http calls made from the browser
	url - Page url
	server_ip - remap host to for specific URL
	headers - this is a dictionary of the headers to be set
	file_path - File in which HAR gets stored
	'''
	port = {'port':9090}
	server = Server("G:\\browsermob\\bin\\browsermob-proxy",port) #Path to the BrowserMobProxy
	server.start()
	proxy = server.create_proxy()
	proxy.remap_hosts("www.example.com",server_ip)
	proxy.remap_hosts("www.example1.com",server_ip)
	proxy.remap_hosts("www.example2.com",server_ip)
	proxy.headers(headers)
	profile  = webdriver.FirefoxProfile()
	profile.set_proxy(proxy.selenium_proxy())
	driver = webdriver.Firefox(firefox_profile=profile)
	new = {'captureHeaders':'True','captureContent':'True'}
	proxy.new_har("google",new)
	driver.get(url)
	proxy.har # returns a HAR JSON blob
	server.stop()
	driver.quit()
	file1 = open(file_path,'w')
	json.dump(proxy.har,file1)
	file1.close()
Пример #6
0
 def get_driver(self, browser, start_beacon_url):
     server = Server(BROWSERMOB_LOCATION)
     server.start()
     self.proxy = server.create_proxy()
     driver = webdriver.Firefox(proxy=self.proxy.selenium_proxy())
     self.proxy.new_har()
     self.beacon_url = start_beacon_url
     return driver
Пример #7
0
 def init_proxy_server(self, port=None):
     kwargs = {}
     if port is not None:
         kwargs['port'] = port
     if self.chained_proxy is not None:
         if self.is_https:
             kwargs['httpsProxy'] = self.chained_proxy
         else:
             kwargs['httpProxy'] = self.chained_proxy
     if self.proxy_username is not None:
         kwargs['proxyUsername'] = self.proxy_username
     if self.proxy_password is not None:
         kwargs['proxyPassword'] = self.proxy_password
     server = Server('C://browsermob-proxy//bin//browsermob-proxy.bat', options={"port": self.server_port})
     server.start()
     proxy = server.create_proxy(params=kwargs)
     return server, proxy
Пример #8
0
def main():
	init()
	if len(sys.argv) >= 2:
	    start_url = sys.argv[1]
	else:
	    print "You must specify page URL!"
	    sys.exit()

	allowed_domain = urlparse(start_url).netloc

	browsermobproxy_path = "/usr/local/opt/browsermobproxy/bin/browsermob-proxy"

	options = {
		'port': 9090,

	}

	server = Server(browsermobproxy_path,options)
	server.start()
	proxy = server.create_proxy()

	profile  = webdriver.FirefoxProfile()
	profile.set_proxy(proxy.selenium_proxy())
	driver = webdriver.Firefox(firefox_profile=profile)

	driver.get(start_url)

	links = driver.find_elements_by_tag_name('a')
	lenl = len(links)
	for i in range(0,lenl):
		if links[i].is_displayed():
			url = links[i].get_attribute('href')
			text = links[i].get_attribute('text')
			if url.find(allowed_domain) != -1:
				proxy.new_har('demo')
				links[i].click()
				print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL)
				show_status_codes(proxy.har,allowed_domain)
			driver.back()
			driver.refresh()
			links = driver.find_elements_by_tag_name('a')

	driver.quit()
	server.stop()
class CreateHar(object):
    """create HTTP archive file"""
 
    def __init__(self, mob_path):
        """initial setup"""
        self.browser_mob = mob_path
        self.server = self.driver = self.proxy = None
 
    @staticmethod
    def __store_into_file(title, result):
        """store result"""
        har_file = open(title + '.har', 'w')
        har_file.write(str(result))
        har_file.close()
 
    def __start_server(self):
        """prepare and start server"""
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()
 
    def __start_driver(self):
        """prepare and start driver"""
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(self.proxy.selenium_proxy())
        self.driver = webdriver.Firefox(firefox_profile=profile)
 
    def start_all(self):
        """start server and driver"""
        self.__start_server()
        self.__start_driver()
 
    def create_har(self, title, url):
        """start request and parse response"""
        self.proxy.new_har(title)
        self.driver.get(url)
        result = json.dumps(self.proxy.har, ensure_ascii=False)
        self.__store_into_file(title, result)
 
    def stop_all(self):
        """stop server and driver"""
        self.server.stop()
        self.driver.quit()
Пример #10
0
	def fetch(url, config, output_directory, fetchEngine="browsermobproxy+selenium", browser="firefox"):

		if fetchEngine in ("phantomjs", "ph"):

			data = subprocess.check_output( config['fetchEngines']['phantomjs_command'].replace("$url", url), shell=True )

		elif fetchEngine in ("browsermobproxy+selenium", "bs"):

			from browsermobproxy import Server
			from selenium import webdriver

			server = Server(config['fetchEngines']['browsermobproxy_binary'])
			server.start()
			proxy = server.create_proxy()

			if browser in ("firefox", "ff"):
				profile = webdriver.FirefoxProfile()
				profile.set_proxy(proxy.selenium_proxy())
				driver = webdriver.Firefox(firefox_profile=profile)
			else:
				chrome_options = webdriver.ChromeOptions()
				chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
				driver = webdriver.Chrome(chrome_options = chrome_options)

			proxy.new_har(url, options={'captureHeaders': True})
			driver.get(url)

			data = json.dumps(proxy.har, ensure_ascii=False)

			server.stop()
			driver.quit()
		else:
			sys.exit("Unrecognized engine.")

		if (data):
			fileName = output_directory + "/" + url.replace("http://", "").replace("https://", "") + "_" + strftime("%Y-%m-%d_%H:%M:%S", gmtime()) + ".har"
			f = open(fileName, "w")
			f.write(data.encode("utf8"))
			f.close()

			return fileName
		else:
			return None
Пример #11
0
def get_driver_and_proxy():
    global display
    global driver
    global proxy
    if not driver:
        if int(config.get('browsermob', {}).get('collect-har', 0)):
            from browsermobproxy import Server
            server = Server(config['browsermob']['path'])
            server.start()
            proxy = server.create_proxy()
        if int(config.get('xconfig', {}).get('headless', 0)):
            display = Display(visible=0, size=(800, 600))
            display.start()
        profile = webdriver.FirefoxProfile()
        if proxy:
            profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)
        driver.implicitly_wait(60)

    return driver, proxy
Пример #12
0
def bmp_proxy():
    """
    Creates a proxy and a server instance with browsermobproxy.
    Reference: http://browsermob-proxy-py.readthedocs.org/en/latest/index.html

    Returns:
        (proxy, server)
    """
    def create_proxy():
        """
        Try to create a proxy.
        """
        try:
            proxy = server.create_proxy()
        except:
            return False, None
        return True, proxy

    port = int(os.environ.get('BROWSERMOB_PROXY_PORT', 8080))
    server = Server('browsermob-proxy', options={'port': port})

    try:
        # If anything in this block raises an exception, make sure we kill
        # the server process before exiting.
        server.start()

        # Using the promise module to wait for the server to be responsive.
        # The server.create_proxy function sometimes raises connection
        # refused errors if the server isn't ready yet.
        proxy = Promise(
            create_proxy, 'browsermobproxy is responsive', timeout=10
        ).fulfill()

        proxy_host = os.environ.get('BROWSERMOB_PROXY_HOST', '127.0.0.1')
        proxy.remap_hosts('localhost', proxy_host)
    except:
        # Make sure that the server process is stopped.
        stop_server(server)
        raise

    return proxy, server
Пример #13
0
class Proxy(object):

        proxy = None
        proxy_server = None
        test_id = None

        def __init__(self, test_id):
            self.test_id = test_id
            self.start_proxy()

        def start_proxy(self):
            self.proxy_server = Server(config.proxy_bin)
            self.proxy_server.start()
            self.proxy = self.proxy_server.create_proxy()
            if config.blacklist:
                self.set_blacklist(config.blacklist)
            self.proxy.new_har(self.test_id)
            logger.debug('Browsermob proxy started.')
            return self

        def stop_proxy(self):
            filename = '{}.har'.format(self.test_id)
            with open(filename, 'w') as harfile:
                json.dump(self.proxy.har, harfile)
            data = json.dumps(self.proxy.har, ensure_ascii=False)
            self.proxy_server.stop()
            self.proxy = None
            self.proxy_server = None
            logger.debug('Browsermob proxy stopped. HAR created: {}'
                         .format(filename))

        def set_blacklist(self, domain_list):
            for domain in domain_list:
                self.proxy.blacklist("^https?://([a-z0-9-]+[.])*{}*.*"
                                     .format(domain), 404)
            logger.debug("Proxy blacklist set.")

        def get_blacklist(self):
            return requests.get('{}{}/blacklist'
                                .format(config.proxy_api, self.proxy.port))
Пример #14
0
class ad_driver():
    _driver = None
    _server = None
    _proxy = None

    def __init__(self, path_to_batch, browser="chrome"):

        """ start browsermob proxy """
        self._server = Server(path_to_batch)
        self._server.start()
        self._proxy = self._server.create_proxy()

        """ Init browser profile """
        if browser is "chrome":
            PROXY = "localhost:%s" % self._proxy.port  # IP:PORT or HOST:PORT
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--proxy-server=%s' % PROXY)
            self._driver = webdriver.Chrome(chrome_options=chrome_options)
        elif browser is "ff":
            profile = webdriver.FirefoxProfile()
            driver = webdriver.Firefox(firefox_profile=profile, proxy=proxy)
        else:
            print "Please set 'browser' variable to any of the value \n 'chrome', 'ff' !"
        self._driver.maximize_window()
        self._driver.implicitly_wait(20)

    def execute(self, test):

        self._proxy.new_har(test["name"])
        self._driver.get(_test_data_dir + os.sep + test['file'])
        time.sleep(2)
        callToTestMethod = getattr(test_steps, test["name"])
        callToTestMethod(self._driver)
        har = self._proxy.har
        requests = har['log']['entries']
        return requests

    def quit(self):
        self._server.stop()
        self._driver.quit()
def init(defaultPort,path):
    global lock
    global serverPort
    global proxyIsInit
    global serverIsRunning
    global proxyServer
    global indexCount
    
    lock.acquire()
    
    serverPort = int(defaultPort)
    proxyIsInit = True
    print "Initializing Proxy Manager - server port : set to : " + str(serverPort)
    
    #checks to see if the server has already started
    if not serverIsRunning:
        proxyServer = Server(path,{'port':int(serverPort)})
        proxyServer.start()
        serverIsRunning = True
        
    
    lock.release()
Пример #16
0
    def _setup_proxy_server(self, downstream_kbps=None, upstream_kbps=None,
                            latency=None):
        server = Server(BROWSERMOB_PROXY_PATH)
        server.start()
        proxy = server.create_proxy()

        # The proxy server is pretty sluggish, setting the limits might not
        # achieve the desired behavior.
        proxy_options = {}

        if downstream_kbps:
            proxy_options['downstream_kbps'] = downstream_kbps

        if upstream_kbps:
            proxy_options['upstream_kbps'] = upstream_kbps

        if latency:
            proxy_options['latency'] = latency

        if len(proxy_options.items()) > 0:
            proxy.limits(proxy_options)

        return server, proxy
Пример #17
0
class BrowserMobProxyTestCaseMixin(object):

    def __init__(self, *args, **kwargs):
        self.browsermob_server = None
        self.browsermob_port = kwargs.pop('browsermob_port')
        self.browsermob_script = kwargs.pop('browsermob_script')

    def setUp(self):
        options = {}
        if self.browsermob_port:
            options['port'] = self.browsermob_port
        if not self.browsermob_script:
            raise ValueError('Must specify --browsermob-script in order to '
                             'run browsermobproxy tests')
        self.browsermob_server = Server(
            self.browsermob_script, options=options)
        self.browsermob_server.start()

    def create_browsermob_proxy(self):
        client = self.browsermob_server.create_proxy()
        with self.marionette.using_context('chrome'):
            self.marionette.execute_script("""
                Components.utils.import("resource://gre/modules/Preferences.jsm");
                Preferences.set("network.proxy.type", 1);
                Preferences.set("network.proxy.http", "localhost");
                Preferences.set("network.proxy.http_port", {port});
                Preferences.set("network.proxy.ssl", "localhost");
                Preferences.set("network.proxy.ssl_port", {port});
            """.format(port=client.port))
        return client

    def tearDown(self):
        if self.browsermob_server:
            self.browsermob_server.stop()
            self.browsermob_server = None

    __del__ = tearDown
Пример #18
0
def create_hars(urls, browsermob_dir, run_cached):
    for url in urls:
        print 'starting browsermob proxy'
        server = Server('{}/bin/browsermob-proxy'.format(browsermob_dir))
        server.start()

        proxy = server.create_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)

        url_slug = slugify(url)
        proxy.new_har(url_slug)

        print 'loading page: {}'.format(url)
        driver.get(url)

        har_name = '{}-{}.har'.format(url_slug, time.time())
        print 'saving HAR file: {}'.format(har_name)
        save_har(har_name, proxy.har)

        if run_cached:
            url_slug = '{}-cached'.format(slugify(url))
            proxy.new_har(url_slug)

            print 'loading cached page: {}'.format(url)
            driver.get(url)

            har_name = '{}-{}.har'.format(url_slug, time.time())
            print 'saving HAR file: {}'.format(har_name)
            save_har(har_name, proxy.har)

        driver.quit()

        print 'stopping browsermob proxy'
        server.stop()
Пример #19
0
class BrowserMobProxyTestCaseMixin(object):

    def __init__(self, *args, **kwargs):
        self.browsermob_server = None
        self.browsermob_port = kwargs.pop('browsermob_port')
        self.browsermob_script = kwargs.pop('browsermob_script')

    def setUp(self):
        options = {}
        if self.browsermob_port:
            options['port'] = self.browsermob_port
        if not self.browsermob_script:
            raise ValueError('Must specify --browsermob-script in order to '
                             'run browsermobproxy tests')
        self.browsermob_server = Server(
            self.browsermob_script, options=options)
        self.browsermob_server.start()

    def create_browsermob_proxy(self):
        client = self.browsermob_server.create_proxy()
        with self.marionette.using_context('chrome'):
            self.marionette.execute_script("""
                Services.prefs.setIntPref('network.proxy.type', 1);
                Services.prefs.setCharPref('network.proxy.http', 'localhost');
                Services.prefs.setIntPref('network.proxy.http_port', %(port)s);
                Services.prefs.setCharPref('network.proxy.ssl', 'localhost');
                Services.prefs.setIntPref('network.proxy.ssl_port', %(port)s);
            """ % {"port": client.port})
        return client

    def tearDown(self):
        if self.browsermob_server:
            self.browsermob_server.stop()
            self.browsermob_server = None

    __del__ = tearDown
Пример #20
0
def retrieve_har():
    print "Retrieving .har file using generated url..."
        
    har_name_ex = har_name + ".har"
    complete_har_path = os.path.join(har_save_path, har_name_ex)
    
    # Magic starts here:
    server = Server(path)
    server.start()
    proxy = server.create_proxy()

    profile  = webdriver.FirefoxProfile(ff_profile)
    profile.set_proxy(proxy.selenium_proxy())
    driver = webdriver.Firefox(firefox_profile=profile)
    
    # Broken script to load the page in Google Chrome instead of Mozilla Firefox
    """
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    driver = webdriver.Chrome(chrome_options = chrome_options)
    """

    proxy.new_har(har_name, options={'captureHeaders': True})
    driver.get(url)
    
    #Trying to click 'vplayer'
    try:
        driver.switch_to.frame(0)   # Clicking the video automagically
        jwplayer = driver.find_element_by_name('vplayer')
        jwplayer.click()
    
    #And if that somehow doesn't work
    except Exception:
        print "Couldn't click player!"
        print "Trying again in 5 seconds..."
        
        time.sleep(5)
        
        #Try it again...
        try:
            driver.switch_to.frame(0)   # Clicking the video automagically (again)
            jwplayer = driver.find_element_by_name('vplayer')
            jwplayer.click()
            
        #And if that doesn't work either
        except Exception:
            print "Not able to click the video player"
            
        #Stop the server and the driver
        server.stop()
        driver.quit()
        
        time.sleep(3)
        sys.exit()
    
    time.sleep(1)
    
    #Exporting the wanted .har file
    result = json.dumps(proxy.har, ensure_ascii=False, indent=4)    # indent=4 puts the .har file on seperated lines

    #And write it to an automatically created file
    har_file = open(complete_har_path,'w')
    har_file.write(str(result))
    har_file.close()

    #Stop the server and the driver
    server.stop()
    driver.quit()
Пример #21
0
    def run_webdriver(self, start_url, port, config, download_dir):
        global useragent
        global referer
        urllib3_logger = logging.getLogger('urllib3')
        urllib3_logger.setLevel(logging.DEBUG)
        logging.info("Starting WebRunner")
        firefox_profile = None
        server = None
        proxy = None
        har = None

        if config.referer:
            referer = config.referer
        else:
            referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format(
                config.url, config.url)

        if config.useragent:
            useragent = config.useragent
        else:
            useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0'

        logging.debug("Running with UserAgent: {}".format(useragent))
        logging.debug("Running with Referer: {}".format(referer))
        logging.debug("Checking URL: {}".format(config.url))

        server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port})
        server.start()
        proxy = server.create_proxy()
        proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'})

        request_js = (
            'var referer = request.getProxyRequest().getField("Referer");'
            'addReferer(request);'
            'function addReferer(r){'
            'if (! referer ) {'
            'r.addRequestHeader("Referer","' + referer + '");'
            '}'
            'return;'
            '}')
        proxy.request_interceptor(request_js)
        if config.firefoxprofile:
            firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile)
        else:
            firefox_profile = FirefoxProfile()

        logging.debug("Using profile {}".format(firefox_profile.path))

        firefox_profile.set_preference("security.OCSP.enabled", 0)
        firefox_profile.set_preference("browser.download.folderList", 2)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("browser.download.dir", download_dir)
        firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                                       "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;appl\
                                       ication/msword;text/plain;application/octet")
        firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("network.proxy.type", 1)
        firefox_profile.set_proxy(proxy.selenium_proxy())
        try:
            webdriver = WebDriver(firefox_profile)
            proxy.new_har(start_url.hostname,
                          options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"})
            self.analyse_page(webdriver, start_url)
            har = proxy.har
            logging.info("Stopping WebRunner")
            proxy.close()
            server.stop()
            webdriver.quit()
            har = Har(har)
        except Exception, e:
            logging.error(e)
            proxy.close()
            webdriver.quit()
            server.stop()
class WebTrafficGenerator:
    
    def __init__(self,args):
        
        self.browser_mob_proxy_location = os.environ.get("BROWSERMOBPROXY_BIN")
        
        if not self.browser_mob_proxy_location:
            self.browser_mob_proxy_location = "./browsermob-proxy/bin/browsermob-proxy"
        
        # Parse arguments
        self.urls_file = args['in_file']
        
        self.out_stats_folder = args['out_folder']
        
        self.timeout = args['timeout']
        
        self.save_headers = args['headers']

        self.max_interval = args['max_interval']
        
        self.browsers_num = args['browsers']

        self.max_requests = args['limit_urls']
        
        self.no_sleep = args['no_sleep']
        
        self.no_https = args['no_https']
        
    def run(self):
        
        # create temporary directory for downloads
        self.temp_dir = tempfile.TemporaryDirectory()
        
        try:
            
            # Read URLs and time
            
            self.urls=[]
            self.thinking_times=[]
            
            visit_timestamps=[]
            
            with open(self.urls_file ,"r") as f:
                
                history = f.read().splitlines()
    
            for line in history:
                
                entry = line.split()
                
                if not (entry[1].lower().startswith("file://") or
                    (entry[1].lower().startswith("http://") and 
                     (entry[1].lower().startswith("10.",7) or 
                      entry[1].lower().startswith("192.168.",7))) or 
                    (entry[1].lower().startswith("https://") and 
                     (entry[1].lower().startswith("10.",8) or 
                      entry[1].lower().startswith("192.168.",8)))):
                    
                    # convert timestamp in seconds
                    visit_timestamps.append(float(entry[0])/1000000)
                    
                    if (not self.no_https or not entry[1].lower().startswith("https://")):
                        self.urls.append(entry[1])
            
            if not self.max_requests:
                self.max_requests = len(self.urls)
    
            visit_timestamps.sort()
            
            for i in range(1, len(visit_timestamps)):
                
                think_time=(visit_timestamps[i]-visit_timestamps[i-1])
                
                if think_time<=self.max_interval:
                    
                    self.thinking_times.append(think_time)
            
            self.cdf, self.inverse_cdf, self.cdf_samples = compute_cdf(self.thinking_times)
            
            print ("Number of URLs: "+str(len(self.urls)))
            
            # Create or clean statistics folder
            
            if not os.path.exists(self.out_stats_folder):
                os.makedirs(self.out_stats_folder)
            else:
                for file in os.listdir(self.out_stats_folder):
                    
                    file_path = os.path.join(self.out_stats_folder, file)
                    
                    if os.path.isfile(file_path):
                        os.remove(file_path)
    
            # Plot history statistics
            self.plot_thinking_time_cdf()
            #self.plot_thinking_time_inverse_cdf()
            
            # Start Proxy
            self.server = Server(self.browser_mob_proxy_location)
            
            self.server.start()
            
            # start queues
            self.urls_queue = Queue()
            self.hars_queue = Queue()
            
            # start Barrier (for coordinating proxy server restart) 
            self.barrier = Barrier(self.browsers_num, action = self.restart_proxy_server)
            
            try:
                
                self.workers = [Browser(i, self.server,
                                        self.urls_queue, self.hars_queue,
                                        self.barrier,
                                        self.timeout, self.save_headers,
                                        self.temp_dir.name)
                                for i in range(self.browsers_num)]
                
                for w in self.workers:
                    w.start()
                
                number_of_requests = 0
                # Start requesting pages
                for url in self.urls:
    
                    if number_of_requests==self.max_requests:
                        break
    
                    self.urls_queue.put(url)
                    number_of_requests += 1
                    
                    if not self.no_sleep:
                        time.sleep(self.get_thinking_time())
                
                for w in self.workers:
                    self.urls_queue.put(None)
                
                self.hars = []
                
                for w in self.workers:
                    browser_hars = self.hars_queue.get()
                    self.hars.extend(browser_hars)
                
                # write HAR file
                with open(os.path.join(self.out_stats_folder,"HARs.json"),"w") as f:
                    json.dump(self.hars,f)
                
                # Gather statistics
                self.stats = {
                              "totalTime":[],
                              "blocked":[],
                              "dns":[],
                              "connect":[],
                              "send":[],
                              "wait":[],
                              "receive":[],
                              "ssl":[]
                              }
                
                for har in self.hars:
                    
                    if har["log"]["totalTime"]!=-1:
                        self.stats["totalTime"].append(har["log"]["totalTime"])
                    
                    for entry in har["log"]["entries"]:
                        
                        if (not self.no_https or not entry["request"]["url"].lower().startswith("https://")):
                        
                            # Queuing
                            if entry["timings"]["blocked"]!=-1:
                                self.stats["blocked"].append(entry["timings"]["blocked"])
                                
                            # DNS resolution
                            if entry["timings"]["dns"]!=-1:
                                self.stats["dns"].append(entry["timings"]["dns"])
                                
                            # TCP Connection
                            if entry["timings"]["connect"]!=-1:
                                self.stats["connect"].append(entry["timings"]["connect"])
                                
                            # HTTP Request send
                            if entry["timings"]["send"]!=-1:
                                self.stats["send"].append(entry["timings"]["send"])
                                
                            # Wait the server
                            if entry["timings"]["wait"]!=-1:
                                self.stats["wait"].append(entry["timings"]["wait"])
                                
                            # HTTP Response receive
                            if entry["timings"]["receive"]!=-1:
                                self.stats["receive"].append(entry["timings"]["receive"])
                                
                            if entry["timings"]["ssl"]!=-1:
                                self.stats["ssl"].append(entry["timings"]["ssl"])
                        
                # Save statistics
                self.plot_stats()
                
                for w in self.workers:
                    w.join()
                    
            except KeyboardInterrupt:
                pass
            
            finally:
                self.urls_queue.close()
                self.hars_queue.close()
                self.server.stop()
                
        except Exception as e:
           print("Exception: " + str(e))
           
           import traceback
           traceback.print_exc()
           
        finally:
            
            self.temp_dir.cleanup()

    def restart_proxy_server(self):
        
        try:
            self.server.stop()
        except Exception as e:
            print("Failed to stop proxy server. Exception: " + str(e))
            
        # Start Proxy
        self.server = Server(self.browser_mob_proxy_location)
        
        self.server.start() 
        
        print("Proxy server restarted")    
    
    def plot_thinking_time_cdf(self):
        
        x = np.linspace(min(self.thinking_times), max(self.thinking_times), num=10000, endpoint=True)
    
        # Plot the cdf
        fig = plt.figure()
        axes = fig.add_subplot(111)
        axes.plot(x, self.cdf(x))
        axes.set_ylim((0,1))
        axes.set_xlabel("Seconds")
        axes.set_ylabel("CDF")
        axes.set_title("Thinking time")
        axes.grid(True)
    
        fig.savefig(os.path.join(self.out_stats_folder,"thinking_time_cdf.png"))

    def plot_thinking_time_inverse_cdf(self):
        
        x = np.linspace(min(self.cdf_samples), max(self.cdf_samples), num=10000, endpoint=True)
        
        # Plot the cdf
        fig = plt.figure()
        axes = fig.add_subplot(111)
        axes.plot(x, self.inverse_cdf(x))
        axes.set_xlim((0,1))
        axes.set_ylabel("Seconds")
        axes.set_xlabel("CDF")
        axes.set_title("Thinking time")
        axes.grid(True)
    
        fig.savefig(os.path.join(self.out_stats_folder,"thinking_time_inverse_cdf.png"))
   
    def get_thinking_time(self):
        
        rand=random.uniform(min(self.cdf_samples),max(self.cdf_samples))
        time = float(self.inverse_cdf(rand))
        return time
    
    def plot_stats(self):
        
        fig_total = plt.figure()
        axes_total = fig_total.add_subplot(111)
        
        fig_timings = plt.figure()
        axes_timings = fig_timings.add_subplot(1,1,1)
        
        fig_timings_log = plt.figure()
        axes_timings_log = fig_timings_log.add_subplot(1,1,1)
        
        for key in self.stats:
            if len(set(self.stats[key]))>1:
                cdf = compute_cdf(self.stats[key])
                
                x = np.linspace(min(self.stats[key]), max(self.stats[key]), num=10000, endpoint=True)
            
                # Plot the cdf
                if key=="totalTime":
                    axes_total.plot(x/1000, cdf[0](x), label=key)
                else:
                    axes_timings.plot(x, cdf[0](x), label=key)
                    
                    # zero is not valid with log axes
                    if min(self.stats[key])==0:
                        non_zero_min = find_non_zero_min(self.stats[key])
                        
                        if non_zero_min == 0:
                            continue
                        
                        x = np.linspace(non_zero_min, max(self.stats[key]), num=10000, endpoint=True)
                        
                    axes_timings_log.plot(x, cdf[0](x), label=key)
                
        axes_total.set_ylim((0,1))
        axes_total.set_xlabel("Seconds")
        axes_total.set_ylabel("CDF")
        axes_total.set_title("Page load time")
        axes_total.grid(True)
        
        fig_total.savefig(os.path.join(self.out_stats_folder,"page_load_cdf.png"))
        
        axes_timings.set_ylim((0,1))
        axes_timings.set_xlabel("Milliseconds")
        axes_timings.set_ylabel("CDF")
        axes_timings.set_title("Single resource timings")
        axes_timings.grid(True)
        axes_timings.legend(loc='best')
        
        axes_timings_log.set_ylim((0,1))
        axes_timings_log.set_xlabel("Milliseconds")
        axes_timings_log.set_ylabel("CDF")
        axes_timings_log.set_xscale("log")
        axes_timings_log.set_title("Single resource timings")
        axes_timings_log.grid(True, which="both", axis="x")
        axes_timings_log.grid(True, which="major", axis="y")
        
        axes_timings_log.legend(loc='best')
    
        fig_timings.savefig(os.path.join(self.out_stats_folder,"timings_cdf.png"))
        fig_timings_log.savefig(os.path.join(self.out_stats_folder,"timings_cdf_log.png"))
Пример #23
0
from selenium.webdriver.chrome.options import Options
from splinter.browser import Browser
from browsermobproxy import Server
from time import sleep
from twilio.rest import Client

# 获取所有网络请求
server = Server(
    "D:/browsermob-proxy-2.1.4-bin/browsermob-proxy-2.1.4/bin/browsermob-proxy"
)
server.start()
proxy = server.create_proxy()

chrome_options = Options()
chrome_options.add_argument('--proxy-server={host}:{port}'.format(
    host='localhost', port=proxy.port))


# disable-infobars
class HuoChe(object):
    """docstring for Train"""
    driver_name = 'Chrome'
    executable_path = 'D:\eng\chromedriver.exe'
    # 用户名 密码
    username = u"20181754121"
    passwd = u"skd158CF"
    """网址"""
    # 我们学校强智选课URL
    select_url = "http://jw.sdufe.edu.cn/jsxsd/xsxk/xsxk_index?jx0502zbid=70A1CABD2E054E06A233181DEE7CB6E9"
    # 强智登录URL
    login_url = "http://jw.sdufe.edu.cn/"
Пример #24
0
    def run_webdriver(self, start_url, port, config, download_dir):
        """
        Run Selenium WebDriver
        """
        useragent = None
        referer = None
        webdriver = None
        urllib3_logger = logging.getLogger('urllib3')
        urllib3_logger.setLevel(logging.DEBUG)

        self.logger.info("Starting WebRunner")
        firefox_profile = None
        server = None
        proxy = None
        har = None

        if config.referer:
            referer = config.referer
        else:
            referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format(
                config.url, config.url)

        if config.useragent:
            useragent = config.useragent
        else:
            useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0'

        self.logger.debug("Running with UserAgent: {}".format(useragent))
        self.logger.debug("Running with Referer: {}".format(referer))
        self.logger.debug("Checking URL: {}".format(config.url))

        server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port})
        server.start()
        proxy = server.create_proxy()
        proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'})
        request_js = (
            'var referer = request.getProxyRequest().getField("Referer");'
            'addReferer(request);'
            'function addReferer(r){'
            'if (! referer ) {'
            'r.addRequestHeader("Referer","' + referer + '");'
            '}'
            'return;'
            '}')
        proxy.request_interceptor(request_js)
        from types import NoneType
        if config.firefoxprofile is not None and os.path.isdir(config.firefoxprofile):
            self.logger.debug("Using existing firefox profile")
            firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile)
        else:
            firefox_profile = FirefoxProfile()

        self.logger.debug("Using profile {}".format(firefox_profile.path))

        firefox_profile.set_preference("security.OCSP.enabled", 0)
        firefox_profile.set_preference("browser.download.folderList", 2)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("browser.download.dir", download_dir)
        firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                                       "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;application/msword;text/plain;application/octet")
        firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("security.mixed_content.block_active_content", False)
        firefox_profile.set_preference("security.mixed_content.block_display_content", False)
        firefox_profile.set_preference("extensions.blocklist.enabled", False)
        firefox_profile.set_preference("network.proxy.type", 1)
        firefox_profile.set_proxy(proxy.selenium_proxy())
        firefox_profile.set_preference("webdriver.log.file", "/tmp/ff.log")
        firefox_profile.set_preference("webdriver.log.driver", "DEBUG")
        firefox_profile.set_preference("browser.newtabpage.enhanced", False)
        firefox_profile.set_preference("browser.newtabpage.enabled", False)
        firefox_profile.set_preference("browser.newtabpage.directory.ping", "")
        firefox_profile.set_preference("browser.newtabpage.directory.source", "")
        firefox_profile.set_preference("browser.search.geoip.url", "")

        try:
            self.xvfb.start()
            capabilities = DesiredCapabilities.FIREFOX
            capabilities['loggingPrefs'] = {'browser':'ALL'}
            if os.path.exists("{}/firefox".format(firefox_profile.path)):
                binary = FirefoxBinary("{}/firefox".format(firefox_profile.path))
            else:
                binary = FirefoxBinary("/usr/bin/firefox")
            webdriver = WebDriver(capabilities=capabilities, firefox_profile=firefox_profile, firefox_binary=binary)
            proxy.new_har(start_url.hostname,
                          options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"})
            self.analyse_page(webdriver, start_url)
            for entry in webdriver.get_log('browser'):
                self.logger.info("Firefox: {}".format(entry))
            har = proxy.har
            self.logger.info("Stopping WebRunner")
            proxy.close()
            server.stop()
            webdriver.quit()
            har = Har(har)
        except Exception, e:
            self.logger.error(e)
            proxy.close()
            if webdriver:
                webdriver.quit()
            self.xvfb.stop()
            server.stop()
class BrowserMobLibrary():

    ROBOT_LIBRARY_SCOPE = 'GLOBAL'
    ROBOT_LIBRARY_VERSION = VERSION

    def __init__(self):
        self.isServerStarted = False
        self.activeProxy = None
        self.server = None
        self.proxies = []

    def _proxy(self):
        if self.activeProxy is None:
            raise Exception("No proxy has been created")
        return self.activeProxy

    def start_browsermob(self, browsermob_path):
        self.server = Server(browsermob_path)
        self.server.start()
        self.isServerStarted = True

    def stop_browsermob(self):
        self.server.stop()
        self.server = None
        self.isServerStarted = False

    def create_proxy(self):
        self.activeProxy = self.server.create_proxy
        self.proxies.append(self.activeProxy)
        return self.server.create_proxy()

    def close_proxy(self, proxy):
        self.proxies.remove(proxy)
        proxy.close()

    def close_active_proxy(self):
        self.close_proxy(self.activeProxy)

    def set_active_proxy(self, proxy):
        self.activeProxy = proxy

    def get_active_proxy(self):
        return self.activeProxy

    def get_all_proxies(self):
        return self.proxies

    def close_all_proxies(self):
        for proxy in self.proxies:
            proxy.close()

    def capture_traffic(self, reference=None, **options):
        return self._proxy().new_har(reference, options)

    def get_captured_traffic(self):
        return self._proxy().har

    def set_capture_reference(self, reference=None):
        return self._proxy().new_page(reference)

    def ignore_all_traffic_matching(self, regexp, status_code):
        return self._proxy().blacklist(regexp, status_code)

    def only_capture_traffic_matching(self, regexp, status_code):
        return self._proxy().whitelist(regexp, status_code)

    def use_basic_authentication(self, domain, username, password):
        return self._proxy().basic_authentication(domain, username, password)

    def set_headers(self, headers, ):
        return self._proxy().headers(headers)

    def set_response_interceptor(self, js, ):
        return self._proxy().response_interceptor(js)

    def set_request_interceptor(self, js, ):
        return self._proxy().request_interceptor(js)

    def set_bandwith_limits(self, **options):
        return self._proxy().limits(options)

    def set_proxy_timeouts(self, **options):
        return self._proxy().timeouts(options)

    def remap_hosts(self, address, ip_address):
        return self._proxy().remap_hosts(address, ip_address)

    def wait_for_traffic_to_stop(self, quiet_period, timeout):
        return self._proxy().wait_for_traffic_to_stop(quiet_period, timeout)

    def clear_proxy_dns_cache(self):
        return self._proxy().clear_dns_cache()

    def rewrite_url(self, match, replace):
        return self._proxy().rewrite_url(match, replace)
Пример #26
0
from browsermobproxy import Server
server = Server("/root/Desktop/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy")
server.start()
proxy = server.create_proxy()

from selenium import webdriver
profile = webdriver.FirefoxProfile()
profile.set_proxy(proxy.selenium_proxy())
driver = webdriver.Firefox(firefox_profile=profile)


proxy.new_har("google")
driver.get("http://www.google.co.uk")
test = proxy.har # returns a HAR JSON blob

print test

server.stop()
driver.quit()
Пример #27
0
class HarProfiler:

    def __init__(self, config, url):
        self.har_dir = config['har_dir']
        self.browsermob_dir = config['browsermob_dir']
        self.label_prefix = config['label_prefix'] or ''
        self.virtual_display = config['virtual_display']
        self.virtual_display_size_x = config['virtual_display_size_x']
        self.virtual_display_size_y = config['virtual_display_size_y']

        self.label = '{}{}'.format(
            self.label_prefix,
            format(self.slugify(url))
        )
        self.cached_label = '{}-cached'.format(self.label)

        epoch = time.time()
        self.har_name = '{}-{}.har'.format(self.label, epoch)
        self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch)

    def __enter__(self):
        log.info('starting virtual display')
        if self.virtual_display:
            self.display = Display(visible=0, size=(
                self.virtual_display_size_x,
                self.virtual_display_size_y
            ))
            self.display.start()

        log.info('starting browsermob proxy')
        self.server = Server('{}/bin/browsermob-proxy'.format(
            self.browsermob_dir)
        )
        self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        log.info('stopping browsermob proxy')
        self.server.stop()
        log.info('stopping virtual display')
        self.display.stop()

    def _make_proxied_webdriver(self):
        proxy = self.server.create_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)
        return (driver, proxy)

    def _save_har(self, har, cached=False):
        if not os.path.isdir(self.har_dir):
            os.makedirs(self.har_dir)
        if not cached:
            har_name = self.har_name
        elif cached:
            har_name = self.cached_har_name

        log.info('saving HAR file: {}'.format(har_name))
        with open(os.path.join(self.har_dir, har_name), 'w' ) as f:
            json.dump(har, f, indent=2, ensure_ascii=False)

    def load_page(self, url, run_cached=True):
        driver, proxy = self._make_proxied_webdriver()
        proxy.new_har(self.label)
        log.info('loading page: {}'.format(url))
        driver.get(url)
        self._save_har(proxy.har)

        if run_cached:
            proxy.new_har(self.cached_label)
            log.info('loading cached page: {}'.format(url))
            driver.get(url)
            self._save_har(proxy.har, cached=True)

        driver.quit()

    def slugify(self, text):
        pattern = re.compile(r'[^a-z0-9]+')
        slug = '-'.join(word for word in pattern.split(text.lower()) if word)
        return slug
Пример #28
0
class performance(object):
    #create performance data

    def __init__(self, mob_path):
        #initialize
        from datetime import datetime
        print "%s: Go "%(datetime.now())
        self.browser_mob = mob_path
        self.server = self.driver = self.proxy = None

    @staticmethod
    def __store_into_file(args,title, result):
        #store data collected into file
        if 'path' in args:
        	har_file = open(args['path']+'/'+title + '.json', 'w')
        else:
        	har_file = open(title + '.json', 'w')
        har_file.write(str(result))
       	har_file.close()

    def __start_server(self):
        #prepare and start server
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def __start_driver(self,args):
        #prepare and start driver
        
        #chromedriver
        if args['browser'] == 'chrome':
        	print "Browser: Chrome"
        	print "URL: {0}".format(args['url'])
        	chromedriver = os.getenv("CHROMEDRIVER_PATH", "/chromedriver")
        	os.environ["webdriver.chrome.driver"] = chromedriver
        	url = urlparse.urlparse (self.proxy.proxy).path
        	chrome_options = webdriver.ChromeOptions()
        	chrome_options.add_argument("--proxy-server={0}".format(url))
        	chrome_options.add_argument("--no-sandbox")
        	self.driver = webdriver.Chrome(chromedriver,chrome_options = chrome_options)
        #firefox
        if args['browser'] == 'firefox':
            print "Browser: Firefox"
            profile = webdriver.FirefoxProfile()
            profile.set_proxy(self.proxy.selenium_proxy())
            self.driver = webdriver.Firefox(firefox_profile=profile)
		
			

    def start_all(self,args):
        #start server and driver
        self.__start_server()
        self.__start_driver(args)

    def create_har(self,args):
        #start request and parse response
        self.proxy.new_har(args['url'], options={'captureHeaders': True})
        self.driver.get(args['url'])
        
        result = json.dumps(self.proxy.har, ensure_ascii=False)
        self.__store_into_file(args,'har', result)
        
        performance = json.dumps(self.driver.execute_script("return window.performance"), ensure_ascii=False)
        self.__store_into_file(args,'perf', performance)

    def stop_all(self):
        #stop server and driver
        from datetime import datetime
        print "%s: Finish"%(datetime.now())
        
        self.server.stop()
        self.driver.quit()
Пример #29
0
class performance(object):
    #create performance data

    def __init__(self, mob_path):
        #initialize
        from datetime import datetime
        print "%s: Go "%(datetime.now())
        self.browser_mob = mob_path
        self.server = self.driver = self.proxy = None

    @staticmethod
    def __store_into_file(args,title, result):
        #store data collected into file
        if 'path' in args:
        	har_file = open(args['path']+'/'+title + '.json', 'w')
        else:
        	har_file = open(title + '.json', 'w')
        har_file.write(result.encode('utf-8'))
       	har_file.close()

    def __start_server(self):
        #prepare and start server
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def __start_driver(self,args):
        #prepare and start driver
        
        #chromedriver
        if args['browser'] == 'chrome':
        	print "Browser: Chrome"
        	print "URL: {0}".format(args['url'])
        	chromedriver = os.getenv("CHROMEDRIVER_PATH", "/chromedriver")
        	os.environ["webdriver.chrome.driver"] = chromedriver
        	url = urlparse.urlparse (self.proxy.proxy).path
        	chrome_options = webdriver.ChromeOptions()
        	chrome_options.add_argument("--proxy-server={0}".format(url))
        	chrome_options.add_argument("--no-sandbox")
        	self.driver = webdriver.Chrome(chromedriver,chrome_options = chrome_options)
        #firefox
        if args['browser'] == 'firefox':
            print "Browser: Firefox"
            profile = webdriver.FirefoxProfile()
            profile.set_proxy(self.proxy.selenium_proxy())
            self.driver = webdriver.Firefox(firefox_profile=profile)
		
			

    def start_all(self,args):
        #start server and driver
        self.__start_server()
        self.__start_driver(args)

    def create_har(self,args):
        #start request and parse response
        self.proxy.new_har(args['url'], options={'captureHeaders': True})
        self.driver.get(args['url'])
        
        result = json.dumps(self.proxy.har, ensure_ascii=False)
        self.__store_into_file(args,'har', result)
        
        performance = json.dumps(self.driver.execute_script("return window.performance"), ensure_ascii=False)
        self.__store_into_file(args,'perf', performance)

    def stop_all(self):
        #stop server and driver
        from datetime import datetime
        print "%s: Finish"%(datetime.now())
        
        self.server.stop()
        self.driver.quit()
Пример #30
0
class HarProfiler:

    def __init__(self, config, url, login_first=False):
        self.url = url
        self.login_first = login_first

        self.login_user = config.get('login_user')
        self.login_password = config.get('login_password')

        self.browsermob_dir = config['browsermob_dir']
        self.har_dir = config['har_dir']
        self.label_prefix = config['label_prefix'] or ''
        self.run_cached = config['run_cached']
        self.virtual_display = config['virtual_display']
        self.virtual_display_size_x = config['virtual_display_size_x']
        self.virtual_display_size_y = config['virtual_display_size_y']

        self.label = '{}{}'.format(self.label_prefix, self.slugify(url))
        self.cached_label = '{}-cached'.format(self.label)

        epoch = time.time()
        self.har_name = '{}-{}.har'.format(self.label, epoch)
        self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch)

    def __enter__(self):
        if self.virtual_display:
            log.info('starting virtual display')
            self.display = Display(visible=0, size=(
                self.virtual_display_size_x,
                self.virtual_display_size_y
            ))
            self.display.start()

        log.info('starting browsermob proxy')
        self.server = Server('{}/bin/browsermob-proxy'.format(
            self.browsermob_dir)
        )
        self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        log.info('stopping browsermob proxy')
        self.server.stop()
        if self.virtual_display:
            log.info('stopping virtual display')
            self.display.stop()

    def _make_proxied_webdriver(self):
        proxy = self.server.create_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)
        return (driver, proxy)

    def _save_har(self, har, cached=False):
        if not os.path.isdir(self.har_dir):
            os.makedirs(self.har_dir)
        if not cached:
            har_name = self.har_name
        elif cached:
            har_name = self.cached_har_name

        log.info('saving HAR file: {}'.format(har_name))
        with open(os.path.join(self.har_dir, har_name), 'w') as f:
            json.dump(har, f, indent=2, ensure_ascii=False)

    def _login(self, driver):
        log.info('logging in...')

        error_msg = 'must specify login credentials in yaml config file'
        if self.login_user is None:
            raise RuntimeError(error_msg)
        if self.login_password is None:
            raise RuntimeError(error_msg)

        driver.get('https://courses.edx.org/login')

        # handle both old and new style logins
        try:
            email_field = driver.find_element_by_id('email')
            password_field = driver.find_element_by_id('password')
        except NoSuchElementException:
            email_field = driver.find_element_by_id('login-email')
            password_field = driver.find_element_by_id('login-password')
        email_field.send_keys(self.login_user)
        password_field.send_keys(self.login_password)
        password_field.submit()

    def _add_page_event_timings(self, driver, har):
        jscript = textwrap.dedent("""
            var performance = window.performance || {};
            var timings = performance.timing || {};
            return timings;
            """)
        timings = driver.execute_script(jscript)
        har['log']['pages'][0]['pageTimings']['onContentLoad'] = (
            timings['domContentLoadedEventEnd'] - timings['navigationStart']
        )
        har['log']['pages'][0]['pageTimings']['onLoad'] = (
            timings['loadEventEnd'] - timings['navigationStart']
        )
        return har

    def load_page(self):
        try:
            driver, proxy = self._make_proxied_webdriver()

            if self.login_first:
                self._login(driver)

            proxy.new_har(self.label)
            log.info('loading page: {}'.format(self.url))
            driver.get(self.url)
            har = self._add_page_event_timings(driver, proxy.har)
            self._save_har(har)

            if self.run_cached:
                proxy.new_har(self.cached_label)
                log.info('loading cached page: {}'.format(self.url))
                driver.get(self.url)
                har = self._add_page_event_timings(driver, proxy.har)
                self._save_har(har, cached=True)
        except Exception:
            raise
        finally:
            driver.quit()

    def slugify(self, text):
        pattern = re.compile(r'[^a-z0-9]+')
        slug = '-'.join(word for word in pattern.split(text.lower()) if word)
        return slug
Пример #31
0
class Browser:

    def __init__(self, chromedriverPath, browsermobPath, harfilePath, cookies=None):
        self.harfilePath = harfilePath
        self.server = Server(browsermobPath)
        self.server.start()
        self.proxy = self.server.create_proxy()

        os.environ["webdriver.chrome.driver"] = chromedriverPath
        url = urlparse (self.proxy.proxy).path
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--proxy-server={0}".format(url))
        
        self.driver = webdriver.Chrome(chromedriverPath,chrome_options =chrome_options)
        if cookies:
            print("Loading cookies from "+str(cookies))
            with open(cookies, 'r') as cookieFile:
                cookieJson = json.loads(cookieFile.read())
            for cookie in cookieJson:
                self.driver.add_cookie(cookie)

    def get(self, url, timeout=20):
        print(url)
        self.proxy.new_har(url, {"captureContent":True})
        try:
            self.driver.set_page_load_timeout(timeout)
            self.driver.get(url)
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/5);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/4);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(4) #wait for the page to load
        except TimeoutException:
            print("Timeout")
            self.driver.find_element_by_tag_name("body").send_keys(Keys.CONTROL+Keys.ESCAPE)

        try:
            source = self.driver.page_source
            result = json.dumps(self.proxy.har, ensure_ascii=False)
            with open(self.harfilePath+"/"+str(int(time.time()*1000.0))+".har", "w") as harfile:
                harfile.write(result)
            return source
        except TimeoutException:
            print("Retrying, with a timeout of "+str(timeout+5))
            return self.get(url, timeout=timeout+5)

    def close(self):
        try:
            self.server.stop()
        except Exception:
            print("Warning: Error stopping server")
            pass
        try:
            self.driver.quit()
        except Exception:
            print("Warning: Error stopping driver")
            pass
Пример #32
0
    def instance_browser(self, proxy_enabled, params):
        """Start web browser and proxy server

        Args:
            proxy_enabled: flag to set proxy
            params: list of parameters
                -0: browser engine
                -1: user-agent
        Returns:
            Instance of the server, the proxy and the web browser
        """

        if proxy_enabled:
            try:
                server = Server(proxy_path)
                server.start()
            except Exception as e:
                raise Exception(
                    'Error launching server: {exception}'.format(exception=e))
            try:
                proxy = server.create_proxy()
            except RuntimeError:
                time.sleep(5)
                try:
                    proxy = server.create_proxy()
                except Exception as e:
                    raise Exception(
                        'Error configuring  proxy: {exception}'.format(
                            exception=e))
            proxy.new_har()
            try:
                proxy_url = urlparse.urlparse(proxy.proxy).path
            except AttributeError:
                proxy_url = urlparse(proxy.proxy).path
        else:
            server = None
            proxy = None
        try:
            engine = params[0]
            try:
                user_agent = USER_AGENTS[params[1]]
            except LookupError:
                user_agent = params[1]
            headless = params[2]
        except LookupError:
            raise Exception('Function browser(): 3 arguments needed')
        try:
            logger.log(
                'NOTE',
                'Engine: {engine} | User-agent: {user_agent} | Headless: {headless}'
                .format(engine=engine,
                        user_agent=user_agent,
                        headless=headless))
            if engine == 'chrome':
                driver_path = self.get_driver_path(engine)
                ch_opt = webdriver.ChromeOptions()
                if proxy_enabled:
                    ch_opt.add_argument("--proxy-server=" + proxy_url)
                if user_agent != 'default':
                    ch_opt.add_argument("--user-agent=" + user_agent)
                if headless:
                    ch_opt.headless = True
                try:
                    browser = webdriver.Chrome(executable_path=driver_path,
                                               chrome_options=ch_opt)
                except LookupError:
                    time.sleep(5)
                    browser = webdriver.Chrome(executable_path=driver_path,
                                               chrome_options=ch_opt)
            elif engine == 'firefox':
                driver_path = self.get_driver_path(engine)
                ff_prf = webdriver.FirefoxProfile()
                ff_opt = webdriver.FirefoxOptions()
                if user_agent != 'default':
                    ff_prf.set_preference("general.useragent.override",
                                          user_agent)
                if headless:
                    ff_opt.headless = True
                try:
                    browser = webdriver.Firefox(executable_path=driver_path, firefox_profile=ff_prf, proxy=proxy.selenium_proxy(), options=ff_opt) if proxy_enabled \
                    else webdriver.Firefox(executable_path=driver_path, firefox_profile=ff_prf, options=ff_opt)
                except LookupError:
                    time.sleep(5)
                    browser = webdriver.Firefox(executable_path=driver_path, firefox_profile=ff_prf, proxy=proxy.selenium_proxy(), options=ff_opt) if proxy_enabled \
                    else webdriver.Firefox(executable_path=driver_path, firefox_profile=ff_prf, options=ff_opt)
            else:
                raise Exception(
                    'Not supported engine: {engine}'.format(engine=engine))
        except Exception as e:
            raise Exception(
                'Error launching {engine} ({user_agent}): {exception}'.format(
                    engine=engine, user_agent=user_agent, exception=e))
        return browser
Пример #33
0
class Fetcher:
    def __init__(self):
        self.server = None
        self.proxy = None
        self.browser = None
        self.driver = None

    def set_remote_server(self, host, port):
        """Defines an already running proxy server for gathering
        includes and content
        """
        self.server = RemoteServer(host, port)
        self.proxy = self.server.create_proxy()

    def start_local_server(self, binpath=None):
        """Starts a local instance of BrowserMob.
        
        Keyword Arguments:
        binpath -- The full path, including the binary name to the 
        browsermob-proxy binary.
        """
        if binpath is None:
            binpath="{0}/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy".format(getcwd())

        self.server = Server(binpath)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def set_firefox(self):
        """Sets the Webdriver for Firefox"""
        self.profile = webdriver.FirefoxProfile()
        self.profile.set_proxy(self.proxy.selenium_proxy())
        self.driver = webdriver.Firefox(firefox_profile=self.profile)

    def run(self, site, name='fetch'):
        """Runs an instance of the Fetcher. Requires that either
        set_remote_server() or start_local_server() has been previously  
        called.

        Keyword Arguments:
        site -- The URL of the site to load.
        name -- Name of the resulting HAR.
        """
        try:
            self.proxy.headers({'Via': None}) # TODO: Need to override BrowserMob to remove the Via Header - https://github.com/lightbody/browsermob-proxy/issues/213 
            self.proxy.new_har(name, options={ 'captureHeaders': True, 
                'captureContent': True, 
                'captureBinaryContent': True })
            self.driver.get(site)

            har = self.proxy.har
            har['dom'] = self.driver.page_source
            return har 
        except AttributeError:
            print "[!] FAILED: Ensure you have set a Webdriver"

    def close(self):
        try:
            self.proxy.stop() # The proxy won't need to be stopped if using remote_server()
        except AttributeError:
            pass

        try:
            self.driver.close()
        except AttributeError:
            print '[!] Driver not found'