def save_web_page_stats_to_har(url, webdriver_name, save_to_file): """Generate the HAR archive from an URL with the Selenium webdriver 'webdriver_name', saving the HAR file to 'save_to_file' """ browsermob_server = Server(Config.browsermob_executable) browsermob_server.start() random_port = get_a_random_free_tcp_port() proxy_conn = browsermob_server.create_proxy({"port": random_port}) driver = create_selenium_webdriver(webdriver_name, proxy_conn) try: proxy_conn.new_har(url, options={'captureHeaders': True}) driver.get(url) har_json = json.dumps(proxy_conn.har, ensure_ascii=False, indent=4, separators=(',', ': ')) # Save '.HAR' file with io.open(save_to_file + '.har', mode='wt', buffering=1, encoding='utf8', errors='backslashreplace', newline=None) as output_har_f: output_har_f.write(unicode(har_json)) # Save '.PROF' file with profiling report (timings, sizes, etc) with io.open(save_to_file + '.prof', mode='wb', buffering=1, newline=None) as prof_output: report_har_dictionary(proxy_conn.har, prof_output) finally: proxy_conn.close() browsermob_server.stop() driver.quit()
def setUp(self): """ Start the browser with a browsermob-proxy instance for use by the test. You *must* call this in the `setUp` method of any subclasses before using the browser! Returns: None """ try: # Start server proxy server = Server('browsermob-proxy') server.start() self.proxy = server.create_proxy() proxy_host = os.environ.get('BROWSERMOB_PROXY_HOST', '127.0.0.1') self.proxy.remap_hosts('localhost', proxy_host) except: self.skipTest('Skipping: could not start server with browsermob-proxy.') # parent's setUp super(WebAppPerfReport, self).setUp() # Initialize vars self._page_timings = [] self._active_har = False self._with_cache = False # Add one more cleanup for the server self.addCleanup(server.stop)
def setupdevices(): """ Description: Sets u browser proxy, Selenium driver, and har object Usage: [driver,proxy]=setupdevices() Inputs: NA Output: Selenium driver Browsermob proxy Browsermob server """ #set up proxy server = Server("############/browsermob-proxy-2.0-beta-9/bin/browsermob-proxy") server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) proxy.new_har("________") #set up driver driver = webdriver.Firefox(firefox_profile=profile) return (driver,proxy,server)
def main(argv): init() parser = argparse.ArgumentParser() parser.add_argument('-u', action='store', dest='start_url', help='Set page URL', required=True) parser.add_argument('-c', action='store', dest='cookies_file', help='JSON file with cookies', required=False) parser.add_argument('-w', action='store', dest='webdriver_type', help='Set WebDriver type (firefox or phantomjs, firebox by default)', default="firefox", required=False) results = parser.parse_args() start_url = results.start_url cookies_file = results.cookies_file webdriver_type = results.webdriver_type allowed_domain = urlparse(start_url).netloc browsermobproxy_path = get_browsermobproxy_path() options = { 'port': 9090, } server = Server(browsermobproxy_path,options) server.start() proxy = server.create_proxy() if webdriver_type == "phantomjs": service_args = ['--proxy=localhost:9091','--proxy-type=http',] driver = webdriver.PhantomJS(service_args=service_args) driver.set_window_size(1440, 1024) else: profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) proxy.new_har('woodpycker', options={'captureHeaders': True, 'captureContent': True}) driver.get(start_url) if not cookies_file is None: with open(cookies_file, 'rb') as fp: cookies = json.load(fp) for cookie in cookies: driver.add_cookie(cookie) driver.refresh() links = driver.find_elements_by_tag_name('a') lenl = len(links) for i in range(0,lenl): if links[i].is_displayed(): url = links[i].get_attribute('href') text = links[i].get_attribute('text') if url.find(allowed_domain) != -1: links[i].click() print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL) show_status_codes(proxy.har,allowed_domain) driver.back() driver.refresh() links = driver.find_elements_by_tag_name('a') driver.quit() server.stop()
def CaptureNetworkTraffic(url,server_ip,headers,file_path): ''' This function can be used to capture network traffic from the browser. Using this function we can capture header/cookies/http calls made from the browser url - Page url server_ip - remap host to for specific URL headers - this is a dictionary of the headers to be set file_path - File in which HAR gets stored ''' port = {'port':9090} server = Server("G:\\browsermob\\bin\\browsermob-proxy",port) #Path to the BrowserMobProxy server.start() proxy = server.create_proxy() proxy.remap_hosts("www.example.com",server_ip) proxy.remap_hosts("www.example1.com",server_ip) proxy.remap_hosts("www.example2.com",server_ip) proxy.headers(headers) profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) new = {'captureHeaders':'True','captureContent':'True'} proxy.new_har("google",new) driver.get(url) proxy.har # returns a HAR JSON blob server.stop() driver.quit() file1 = open(file_path,'w') json.dump(proxy.har,file1) file1.close()
def get_driver(self, browser, start_beacon_url): server = Server(BROWSERMOB_LOCATION) server.start() self.proxy = server.create_proxy() driver = webdriver.Firefox(proxy=self.proxy.selenium_proxy()) self.proxy.new_har() self.beacon_url = start_beacon_url return driver
def init_proxy_server(self, port=None): kwargs = {} if port is not None: kwargs['port'] = port if self.chained_proxy is not None: if self.is_https: kwargs['httpsProxy'] = self.chained_proxy else: kwargs['httpProxy'] = self.chained_proxy if self.proxy_username is not None: kwargs['proxyUsername'] = self.proxy_username if self.proxy_password is not None: kwargs['proxyPassword'] = self.proxy_password server = Server('C://browsermob-proxy//bin//browsermob-proxy.bat', options={"port": self.server_port}) server.start() proxy = server.create_proxy(params=kwargs) return server, proxy
def main(): init() if len(sys.argv) >= 2: start_url = sys.argv[1] else: print "You must specify page URL!" sys.exit() allowed_domain = urlparse(start_url).netloc browsermobproxy_path = "/usr/local/opt/browsermobproxy/bin/browsermob-proxy" options = { 'port': 9090, } server = Server(browsermobproxy_path,options) server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) driver.get(start_url) links = driver.find_elements_by_tag_name('a') lenl = len(links) for i in range(0,lenl): if links[i].is_displayed(): url = links[i].get_attribute('href') text = links[i].get_attribute('text') if url.find(allowed_domain) != -1: proxy.new_har('demo') links[i].click() print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL) show_status_codes(proxy.har,allowed_domain) driver.back() driver.refresh() links = driver.find_elements_by_tag_name('a') driver.quit() server.stop()
class CreateHar(object): """create HTTP archive file""" def __init__(self, mob_path): """initial setup""" self.browser_mob = mob_path self.server = self.driver = self.proxy = None @staticmethod def __store_into_file(title, result): """store result""" har_file = open(title + '.har', 'w') har_file.write(str(result)) har_file.close() def __start_server(self): """prepare and start server""" self.server = Server(self.browser_mob) self.server.start() self.proxy = self.server.create_proxy() def __start_driver(self): """prepare and start driver""" profile = webdriver.FirefoxProfile() profile.set_proxy(self.proxy.selenium_proxy()) self.driver = webdriver.Firefox(firefox_profile=profile) def start_all(self): """start server and driver""" self.__start_server() self.__start_driver() def create_har(self, title, url): """start request and parse response""" self.proxy.new_har(title) self.driver.get(url) result = json.dumps(self.proxy.har, ensure_ascii=False) self.__store_into_file(title, result) def stop_all(self): """stop server and driver""" self.server.stop() self.driver.quit()
def fetch(url, config, output_directory, fetchEngine="browsermobproxy+selenium", browser="firefox"): if fetchEngine in ("phantomjs", "ph"): data = subprocess.check_output( config['fetchEngines']['phantomjs_command'].replace("$url", url), shell=True ) elif fetchEngine in ("browsermobproxy+selenium", "bs"): from browsermobproxy import Server from selenium import webdriver server = Server(config['fetchEngines']['browsermobproxy_binary']) server.start() proxy = server.create_proxy() if browser in ("firefox", "ff"): profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) else: chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) driver = webdriver.Chrome(chrome_options = chrome_options) proxy.new_har(url, options={'captureHeaders': True}) driver.get(url) data = json.dumps(proxy.har, ensure_ascii=False) server.stop() driver.quit() else: sys.exit("Unrecognized engine.") if (data): fileName = output_directory + "/" + url.replace("http://", "").replace("https://", "") + "_" + strftime("%Y-%m-%d_%H:%M:%S", gmtime()) + ".har" f = open(fileName, "w") f.write(data.encode("utf8")) f.close() return fileName else: return None
def get_driver_and_proxy(): global display global driver global proxy if not driver: if int(config.get('browsermob', {}).get('collect-har', 0)): from browsermobproxy import Server server = Server(config['browsermob']['path']) server.start() proxy = server.create_proxy() if int(config.get('xconfig', {}).get('headless', 0)): display = Display(visible=0, size=(800, 600)) display.start() profile = webdriver.FirefoxProfile() if proxy: profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) driver.implicitly_wait(60) return driver, proxy
def bmp_proxy(): """ Creates a proxy and a server instance with browsermobproxy. Reference: http://browsermob-proxy-py.readthedocs.org/en/latest/index.html Returns: (proxy, server) """ def create_proxy(): """ Try to create a proxy. """ try: proxy = server.create_proxy() except: return False, None return True, proxy port = int(os.environ.get('BROWSERMOB_PROXY_PORT', 8080)) server = Server('browsermob-proxy', options={'port': port}) try: # If anything in this block raises an exception, make sure we kill # the server process before exiting. server.start() # Using the promise module to wait for the server to be responsive. # The server.create_proxy function sometimes raises connection # refused errors if the server isn't ready yet. proxy = Promise( create_proxy, 'browsermobproxy is responsive', timeout=10 ).fulfill() proxy_host = os.environ.get('BROWSERMOB_PROXY_HOST', '127.0.0.1') proxy.remap_hosts('localhost', proxy_host) except: # Make sure that the server process is stopped. stop_server(server) raise return proxy, server
class Proxy(object): proxy = None proxy_server = None test_id = None def __init__(self, test_id): self.test_id = test_id self.start_proxy() def start_proxy(self): self.proxy_server = Server(config.proxy_bin) self.proxy_server.start() self.proxy = self.proxy_server.create_proxy() if config.blacklist: self.set_blacklist(config.blacklist) self.proxy.new_har(self.test_id) logger.debug('Browsermob proxy started.') return self def stop_proxy(self): filename = '{}.har'.format(self.test_id) with open(filename, 'w') as harfile: json.dump(self.proxy.har, harfile) data = json.dumps(self.proxy.har, ensure_ascii=False) self.proxy_server.stop() self.proxy = None self.proxy_server = None logger.debug('Browsermob proxy stopped. HAR created: {}' .format(filename)) def set_blacklist(self, domain_list): for domain in domain_list: self.proxy.blacklist("^https?://([a-z0-9-]+[.])*{}*.*" .format(domain), 404) logger.debug("Proxy blacklist set.") def get_blacklist(self): return requests.get('{}{}/blacklist' .format(config.proxy_api, self.proxy.port))
class ad_driver(): _driver = None _server = None _proxy = None def __init__(self, path_to_batch, browser="chrome"): """ start browsermob proxy """ self._server = Server(path_to_batch) self._server.start() self._proxy = self._server.create_proxy() """ Init browser profile """ if browser is "chrome": PROXY = "localhost:%s" % self._proxy.port # IP:PORT or HOST:PORT chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--proxy-server=%s' % PROXY) self._driver = webdriver.Chrome(chrome_options=chrome_options) elif browser is "ff": profile = webdriver.FirefoxProfile() driver = webdriver.Firefox(firefox_profile=profile, proxy=proxy) else: print "Please set 'browser' variable to any of the value \n 'chrome', 'ff' !" self._driver.maximize_window() self._driver.implicitly_wait(20) def execute(self, test): self._proxy.new_har(test["name"]) self._driver.get(_test_data_dir + os.sep + test['file']) time.sleep(2) callToTestMethod = getattr(test_steps, test["name"]) callToTestMethod(self._driver) har = self._proxy.har requests = har['log']['entries'] return requests def quit(self): self._server.stop() self._driver.quit()
def init(defaultPort,path): global lock global serverPort global proxyIsInit global serverIsRunning global proxyServer global indexCount lock.acquire() serverPort = int(defaultPort) proxyIsInit = True print "Initializing Proxy Manager - server port : set to : " + str(serverPort) #checks to see if the server has already started if not serverIsRunning: proxyServer = Server(path,{'port':int(serverPort)}) proxyServer.start() serverIsRunning = True lock.release()
def _setup_proxy_server(self, downstream_kbps=None, upstream_kbps=None, latency=None): server = Server(BROWSERMOB_PROXY_PATH) server.start() proxy = server.create_proxy() # The proxy server is pretty sluggish, setting the limits might not # achieve the desired behavior. proxy_options = {} if downstream_kbps: proxy_options['downstream_kbps'] = downstream_kbps if upstream_kbps: proxy_options['upstream_kbps'] = upstream_kbps if latency: proxy_options['latency'] = latency if len(proxy_options.items()) > 0: proxy.limits(proxy_options) return server, proxy
class BrowserMobProxyTestCaseMixin(object): def __init__(self, *args, **kwargs): self.browsermob_server = None self.browsermob_port = kwargs.pop('browsermob_port') self.browsermob_script = kwargs.pop('browsermob_script') def setUp(self): options = {} if self.browsermob_port: options['port'] = self.browsermob_port if not self.browsermob_script: raise ValueError('Must specify --browsermob-script in order to ' 'run browsermobproxy tests') self.browsermob_server = Server( self.browsermob_script, options=options) self.browsermob_server.start() def create_browsermob_proxy(self): client = self.browsermob_server.create_proxy() with self.marionette.using_context('chrome'): self.marionette.execute_script(""" Components.utils.import("resource://gre/modules/Preferences.jsm"); Preferences.set("network.proxy.type", 1); Preferences.set("network.proxy.http", "localhost"); Preferences.set("network.proxy.http_port", {port}); Preferences.set("network.proxy.ssl", "localhost"); Preferences.set("network.proxy.ssl_port", {port}); """.format(port=client.port)) return client def tearDown(self): if self.browsermob_server: self.browsermob_server.stop() self.browsermob_server = None __del__ = tearDown
def create_hars(urls, browsermob_dir, run_cached): for url in urls: print 'starting browsermob proxy' server = Server('{}/bin/browsermob-proxy'.format(browsermob_dir)) server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) url_slug = slugify(url) proxy.new_har(url_slug) print 'loading page: {}'.format(url) driver.get(url) har_name = '{}-{}.har'.format(url_slug, time.time()) print 'saving HAR file: {}'.format(har_name) save_har(har_name, proxy.har) if run_cached: url_slug = '{}-cached'.format(slugify(url)) proxy.new_har(url_slug) print 'loading cached page: {}'.format(url) driver.get(url) har_name = '{}-{}.har'.format(url_slug, time.time()) print 'saving HAR file: {}'.format(har_name) save_har(har_name, proxy.har) driver.quit() print 'stopping browsermob proxy' server.stop()
class BrowserMobProxyTestCaseMixin(object): def __init__(self, *args, **kwargs): self.browsermob_server = None self.browsermob_port = kwargs.pop('browsermob_port') self.browsermob_script = kwargs.pop('browsermob_script') def setUp(self): options = {} if self.browsermob_port: options['port'] = self.browsermob_port if not self.browsermob_script: raise ValueError('Must specify --browsermob-script in order to ' 'run browsermobproxy tests') self.browsermob_server = Server( self.browsermob_script, options=options) self.browsermob_server.start() def create_browsermob_proxy(self): client = self.browsermob_server.create_proxy() with self.marionette.using_context('chrome'): self.marionette.execute_script(""" Services.prefs.setIntPref('network.proxy.type', 1); Services.prefs.setCharPref('network.proxy.http', 'localhost'); Services.prefs.setIntPref('network.proxy.http_port', %(port)s); Services.prefs.setCharPref('network.proxy.ssl', 'localhost'); Services.prefs.setIntPref('network.proxy.ssl_port', %(port)s); """ % {"port": client.port}) return client def tearDown(self): if self.browsermob_server: self.browsermob_server.stop() self.browsermob_server = None __del__ = tearDown
def retrieve_har(): print "Retrieving .har file using generated url..." har_name_ex = har_name + ".har" complete_har_path = os.path.join(har_save_path, har_name_ex) # Magic starts here: server = Server(path) server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile(ff_profile) profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) # Broken script to load the page in Google Chrome instead of Mozilla Firefox """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) driver = webdriver.Chrome(chrome_options = chrome_options) """ proxy.new_har(har_name, options={'captureHeaders': True}) driver.get(url) #Trying to click 'vplayer' try: driver.switch_to.frame(0) # Clicking the video automagically jwplayer = driver.find_element_by_name('vplayer') jwplayer.click() #And if that somehow doesn't work except Exception: print "Couldn't click player!" print "Trying again in 5 seconds..." time.sleep(5) #Try it again... try: driver.switch_to.frame(0) # Clicking the video automagically (again) jwplayer = driver.find_element_by_name('vplayer') jwplayer.click() #And if that doesn't work either except Exception: print "Not able to click the video player" #Stop the server and the driver server.stop() driver.quit() time.sleep(3) sys.exit() time.sleep(1) #Exporting the wanted .har file result = json.dumps(proxy.har, ensure_ascii=False, indent=4) # indent=4 puts the .har file on seperated lines #And write it to an automatically created file har_file = open(complete_har_path,'w') har_file.write(str(result)) har_file.close() #Stop the server and the driver server.stop() driver.quit()
def run_webdriver(self, start_url, port, config, download_dir): global useragent global referer urllib3_logger = logging.getLogger('urllib3') urllib3_logger.setLevel(logging.DEBUG) logging.info("Starting WebRunner") firefox_profile = None server = None proxy = None har = None if config.referer: referer = config.referer else: referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format( config.url, config.url) if config.useragent: useragent = config.useragent else: useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0' logging.debug("Running with UserAgent: {}".format(useragent)) logging.debug("Running with Referer: {}".format(referer)) logging.debug("Checking URL: {}".format(config.url)) server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port}) server.start() proxy = server.create_proxy() proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'}) request_js = ( 'var referer = request.getProxyRequest().getField("Referer");' 'addReferer(request);' 'function addReferer(r){' 'if (! referer ) {' 'r.addRequestHeader("Referer","' + referer + '");' '}' 'return;' '}') proxy.request_interceptor(request_js) if config.firefoxprofile: firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile) else: firefox_profile = FirefoxProfile() logging.debug("Using profile {}".format(firefox_profile.path)) firefox_profile.set_preference("security.OCSP.enabled", 0) firefox_profile.set_preference("browser.download.folderList", 2) firefox_profile.set_preference("browser.download.manager.showWhenStarting", False) firefox_profile.set_preference("browser.download.dir", download_dir) firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;appl\ ication/msword;text/plain;application/octet") firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False) firefox_profile.set_preference("browser.download.manager.showWhenStarting", False) firefox_profile.set_preference("network.proxy.type", 1) firefox_profile.set_proxy(proxy.selenium_proxy()) try: webdriver = WebDriver(firefox_profile) proxy.new_har(start_url.hostname, options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"}) self.analyse_page(webdriver, start_url) har = proxy.har logging.info("Stopping WebRunner") proxy.close() server.stop() webdriver.quit() har = Har(har) except Exception, e: logging.error(e) proxy.close() webdriver.quit() server.stop()
class WebTrafficGenerator: def __init__(self,args): self.browser_mob_proxy_location = os.environ.get("BROWSERMOBPROXY_BIN") if not self.browser_mob_proxy_location: self.browser_mob_proxy_location = "./browsermob-proxy/bin/browsermob-proxy" # Parse arguments self.urls_file = args['in_file'] self.out_stats_folder = args['out_folder'] self.timeout = args['timeout'] self.save_headers = args['headers'] self.max_interval = args['max_interval'] self.browsers_num = args['browsers'] self.max_requests = args['limit_urls'] self.no_sleep = args['no_sleep'] self.no_https = args['no_https'] def run(self): # create temporary directory for downloads self.temp_dir = tempfile.TemporaryDirectory() try: # Read URLs and time self.urls=[] self.thinking_times=[] visit_timestamps=[] with open(self.urls_file ,"r") as f: history = f.read().splitlines() for line in history: entry = line.split() if not (entry[1].lower().startswith("file://") or (entry[1].lower().startswith("http://") and (entry[1].lower().startswith("10.",7) or entry[1].lower().startswith("192.168.",7))) or (entry[1].lower().startswith("https://") and (entry[1].lower().startswith("10.",8) or entry[1].lower().startswith("192.168.",8)))): # convert timestamp in seconds visit_timestamps.append(float(entry[0])/1000000) if (not self.no_https or not entry[1].lower().startswith("https://")): self.urls.append(entry[1]) if not self.max_requests: self.max_requests = len(self.urls) visit_timestamps.sort() for i in range(1, len(visit_timestamps)): think_time=(visit_timestamps[i]-visit_timestamps[i-1]) if think_time<=self.max_interval: self.thinking_times.append(think_time) self.cdf, self.inverse_cdf, self.cdf_samples = compute_cdf(self.thinking_times) print ("Number of URLs: "+str(len(self.urls))) # Create or clean statistics folder if not os.path.exists(self.out_stats_folder): os.makedirs(self.out_stats_folder) else: for file in os.listdir(self.out_stats_folder): file_path = os.path.join(self.out_stats_folder, file) if os.path.isfile(file_path): os.remove(file_path) # Plot history statistics self.plot_thinking_time_cdf() #self.plot_thinking_time_inverse_cdf() # Start Proxy self.server = Server(self.browser_mob_proxy_location) self.server.start() # start queues self.urls_queue = Queue() self.hars_queue = Queue() # start Barrier (for coordinating proxy server restart) self.barrier = Barrier(self.browsers_num, action = self.restart_proxy_server) try: self.workers = [Browser(i, self.server, self.urls_queue, self.hars_queue, self.barrier, self.timeout, self.save_headers, self.temp_dir.name) for i in range(self.browsers_num)] for w in self.workers: w.start() number_of_requests = 0 # Start requesting pages for url in self.urls: if number_of_requests==self.max_requests: break self.urls_queue.put(url) number_of_requests += 1 if not self.no_sleep: time.sleep(self.get_thinking_time()) for w in self.workers: self.urls_queue.put(None) self.hars = [] for w in self.workers: browser_hars = self.hars_queue.get() self.hars.extend(browser_hars) # write HAR file with open(os.path.join(self.out_stats_folder,"HARs.json"),"w") as f: json.dump(self.hars,f) # Gather statistics self.stats = { "totalTime":[], "blocked":[], "dns":[], "connect":[], "send":[], "wait":[], "receive":[], "ssl":[] } for har in self.hars: if har["log"]["totalTime"]!=-1: self.stats["totalTime"].append(har["log"]["totalTime"]) for entry in har["log"]["entries"]: if (not self.no_https or not entry["request"]["url"].lower().startswith("https://")): # Queuing if entry["timings"]["blocked"]!=-1: self.stats["blocked"].append(entry["timings"]["blocked"]) # DNS resolution if entry["timings"]["dns"]!=-1: self.stats["dns"].append(entry["timings"]["dns"]) # TCP Connection if entry["timings"]["connect"]!=-1: self.stats["connect"].append(entry["timings"]["connect"]) # HTTP Request send if entry["timings"]["send"]!=-1: self.stats["send"].append(entry["timings"]["send"]) # Wait the server if entry["timings"]["wait"]!=-1: self.stats["wait"].append(entry["timings"]["wait"]) # HTTP Response receive if entry["timings"]["receive"]!=-1: self.stats["receive"].append(entry["timings"]["receive"]) if entry["timings"]["ssl"]!=-1: self.stats["ssl"].append(entry["timings"]["ssl"]) # Save statistics self.plot_stats() for w in self.workers: w.join() except KeyboardInterrupt: pass finally: self.urls_queue.close() self.hars_queue.close() self.server.stop() except Exception as e: print("Exception: " + str(e)) import traceback traceback.print_exc() finally: self.temp_dir.cleanup() def restart_proxy_server(self): try: self.server.stop() except Exception as e: print("Failed to stop proxy server. Exception: " + str(e)) # Start Proxy self.server = Server(self.browser_mob_proxy_location) self.server.start() print("Proxy server restarted") def plot_thinking_time_cdf(self): x = np.linspace(min(self.thinking_times), max(self.thinking_times), num=10000, endpoint=True) # Plot the cdf fig = plt.figure() axes = fig.add_subplot(111) axes.plot(x, self.cdf(x)) axes.set_ylim((0,1)) axes.set_xlabel("Seconds") axes.set_ylabel("CDF") axes.set_title("Thinking time") axes.grid(True) fig.savefig(os.path.join(self.out_stats_folder,"thinking_time_cdf.png")) def plot_thinking_time_inverse_cdf(self): x = np.linspace(min(self.cdf_samples), max(self.cdf_samples), num=10000, endpoint=True) # Plot the cdf fig = plt.figure() axes = fig.add_subplot(111) axes.plot(x, self.inverse_cdf(x)) axes.set_xlim((0,1)) axes.set_ylabel("Seconds") axes.set_xlabel("CDF") axes.set_title("Thinking time") axes.grid(True) fig.savefig(os.path.join(self.out_stats_folder,"thinking_time_inverse_cdf.png")) def get_thinking_time(self): rand=random.uniform(min(self.cdf_samples),max(self.cdf_samples)) time = float(self.inverse_cdf(rand)) return time def plot_stats(self): fig_total = plt.figure() axes_total = fig_total.add_subplot(111) fig_timings = plt.figure() axes_timings = fig_timings.add_subplot(1,1,1) fig_timings_log = plt.figure() axes_timings_log = fig_timings_log.add_subplot(1,1,1) for key in self.stats: if len(set(self.stats[key]))>1: cdf = compute_cdf(self.stats[key]) x = np.linspace(min(self.stats[key]), max(self.stats[key]), num=10000, endpoint=True) # Plot the cdf if key=="totalTime": axes_total.plot(x/1000, cdf[0](x), label=key) else: axes_timings.plot(x, cdf[0](x), label=key) # zero is not valid with log axes if min(self.stats[key])==0: non_zero_min = find_non_zero_min(self.stats[key]) if non_zero_min == 0: continue x = np.linspace(non_zero_min, max(self.stats[key]), num=10000, endpoint=True) axes_timings_log.plot(x, cdf[0](x), label=key) axes_total.set_ylim((0,1)) axes_total.set_xlabel("Seconds") axes_total.set_ylabel("CDF") axes_total.set_title("Page load time") axes_total.grid(True) fig_total.savefig(os.path.join(self.out_stats_folder,"page_load_cdf.png")) axes_timings.set_ylim((0,1)) axes_timings.set_xlabel("Milliseconds") axes_timings.set_ylabel("CDF") axes_timings.set_title("Single resource timings") axes_timings.grid(True) axes_timings.legend(loc='best') axes_timings_log.set_ylim((0,1)) axes_timings_log.set_xlabel("Milliseconds") axes_timings_log.set_ylabel("CDF") axes_timings_log.set_xscale("log") axes_timings_log.set_title("Single resource timings") axes_timings_log.grid(True, which="both", axis="x") axes_timings_log.grid(True, which="major", axis="y") axes_timings_log.legend(loc='best') fig_timings.savefig(os.path.join(self.out_stats_folder,"timings_cdf.png")) fig_timings_log.savefig(os.path.join(self.out_stats_folder,"timings_cdf_log.png"))
from selenium.webdriver.chrome.options import Options from splinter.browser import Browser from browsermobproxy import Server from time import sleep from twilio.rest import Client # 获取所有网络请求 server = Server( "D:/browsermob-proxy-2.1.4-bin/browsermob-proxy-2.1.4/bin/browsermob-proxy" ) server.start() proxy = server.create_proxy() chrome_options = Options() chrome_options.add_argument('--proxy-server={host}:{port}'.format( host='localhost', port=proxy.port)) # disable-infobars class HuoChe(object): """docstring for Train""" driver_name = 'Chrome' executable_path = 'D:\eng\chromedriver.exe' # 用户名 密码 username = u"20181754121" passwd = u"skd158CF" """网址""" # 我们学校强智选课URL select_url = "http://jw.sdufe.edu.cn/jsxsd/xsxk/xsxk_index?jx0502zbid=70A1CABD2E054E06A233181DEE7CB6E9" # 强智登录URL login_url = "http://jw.sdufe.edu.cn/"
def run_webdriver(self, start_url, port, config, download_dir): """ Run Selenium WebDriver """ useragent = None referer = None webdriver = None urllib3_logger = logging.getLogger('urllib3') urllib3_logger.setLevel(logging.DEBUG) self.logger.info("Starting WebRunner") firefox_profile = None server = None proxy = None har = None if config.referer: referer = config.referer else: referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format( config.url, config.url) if config.useragent: useragent = config.useragent else: useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0' self.logger.debug("Running with UserAgent: {}".format(useragent)) self.logger.debug("Running with Referer: {}".format(referer)) self.logger.debug("Checking URL: {}".format(config.url)) server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port}) server.start() proxy = server.create_proxy() proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'}) request_js = ( 'var referer = request.getProxyRequest().getField("Referer");' 'addReferer(request);' 'function addReferer(r){' 'if (! referer ) {' 'r.addRequestHeader("Referer","' + referer + '");' '}' 'return;' '}') proxy.request_interceptor(request_js) from types import NoneType if config.firefoxprofile is not None and os.path.isdir(config.firefoxprofile): self.logger.debug("Using existing firefox profile") firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile) else: firefox_profile = FirefoxProfile() self.logger.debug("Using profile {}".format(firefox_profile.path)) firefox_profile.set_preference("security.OCSP.enabled", 0) firefox_profile.set_preference("browser.download.folderList", 2) firefox_profile.set_preference("browser.download.manager.showWhenStarting", False) firefox_profile.set_preference("browser.download.dir", download_dir) firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;application/msword;text/plain;application/octet") firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False) firefox_profile.set_preference("browser.download.manager.showWhenStarting", False) firefox_profile.set_preference("security.mixed_content.block_active_content", False) firefox_profile.set_preference("security.mixed_content.block_display_content", False) firefox_profile.set_preference("extensions.blocklist.enabled", False) firefox_profile.set_preference("network.proxy.type", 1) firefox_profile.set_proxy(proxy.selenium_proxy()) firefox_profile.set_preference("webdriver.log.file", "/tmp/ff.log") firefox_profile.set_preference("webdriver.log.driver", "DEBUG") firefox_profile.set_preference("browser.newtabpage.enhanced", False) firefox_profile.set_preference("browser.newtabpage.enabled", False) firefox_profile.set_preference("browser.newtabpage.directory.ping", "") firefox_profile.set_preference("browser.newtabpage.directory.source", "") firefox_profile.set_preference("browser.search.geoip.url", "") try: self.xvfb.start() capabilities = DesiredCapabilities.FIREFOX capabilities['loggingPrefs'] = {'browser':'ALL'} if os.path.exists("{}/firefox".format(firefox_profile.path)): binary = FirefoxBinary("{}/firefox".format(firefox_profile.path)) else: binary = FirefoxBinary("/usr/bin/firefox") webdriver = WebDriver(capabilities=capabilities, firefox_profile=firefox_profile, firefox_binary=binary) proxy.new_har(start_url.hostname, options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"}) self.analyse_page(webdriver, start_url) for entry in webdriver.get_log('browser'): self.logger.info("Firefox: {}".format(entry)) har = proxy.har self.logger.info("Stopping WebRunner") proxy.close() server.stop() webdriver.quit() har = Har(har) except Exception, e: self.logger.error(e) proxy.close() if webdriver: webdriver.quit() self.xvfb.stop() server.stop()
class BrowserMobLibrary(): ROBOT_LIBRARY_SCOPE = 'GLOBAL' ROBOT_LIBRARY_VERSION = VERSION def __init__(self): self.isServerStarted = False self.activeProxy = None self.server = None self.proxies = [] def _proxy(self): if self.activeProxy is None: raise Exception("No proxy has been created") return self.activeProxy def start_browsermob(self, browsermob_path): self.server = Server(browsermob_path) self.server.start() self.isServerStarted = True def stop_browsermob(self): self.server.stop() self.server = None self.isServerStarted = False def create_proxy(self): self.activeProxy = self.server.create_proxy self.proxies.append(self.activeProxy) return self.server.create_proxy() def close_proxy(self, proxy): self.proxies.remove(proxy) proxy.close() def close_active_proxy(self): self.close_proxy(self.activeProxy) def set_active_proxy(self, proxy): self.activeProxy = proxy def get_active_proxy(self): return self.activeProxy def get_all_proxies(self): return self.proxies def close_all_proxies(self): for proxy in self.proxies: proxy.close() def capture_traffic(self, reference=None, **options): return self._proxy().new_har(reference, options) def get_captured_traffic(self): return self._proxy().har def set_capture_reference(self, reference=None): return self._proxy().new_page(reference) def ignore_all_traffic_matching(self, regexp, status_code): return self._proxy().blacklist(regexp, status_code) def only_capture_traffic_matching(self, regexp, status_code): return self._proxy().whitelist(regexp, status_code) def use_basic_authentication(self, domain, username, password): return self._proxy().basic_authentication(domain, username, password) def set_headers(self, headers, ): return self._proxy().headers(headers) def set_response_interceptor(self, js, ): return self._proxy().response_interceptor(js) def set_request_interceptor(self, js, ): return self._proxy().request_interceptor(js) def set_bandwith_limits(self, **options): return self._proxy().limits(options) def set_proxy_timeouts(self, **options): return self._proxy().timeouts(options) def remap_hosts(self, address, ip_address): return self._proxy().remap_hosts(address, ip_address) def wait_for_traffic_to_stop(self, quiet_period, timeout): return self._proxy().wait_for_traffic_to_stop(quiet_period, timeout) def clear_proxy_dns_cache(self): return self._proxy().clear_dns_cache() def rewrite_url(self, match, replace): return self._proxy().rewrite_url(match, replace)
from browsermobproxy import Server server = Server("/root/Desktop/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy") server.start() proxy = server.create_proxy() from selenium import webdriver profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) proxy.new_har("google") driver.get("http://www.google.co.uk") test = proxy.har # returns a HAR JSON blob print test server.stop() driver.quit()
class HarProfiler: def __init__(self, config, url): self.har_dir = config['har_dir'] self.browsermob_dir = config['browsermob_dir'] self.label_prefix = config['label_prefix'] or '' self.virtual_display = config['virtual_display'] self.virtual_display_size_x = config['virtual_display_size_x'] self.virtual_display_size_y = config['virtual_display_size_y'] self.label = '{}{}'.format( self.label_prefix, format(self.slugify(url)) ) self.cached_label = '{}-cached'.format(self.label) epoch = time.time() self.har_name = '{}-{}.har'.format(self.label, epoch) self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch) def __enter__(self): log.info('starting virtual display') if self.virtual_display: self.display = Display(visible=0, size=( self.virtual_display_size_x, self.virtual_display_size_y )) self.display.start() log.info('starting browsermob proxy') self.server = Server('{}/bin/browsermob-proxy'.format( self.browsermob_dir) ) self.server.start() return self def __exit__(self, type, value, traceback): log.info('stopping browsermob proxy') self.server.stop() log.info('stopping virtual display') self.display.stop() def _make_proxied_webdriver(self): proxy = self.server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) return (driver, proxy) def _save_har(self, har, cached=False): if not os.path.isdir(self.har_dir): os.makedirs(self.har_dir) if not cached: har_name = self.har_name elif cached: har_name = self.cached_har_name log.info('saving HAR file: {}'.format(har_name)) with open(os.path.join(self.har_dir, har_name), 'w' ) as f: json.dump(har, f, indent=2, ensure_ascii=False) def load_page(self, url, run_cached=True): driver, proxy = self._make_proxied_webdriver() proxy.new_har(self.label) log.info('loading page: {}'.format(url)) driver.get(url) self._save_har(proxy.har) if run_cached: proxy.new_har(self.cached_label) log.info('loading cached page: {}'.format(url)) driver.get(url) self._save_har(proxy.har, cached=True) driver.quit() def slugify(self, text): pattern = re.compile(r'[^a-z0-9]+') slug = '-'.join(word for word in pattern.split(text.lower()) if word) return slug
class performance(object): #create performance data def __init__(self, mob_path): #initialize from datetime import datetime print "%s: Go "%(datetime.now()) self.browser_mob = mob_path self.server = self.driver = self.proxy = None @staticmethod def __store_into_file(args,title, result): #store data collected into file if 'path' in args: har_file = open(args['path']+'/'+title + '.json', 'w') else: har_file = open(title + '.json', 'w') har_file.write(str(result)) har_file.close() def __start_server(self): #prepare and start server self.server = Server(self.browser_mob) self.server.start() self.proxy = self.server.create_proxy() def __start_driver(self,args): #prepare and start driver #chromedriver if args['browser'] == 'chrome': print "Browser: Chrome" print "URL: {0}".format(args['url']) chromedriver = os.getenv("CHROMEDRIVER_PATH", "/chromedriver") os.environ["webdriver.chrome.driver"] = chromedriver url = urlparse.urlparse (self.proxy.proxy).path chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(url)) chrome_options.add_argument("--no-sandbox") self.driver = webdriver.Chrome(chromedriver,chrome_options = chrome_options) #firefox if args['browser'] == 'firefox': print "Browser: Firefox" profile = webdriver.FirefoxProfile() profile.set_proxy(self.proxy.selenium_proxy()) self.driver = webdriver.Firefox(firefox_profile=profile) def start_all(self,args): #start server and driver self.__start_server() self.__start_driver(args) def create_har(self,args): #start request and parse response self.proxy.new_har(args['url'], options={'captureHeaders': True}) self.driver.get(args['url']) result = json.dumps(self.proxy.har, ensure_ascii=False) self.__store_into_file(args,'har', result) performance = json.dumps(self.driver.execute_script("return window.performance"), ensure_ascii=False) self.__store_into_file(args,'perf', performance) def stop_all(self): #stop server and driver from datetime import datetime print "%s: Finish"%(datetime.now()) self.server.stop() self.driver.quit()
class performance(object): #create performance data def __init__(self, mob_path): #initialize from datetime import datetime print "%s: Go "%(datetime.now()) self.browser_mob = mob_path self.server = self.driver = self.proxy = None @staticmethod def __store_into_file(args,title, result): #store data collected into file if 'path' in args: har_file = open(args['path']+'/'+title + '.json', 'w') else: har_file = open(title + '.json', 'w') har_file.write(result.encode('utf-8')) har_file.close() def __start_server(self): #prepare and start server self.server = Server(self.browser_mob) self.server.start() self.proxy = self.server.create_proxy() def __start_driver(self,args): #prepare and start driver #chromedriver if args['browser'] == 'chrome': print "Browser: Chrome" print "URL: {0}".format(args['url']) chromedriver = os.getenv("CHROMEDRIVER_PATH", "/chromedriver") os.environ["webdriver.chrome.driver"] = chromedriver url = urlparse.urlparse (self.proxy.proxy).path chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(url)) chrome_options.add_argument("--no-sandbox") self.driver = webdriver.Chrome(chromedriver,chrome_options = chrome_options) #firefox if args['browser'] == 'firefox': print "Browser: Firefox" profile = webdriver.FirefoxProfile() profile.set_proxy(self.proxy.selenium_proxy()) self.driver = webdriver.Firefox(firefox_profile=profile) def start_all(self,args): #start server and driver self.__start_server() self.__start_driver(args) def create_har(self,args): #start request and parse response self.proxy.new_har(args['url'], options={'captureHeaders': True}) self.driver.get(args['url']) result = json.dumps(self.proxy.har, ensure_ascii=False) self.__store_into_file(args,'har', result) performance = json.dumps(self.driver.execute_script("return window.performance"), ensure_ascii=False) self.__store_into_file(args,'perf', performance) def stop_all(self): #stop server and driver from datetime import datetime print "%s: Finish"%(datetime.now()) self.server.stop() self.driver.quit()
class HarProfiler: def __init__(self, config, url, login_first=False): self.url = url self.login_first = login_first self.login_user = config.get('login_user') self.login_password = config.get('login_password') self.browsermob_dir = config['browsermob_dir'] self.har_dir = config['har_dir'] self.label_prefix = config['label_prefix'] or '' self.run_cached = config['run_cached'] self.virtual_display = config['virtual_display'] self.virtual_display_size_x = config['virtual_display_size_x'] self.virtual_display_size_y = config['virtual_display_size_y'] self.label = '{}{}'.format(self.label_prefix, self.slugify(url)) self.cached_label = '{}-cached'.format(self.label) epoch = time.time() self.har_name = '{}-{}.har'.format(self.label, epoch) self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch) def __enter__(self): if self.virtual_display: log.info('starting virtual display') self.display = Display(visible=0, size=( self.virtual_display_size_x, self.virtual_display_size_y )) self.display.start() log.info('starting browsermob proxy') self.server = Server('{}/bin/browsermob-proxy'.format( self.browsermob_dir) ) self.server.start() return self def __exit__(self, type, value, traceback): log.info('stopping browsermob proxy') self.server.stop() if self.virtual_display: log.info('stopping virtual display') self.display.stop() def _make_proxied_webdriver(self): proxy = self.server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) return (driver, proxy) def _save_har(self, har, cached=False): if not os.path.isdir(self.har_dir): os.makedirs(self.har_dir) if not cached: har_name = self.har_name elif cached: har_name = self.cached_har_name log.info('saving HAR file: {}'.format(har_name)) with open(os.path.join(self.har_dir, har_name), 'w') as f: json.dump(har, f, indent=2, ensure_ascii=False) def _login(self, driver): log.info('logging in...') error_msg = 'must specify login credentials in yaml config file' if self.login_user is None: raise RuntimeError(error_msg) if self.login_password is None: raise RuntimeError(error_msg) driver.get('https://courses.edx.org/login') # handle both old and new style logins try: email_field = driver.find_element_by_id('email') password_field = driver.find_element_by_id('password') except NoSuchElementException: email_field = driver.find_element_by_id('login-email') password_field = driver.find_element_by_id('login-password') email_field.send_keys(self.login_user) password_field.send_keys(self.login_password) password_field.submit() def _add_page_event_timings(self, driver, har): jscript = textwrap.dedent(""" var performance = window.performance || {}; var timings = performance.timing || {}; return timings; """) timings = driver.execute_script(jscript) har['log']['pages'][0]['pageTimings']['onContentLoad'] = ( timings['domContentLoadedEventEnd'] - timings['navigationStart'] ) har['log']['pages'][0]['pageTimings']['onLoad'] = ( timings['loadEventEnd'] - timings['navigationStart'] ) return har def load_page(self): try: driver, proxy = self._make_proxied_webdriver() if self.login_first: self._login(driver) proxy.new_har(self.label) log.info('loading page: {}'.format(self.url)) driver.get(self.url) har = self._add_page_event_timings(driver, proxy.har) self._save_har(har) if self.run_cached: proxy.new_har(self.cached_label) log.info('loading cached page: {}'.format(self.url)) driver.get(self.url) har = self._add_page_event_timings(driver, proxy.har) self._save_har(har, cached=True) except Exception: raise finally: driver.quit() def slugify(self, text): pattern = re.compile(r'[^a-z0-9]+') slug = '-'.join(word for word in pattern.split(text.lower()) if word) return slug
class Browser: def __init__(self, chromedriverPath, browsermobPath, harfilePath, cookies=None): self.harfilePath = harfilePath self.server = Server(browsermobPath) self.server.start() self.proxy = self.server.create_proxy() os.environ["webdriver.chrome.driver"] = chromedriverPath url = urlparse (self.proxy.proxy).path chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(url)) self.driver = webdriver.Chrome(chromedriverPath,chrome_options =chrome_options) if cookies: print("Loading cookies from "+str(cookies)) with open(cookies, 'r') as cookieFile: cookieJson = json.loads(cookieFile.read()) for cookie in cookieJson: self.driver.add_cookie(cookie) def get(self, url, timeout=20): print(url) self.proxy.new_har(url, {"captureContent":True}) try: self.driver.set_page_load_timeout(timeout) self.driver.get(url) self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/5);") time.sleep(.5) #wait for the page to load self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/4);") time.sleep(.5) #wait for the page to load self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);") time.sleep(.5) #wait for the page to load self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);") time.sleep(.5) #wait for the page to load self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(4) #wait for the page to load except TimeoutException: print("Timeout") self.driver.find_element_by_tag_name("body").send_keys(Keys.CONTROL+Keys.ESCAPE) try: source = self.driver.page_source result = json.dumps(self.proxy.har, ensure_ascii=False) with open(self.harfilePath+"/"+str(int(time.time()*1000.0))+".har", "w") as harfile: harfile.write(result) return source except TimeoutException: print("Retrying, with a timeout of "+str(timeout+5)) return self.get(url, timeout=timeout+5) def close(self): try: self.server.stop() except Exception: print("Warning: Error stopping server") pass try: self.driver.quit() except Exception: print("Warning: Error stopping driver") pass
def instance_browser(self, proxy_enabled, params): """Start web browser and proxy server Args: proxy_enabled: flag to set proxy params: list of parameters -0: browser engine -1: user-agent Returns: Instance of the server, the proxy and the web browser """ if proxy_enabled: try: server = Server(proxy_path) server.start() except Exception as e: raise Exception( 'Error launching server: {exception}'.format(exception=e)) try: proxy = server.create_proxy() except RuntimeError: time.sleep(5) try: proxy = server.create_proxy() except Exception as e: raise Exception( 'Error configuring proxy: {exception}'.format( exception=e)) proxy.new_har() try: proxy_url = urlparse.urlparse(proxy.proxy).path except AttributeError: proxy_url = urlparse(proxy.proxy).path else: server = None proxy = None try: engine = params[0] try: user_agent = USER_AGENTS[params[1]] except LookupError: user_agent = params[1] headless = params[2] except LookupError: raise Exception('Function browser(): 3 arguments needed') try: logger.log( 'NOTE', 'Engine: {engine} | User-agent: {user_agent} | Headless: {headless}' .format(engine=engine, user_agent=user_agent, headless=headless)) if engine == 'chrome': driver_path = self.get_driver_path(engine) ch_opt = webdriver.ChromeOptions() if proxy_enabled: ch_opt.add_argument("--proxy-server=" + proxy_url) if user_agent != 'default': ch_opt.add_argument("--user-agent=" + user_agent) if headless: ch_opt.headless = True try: browser = webdriver.Chrome(executable_path=driver_path, chrome_options=ch_opt) except LookupError: time.sleep(5) browser = webdriver.Chrome(executable_path=driver_path, chrome_options=ch_opt) elif engine == 'firefox': driver_path = self.get_driver_path(engine) ff_prf = webdriver.FirefoxProfile() ff_opt = webdriver.FirefoxOptions() if user_agent != 'default': ff_prf.set_preference("general.useragent.override", user_agent) if headless: ff_opt.headless = True try: browser = webdriver.Firefox(executable_path=driver_path, firefox_profile=ff_prf, proxy=proxy.selenium_proxy(), options=ff_opt) if proxy_enabled \ else webdriver.Firefox(executable_path=driver_path, firefox_profile=ff_prf, options=ff_opt) except LookupError: time.sleep(5) browser = webdriver.Firefox(executable_path=driver_path, firefox_profile=ff_prf, proxy=proxy.selenium_proxy(), options=ff_opt) if proxy_enabled \ else webdriver.Firefox(executable_path=driver_path, firefox_profile=ff_prf, options=ff_opt) else: raise Exception( 'Not supported engine: {engine}'.format(engine=engine)) except Exception as e: raise Exception( 'Error launching {engine} ({user_agent}): {exception}'.format( engine=engine, user_agent=user_agent, exception=e)) return browser
class Fetcher: def __init__(self): self.server = None self.proxy = None self.browser = None self.driver = None def set_remote_server(self, host, port): """Defines an already running proxy server for gathering includes and content """ self.server = RemoteServer(host, port) self.proxy = self.server.create_proxy() def start_local_server(self, binpath=None): """Starts a local instance of BrowserMob. Keyword Arguments: binpath -- The full path, including the binary name to the browsermob-proxy binary. """ if binpath is None: binpath="{0}/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy".format(getcwd()) self.server = Server(binpath) self.server.start() self.proxy = self.server.create_proxy() def set_firefox(self): """Sets the Webdriver for Firefox""" self.profile = webdriver.FirefoxProfile() self.profile.set_proxy(self.proxy.selenium_proxy()) self.driver = webdriver.Firefox(firefox_profile=self.profile) def run(self, site, name='fetch'): """Runs an instance of the Fetcher. Requires that either set_remote_server() or start_local_server() has been previously called. Keyword Arguments: site -- The URL of the site to load. name -- Name of the resulting HAR. """ try: self.proxy.headers({'Via': None}) # TODO: Need to override BrowserMob to remove the Via Header - https://github.com/lightbody/browsermob-proxy/issues/213 self.proxy.new_har(name, options={ 'captureHeaders': True, 'captureContent': True, 'captureBinaryContent': True }) self.driver.get(site) har = self.proxy.har har['dom'] = self.driver.page_source return har except AttributeError: print "[!] FAILED: Ensure you have set a Webdriver" def close(self): try: self.proxy.stop() # The proxy won't need to be stopped if using remote_server() except AttributeError: pass try: self.driver.close() except AttributeError: print '[!] Driver not found'