Пример #1
0
def setupdevices():    
    """
    Description:
        Sets u browser proxy, Selenium driver, and har object

    Usage:
        [driver,proxy]=setupdevices()
        
    Inputs:
        NA
    
    Output:
        Selenium driver
        Browsermob proxy
        Browsermob server        
    """    
    #set up proxy
    server = Server("############/browsermob-proxy-2.0-beta-9/bin/browsermob-proxy")
    server.start()
    proxy = server.create_proxy()
    profile  = webdriver.FirefoxProfile()
    profile.set_proxy(proxy.selenium_proxy())
    proxy.new_har("________")
    
    #set up driver
    driver = webdriver.Firefox(firefox_profile=profile)
    
    return (driver,proxy,server)
def save_web_page_stats_to_har(url, webdriver_name, save_to_file):
    """Generate the HAR archive from an URL with the Selenium webdriver
    'webdriver_name', saving the HAR file to 'save_to_file'
    """
    browsermob_server = Server(Config.browsermob_executable)
    browsermob_server.start()
    random_port = get_a_random_free_tcp_port()
    proxy_conn = browsermob_server.create_proxy({"port": random_port})
    driver = create_selenium_webdriver(webdriver_name, proxy_conn)
    try:
        proxy_conn.new_har(url, options={'captureHeaders': True})
        driver.get(url)

        har_json = json.dumps(proxy_conn.har, ensure_ascii=False,
                              indent=4, separators=(',', ': '))
        # Save '.HAR' file
        with io.open(save_to_file + '.har', mode='wt', buffering=1,
                     encoding='utf8', errors='backslashreplace',
                     newline=None) as output_har_f:
            output_har_f.write(unicode(har_json))

        # Save '.PROF' file with profiling report (timings, sizes, etc)
        with io.open(save_to_file + '.prof', mode='wb', buffering=1,
                     newline=None) as prof_output:
            report_har_dictionary(proxy_conn.har, prof_output)

    finally:
        proxy_conn.close()
        browsermob_server.stop()
        driver.quit()
Пример #3
0
def main(argv):
	init()

	parser = argparse.ArgumentParser()
	parser.add_argument('-u', action='store', dest='start_url', help='Set page URL', required=True)
	parser.add_argument('-c', action='store', dest='cookies_file', help='JSON file with cookies', required=False)
	parser.add_argument('-w', action='store', dest='webdriver_type', help='Set WebDriver type (firefox or phantomjs, firebox by default)', default="firefox", required=False)
	results = parser.parse_args()
	
	start_url = results.start_url
	cookies_file = results.cookies_file
	webdriver_type = results.webdriver_type

	allowed_domain = urlparse(start_url).netloc

	browsermobproxy_path = get_browsermobproxy_path()

	options = {
		'port': 9090,
	}

	server = Server(browsermobproxy_path,options)
	server.start()
	proxy = server.create_proxy()

	if webdriver_type == "phantomjs":
		service_args = ['--proxy=localhost:9091','--proxy-type=http',]
		driver = webdriver.PhantomJS(service_args=service_args)
		driver.set_window_size(1440, 1024)
	else:
		profile  = webdriver.FirefoxProfile()
		profile.set_proxy(proxy.selenium_proxy())
		driver = webdriver.Firefox(firefox_profile=profile)

	proxy.new_har('woodpycker', options={'captureHeaders': True, 'captureContent': True})
	driver.get(start_url)

	if not cookies_file is None:
		with open(cookies_file, 'rb') as fp:
		    cookies = json.load(fp)
		for cookie in cookies:
			driver.add_cookie(cookie)
		driver.refresh()

	links = driver.find_elements_by_tag_name('a')
	lenl = len(links)
	for i in range(0,lenl):
		if links[i].is_displayed():
			url = links[i].get_attribute('href')
			text = links[i].get_attribute('text')
			if url.find(allowed_domain) != -1:
				links[i].click()
				print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL)
				show_status_codes(proxy.har,allowed_domain)
			driver.back()
			driver.refresh()
			links = driver.find_elements_by_tag_name('a')

	driver.quit()
	server.stop()
Пример #4
0
    def setUp(self):
        """
        Start the browser with a browsermob-proxy instance for use by the test.
        You *must* call this in the `setUp` method of any subclasses before using the browser!

        Returns:
            None
        """

        try:
            # Start server proxy
            server = Server('browsermob-proxy')
            server.start()
            self.proxy = server.create_proxy()
            proxy_host = os.environ.get('BROWSERMOB_PROXY_HOST', '127.0.0.1')
            self.proxy.remap_hosts('localhost', proxy_host)
        except:
            self.skipTest('Skipping: could not start server with browsermob-proxy.')

        # parent's setUp
        super(WebAppPerfReport, self).setUp()

        # Initialize vars
        self._page_timings = []
        self._active_har = False
        self._with_cache = False

        # Add one more cleanup for the server
        self.addCleanup(server.stop)
def CaptureNetworkTraffic(url,server_ip,headers,file_path):
	''' 
	This function can be used to capture network traffic from the browser. Using this function we can capture header/cookies/http calls made from the browser
	url - Page url
	server_ip - remap host to for specific URL
	headers - this is a dictionary of the headers to be set
	file_path - File in which HAR gets stored
	'''
	port = {'port':9090}
	server = Server("G:\\browsermob\\bin\\browsermob-proxy",port) #Path to the BrowserMobProxy
	server.start()
	proxy = server.create_proxy()
	proxy.remap_hosts("www.example.com",server_ip)
	proxy.remap_hosts("www.example1.com",server_ip)
	proxy.remap_hosts("www.example2.com",server_ip)
	proxy.headers(headers)
	profile  = webdriver.FirefoxProfile()
	profile.set_proxy(proxy.selenium_proxy())
	driver = webdriver.Firefox(firefox_profile=profile)
	new = {'captureHeaders':'True','captureContent':'True'}
	proxy.new_har("google",new)
	driver.get(url)
	proxy.har # returns a HAR JSON blob
	server.stop()
	driver.quit()
	file1 = open(file_path,'w')
	json.dump(proxy.har,file1)
	file1.close()
Пример #6
0
 def get_driver(self, browser, start_beacon_url):
     server = Server(BROWSERMOB_LOCATION)
     server.start()
     self.proxy = server.create_proxy()
     driver = webdriver.Firefox(proxy=self.proxy.selenium_proxy())
     self.proxy.new_har()
     self.beacon_url = start_beacon_url
     return driver
def pytest_runtest_setup(item):
    logger = logging.getLogger(__name__)

    if item.config.option.bmp_test_proxy and 'skip_browsermob_proxy' not in item.keywords:

        if hasattr(item.session.config, 'browsermob_server'):
            server = item.session.config.browsermob_server
        else:
            server = Server(item.config.option.bmp_path, {'port': int(item.config.option.bmp_port)})

        item.config.browsermob_test_proxy = server.create_proxy()
        logger.info('BrowserMob test proxy started (%s:%s)' % (item.config.option.bmp_host, item.config.browsermob_test_proxy.port))
        configure_browsermob_proxy(item.config.browsermob_test_proxy, item.config)
        #TODO make recording of har configurable
        item.config.browsermob_test_proxy.new_har()
Пример #8
0
 def init_proxy_server(self, port=None):
     kwargs = {}
     if port is not None:
         kwargs['port'] = port
     if self.chained_proxy is not None:
         if self.is_https:
             kwargs['httpsProxy'] = self.chained_proxy
         else:
             kwargs['httpProxy'] = self.chained_proxy
     if self.proxy_username is not None:
         kwargs['proxyUsername'] = self.proxy_username
     if self.proxy_password is not None:
         kwargs['proxyPassword'] = self.proxy_password
     server = Server('C://browsermob-proxy//bin//browsermob-proxy.bat', options={"port": self.server_port})
     server.start()
     proxy = server.create_proxy(params=kwargs)
     return server, proxy
Пример #9
0
def main():
	init()
	if len(sys.argv) >= 2:
	    start_url = sys.argv[1]
	else:
	    print "You must specify page URL!"
	    sys.exit()

	allowed_domain = urlparse(start_url).netloc

	browsermobproxy_path = "/usr/local/opt/browsermobproxy/bin/browsermob-proxy"

	options = {
		'port': 9090,

	}

	server = Server(browsermobproxy_path,options)
	server.start()
	proxy = server.create_proxy()

	profile  = webdriver.FirefoxProfile()
	profile.set_proxy(proxy.selenium_proxy())
	driver = webdriver.Firefox(firefox_profile=profile)

	driver.get(start_url)

	links = driver.find_elements_by_tag_name('a')
	lenl = len(links)
	for i in range(0,lenl):
		if links[i].is_displayed():
			url = links[i].get_attribute('href')
			text = links[i].get_attribute('text')
			if url.find(allowed_domain) != -1:
				proxy.new_har('demo')
				links[i].click()
				print "%s Clicked on the link '%s' with HREF '%s'" % (Fore.BLUE+"*"+Fore.RESET,Style.BRIGHT+text+Style.RESET_ALL,Style.BRIGHT+url+Style.RESET_ALL)
				show_status_codes(proxy.har,allowed_domain)
			driver.back()
			driver.refresh()
			links = driver.find_elements_by_tag_name('a')

	driver.quit()
	server.stop()
Пример #10
0
	def fetch(url, config, output_directory, fetchEngine="browsermobproxy+selenium", browser="firefox"):

		if fetchEngine in ("phantomjs", "ph"):

			data = subprocess.check_output( config['fetchEngines']['phantomjs_command'].replace("$url", url), shell=True )

		elif fetchEngine in ("browsermobproxy+selenium", "bs"):

			from browsermobproxy import Server
			from selenium import webdriver

			server = Server(config['fetchEngines']['browsermobproxy_binary'])
			server.start()
			proxy = server.create_proxy()

			if browser in ("firefox", "ff"):
				profile = webdriver.FirefoxProfile()
				profile.set_proxy(proxy.selenium_proxy())
				driver = webdriver.Firefox(firefox_profile=profile)
			else:
				chrome_options = webdriver.ChromeOptions()
				chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
				driver = webdriver.Chrome(chrome_options = chrome_options)

			proxy.new_har(url, options={'captureHeaders': True})
			driver.get(url)

			data = json.dumps(proxy.har, ensure_ascii=False)

			server.stop()
			driver.quit()
		else:
			sys.exit("Unrecognized engine.")

		if (data):
			fileName = output_directory + "/" + url.replace("http://", "").replace("https://", "") + "_" + strftime("%Y-%m-%d_%H:%M:%S", gmtime()) + ".har"
			f = open(fileName, "w")
			f.write(data.encode("utf8"))
			f.close()

			return fileName
		else:
			return None
class CreateHar(object):
    """create HTTP archive file"""
 
    def __init__(self, mob_path):
        """initial setup"""
        self.browser_mob = mob_path
        self.server = self.driver = self.proxy = None
 
    @staticmethod
    def __store_into_file(title, result):
        """store result"""
        har_file = open(title + '.har', 'w')
        har_file.write(str(result))
        har_file.close()
 
    def __start_server(self):
        """prepare and start server"""
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()
 
    def __start_driver(self):
        """prepare and start driver"""
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(self.proxy.selenium_proxy())
        self.driver = webdriver.Firefox(firefox_profile=profile)
 
    def start_all(self):
        """start server and driver"""
        self.__start_server()
        self.__start_driver()
 
    def create_har(self, title, url):
        """start request and parse response"""
        self.proxy.new_har(title)
        self.driver.get(url)
        result = json.dumps(self.proxy.har, ensure_ascii=False)
        self.__store_into_file(title, result)
 
    def stop_all(self):
        """stop server and driver"""
        self.server.stop()
        self.driver.quit()
Пример #12
0
def get_driver_and_proxy():
    global display
    global driver
    global proxy
    if not driver:
        if int(config.get('browsermob', {}).get('collect-har', 0)):
            from browsermobproxy import Server
            server = Server(config['browsermob']['path'])
            server.start()
            proxy = server.create_proxy()
        if int(config.get('xconfig', {}).get('headless', 0)):
            display = Display(visible=0, size=(800, 600))
            display.start()
        profile = webdriver.FirefoxProfile()
        if proxy:
            profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)
        driver.implicitly_wait(60)

    return driver, proxy
Пример #13
0
class Proxy(object):

        proxy = None
        proxy_server = None
        test_id = None

        def __init__(self, test_id):
            self.test_id = test_id
            self.start_proxy()

        def start_proxy(self):
            self.proxy_server = Server(config.proxy_bin)
            self.proxy_server.start()
            self.proxy = self.proxy_server.create_proxy()
            if config.blacklist:
                self.set_blacklist(config.blacklist)
            self.proxy.new_har(self.test_id)
            logger.debug('Browsermob proxy started.')
            return self

        def stop_proxy(self):
            filename = '{}.har'.format(self.test_id)
            with open(filename, 'w') as harfile:
                json.dump(self.proxy.har, harfile)
            data = json.dumps(self.proxy.har, ensure_ascii=False)
            self.proxy_server.stop()
            self.proxy = None
            self.proxy_server = None
            logger.debug('Browsermob proxy stopped. HAR created: {}'
                         .format(filename))

        def set_blacklist(self, domain_list):
            for domain in domain_list:
                self.proxy.blacklist("^https?://([a-z0-9-]+[.])*{}*.*"
                                     .format(domain), 404)
            logger.debug("Proxy blacklist set.")

        def get_blacklist(self):
            return requests.get('{}{}/blacklist'
                                .format(config.proxy_api, self.proxy.port))
Пример #14
0
class ad_driver():
    _driver = None
    _server = None
    _proxy = None

    def __init__(self, path_to_batch, browser="chrome"):

        """ start browsermob proxy """
        self._server = Server(path_to_batch)
        self._server.start()
        self._proxy = self._server.create_proxy()

        """ Init browser profile """
        if browser is "chrome":
            PROXY = "localhost:%s" % self._proxy.port  # IP:PORT or HOST:PORT
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--proxy-server=%s' % PROXY)
            self._driver = webdriver.Chrome(chrome_options=chrome_options)
        elif browser is "ff":
            profile = webdriver.FirefoxProfile()
            driver = webdriver.Firefox(firefox_profile=profile, proxy=proxy)
        else:
            print "Please set 'browser' variable to any of the value \n 'chrome', 'ff' !"
        self._driver.maximize_window()
        self._driver.implicitly_wait(20)

    def execute(self, test):

        self._proxy.new_har(test["name"])
        self._driver.get(_test_data_dir + os.sep + test['file'])
        time.sleep(2)
        callToTestMethod = getattr(test_steps, test["name"])
        callToTestMethod(self._driver)
        har = self._proxy.har
        requests = har['log']['entries']
        return requests

    def quit(self):
        self._server.stop()
        self._driver.quit()
Пример #15
0
    def _setup_proxy_server(self, downstream_kbps=None, upstream_kbps=None,
                            latency=None):
        server = Server(BROWSERMOB_PROXY_PATH)
        server.start()
        proxy = server.create_proxy()

        # The proxy server is pretty sluggish, setting the limits might not
        # achieve the desired behavior.
        proxy_options = {}

        if downstream_kbps:
            proxy_options['downstream_kbps'] = downstream_kbps

        if upstream_kbps:
            proxy_options['upstream_kbps'] = upstream_kbps

        if latency:
            proxy_options['latency'] = latency

        if len(proxy_options.items()) > 0:
            proxy.limits(proxy_options)

        return server, proxy
Пример #16
0
class BrowserMobProxyTestCaseMixin(object):

    def __init__(self, *args, **kwargs):
        self.browsermob_server = None
        self.browsermob_port = kwargs.pop('browsermob_port')
        self.browsermob_script = kwargs.pop('browsermob_script')

    def setUp(self):
        options = {}
        if self.browsermob_port:
            options['port'] = self.browsermob_port
        if not self.browsermob_script:
            raise ValueError('Must specify --browsermob-script in order to '
                             'run browsermobproxy tests')
        self.browsermob_server = Server(
            self.browsermob_script, options=options)
        self.browsermob_server.start()

    def create_browsermob_proxy(self):
        client = self.browsermob_server.create_proxy()
        with self.marionette.using_context('chrome'):
            self.marionette.execute_script("""
                Components.utils.import("resource://gre/modules/Preferences.jsm");
                Preferences.set("network.proxy.type", 1);
                Preferences.set("network.proxy.http", "localhost");
                Preferences.set("network.proxy.http_port", {port});
                Preferences.set("network.proxy.ssl", "localhost");
                Preferences.set("network.proxy.ssl_port", {port});
            """.format(port=client.port))
        return client

    def tearDown(self):
        if self.browsermob_server:
            self.browsermob_server.stop()
            self.browsermob_server = None

    __del__ = tearDown
Пример #17
0
class BrowserMobProxyTestCaseMixin(object):

    def __init__(self, *args, **kwargs):
        self.browsermob_server = None
        self.browsermob_port = kwargs.pop('browsermob_port')
        self.browsermob_script = kwargs.pop('browsermob_script')

    def setUp(self):
        options = {}
        if self.browsermob_port:
            options['port'] = self.browsermob_port
        if not self.browsermob_script:
            raise ValueError('Must specify --browsermob-script in order to '
                             'run browsermobproxy tests')
        self.browsermob_server = Server(
            self.browsermob_script, options=options)
        self.browsermob_server.start()

    def create_browsermob_proxy(self):
        client = self.browsermob_server.create_proxy()
        with self.marionette.using_context('chrome'):
            self.marionette.execute_script("""
                Services.prefs.setIntPref('network.proxy.type', 1);
                Services.prefs.setCharPref('network.proxy.http', 'localhost');
                Services.prefs.setIntPref('network.proxy.http_port', %(port)s);
                Services.prefs.setCharPref('network.proxy.ssl', 'localhost');
                Services.prefs.setIntPref('network.proxy.ssl_port', %(port)s);
            """ % {"port": client.port})
        return client

    def tearDown(self):
        if self.browsermob_server:
            self.browsermob_server.stop()
            self.browsermob_server = None

    __del__ = tearDown
Пример #18
0
def create_hars(urls, browsermob_dir, run_cached):
    for url in urls:
        print 'starting browsermob proxy'
        server = Server('{}/bin/browsermob-proxy'.format(browsermob_dir))
        server.start()

        proxy = server.create_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)

        url_slug = slugify(url)
        proxy.new_har(url_slug)

        print 'loading page: {}'.format(url)
        driver.get(url)

        har_name = '{}-{}.har'.format(url_slug, time.time())
        print 'saving HAR file: {}'.format(har_name)
        save_har(har_name, proxy.har)

        if run_cached:
            url_slug = '{}-cached'.format(slugify(url))
            proxy.new_har(url_slug)

            print 'loading cached page: {}'.format(url)
            driver.get(url)

            har_name = '{}-{}.har'.format(url_slug, time.time())
            print 'saving HAR file: {}'.format(har_name)
            save_har(har_name, proxy.har)

        driver.quit()

        print 'stopping browsermob proxy'
        server.stop()
Пример #19
0
def main():
    global proxy_client, proxy_server
    LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
    log_filename = 'logs/test_' + time.strftime("%Y%m%d",
                                                time.localtime()) + '.log'
    logging.basicConfig(filename=log_filename,
                        level=logging.INFO,
                        format=LOG_FORMAT)

    # 当前脚本所在目录路径
    curpath = os.path.dirname(os.path.realpath(__file__))

    # 全局config文件
    global_config = {}
    global_config_file_path = curpath + "/config.yaml"
    if os.path.isfile(global_config_file_path):
        gf = open(global_config_file_path, 'r', encoding='utf-8')
        global_config = yaml.safe_load(gf.read())

    # 是否传入配置文件
    if len(sys.argv) > 1:
        test_filename = sys.argv[1]
        config_file = "/config/" + test_filename + ".yaml"
    else:
        test_filename = 'default'
        config_file = "/config/" + test_filename + '.yaml'

    # yaml配置文件是否存在
    config_file_path = curpath + config_file
    if not os.path.isfile(config_file_path):
        print("配置文件不存在 " + config_file_path)
        return 1

    f = open(config_file_path, 'r', encoding='utf-8')
    config = yaml.safe_load(f.read())

    # 合并配置
    config = Util.recursionMergeTwoDict(global_config, config)

    # 是否开启代理
    is_open_proxy = config.get('BROWSER').get('proxy')
    if is_open_proxy:
        from browsermobproxy import Server
        bmp_path = config.get('BROWSER').get('bmp_path')
        logging.info('开启代理 ' + bmp_path)
        proxy_server = Server(bmp_path)
        proxy_server.start()
        proxy_client = proxy_server.create_proxy()

    browser_type = config.get('BROWSER').get('type')
    if browser_type == 'Firefox':
        options = FirefoxOptions()
        options.page_load_strategy = 'normal'
        if is_open_proxy:
            options.add_argument('--proxy-server={0}'.format(
                proxy_client.proxy))
        browser = webdriver.Firefox(options=options)
    elif browser_type == 'Chrome':
        options = ChromeOptions()
        options.page_load_strategy = 'normal'
        if is_open_proxy:
            options.add_argument('--proxy-server={0}'.format(
                proxy_client.proxy))
        browser = webdriver.Chrome(options=options)
    else:
        print('浏览器' + browser_type + ':类型不支持')
        return False

    logging.info('开始使用 ' + browser_type + ' 浏览器进行自动化测试')

    if is_open_proxy:
        proxy_client.new_har("req",
                             options={
                                 'captureHeaders': True,
                                 'captureContent': True
                             })

    browser.maximize_window()
    # 浏览器等待时间
    # browser.implicitly_wait(10)

    url = config.get('WEBSITE').get('url')
    browser.get(url)
    if is_open_proxy:
        Http.logHar(proxy_client.har)

    # 执行配置的TEST对象
    test = config.get('TEST')
    suite = unittest.TestSuite()
    for key in test:
        menus = Menu.getMenuConfig(config, key)
        try:
            if is_open_proxy:
                test_data = [browser, menus, proxy_client]
            else:
                test_data = [browser, menus]
            suite.addTest(
                ParametrizedTestCase.parametrize(Action,
                                                 'test_menu',
                                                 param=test_data))
        except AssertExcetion:
            print(key + " 断言失败")

    report_file_name = 'reports/' + test_filename + "_" + time.strftime(
        "%Y%m%d", time.localtime()) + '.html'
    fp = open(report_file_name, 'w', encoding='utf-8')
    runner = HTMLTestRunner.HTMLTestRunner(stream=fp,
                                           title='你的测试报告',
                                           description='使用配置文件:' +
                                           config_file_path + '生成的测试报告')
    runner.run(suite)
    fp.close()

    sleep(5)
    browser.quit()

    if is_open_proxy:
        proxy_client.close()
        proxy_server.stop()
Пример #20
0
from selenium import webdriver
from browsermobproxy import Server
import os
import json
from urllib.parse import urlparse
import time
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import pprint

server = Server("")
server.start()
proxy = server.create_proxy(params={"trustAllServers": "true"})

chromedriver = ""
#os.environ["webdriver.chrome.driver"] = chromedriver
url = urlparse(proxy.proxy).path
#chrome_options = webdriver.ChromeOptions()
chrome_options = webdriver.FirefoxOptions()
chrome_options.add_argument('ignore-certificate-errors')
chrome_options.add_argument("--proxy-server={0}".format(url))
#driver = webdriver.Chrome(chromedriver, chrome_options=chrome_options)
driver = webdriver.Firefox(chromedriver, chrome_options)

proxy.new_har("universalorlando.com", options={'captureHeaders': True})

driver.get("https://www.universalorlando.com/")

#fastrack2 = WebDriverWait(driver, 10).until(ec.visibility_of_element_located((By.XPATH, '//*[@id="btn-1"]')))
Пример #21
0
class Monitor(object):
    """
    step 3 配置chromedriver 和 browermobproxy 路径
    需要使用完整路径,否则browsermobproxy无法启动服务
    我是将这两个部分放到了和monitor.py同一目录
    同时设置chrome为屏蔽图片,若需要抓取图片可自行修改
    """
    PROXY_PATH = path.abspath(
        r"D:\Anaconda3\browsermob-proxy-2.1.4\bin/browsermob-proxy.bat")
    CHROME_PATH = path.abspath(r"D:\Anaconda3\chromedriver.exe")
    CHROME_OPTIONS = {"profile.managed_default_content_settings.images": 2}

    def __init__(self):
        """
        类初始化函数暂不做操作
        """
        pass

    def initProxy(self):
        """
        step 4 初始化 browermobproxy
        设置需要屏蔽的网络连接,此处屏蔽了css,和图片(有时chrome的设置会失效),可加快网页加载速度
        新建proxy代理地址
        """
        self.server = Server(self.PROXY_PATH)
        self.server.start()
        self.proxy = self.server.create_proxy()
        self.proxy.blacklist([
            "http://.*/.*.css.*", "http://.*/.*.jpg.*", "http://.*/.*.png.*",
            "http://.*/.*.gif.*"
        ], 200)

    def initChrome(self):
        """
        step 5 初始化selenium, chrome设置
        将chrome的代理设置为browermobproxy新建的代理地址
        """
        chromeSettings = webdriver.ChromeOptions()
        chromeSettings.add_argument('--proxy-server={host}:{port}'.format(
            host="localhost", port=self.proxy.port))
        chromeSettings.add_experimental_option("prefs", self.CHROME_OPTIONS)
        self.driver = webdriver.Chrome(executable_path=self.CHROME_PATH,
                                       chrome_options=chromeSettings)

    def genNewRecord(self, name="monitor", options={'captureContent': True}):
        """
        step 6 新建监控记录,设置内容监控为True
        """
        self.proxy.new_har(name, options=options)

    def getContentText(self, targetUrl):
        """
        step 7 简单的获取目标数据的函数
        其中 targetUrl 为浏览器获取对应数据调用的url,需要用正则表达式表示
        """
        if self.proxy.har['log']['entries']:
            for loop_record in self.proxy.har['log']['entries']:
                try:
                    if re.fullmatch(targetUrl, loop_record["request"]['url']):
                        return loop_record["response"]['content']["text"]
                except Exception as err:
                    print(err)
                    continue
        return None

    def start(self):
        """step 8 配置monitor的启动顺序"""
        try:
            self.initProxy()
            self.initChrome()
            print('初始化完成')
        except Exception as err:
            print(err)

    def quit(self):
        """
        step 9 配置monitor的退出顺序
        代理sever的退出可能失败,目前是手动关闭,若谁能提供解决方法,将不胜感激
        """
        self.driver.close()
        self.driver.quit()
        try:
            self.proxy.close()
            self.server.process.terminate()
            self.server.process.wait()
            self.server.process.kill()
        except OSError:
            pass
Пример #22
0
def analyze(site):
    try:
        site = re.sub(r'^https?://', '', site)
        response = requests.get(f'http://{site}',
                                timeout=10,
                                headers={'User-Agent': USER_AGENT_CHROME})
        redirected_hostname = urlparse(response.url).hostname
        redirected_site = re.sub(r'^https?://', '', response.url)
        # phase 0
        http_redirection_result = analyze_http_redirection(response)
        # phase 1
        tls_result = analyze_tls(redirected_hostname)
        # phase 2
        response = requests.get(f'https://{redirected_site}',
                                timeout=10,
                                headers={'User-Agent': USER_AGENT_CHROME})
        response_ie = requests.get(f'https://{redirected_site}',
                                   timeout=10,
                                   headers={'User-Agent': USER_AGENT_IE})
        response_headers = response.headers
        response_cookies = response.cookies
        response_url = response.url
        soup = BeautifulSoup(response.text, features='html.parser')

        server = Server(BROWSERMOB_PROXY_PATH)
        server.start()
        proxy = server.create_proxy()
        options = webdriver.ChromeOptions()
        options.add_argument(f'--proxy-server={proxy.proxy}')
        options.add_argument('--headless')
        driver = webdriver.Chrome(options=options)
        proxy.new_har()
        driver.get(f'https://{redirected_site}')
        har_entries = proxy.har['log']['entries']
        with open('libraries.js') as f:
            javascript = f.read()
        third_party_libs = driver.execute_script(javascript)
        proxy.close()
        server.stop()
        driver.quit()

        hsts_result = analyze_hsts(response_headers)
        hpkp_result = analyze_hpkp(response_headers)
        x_content_type_options_result = analyze_x_content_type_options(
            response_headers)
        x_xss_protection_result = analyze_x_xss_protection(response_headers)
        x_frame_options_result = analyze_x_frame_options(response_headers)
        x_download_options_result = analyze_x_download_options(
            response_ie.headers)
        expect_ct_result = analyze_expect_ct(response_headers)
        # phase 3
        cookie_security_result = analyze_cookie_security(
            response_cookies, soup)
        cors_policy_result = analyze_cors_policy(response_headers,
                                                 response_url)
        csp_result = analyze_csp(redirected_site)
        csrf_result = analyze_csrf(response_cookies, soup)

        cors_result = analyze_cors(soup, har_entries)
        referrer_policy_result = analyze_referrer_policy(
            response_headers, response_url, soup, har_entries)
        cache_control_result = analyze_cache_control(response_headers, soup)
        leaking_server_software_info_result = analyze_leaking_server_software_info(
            response_headers)
        # phase 4
        mixed_content_result = analyze_mixed_content(har_entries)
        sri_result = analyze_sri_protection(soup)
        cross_domain_existence_result = analyze_cross_domain_existence(
            response_url, har_entries)
        third_party_libs_result = analyze_third_party_libs(third_party_libs)

        result = SuccessResult(
            site=site,
            timestamp=datetime.datetime.now(),
            http_redirection_result=http_redirection_result,
            tls_result=tls_result,
            hsts_result=hsts_result,
            hpkp_result=hpkp_result,
            x_content_type_options_result=x_content_type_options_result,
            x_xss_protection_result=x_xss_protection_result,
            x_frame_options_result=x_frame_options_result,
            x_download_options_result=x_download_options_result,
            expect_ct_result=expect_ct_result,
            cookie_security_result=cookie_security_result,
            cors_policy_result=cors_policy_result,
            cors_result=cors_result,
            csp_result=csp_result,
            csrf_result=csrf_result,
            referrer_policy_result=referrer_policy_result,
            cache_control_result=cache_control_result,
            leaking_server_software_info_result=
            leaking_server_software_info_result,
            mixed_content_result=mixed_content_result,
            sri_result=sri_result,
            cross_domain_existence_result=cross_domain_existence_result,
            third_party_libs_result=third_party_libs_result)
    except Exception as e:
        result = ErrorResult(site=site,
                             timestamp=datetime.datetime.now(),
                             error_msg=str(e))
    return result
Пример #23
0
class Fetcher:
    def __init__(self):
        self.server = None
        self.proxy = None
        self.browser = None
        self.driver = None

    def set_remote_server(self, host, port):
        """Defines an already running proxy server for gathering
        includes and content
        """
        self.server = RemoteServer(host, port)
        self.proxy = self.server.create_proxy()

    def start_local_server(self, binpath=None):
        """Starts a local instance of BrowserMob.
        
        Keyword Arguments:
        binpath -- The full path, including the binary name to the 
        browsermob-proxy binary.
        """
        if binpath is None:
            binpath="{0}/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy".format(getcwd())

        self.server = Server(binpath)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def set_firefox(self):
        """Sets the Webdriver for Firefox"""
        self.profile = webdriver.FirefoxProfile()
        self.profile.set_proxy(self.proxy.selenium_proxy())
        self.driver = webdriver.Firefox(firefox_profile=self.profile)

    def run(self, site, name='fetch'):
        """Runs an instance of the Fetcher. Requires that either
        set_remote_server() or start_local_server() has been previously  
        called.

        Keyword Arguments:
        site -- The URL of the site to load.
        name -- Name of the resulting HAR.
        """
        try:
            self.proxy.headers({'Via': None}) # TODO: Need to override BrowserMob to remove the Via Header - https://github.com/lightbody/browsermob-proxy/issues/213 
            self.proxy.new_har(name, options={ 'captureHeaders': True, 
                'captureContent': True, 
                'captureBinaryContent': True })
            self.driver.get(site)

            har = self.proxy.har
            har['dom'] = self.driver.page_source
            return har 
        except AttributeError:
            print "[!] FAILED: Ensure you have set a Webdriver"

    def close(self):
        try:
            self.proxy.stop() # The proxy won't need to be stopped if using remote_server()
        except AttributeError:
            pass

        try:
            self.driver.close()
        except AttributeError:
            print '[!] Driver not found'
Пример #24
0
    def _real_extract(self, url):

        try:

            with OnlyFansPostIE._LOCK:

                while True:

                    _server_port = 18080 + 100 * OnlyFansPostIE._NUM

                    _server = Server(
                        path=
                        "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy",
                        options={'port': _server_port})
                    if _server._is_listening():
                        OnlyFansPostIE._NUM += 1
                        if OnlyFansPostIE._NUM == 25:
                            raise Exception("mobproxy max tries")
                    else:

                        _server.start({'log_path': '/dev', 'log_file': 'null'})
                        OnlyFansPostIE._NUM += 1
                        break

            _host = 'localhost'
            _port = _server_port + 1
            _harproxy = _server.create_proxy({'port': _port})
            driver = self.get_driver(host=_host, port=_port)

            self.send_driver_request(driver, self._SITE_URL)
            for cookie in OnlyFansPostIE._COOKIES:
                driver.add_cookie(cookie)

            self.report_extraction(url)

            post, account = re.search(self._VALID_URL,
                                      url).group("post", "account")

            self.to_screen("post:" + post + ":" + "account:" + account)

            entries = {}

            _harproxy.new_har(options={
                'captureHeaders': False,
                'captureContent': True
            },
                              ref=f"har_{post}",
                              title=f"har_{post}")
            self.send_driver_request(driver, url)
            res = self.wait_until(driver, 30, error404_or_found())
            if not res or res[0] == "error404":
                raise ExtractorError("Error 404: Post doesnt exists")
            har = _harproxy.har
            data_json = self.scan_for_request(har, f"har_{post}",
                                              f"/api2/v2/posts/{post}")
            if data_json:
                self.write_debug(data_json)
                _entry = self._extract_from_json(data_json,
                                                 user_profile=account)
                if _entry:
                    for _video in _entry:
                        if not _video['id'] in entries.keys():
                            entries[_video['id']] = _video
                        else:
                            if _video['duration'] > entries[
                                    _video['id']]['duration']:
                                entries[_video['id']] = _video

            if entries:
                return self.playlist_result(list(entries.values()),
                                            "Onlyfans:" + account,
                                            "Onlyfans:" + account)
            else:
                raise ExtractorError("No entries")

        except ExtractorError as e:
            raise
        except Exception as e:
            lines = traceback.format_exception(*sys.exc_info())
            self.to_screen(f'{repr(e)} \n{"!!".join(lines)}')
            raise ExtractorError(repr(e))
        finally:
            _harproxy.close()
            _server.stop()
            self.rm_driver(driver)
Пример #25
0
    def _real_extract(self, url):

        try:

            self.report_extraction(url)

            with OnlyFansPaidlistIE._LOCK:
                _server_port = 18080 + 100 * OnlyFansPaidlistIE._NUM
                OnlyFansPaidlistIE._NUM += 1
                _server = Server(
                    path=
                    "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy",
                    options={'port': _server_port})
                _server.start({'log_path': '/dev', 'log_file': 'null'})
                _host = 'localhost'
                _port = _server_port + 1
                _host = 'localhost'
                _harproxy = _server.create_proxy({'port': _port})

            driver = self.get_driver(host=_host, port=_port)
            _harproxy.new_har(options={
                'captureHeaders': False,
                'captureContent': True
            },
                              ref="har_paid",
                              title="har_paid")
            self.send_driver_request(driver, self._SITE_URL)
            for cookie in OnlyFansPaidlistIE._COOKIES:
                driver.add_cookie(cookie)

            self.send_driver_request(driver, self._SITE_URL)
            list_el = self.wait_until(
                driver, 60,
                ec.presence_of_all_elements_located(
                    (By.CLASS_NAME, "b-tabs__nav__item")))
            for el in list_el:
                if re.search(r'(?:purchased|comprado)',
                             el.get_attribute("textContent").lower()):
                    el.click()
                    break
            self.wait_until(
                driver, 60,
                ec.presence_of_element_located((By.CLASS_NAME, "user_posts")))

            self.wait_until(driver, 600, scroll(10))

            har = _harproxy.har
            users_json = self.scan_for_all_requests(har, "har_paid",
                                                    r'/api2/v2/users/list')
            if users_json:
                self.to_screen("users list attempt success")
                users_dict = dict()
                for _users in users_json:
                    for user in _users.keys():
                        users_dict.update(
                            {_users[user]['id']: _users[user]['username']})
            else:
                self.to_screen("User-dict loaded manually")
                users_dict = dict()
                users_dict.update({
                    127138: 'lucasxfrost',
                    1810078: 'sirpeeter',
                    5442793: 'stallionfabio',
                    7820586: 'mreyesmuriel'
                })

            self.to_screen(users_dict)

            entries = {}
            _reg_str = r'/api2/v2/posts/paid\?'
            data_json = self.scan_for_all_requests(har, "har_paid", _reg_str)
            if data_json:
                self.write_debug(data_json)
                list_json = []
                for el in data_json:
                    list_json += el['list']

                for info_json in list_json:
                    for _video in self._extract_from_json(
                            info_json, users_dict=users_dict):
                        if not _video['id'] in entries.keys():
                            entries[_video['id']] = _video
                        else:
                            if _video.get('duration',
                                          1) > entries[_video['id']].get(
                                              'duration', 0):
                                entries[_video['id']] = _video

            if entries:
                return self.playlist_result(list(entries.values()),
                                            "Onlyfans:paid", "Onlyfans:paid")
            else:
                raise ExtractorError("no entries")

        except ExtractorError as e:
            raise
        except Exception as e:
            lines = traceback.format_exception(*sys.exc_info())
            self.to_screen(f'{repr(e)} \n{"!!".join(lines)}')
            raise ExtractorError(repr(e))
        finally:
            _harproxy.close()
            _server.stop()
            self.rm_driver(driver)
def stream_collector():

    server = Server("/root/xhprof/browsermob-proxy-2.1.4/bin/browsermob-proxy")
    server.start()
    proxy = server.create_proxy()
    chrome_options = Options()

    # Avoid the bug "DevToolActivePort file doesn't exist"
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')

    # Use headless browser to implement web automation
    chrome_options.add_argument('--headless')

    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy))

    # Turn on the chrome driver
    chrome_driver = "/usr/bin/chromedriver"

    driver = webdriver.Chrome(executable_path=chrome_driver,
                              chrome_options=chrome_options)

    # Provide the url of the target website
    # The link is found in the source code of the website and it is embedded in a static page
    base_url = "https://media.tvm.com.mt/16958960.ihtml/player.html?source=embed&live_id=16966825&tvm_location=tvm1_live"
    proxy.new_har("tvm.com.mt/mt/live",
                  options={
                      'captureHeaders': True,
                      'captureContent': True
                  })
    driver.get(base_url)
    print("Connecting to the website(TVM)...")

    try:
        # Locate the corresponding element of the button and click it.
        # In this case, we would like to implement the automation because no visible browser is available.
        # Hence, this section faciliates to click the play button in order to obtain the live streams.
        driver.find_element_by_xpath("//*[@class='big-play-button']").click()
        time.sleep(5)
        print("Click the button!")
        time.sleep(5)
        result = proxy.har

        for entry in result['log']['entries']:
            _url = entry['request']['url']
            # Filter the urls based on three key elements, "m3u8" , "chunklist" and "b2"
            # because we aim to find the stream which is in high quality.
            if "m3u8" in _url and "chunklist" in _url:
                print("Congrats! The live stream is shown in the following!")
                print(_url)
                # Save the stream
                save_stream(_url)
                # Once obtaining the target url, stop the server and quit the driver.
                server.stop()
                driver.quit()
                break

    except:
        # Some errors happened and it takes too much time
        print("Sorry, it's not working!")

    server.stop()
    driver.quit()
Пример #27
0
    def get_viedo_downURL(self, cursor):
        print("开始请求资源网站")
        # 建立browsermobproxy服务, 需指定browsermob-proxy, 类似chromedriver
        server = Server(
            "D:/下载/browsermob-proxy-2.1.4/bin/browsermob-proxy.bat")
        server.start()
        # 创建代理
        proxy = server.create_proxy()
        chrome_options = Options()
        # 为chrome启动时设置代理
        chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy))
        # 静默模式, 不显示浏览器
        chrome_options.add_argument('headless')

        driver = webdriver.Chrome(
            executable_path=
            'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe',
            chrome_options=chrome_options)

        # driver.set_script_timeout(3)

        # 这设置了要记录的新HAR(HTTP Archive format(HAR文件),是用来记录浏览器加载网页时所消耗的时间的工具)
        proxy.new_har(ref="HAR啦",
                      options={
                          'captureHeaders': True,
                          'captureContent': True
                      },
                      title="标题")
        driver.get(self.url)
        title = driver.title
        torrentName = ''.join(re.findall(r'[A-Za-z]+-\d+',
                                         title)).replace('-', '_')
        # 获取HAR
        result = proxy.har
        print(result)

        # m3u8UrlSet = set()
        # 把爬取的链接和标题存入数据库
        cursor.execute(
            "CREATE TABLE IF NOT EXISTS torrent(id INTEGER PRIMARY KEY AUTOINCREMENT, title VARCHAR(255) NOT NULL, fileUrl VARCHAR(255) NOT NULL, isGet INT NOT NULL);"
        )
        for entry in result['log']['entries']:
            _url = entry['request']['url']
            # 根据URL找到数据接口
            if "m3u8" in _url:
                print("找到M3U8文件了")
                print(_url)
                cursor.execute(
                    "INSERT INTO torrent(id, title, fileUrl, isGet) VALUES (?, ?, ?, ?);",
                    (None, title, _url, 0))
                # m3u8UrlSet.add(_url)
                return torrentName

            # 判断响应是否存在error
            if "_error" in entry['response'].keys():
                print("Url : {} 响应报错信息为 error : {}".format(
                    _url, entry["response"]["_error"]))

        # print(m3u8UrlSet)
        # 代理需要关闭
        print("已经要关闭啦!!!!")
        server.stop()
        driver.quit()

        return torrentName
class performance(object):
    #create performance data

    def __init__(self, mob_path):
        #initialize
        from datetime import datetime
        print "%s: Go " % (datetime.now())
        self.browser_mob = mob_path
        self.server = self.driver = self.proxy = None

    @staticmethod
    def __store_into_file(args, title, result):
        #store data collected into file
        if 'path' in args:
            har_file = open(args['path'] + '/' + title + '.json', 'w')
        else:
            har_file = open(title + '.json', 'w')
        har_file.write(str(result))
        har_file.close()

    def __start_server(self):
        #prepare and start server
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def __start_driver(self, args):
        #prepare and start driver

        #chromedriver
        if args['browser'] == 'chrome':
            print "Browser: Chrome"
            print "URL: {0}".format(args['url'])
            chromedriver = os.getenv("CHROMEDRIVER_PATH", "/chromedriver")
            os.environ["webdriver.chrome.driver"] = chromedriver
            url = urlparse.urlparse(self.proxy.proxy).path
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument("--proxy-server={0}".format(url))
            chrome_options.add_argument("--no-sandbox")
            self.driver = webdriver.Chrome(chromedriver,
                                           chrome_options=chrome_options)
        #firefox
        if args['browser'] == 'firefox':
            print "Browser: Firefox"
            profile = webdriver.FirefoxProfile()
            profile.set_proxy(self.proxy.selenium_proxy())
            self.driver = webdriver.Firefox(firefox_profile=profile)

    def start_all(self, args):
        #start server and driver
        self.__start_server()
        self.__start_driver(args)

    def create_har(self, args):
        #start request and parse response
        self.proxy.new_har(args['url'], options={'captureHeaders': True})
        self.driver.get(args['url'])

        result = json.dumps(self.proxy.har, ensure_ascii=False)
        self.__store_into_file(args, 'har', result)

        performance = json.dumps(
            self.driver.execute_script("return window.performance"),
            ensure_ascii=False)
        self.__store_into_file(args, 'perf', performance)

    def stop_all(self):
        #stop server and driver
        from datetime import datetime
        print "%s: Finish" % (datetime.now())

        self.server.stop()
        self.driver.quit()
class BrowserMobLibrary():

    ROBOT_LIBRARY_SCOPE = 'GLOBAL'
    ROBOT_LIBRARY_VERSION = VERSION

    def __init__(self):
        self.isServerStarted = False
        self.activeProxy = None
        self.server = None
        self.proxies = []

    def _proxy(self):
        if self.activeProxy is None:
            raise Exception("No proxy has been created")
        return self.activeProxy

    def start_browsermob(self, browsermob_path):
        self.server = Server(browsermob_path)
        self.server.start()
        self.isServerStarted = True

    def stop_browsermob(self):
        self.server.stop()
        self.server = None
        self.isServerStarted = False

    def create_proxy(self):
        self.activeProxy = self.server.create_proxy
        self.proxies.append(self.activeProxy)
        return self.server.create_proxy()

    def close_proxy(self, proxy):
        self.proxies.remove(proxy)
        proxy.close()

    def close_active_proxy(self):
        self.close_proxy(self.activeProxy)

    def set_active_proxy(self, proxy):
        self.activeProxy = proxy

    def get_active_proxy(self):
        return self.activeProxy

    def get_all_proxies(self):
        return self.proxies

    def close_all_proxies(self):
        for proxy in self.proxies:
            proxy.close()

    def capture_traffic(self, reference=None, **options):
        return self._proxy().new_har(reference, options)

    def get_captured_traffic(self):
        return self._proxy().har

    def set_capture_reference(self, reference=None):
        return self._proxy().new_page(reference)

    def ignore_all_traffic_matching(self, regexp, status_code):
        return self._proxy().blacklist(regexp, status_code)

    def only_capture_traffic_matching(self, regexp, status_code):
        return self._proxy().whitelist(regexp, status_code)

    def use_basic_authentication(self, domain, username, password):
        return self._proxy().basic_authentication(domain, username, password)

    def set_headers(self, headers, ):
        return self._proxy().headers(headers)

    def set_response_interceptor(self, js, ):
        return self._proxy().response_interceptor(js)

    def set_request_interceptor(self, js, ):
        return self._proxy().request_interceptor(js)

    def set_bandwith_limits(self, **options):
        return self._proxy().limits(options)

    def set_proxy_timeouts(self, **options):
        return self._proxy().timeouts(options)

    def remap_hosts(self, address, ip_address):
        return self._proxy().remap_hosts(address, ip_address)

    def wait_for_traffic_to_stop(self, quiet_period, timeout):
        return self._proxy().wait_for_traffic_to_stop(quiet_period, timeout)

    def clear_proxy_dns_cache(self):
        return self._proxy().clear_dns_cache()

    def rewrite_url(self, match, replace):
        return self._proxy().rewrite_url(match, replace)
Пример #30
0
class StoredXSSDetector:
    def __init__(self, results, reports, **kwargs):
        self.results = results
        self.reports = reports
        self.args = kwargs
        self.factor_length = 6
        self.factor = randstr(self.factor_length)
        self.rand_length = 10
        self.listen_port = 9759
        self.filled_forms = {}
        self.str_mapping = {}
        self.bindings = {}
        self.lock = threading.Lock()
        self.vulnerable = []
        self.server = None
        self.cookies = {}
        for entry in self.args['cookie'].split(';'):
            if entry.find('=') == -1:
                continue
            key, value = entry.strip().split('=', 1)
            self.cookies[key] = value
        # Create proxy server
        logging.info('Starting browsermobproxy server...')
        self.proxy_server = Server(self.args['browsermobproxy'])
        self.proxy_server.start()
        self.proxy = self.proxy_server.create_proxy()
        logging.info('Browsermobproxy server started')
        # Create Chrome engine
        logging.info('Creating Selenium Chrome webdriver...')
        self.chrome_options = webdriver.ChromeOptions()
        self.chrome_options.add_argument('--proxy-server={}'.format(
            self.proxy.proxy))
        if 'headless' in self.args:
            self.chrome_options.add_argument('--headless')
        self.chrome_options.add_argument('--disable-gpu')
        self.chrome_options.add_argument("--disable-extensions")
        self.driver = webdriver.Chrome(chrome_options=self.chrome_options)
        logging.info('Selenium Chrome webdriver created')

    @staticmethod
    def meta():
        return {'name': 'Stored XSS Detector for all', 'version': '1.0'}

    def random_form(self):
        logging.info('Start sending random payload')
        for uuid in self.results['requests']:
            logging.info('Processing form {}'.format(uuid))
            request = self.results['requests'][uuid]
            if request['content-type'] == 'text/plain':
                logging.warning('Form {} is text/plain. Skipped'.format(uuid))
                continue
            params = {}
            self.filled_forms[request['uuid']] = {}
            r = None
            for name in request['fields']:
                field = request['fields'][name]
                if field['type'] in ['text', 'password', 'textarea']:
                    params[name] = self.factor + randstr(self.rand_length)
                    self.filled_forms[request['uuid']][name] = params[name]
                    self.str_mapping[params[name]] = (request['uuid'], name)
                elif field['type'] == 'radio':
                    params[name] = field['values'][0]
                elif field['type'] == 'checkbox':
                    params[name] = field['value']
                else:
                    params[name] = field['default']
            if request['method'] == 'GET':
                r = requests.get(request['url'],
                                 params=params,
                                 cookies=self.cookies)
            elif request['method'] == 'POST':
                if request[
                        'content-type'] == 'application/x-www-form-urlencoded':
                    r = requests.post(request['url'],
                                      data=params,
                                      cookies=self.cookies)
                elif request['content-type'] == 'multipart/form-data':
                    r = requests.post(request['url'],
                                      files=params,
                                      cookies=self.cookies)
            if r.status_code not in [200, 301, 302, 306, 307, 308]:
                del self.filled_forms[request['uuid']]
                logging.warning('Failed to send form {}'.format(uuid))
            logging.info('Form {} sent successfully'.format(uuid))

    def bind_form(self):
        logging.info('Start binding urls with forms')
        for url in self.results['urls']:
            logging.info('Crawling {}'.format(url))
            r = requests.get(url, cookies=self.cookies)
            text = r.text
            pos = text.find(self.factor)
            while pos != -1:
                rand = text[pos:pos + self.factor_length + self.rand_length]
                if rand in self.str_mapping:
                    logging.info('Found a binding in {}'.format(url))
                    self.bindings[self.str_mapping[rand]] = url
                pos = text.find(self.factor,
                                pos + self.factor_length + self.rand_length)

    def start_server(self):
        class Handler(BaseHTTPRequestHandler):
            def do_GET(s):
                query = parse_qs(urlparse(s.path).query)
                if 'uuid' in query and 'name' in query:
                    self.lock.acquire()
                    try:
                        pair = (query['uuid'][0], query['name'][0],
                                query['url'][0])
                        if pair not in self.vulnerable:
                            self.vulnerable.append(pair)
                    finally:
                        self.lock.release()
                s.send_response(200)
                s.send_header('Content-Type', 'text/html')
                s.end_headers()
                s.wfile.write(b'')

        def start_server():
            logging.info('Starting monitor server at port {}...'.format(
                self.listen_port))
            self.server.serve_forever()

        self.server = HTTPServer(('127.0.0.1', self.listen_port), Handler)
        t = threading.Thread(target=start_server, daemon=True)
        t.start()
        time.sleep(3)

    def send_payload(self):
        logging.info('Start to send payload')
        for info in self.bindings:
            url = self.bindings[info]
            request = self.results['requests'][info[0]]
            logging.info('Testing payload for form {}'.format(info[0]))
            params = {}
            for name in request['fields']:
                field = request['fields'][name]
                if name == info[1]:
                    script = script_template.format(self.listen_port, info[0],
                                                    info[1])
                    params[name] = \
                        base64.b64decode(xss_payload[0]).decode('utf-8') + script + \
                        base64.b64decode(xss_payload[1]).decode('utf-8') + script + \
                        base64.b64decode(xss_payload[2]).decode('utf-8')
                elif field['type'] == 'radio':
                    params[name] = field['values'][0]
                elif field['type'] == 'checkbox':
                    params[name] = field['value']
                else:
                    params[name] = field['default']
            r = None
            if request['method'] == 'GET':
                r = requests.get(request['url'],
                                 params=params,
                                 cookies=self.cookies)
            elif request['method'] == 'POST':
                if request[
                        'content-type'] == 'application/x-www-form-urlencoded':
                    r = requests.post(request['url'],
                                      data=params,
                                      cookies=self.cookies)
                elif request['content-type'] == 'multipart/form-data':
                    r = requests.post(request['url'],
                                      files=params,
                                      cookies=self.cookies)
            if r.status_code not in [200, 301, 302, 306, 307, 308]:
                continue
            self.driver.get(url)
            for key in self.cookies:
                exist = self.driver.get_cookie(key)
                if exist is not None and exist['value'] != self.cookies[key]:
                    self.driver.add_cookie({
                        'name': key,
                        'value': self.cookies[key]
                    })
            self.driver.get(url)

    def stop_server(self):
        self.server.shutdown()
        logging.info('The monitoring server has been closed')

    def make_report(self):
        def make_entry(v):
            request = self.results['requests'][v[0]]
            return [
                request['location'], request['url'], request['method'], v[1],
                html.escape(v[2])
            ]

        self.reports.append({
            'title':
            'Stored XSS Injection Points',
            'overview':
            'Found {} Stored XSS injection point(s)'.format(
                len(self.vulnerable)),
            'header':
            ['Form Location', 'Target', 'Method', 'Name', 'XSS Location'],
            'entries':
            list(map(make_entry, self.vulnerable))
        })

    def exec(self):
        logging.info('Start to test stored XSS points')
        if 'requests' not in self.results:
            logging.fatal('There\'s no requests in results')
            raise NoRequestsException
        self.random_form()
        self.bind_form()
        self.start_server()
        self.send_payload()
        self.stop_server()
        self.make_report()
        logging.info('Stopping proxy server and Chrome webdriver...')
        self.proxy.close()
        self.proxy_server.stop()
        self.driver.stop_client()
        self.driver.close()
        logging.info('Proxy server and Chrome webdriver have been closed')
Пример #31
0
def main(argv):
    init()

    parser = argparse.ArgumentParser()
    parser.add_argument('-u',
                        action='store',
                        dest='start_url',
                        help='Set page URL',
                        required=True)
    parser.add_argument('-c',
                        action='store',
                        dest='cookies_file',
                        help='JSON file with cookies',
                        required=False)
    parser.add_argument(
        '-w',
        action='store',
        dest='webdriver_type',
        help='Set WebDriver type (firefox or phantomjs, firebox by default)',
        default="firefox",
        required=False)
    results = parser.parse_args()

    start_url = results.start_url
    cookies_file = results.cookies_file
    webdriver_type = results.webdriver_type

    allowed_domain = urlparse(start_url).netloc

    browsermobproxy_path = get_browsermobproxy_path()

    options = {
        'port': 9090,
    }

    server = Server(browsermobproxy_path, options)
    server.start()
    proxy = server.create_proxy()

    if webdriver_type == "phantomjs":
        service_args = [
            '--proxy=localhost:9091',
            '--proxy-type=http',
        ]
        driver = webdriver.PhantomJS(service_args=service_args)
        driver.set_window_size(1440, 1024)
    else:
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)

    proxy.new_har('woodpycker',
                  options={
                      'captureHeaders': True,
                      'captureContent': True
                  })
    driver.get(start_url)

    if not cookies_file is None:
        with open(cookies_file, 'rb') as fp:
            cookies = json.load(fp)
        for cookie in cookies:
            driver.add_cookie(cookie)
        driver.refresh()

    links = driver.find_elements_by_tag_name('a')
    lenl = len(links)
    for i in range(0, lenl):
        if links[i].is_displayed():
            url = links[i].get_attribute('href')
            text = links[i].get_attribute('text')
            if url.find(allowed_domain) != -1:
                links[i].click()
                print "%s Clicked on the link '%s' with HREF '%s'" % (
                    Fore.BLUE + "*" + Fore.RESET, Style.BRIGHT + text +
                    Style.RESET_ALL, Style.BRIGHT + url + Style.RESET_ALL)
                show_status_codes(proxy.har, allowed_domain)
            driver.back()
            driver.refresh()
            links = driver.find_elements_by_tag_name('a')

    driver.quit()
    server.stop()
Пример #32
0
class Browser:

    def __init__(self, chromedriverPath, browsermobPath, harfilePath, cookies=None):
        self.harfilePath = harfilePath
        self.server = Server(browsermobPath)
        self.server.start()
        self.proxy = self.server.create_proxy()

        os.environ["webdriver.chrome.driver"] = chromedriverPath
        url = urlparse (self.proxy.proxy).path
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--proxy-server={0}".format(url))
        
        self.driver = webdriver.Chrome(chromedriverPath,chrome_options =chrome_options)
        if cookies:
            print("Loading cookies from "+str(cookies))
            with open(cookies, 'r') as cookieFile:
                cookieJson = json.loads(cookieFile.read())
            for cookie in cookieJson:
                self.driver.add_cookie(cookie)

    def get(self, url, timeout=20):
        print(url)
        self.proxy.new_har(url, {"captureContent":True})
        try:
            self.driver.set_page_load_timeout(timeout)
            self.driver.get(url)
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/5);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/4);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")
            time.sleep(.5) #wait for the page to load
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(4) #wait for the page to load
        except TimeoutException:
            print("Timeout")
            self.driver.find_element_by_tag_name("body").send_keys(Keys.CONTROL+Keys.ESCAPE)

        try:
            source = self.driver.page_source
            result = json.dumps(self.proxy.har, ensure_ascii=False)
            with open(self.harfilePath+"/"+str(int(time.time()*1000.0))+".har", "w") as harfile:
                harfile.write(result)
            return source
        except TimeoutException:
            print("Retrying, with a timeout of "+str(timeout+5))
            return self.get(url, timeout=timeout+5)

    def close(self):
        try:
            self.server.stop()
        except Exception:
            print("Warning: Error stopping server")
            pass
        try:
            self.driver.quit()
        except Exception:
            print("Warning: Error stopping driver")
            pass
Пример #33
0
class performance(object):
    #create performance data

    def __init__(self, mob_path):
        #initialize
        from datetime import datetime
        print "%s: Go "%(datetime.now())
        self.browser_mob = mob_path
        self.server = self.driver = self.proxy = None

    @staticmethod
    def __store_into_file(args,title, result):
        #store data collected into file
        if 'path' in args:
        	har_file = open(args['path']+'/'+title + '.json', 'w')
        else:
        	har_file = open(title + '.json', 'w')
        har_file.write(str(result))
       	har_file.close()

    def __start_server(self):
        #prepare and start server
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def __start_driver(self,args):
        #prepare and start driver
        
        #chromedriver
        if args['browser'] == 'chrome':
        	print "Browser: Chrome"
        	print "URL: {0}".format(args['url'])
        	chromedriver = os.getenv("CHROMEDRIVER_PATH", "/chromedriver")
        	os.environ["webdriver.chrome.driver"] = chromedriver
        	url = urlparse.urlparse (self.proxy.proxy).path
        	chrome_options = webdriver.ChromeOptions()
        	chrome_options.add_argument("--proxy-server={0}".format(url))
        	chrome_options.add_argument("--no-sandbox")
        	self.driver = webdriver.Chrome(chromedriver,chrome_options = chrome_options)
        #firefox
        if args['browser'] == 'firefox':
            print "Browser: Firefox"
            profile = webdriver.FirefoxProfile()
            profile.set_proxy(self.proxy.selenium_proxy())
            self.driver = webdriver.Firefox(firefox_profile=profile)
		
			

    def start_all(self,args):
        #start server and driver
        self.__start_server()
        self.__start_driver(args)

    def create_har(self,args):
        #start request and parse response
        self.proxy.new_har(args['url'], options={'captureHeaders': True})
        self.driver.get(args['url'])
        
        result = json.dumps(self.proxy.har, ensure_ascii=False)
        self.__store_into_file(args,'har', result)
        
        performance = json.dumps(self.driver.execute_script("return window.performance"), ensure_ascii=False)
        self.__store_into_file(args,'perf', performance)

    def stop_all(self):
        #stop server and driver
        from datetime import datetime
        print "%s: Finish"%(datetime.now())
        
        self.server.stop()
        self.driver.quit()
Пример #34
0
    def _real_extract(self, url):

        try:

            with VideovardIE._LOCK:

                self.report_extraction(url)
                videoid = self._match_id(url)

                while True:
                    _server_port = 18080 + VideovardIE._NUM * 100
                    _server = Server(
                        path=
                        "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy",
                        options={'port': _server_port})
                    try:
                        if _server._is_listening():
                            VideovardIE._NUM += 1
                            if VideovardIE._NUM == 25:
                                raise Exception("mobproxy max tries")
                        else:
                            _server.start({
                                "log_path": "/dev",
                                "log_file": "null"
                            })
                            self.to_screen(
                                f"[{url}] browsermob-proxy start OK on port {_server_port}"
                            )
                            VideovardIE._NUM += 1
                            break
                    except Exception as e:
                        lines = traceback.format_exception(*sys.exc_info())
                        self.to_screen(
                            f'[{url}] {repr(e)} \n{"!!".join(lines)}')
                        if _server.process: _server.stop()
                        raise ExtractorError(
                            f"[{url}] browsermob-proxy start error - {repr(e)}"
                        )

                _host = 'localhost'
                _port = _server_port + 1
                _harproxy = _server.create_proxy({'port': _port})
                driver = self.get_driver(host=_host, port=_port)

                try:
                    _harproxy.new_har(options={
                        'captureHeaders': True,
                        'captureContent': True
                    },
                                      ref=f"har_{videoid}",
                                      title=f"har_{videoid}")
                    self.send_multi_request(driver, url.replace('/e/', '/v/'))
                    title = try_get(
                        self.wait_until(
                            driver, 60,
                            ec.presence_of_element_located(
                                (By.TAG_NAME, "h1"))), lambda x: x.text)

                    vpl = self.wait_until(
                        driver, 60,
                        ec.presence_of_element_located((By.ID, "vplayer")))
                    for i in range(2):
                        try:
                            vpl.click()
                            self.wait_until(driver, 1)
                            vpl.click()
                            break
                        except Exception as e:
                            el_kal = self.wait_until(
                                driver, 60,
                                ec.presence_of_element_located(
                                    (By.CSS_SELECTOR, "div.kalamana")))
                            if el_kal: el_kal.click()
                            self.wait_until(driver, 1)
                            el_rul = self.wait_until(
                                driver, 60,
                                ec.presence_of_element_located(
                                    (By.CSS_SELECTOR, "div.rulezco")))
                            if el_rul: el_rul.click()
                            self.wait_until(driver, 1)
                            continue

                    har = _harproxy.har
                    m3u8_url = self.scan_for_request(har, f"har_{videoid}",
                                                     f"master.m3u8")
                    if m3u8_url:
                        self.write_debug(f"[{url}] m3u8 url - {m3u8_url}")
                        res = self.send_multi_request(None, m3u8_url)
                        if not res:
                            raise ExtractorError(f"[{url}] no m3u8 doc")
                        m3u8_doc = (res.content).decode('utf-8', 'replace')
                        self.write_debug(f"[{url}] \n{m3u8_doc}")
                        formats_m3u8, _ = self._parse_m3u8_formats_and_subtitles(
                            m3u8_doc,
                            m3u8_url,
                            ext="mp4",
                            entry_protocol='m3u8_native',
                            m3u8_id="hls")

                        if not formats_m3u8:
                            raise ExtractorError(
                                f"[{url}] Can't find any M3U8 format")

                        self._sort_formats(formats_m3u8)

                        return ({
                            "id":
                            videoid,
                            "title":
                            sanitize_filename(title, restricted=True),
                            "formats":
                            formats_m3u8,
                            "ext":
                            "mp4"
                        })

                except ExtractorError as e:
                    raise
                except Exception as e:
                    lines = traceback.format_exception(*sys.exc_info())
                    self.to_screen(f'{repr(e)} \n{"!!".join(lines)}')
                    raise ExtractorError(repr(e))
                finally:
                    _harproxy.close()
                    _server.stop()
                    self.rm_driver(driver)

        except Exception as e:
            lines = traceback.format_exception(*sys.exc_info())
            self.to_screen(f'{repr(e)} \n{"!!".join(lines)}')
            raise ExtractorError(repr(e))
Пример #35
0
    def worker(self):

        # Set basic var for the function
        self.urls_down = []
        network_events = []
        URLS = self.urls_list
        page_nbr = len(self.urls_list) - 1
        path = os.getcwd()
        # Browsermob binaries location
        browsermobproxy_location = "{}/browsermob/browsermob-proxy".format(
            path)

        # Start browsermob server
        print("Proxy init...")
        server = Server(browsermobproxy_location)
        server.start()
        time.sleep(1)
        proxy = server.create_proxy()
        time.sleep(1)

        # Set option for the webdriver, automation detection from japscan, certificate, and headless
        chrome_path = "{}/chromedriver".format(path)
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_experimental_option("useAutomationExtension", False)
        chrome_options.add_experimental_option("excludeSwitches",
                                               ["enable-automation"])
        chrome_options.set_capability("acceptInsecureCerts", True)
        chrome_options.add_argument("--log-level=3")
        chrome_options.add_argument('--proxy-server=%s' % proxy.proxy)
        chrome_options.add_argument("--disable-blink-features")
        chrome_options.add_argument(
            "--disable-blink-features=AutomationControlled")
        chrome_options.add_argument("--headless")
        caps = DesiredCapabilities.CHROME
        driver = webdriver.Chrome(chrome_path,
                                  desired_capabilities=caps,
                                  options=chrome_options)
        print("Driver init...")

        # Do a while loop in case of timeout it happen sometimes
        while True:

            print("Fetch :")
            try:
                # Initiate the driver with low consumption website
                driver.set_page_load_timeout(30)
                driver.get('http://perdu.com/')

                # if the page number is even scrap only even page, since we can scrap the current page and the next page it's shorter
                if page_nbr % 2 == 0:

                    for URL in tqdm(URLS[::2]):
                        network_events = []
                        proxy.new_har("urls")
                        driver.get(URL)

                        # Get the page logs
                        entries = proxy.har['log']["entries"]
                        for entry in entries:
                            if 'request' in entry.keys():
                                network_events.append(entry['request']['url'])

                        # Extract only the imges
                        matches = [
                            s for s in network_events
                            if ".jpg" in s and "japscan.co" in s
                            or ".png" in s and "japscan.co" in s
                        ]
                        matches = [x for x in matches if "bg." not in x]

                        # Add images Urls to a list
                        for match in matches:

                            self.urls_down.append(match)

                # Same operation if page number is odd
                if page_nbr % 2 != 0:

                    for URL in tqdm(URLS[1::2]):
                        network_events = []
                        proxy.new_har("urls")
                        driver.get(URL)

                        entries = proxy.har['log']["entries"]
                        for entry in entries:
                            if 'request' in entry.keys():
                                network_events.append(entry['request']['url'])

                        matches = [
                            s for s in network_events
                            if ".jpg" in s and "japscan.co" in s
                            or ".png" in s and "japscan.co" in s
                        ]
                        matches = [x for x in matches if "bg." not in x]

                        for match in matches:

                            self.urls_down.append(match)

                break

            except TimeoutException as ex:
                print("Timeout, retry" + str(ex))
                driver.quit()
                continue

        # Remove duplicate
        self.urls_down = list(dict.fromkeys(self.urls_down))

        # Stop the server and the driver
        server.stop()
        driver.quit()

        # Return image url list
        return
Пример #36
0
def get_signature_url(user_url):
    try:
        # 代理服务
        server = Server(proxy_file)
        server.start()
        proxy = server.create_proxy()

        options = webdriver.ChromeOptions() # ChromeOptions()
        options.add_argument("--proxy-server={0}".format(proxy.proxy))
        options.add_argument('--disable-gpu')
        # options.add_argument('--dump-dom')
        # options.add_argument('--disable-web-security')
        # options.headless = True

        options.add_argument('lang=zh_CN.UTF-8')
        options.add_argument("user-agent=" + ua)
        options.add_argument('accept=' + accept[0])
        options.add_argument("accept-language=" + accept_language[0])
        options.add_argument('accept-encoding="gzip, deflate, br"')
        options.add_argument("upgrade-insecure-requests=1")
        options.add_argument('cache-control="max-age=0"')
        options.add_experimental_option('excludeSwitches', ['enable-automation'])  # 爬虫关键字

        # webdriver.Firefox(options,executable_path=fireFox_driver)  # # webdriver.Firefox(firefox_options=chrome_options)#
        driver = webdriver.Chrome(options=options)
        proxy.new_har("douyin", options={'captureHeaders': True, 'captureContent': True})
        logger.info("原始URL {}".format(url))
        driver.set_network_conditions(
            offline=False,
            latency=5,  # additional latency (ms)
            download_throughput=500 * 1024,  # maximal throughput
            upload_throughput=500 * 1024)  # maximal throughput

        driver.get(user_url)
        network = driver.get_network_conditions()
        print(network)

        time.sleep(3)
        result = proxy.har  # 获取HAR
        # print(result)
        for entry in result['log']['entries']:
            _url = entry['request']['url']
            # print(_url)
            # # 根据URL找到数据接口,这里要找的是 http://git.liuyanlin.cn/get_ht_list 这个接口
            if "_signature" in _url:
                logger.info("获取到用户第一个数据请求接口------>>>\n{}".format(_url))
                driver.get(_url)
                cookies = driver.get_cookies()
                time.sleep(3)
                print(driver.page_source)
                print(cookies)
                return _url
                # print(_url)
                # _response = entry['response']
                # _content = _response['content']
                # 获取接口返回内容
                # print(_content)
    except Exception as e:
        logger.exception(e)
        pass
    finally:
        server.stop()
        driver.quit()
Пример #37
0
    def _real_extract(self, url):

        try:
            self.report_extraction(url)

            with OnlyFansPostIE._LOCK:

                while True:

                    _server_port = 18080 + 100 * OnlyFansPostIE._NUM

                    _server = Server(
                        path=
                        "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy",
                        options={'port': _server_port})
                    if _server._is_listening():
                        OnlyFansPostIE._NUM += 1
                        if OnlyFansPostIE._NUM == 25:
                            raise Exception("mobproxy max tries")
                    else:

                        _server.start({'log_path': '/dev', 'log_file': 'null'})
                        OnlyFansPostIE._NUM += 1
                        break

            _host = 'localhost'
            _port = _server_port + 1
            _harproxy = _server.create_proxy({'port': _port})
            driver = self.get_driver(host=_host, port=_port)

            driver = self.get_driver(host=_host, port=_port)
            self.send_driver_request(driver, self._SITE_URL)
            for cookie in OnlyFansPlaylistIE._COOKIES:
                driver.add_cookie(cookie)

            account, mode = re.search(self._VALID_URL,
                                      url).group("account", "mode")
            if not mode:
                mode = "latest"

            entries = {}

            if mode in ("all", "latest", "favorites", "tips"):

                self.send_driver_request(driver, f"{self._SITE_URL}/{account}")
                res = self.wait_until(driver, 60, error404_or_found())
                if not res or res[0] == "error404":
                    raise ExtractorError(
                        "Error 404: User profile doesnt exists")

                _url = f"{self._SITE_URL}/{account}/videos{self._MODE_DICT[mode]}"

                _harproxy.new_har(options={
                    'captureHeaders': False,
                    'captureContent': True
                },
                                  ref=f"har_{account}_{mode}",
                                  title=f"har_{account}_{mode}")

                self.send_driver_request(driver, _url)
                self.wait_until(
                    driver, 60,
                    ec.presence_of_all_elements_located(
                        (By.CLASS_NAME, "b-photos__item.m-video-item")))
                if mode in ("latest"):
                    har = _harproxy.har
                    data_json = self.scan_for_request(har,
                                                      f"har_{account}_{mode}",
                                                      "posts/videos?")
                    if data_json:
                        self.write_debug(data_json)
                        list_json = data_json.get('list')
                        if list_json:
                            for info_json in list_json:
                                _entry = self._extract_from_json(
                                    info_json, user_profile=account)
                                if _entry:
                                    for _video in _entry:
                                        if not _video['id'] in entries.keys():
                                            entries[_video['id']] = _video
                                        else:
                                            if _video.get(
                                                    'duration', 1) > entries[
                                                        _video['id']].get(
                                                            'duration', 0):
                                                entries[_video['id']] = _video

                else:

                    #lets scroll down in the videos pages till the end
                    self.wait_until(driver, 600, scroll(10))

                    har = _harproxy.har
                    _reg_str = r'/api2/v2/users/\d+/posts/videos\?'
                    data_json = self.scan_for_all_requests(
                        har, f"har_{account}_{mode}", _reg_str)
                    if data_json:
                        self.write_debug(data_json)
                        list_json = []
                        for el in data_json:
                            list_json += el.get('list')

                        self.write_debug(list_json)

                        for info_json in list_json:
                            _entry = self._extract_from_json(
                                info_json, user_profile=account)
                            if _entry:
                                for _video in _entry:
                                    if not _video['id'] in entries.keys():
                                        entries[_video['id']] = _video
                                    else:
                                        if _video.get(
                                                'duration',
                                                1) > entries[_video['id']].get(
                                                    'duration', 0):
                                            entries[_video['id']] = _video

            elif mode in ("chat"):

                _harproxy.new_har(options={
                    'captureHeaders': False,
                    'captureContent': True
                },
                                  ref=f"har_{account}_{mode}",
                                  title=f"har_{account}_{mode}")
                _url = f"{self._SITE_URL}/{account}"
                self.send_driver_request(driver, _url)
                res = self.wait_until(driver, 60, error404_or_found())
                if not res or res[0] == "error404":
                    raise ExtractorError("User profile doesnt exists")
                har = _harproxy.har
                data_json = self.scan_for_request(har, f"har_{account}_{mode}",
                                                  f"users/{account}")
                #self.to_screen(data_json)
                userid = try_get(data_json, lambda x: x['id'])
                if not userid:
                    raise ExtractorError("couldnt get id user for chat room")
                url_chat = f"https://onlyfans.com/my/chats/chat/{userid}/"

                self.to_screen(url_chat)
                self.send_driver_request(driver, url_chat)
                #init start of chat is to be at the end, with all the previous messages above. Lets scroll
                # up to the start of the chat
                el_chat_scroll = self.wait_until(
                    driver, 60,
                    ec.presence_of_element_located((
                        By.CSS_SELECTOR,
                        "div.b-chats__scrollbar.m-custom-scrollbar.b-chat__messages.m-native-custom-scrollbar.m-scrollbar-y.m-scroll-behavior-auto"
                    )))
                self.wait_until(driver, 1)
                el_chat_scroll.send_keys(Keys.HOME)
                self.wait_until(driver, 5)

                har = _harproxy.har
                _reg_str = r'/api2/v2/chats/\d+/messages'
                data_json = self.scan_for_all_requests(
                    har, f"har_{account}_{mode}", _reg_str)
                if data_json:
                    self.write_debug(data_json)
                    list_json = []
                    for el in data_json:
                        list_json += el.get('list')

                    for info_json in list_json:

                        _entry = self._extract_from_json(info_json,
                                                         user_profile=account)
                        if _entry:
                            for _video in _entry:
                                if not _video['id'] in entries.keys():
                                    entries[_video['id']] = _video
                                else:
                                    if _video.get(
                                            'duration',
                                            1) > entries[_video['id']].get(
                                                'duration', 0):
                                        entries[_video['id']] = _video

            if entries:
                return self.playlist_result(list(entries.values()),
                                            "Onlyfans:" + account,
                                            "Onlyfans:" + account)
            else:
                raise ExtractorError("no entries")

        except ExtractorError as e:
            raise
        except Exception as e:
            lines = traceback.format_exception(*sys.exc_info())
            self.to_screen(f'{repr(e)} \n{"!!".join(lines)}')
            raise ExtractorError(repr(e))
        finally:
            _harproxy.close()
            _server.stop()
            self.rm_driver(driver)
Пример #38
0
def retrieve_har():
    print "Retrieving .har file using generated url..."
        
    har_name_ex = har_name + ".har"
    complete_har_path = os.path.join(har_save_path, har_name_ex)
    
    # Magic starts here:
    server = Server(path)
    server.start()
    proxy = server.create_proxy()

    profile  = webdriver.FirefoxProfile(ff_profile)
    profile.set_proxy(proxy.selenium_proxy())
    driver = webdriver.Firefox(firefox_profile=profile)
    
    # Broken script to load the page in Google Chrome instead of Mozilla Firefox
    """
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    driver = webdriver.Chrome(chrome_options = chrome_options)
    """

    proxy.new_har(har_name, options={'captureHeaders': True})
    driver.get(url)
    
    #Trying to click 'vplayer'
    try:
        driver.switch_to.frame(0)   # Clicking the video automagically
        jwplayer = driver.find_element_by_name('vplayer')
        jwplayer.click()
    
    #And if that somehow doesn't work
    except Exception:
        print "Couldn't click player!"
        print "Trying again in 5 seconds..."
        
        time.sleep(5)
        
        #Try it again...
        try:
            driver.switch_to.frame(0)   # Clicking the video automagically (again)
            jwplayer = driver.find_element_by_name('vplayer')
            jwplayer.click()
            
        #And if that doesn't work either
        except Exception:
            print "Not able to click the video player"
            
        #Stop the server and the driver
        server.stop()
        driver.quit()
        
        time.sleep(3)
        sys.exit()
    
    time.sleep(1)
    
    #Exporting the wanted .har file
    result = json.dumps(proxy.har, ensure_ascii=False, indent=4)    # indent=4 puts the .har file on seperated lines

    #And write it to an automatically created file
    har_file = open(complete_har_path,'w')
    har_file.write(str(result))
    har_file.close()

    #Stop the server and the driver
    server.stop()
    driver.quit()
Пример #39
0
    def _get_videos_from_subs(self, url):
        try:

            _url_videos = f"{url}/videos"
            self.report_extraction(_url_videos)
            with OnlyFansActSubslistIE._LOCK:
                _server_port = 18080 + 100 * OnlyFansActSubslistIE._NUM
                OnlyFansActSubslistIE._NUM += 1
                _server = Server(
                    path=
                    "/Users/antoniotorres/Projects/async_downloader/browsermob-proxy-2.1.4/bin/browsermob-proxy",
                    options={'port': _server_port})
                _server.start({'log_path': '/dev', 'log_file': 'null'})
                _host = 'localhost'
                _port = _server_port + 1
                _host = 'localhost'
                _harproxy = _server.create_proxy({'port': _port})

            driver = self.get_driver(host=_host,
                                     port=_port,
                                     msg=f'[{_url_videos}]')
            self.send_driver_request(driver, self._SITE_URL)
            for cookie in OnlyFansActSubslistIE._COOKIES:
                driver.add_cookie(cookie)

            self.send_driver_request(driver, url)
            res = self.wait_until(driver, 60, error404_or_found())
            if not res or res[0] == "error404":
                raise ExtractorError(
                    f"[{_url_videos}] User profile doesnt exists")
            account = url.split("/")[-1]
            _harproxy.new_har(options={
                'captureHeaders': False,
                'captureContent': True
            },
                              ref=f"har_actsubs_{account}",
                              title=f"har_actsubs_{account}")
            self.send_driver_request(driver, _url_videos)
            self.wait_until(
                driver, 60,
                ec.presence_of_all_elements_located(
                    (By.CLASS_NAME, "b-photos__item.m-video-item")))

            har = _harproxy.har
            data_json = self.scan_for_request(har, f"har_actsubs_{account}",
                                              "posts/videos?")
            entries = {}
            if data_json:
                self.write_debug(data_json)
                list_json = data_json.get('list')
                if list_json:
                    for info_json in list_json:
                        _entry = self._extract_from_json(info_json,
                                                         user_profile=account)
                        if _entry:
                            for _video in _entry:
                                if not _video['id'] in entries.keys():
                                    entries[_video['id']] = _video
                                else:
                                    if _video.get(
                                            'duration',
                                            1) > entries[_video['id']].get(
                                                'duration', 0):
                                        entries[_video['id']] = _video

            if not entries: raise ExtractorError(f"[{_url_videos}] no entries")
            return list(entries.values())

        except ExtractorError as e:
            raise
        except Exception as e:
            lines = traceback.format_exception(*sys.exc_info())
            self.to_screen(f'[{_url_videos}] {repr(e)} \n{"!!".join(lines)}')
            raise ExtractorError(f'[{_url_videos}] {repr(e)}')
        finally:
            _harproxy.close()
            _server.stop()
            self.rm_driver(driver)
Пример #40
0
class HarTrapper:
    def __init__(self):
        # Setup settings from congig file.
        config = configparser.ConfigParser()
        config.read('config.ini')

        self.PAGE = config['CAPTURE_N_HAR_FILES']['PAGE']
        self.NAME = config['CAPTURE_N_HAR_FILES']['NAME']
        self.URL = config['CAPTURE_N_HAR_FILES']['URL']
        self.PATH = config['CAPTURE_N_HAR_FILES']['PATH']
        self.USERNAME = config['CAPTURE_N_HAR_FILES']['USERNAME']
        self.PASSWORD = config['CAPTURE_N_HAR_FILES']['PASSWORD']
        self.N = config['CAPTURE_N_HAR_FILES']['N']

        self.df = None

        self.server = Server(config['HAR']['SERVER_PATH'])
        self.server.start()
        self.proxy = self._start_proxy()
        self.driver = self._start_chrome_driver()


    def capture_n_har_files(self):
        """Run n times:
            Create an Har class instance.
            Using BrowsermobProxy start recording har data from the browser.
            Using selenium start a browser and go to the url.
            Perform the action ordered by the 'page_func' function.
            Create a Pandas DataFrame from the har data recorded.
            Close the selenium session and the proxy.
            Export the DataFrame to a csv file.

        :param page_func: A custom function for action to perform inside the webpage.
        :param path: csv file/s save location.
        :param rnd: Whether to choose a random url.
        :param n: Number of times to run.
        :param name: Record name.
        :param url: The web site, use full address(exm: http://www.google.com).
        """

        print('Working...')

        # Enter facebook.com
        self.driver.get(self.URL)

        # Go to the 'pages' page in facebook.com
        self.go_to_pages()

        # Wait for 'c_user' cookie.
        while self.driver.get_cookie('c_user') is None:
            print('No c_user.')
            self.driver.implicitly_wait(2)

        # Click on the PAGE and close the tab N times.
        for i in range(int(self.N)):
            self.clear_cache()

            self.proxy.new_har(self.NAME)
            self.page_func()
            self.build_df()
            self.export_df(self.PATH + f'/_{i}.csv')

            print(f'{i}')

        # Finish session.
        self.driver.quit()
        self.server.stop()


    def _start_proxy(self):
        """Start a new proxy server to capture har data.

        :return: The new server proxy.
        """

        run = True

        while run:
            try:
                proxy = self.server.create_proxy()
                run = False
            except requests.exceptions.ConnectionError as e:
                print(e)

        return proxy


    def _start_chrome_driver(self) -> webdriver:
        """Using Selenium start the google chrome browser headless.
        All the browser requests and responses(har_fit data) will be recorded
        using a BrowsermobProxy proxy server.

        :return: Google chrome driver object.
        """

        chrome_options = webdriver.ChromeOptions()
        prefs = {"profile.default_content_setting_values.notifications": 2}
        chrome_options.add_experimental_option("prefs", prefs)
        chrome_options.set_capability('proxy', {'httpProxy': f'{self.proxy.proxy}',
                                                'noProxy': '',
                                                'proxyType': 'manual',
                                                'sslProxy': f'{self.proxy.proxy}'})
        # chrome_options.add_argument("--headless")

        driver = webdriver.Chrome(chrome_options=chrome_options)
        driver.set_page_load_timeout(999)
        driver.delete_all_cookies()

        return driver


    def clear_cache(self):
        cookies = self.driver.get_cookies()

        for cookie in cookies:
            name = cookie['name']

            if name == 'c_user' or name == 'xs':
                pass
            else:
                self.driver.delete_cookie(name)


    def export_har(self):
        """Export the har_fit recording to a json file.
        """
        with open('./har_fit.json', 'w') as file:
            json.dump(self.proxy.har, file)


    def export_df(self, path):
        """Export the instance DataFrame to a csv file.
        :param path: Export directory path.
        """
        self.df.to_csv(path)


    def go_to_pages(self):
        timeout = 5

        email_xpath = '//input[@id="email"] | //input[@name="email"]'
        pass_xpath = '//input[@id="pass"] | //input[@name="pass"]'
        login_xpath = '//input[@value="Log In"] | //button[@name="login"]'

        run = True

        while run:
            try:
                WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, email_xpath)))
                WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, login_xpath)))
                WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, pass_xpath)))
                run = False

            except selenium.common.exceptions.NoSuchElementException:
                print('Login NoSuchElementException.')
            except selenium.common.exceptions.TimeoutException:
                print('Login TimeoutException.')
            except selenium.common.exceptions.ElementNotInteractableException:
                print('Login ElementNotInteractableException.')

        self.driver.find_element_by_xpath(email_xpath).send_keys(self.USERNAME)
        self.driver.find_element_by_xpath(pass_xpath).send_keys(self.PASSWORD)
        self.driver.find_element_by_xpath(login_xpath).click()

        run = True

        while run:
            try:
                WebDriverWait(self.driver, timeout).until(
                    EC.presence_of_element_located((By.XPATH, '//div[text()="Pages"]')))

                self.driver.find_element_by_xpath('//div[text()="Pages"]').click()

                run = False

            except selenium.common.exceptions.NoSuchElementException:
                print('Login NoSuchElementException.')

            except selenium.common.exceptions.TimeoutException:
                print('Login TimeoutException.')

            except selenium.common.exceptions.ElementNotInteractableException:
                print('ElementNotInteractableException')


    def page_func(self):
        """Passed to the Har.capture_n_har_files procedure for selenium to run
        on the web page.
        """
        timeout = 5

        liked_xpath = '//*[text() = "Liked Pages"]'
        page_xpath = '//span[text()="' + self.PAGE + '"]'
        home_xpath = '//a[text()="Home"]'
        # Make sure all elements exist on page before moving on.
        pages_window = self.driver.current_window_handle

        run = True

        while run:
            try:
                WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, liked_xpath)))
                self.driver.find_element_by_xpath(liked_xpath).click()
                run = False

            except selenium.common.exceptions.NoSuchElementException:
                print('Login NoSuchElementException.')
            except selenium.common.exceptions.TimeoutException:
                print('Login TimeoutException.')
            except selenium.common.exceptions.ElementNotInteractableException:
                print('ElementNotInteractableException')

        run = True

        while run:
            try:

                WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, page_xpath)))
                self.driver.find_element_by_xpath(page_xpath).click()
                run = False

            except selenium.common.exceptions.NoSuchElementException:
                print('Login NoSuchElementException.')
            except selenium.common.exceptions.TimeoutException:
                print('Login TimeoutException.')
            except selenium.common.exceptions.ElementNotInteractableException:
                print('ElementNotInteractableException')

        run = True

        while run:
            try:
                new_window = self.driver.window_handles[1]
                self.driver.switch_to_window(new_window)
                self.driver.close()
                self.driver.switch_to_window(pages_window)

                run = False

            except selenium.common.exceptions.NoSuchElementException:
                print('Login NoSuchElementException.')
            except selenium.common.exceptions.TimeoutException:
                print('Login TimeoutException.')
            except selenium.common.exceptions.ElementNotInteractableException:
                print('Login ElementNotInteractableException.')


    def build_df(self):
        """Iterate each row in the har_fit data csv file
        and add it to a dictionary.
        Add all the rows dictionaries to a list.
        Create one complete DataFrame from the list.

        :return: The instance har_fit recording data in the form of a Pandas DataFrame.
        """
        _ = list()

        for entry in self.proxy.har['log']['entries']:
            __ = dict()

            for k, v in entry.items():
                if type(v) == dict or type(v) == list:
                    self._add_to_dict(__, k + '.', v)
                else:
                    __[k] = v

            _.append(__)

        self.df = pd.DataFrame(_)


    def _add_to_dict(self, __, k, v):
        """Utility method for the build_df method.
        """
        if type(v) == list:
            for kk, vv in v:
                if type(vv) == dict or type(vv) == list:
                    self._add_to_dict(__, k + kk + '.', vv)
                else:
                    __[k + kk] = vv
        else:
            for kk, vv in v.items():
                if type(vv) == dict or type(vv) == list:
                    self._add_to_dict(__, k + kk + '.', vv)
                else:
                    __[k + kk] = vv
Пример #41
0
def start_local_proxy():
    server = Server(PATH_TO_BROWSER_MOB_PROXY)
    server.start()
    proxy = server.create_proxy()
    return proxy, server
Пример #42
0
class browseWeb:
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
    }

    def __init__(self, root="C:/xampp/htdocs/webscrape/", folder=""):

        self.url = ""
        self.root = root + folder + "/"

        # Start browsermob proxy
        self.server = Server(
            r"C:\webdrivers\browsermob-proxy\bin\browsermob-proxy")
        self.server.start()
        self.proxy = self.server.create_proxy()

        # Setup Chrome webdriver - note: does not seem to work with headless On
        options = webdriver.ChromeOptions()
        options.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
        options.add_argument(
            '--proxy-server=%s' % self.proxy.proxy
        )  # Setup proxy to point to our browsermob so that it can track requests

        self.w = webdriver.Chrome(r'C:/webdrivers/chromedriver.exe',
                                  chrome_options=options)
        self.proxy.new_har("Listener", options={'captureHeaders':
                                                True})  # Request listener
        #self.proxy.new_har("Listener" )  # Request listener

        print("Browser and Server initialized...")

    #------------------------------------------------------
    #	GETTERS
    #------------------------------------------------------

    def getCode(self):
        return self.w.page_source

    def getElement(self):
        return self.element

    def getElements(self):
        return self.elements

    def getElementCode(self, element=None):

        if element: return element.get_attribute('outerHTML')
        else: return self.element.get_attribute('outerHTML')

    def getTypeParam(self, by):

        byParam = None

        if by == "id": byParam = By.ID
        if by == "class": byParam = By.CLASS_NAME
        if by == "name": byParam = By.NAME
        if by == "tag": byParam = By.TAG_NAME
        if by == "text": byParam = By.PARTIAL_LINK_TEXT
        if by == "css": byParam = By.CSS_SELECTOR

        return byParam

    def getRequests(self):

        dataCollected = {}

        requestsList = {
            "js": [],
            "css": [],
            "img": [],
            "font": [],
            "json": [],
            "doc": [],
            "binary": [],
            "unknown": [],
            "total": {}
        }

        # Get list of All requests
        requestLog = self.proxy.har['log']["entries"]

        createFolder(self.root + "logs")
        writeFile(data=beauty(requestLog), file=self.root + "logs/har.json")
        print("\n[ ! ] HAR Stored! ")

        for entry in requestLog:

            if 'request' in entry.keys():

                # Getting referers
                referer = ""
                headersList = entry['request']['headers']
                for item in headersList:
                    if item["name"] == "Referer":
                        referer = item["value"]

                url = entry['request']['url']

                obj = {
                    "url": url,
                    "type": entry['response']['content']['mimeType'],
                    "size": entry['response']['bodySize'],
                    "method": entry['request']['method'],
                    "status": entry['response']['status'],
                    "referer": referer
                }

                urlNew = url
                if "?" in url:
                    urlNew = url.split("?")[0]  # remove ? parameters from url

                dataCollected[urlNew] = obj

        # Clasifing resquests

        for key in dataCollected:
            type = dataCollected[key]["type"]
            url = dataCollected[key]["url"]
            referer = dataCollected[key]["referer"]
            status = str(dataCollected[key]["status"])

            # if status is available
            if not "204" in status:

                # Catching mymeType of empty mimeTypes or plain mime
                if type == "" or "plain" in type or "None" in type:

                    try:
                        print("--> (" + status + " - " + type +
                              ")Getting mimeType  ->  " + url)
                        header = requests.head(url,
                                               allow_redirects=True,
                                               headers=self.headers)
                        type = str(header.headers.get('content-type'))
                        print("--> ( " + type + " )")

                    except requests.exceptions.Timeout as e:
                        print("Error Timeout: ", e)
                    except requests.exceptions.ConnectionError as e:
                        print("Error Connecting: ", e)
                    except requests.exceptions.TooManyRedirects as e:
                        print("Error, too many redirects", e)
                    except requests.exceptions.HTTPError as e:
                        print("Http Error", e)
                    except requests.exceptions.RequestException as e:
                        # catastrophic error. bail.
                        print(e)
                        sys.exit(1)

                if "image" in type:
                    requestsList["img"].append([url, type])
                elif "css" in type:
                    requestsList["css"].append([url, type])
                elif "font" in type:
                    requestsList["font"].append([url, type])
                elif "javascript" in type:
                    requestsList["js"].append([url, type])
                elif "json" in type:
                    requestsList["json"].append([url, type])
                elif "html" in type or "plain" in type:
                    requestsList["doc"].append([url, type])
                elif "octet-stream" in type:
                    requestsList["binary"].append([url, type])
                else:
                    requestsList["unknown"].append([url, type])

        # Counting resquests
        total = 0
        for key in requestsList:
            subtotal = len(requestsList[key])
            requestsList["total"][key] = subtotal
            total = total + subtotal

            requestsList["total"]["total"] = total

        self.request = requestsList

        return requestsList

    #------------------------------------------------------
    #	SETTERS
    #------------------------------------------------------

    def setUrl(self, url):
        self.url = url

    def setElement(self, by, name):

        byParam = self.getTypeParam(by)
        self.element = self.w.find_element(byParam, name)
        return self.element

    def setElements(self, by, name):

        byParam = self.getTypeParam(by)
        self.elements = self.w.find_elements(byParam, name)
        return self.elements

    #------------------------------------------------------
    #	FUNCTIONS
    #------------------------------------------------------

    def browseURL(self, url):

        print("--> Loading web  ->  " + url)

        t = time.time()
        self.w.implicitly_wait(30)
        self.w.set_page_load_timeout(30)

        try:
            self.w.get(url)

        except TimeoutException:
            print("Error loading something")
            print('Time consuming:', time.time() - t)
            self.w.execute_script("window.stop();")

        print("--> Loaded... ")

    #-------------------------------------------------------

    def click(self, selector=""):
        if selector: self.w.find_element_by_css_selector(selector).click()
        else: self.element.click()

    def enter(self, selector=""):
        if selector:
            self.w.find_element_by_css_selector(selector).send_keys(Keys.ENTER)
        else:
            self.element.send_keys(Keys.ENTER)

    def text(self, selector="", text=""):
        if selector:
            self.w.find_element_by_css_selector(selector).send_keys(text)
        else:
            self.element.send_keys(text)

    def exit(self):

        self.server.stop()
        print("Closed Proxy Server...")
        self.w.close()
        print("Closed Browser...")
        self.w.quit()
        print("Quited app...")

        # Killing java and webdriver process
        os.system("taskkill /f /im java.exe")
        os.system("taskkill /f /im chromedriver.exe")
        print("Process killed...")

    # -------------------------------------------------------

    def waitEnableItem(self, delay=10):

        try:
            # wait for button to be enabled
            WebDriverWait(self.w, delay).until(
                EC.element_to_be_clickable((By.ID, 'getData')))
            button = self.w.find_element_by_id('getData')
            button.click()

        except TimeoutException:
            print('Loading took too much time!')
        else:
            #html = browser.page_source
            pass
        finally:
            #browser.quit()
            pass

    def waitLoadedItem(self, delay=10, cssSelector=""):

        try:
            # wait for data to be loaded
            # e.g:  cssSelector = '#dataTarget > div'
            WebDriverWait(self.w, delay).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, cssSelector)))
        except TimeoutException:
            print('Loading took too much time!')
        else:
            #html = self.w.page_source
            pass
        finally:
            #browser.quit()
            pass

    # -------------------------------------------------------

    def command(self, data):

        library = {"1": "start", "2": "getrequests"}

        # getCode from library
        if data.isdigit():  # if the command is a number
            if data in library:  # if the number exist in the library
                data = library[data]

        if "start" in data:
            self.browseURL(self.url)

        elif "getrequests" in data:
            self.getRequests()
            print("-" * 30)
            printo(self.request)
            print("-" * 30)

        elif "click" in data:
            array = data.split(" ")
            if len(array) > 1:
                self.click(array[1])

        elif "getcode" in data:
            code = self.getCode()
            print(code)
Пример #43
0
class SeleniumTestCase(unittest.TestCase):
    client = None
    server = None
    proxy = None
    use_proxy = True if int(os.environ.get('USE_PROXY', 0)) else False
    visitor_site_url = os.environ.get('VISITOR_SITE_URL',
                                      'http://visitor.angieslist.com')
    legacy_url = os.environ.get('LEGACY_URL', 'http://qatools.angieslist.com')
    browser_clients = os.environ.get('BROWSER_CLIENTS', 'Chrome').split(',')
    test_browser = int(os.environ.get('TEST_BROWSER', 0))
    test_client = os.environ.get('TEST_CLIENT', 'Mac OSX 10.10')
    browsermob_path = os.environ.get(
        'BROWSERMOB_PATH', './../browsermob-proxy/bin/browsermob-proxy')
    browsermob_port = int(os.environ.get('BROWSERMOB_PORT', '9090'))
    browsermob_host = os.environ.get('BROWSERMOB_HOST', '127.0.0.1')
    test_legacy_user = os.environ.get('LEGACY_USER', '')
    test_legacy_password = os.environ.get('LEGACY_PASSWORD', '')
    cbt_user = os.environ.get('CBT_USER', '')
    cbt_key = os.environ.get('CBT_KEY', '')
    cbt_flag = True if int(os.environ.get('CBT_FLAG', 1)) else False
    char_key = None
    caps = {}

    def setUp(self):
        method_name = self.browser_clients[self.test_browser]
        if self.use_proxy:
            self.server = Server(self.browsermob_path, {
                'host': self.browsermob_host,
                'port': self.browsermob_port
            })
            self.server.start()
            self.proxy = self.server.create_proxy()

        if self.cbt_flag:
            self.api_session = requests.Session()
            self.api_session.auth = (self.cbt_user, self.cbt_key)
            self.test_result = 'fail'
            self.caps['name'] = self.id() + ' ' + str(datetime.datetime.now())
            self.caps['build'] = '1.0'
            # caps['browserName'] = 'Safari'
            # caps['version'] = '8'
            self.caps['browserName'] = method_name
            self.caps['platform'] = self.test_client
            self.caps['screenResolution'] = '1366x768'
            self.caps['record_video'] = 'true'
            self.caps['record_network'] = 'true'
            self.caps['loggingPrefs'] = {'performance': 'INFO'}
        try:
            client_method = getattr(webdriver, method_name)
        except AttributeError:
            raise NotImplementedError(
                "Class `{}` does not implement `{}`".format(
                    webdriver.__class__.__name__, method_name))

        try:
            d = getattr(DesiredCapabilities, method_name.upper())
            d['loggingPrefs'] = {
                'browser': 'ALL',
                'driver': 'ALL',
                'performance': 'ALL'
            }
            if method_name == 'Chrome':
                ch_profile = webdriver.ChromeOptions()
                ch_profile.perfLoggingPrefs = {
                    'enableNetwork': True,
                    'traceCategories': 'performance, devtools.network'
                }
                ch_profile.add_argument('incognito')
                ch_profile.add_argument('disable-extensions')
                ch_profile.add_argument('auto-open-devtools-for-tabs')
                ch_profile.add_argument('disable-browser-side-navigation')

                if self.use_proxy:
                    ch_profile.add_argument(
                        '--proxy-server=http://%s' %
                        self.proxy.selenium_proxy().httpProxy)

                if self.cbt_flag:
                    browser = webdriver.Remote(
                        desired_capabilities=self.caps,
                        command_executor=
                        "http://%s:%[email protected]:80/wd/hub" %
                        (self.cbt_user, self.cbt_key))
                    browser.implicitly_wait(20)
                else:
                    browser = client_method(desired_capabilities=d,
                                            chrome_options=ch_profile)
            elif method_name == 'Firefox':
                fp = webdriver.FirefoxProfile()
                if self.use_proxy:
                    fp.set_proxy(self.proxy.selenium_proxy())
                if self.cbt_flag:
                    browser = webdriver.Remote(
                        desired_capabilities=self.caps,
                        command_executor=
                        "http://%s:%[email protected]:80/wd/hub" %
                        (self.cbt_user, self.cbt_key))
                    browser.implicitly_wait(20)
                else:
                    browser = client_method(capabilities=d, firefox_profile=fp)
            else:
                browser = client_method()

            browser.set_window_size(2000, 1400)

            self.client = browser
        except:
            print('Web browser not available')
            self.skiptest('Browser not available')

        time.sleep(1)

    def tearDown(self):
        if self.client:
            self.client.close()
        if self.server:
            self.server.stop()
        if self.cbt_flag:
            self.client.quit()
            self.api_session.put(
                'https://crossbrowsertesting.com/api/v3/selenium/' +
                self.client.session_id,
                data={
                    'action': 'set_score',
                    'score': self.test_result
                })

    def isElementPresent(self, cssSelector):
        try:
            err_element = self.client.find_element_by_css_selector(cssSelector)
            return True
        except NoSuchElementException:
            return False
        return False

    def prompt_with_timeout(self, prompt, time_limit):
        fd = sys.stdin.fileno()
        old_settings = termios.tcgetattr(fd)
        myThread = _thread.start_new_thread(self.keypress, ())
        print(prompt)
        for i in range(0, time_limit):
            self.char_key = None
            sleep(1)
            if self.char_key is not None:
                termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
                char = self.char_key
                self.char_key = None
                try:
                    _thread.exit()
                except SystemExit:
                    pass
                return char
        termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
        print("Continuing...")
        self.char_key = None
        return None

    def getch(self):
        fd = sys.stdin.fileno()
        old_settings = termios.tcgetattr(fd)
        try:
            tty.setraw(sys.stdin.fileno())
            ch = sys.stdin.read(1)
        finally:
            termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
        return ch

    def keypress(self):
        self.char_key = self.getch()

    # Helper method to use after an event triggers a new page load
    # @param old_page - client.find_element_by_tag_name('html') grabbed BEFORE new page call
    # @param timeout - int seconds
    @contextmanager
    def wait_for_new_page_load(self, old_page, timeout=30):
        yield
        WebDriverWait(self.client, timeout).until(staleness_of(old_page))
Пример #44
0
class HarProfiler:

    def __init__(self, config, url):
        self.har_dir = config['har_dir']
        self.browsermob_dir = config['browsermob_dir']
        self.label_prefix = config['label_prefix'] or ''
        self.virtual_display = config['virtual_display']
        self.virtual_display_size_x = config['virtual_display_size_x']
        self.virtual_display_size_y = config['virtual_display_size_y']

        self.label = '{}{}'.format(
            self.label_prefix,
            format(self.slugify(url))
        )
        self.cached_label = '{}-cached'.format(self.label)

        epoch = time.time()
        self.har_name = '{}-{}.har'.format(self.label, epoch)
        self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch)

    def __enter__(self):
        log.info('starting virtual display')
        if self.virtual_display:
            self.display = Display(visible=0, size=(
                self.virtual_display_size_x,
                self.virtual_display_size_y
            ))
            self.display.start()

        log.info('starting browsermob proxy')
        self.server = Server('{}/bin/browsermob-proxy'.format(
            self.browsermob_dir)
        )
        self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        log.info('stopping browsermob proxy')
        self.server.stop()
        log.info('stopping virtual display')
        self.display.stop()

    def _make_proxied_webdriver(self):
        proxy = self.server.create_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)
        return (driver, proxy)

    def _save_har(self, har, cached=False):
        if not os.path.isdir(self.har_dir):
            os.makedirs(self.har_dir)
        if not cached:
            har_name = self.har_name
        elif cached:
            har_name = self.cached_har_name

        log.info('saving HAR file: {}'.format(har_name))
        with open(os.path.join(self.har_dir, har_name), 'w' ) as f:
            json.dump(har, f, indent=2, ensure_ascii=False)

    def load_page(self, url, run_cached=True):
        driver, proxy = self._make_proxied_webdriver()
        proxy.new_har(self.label)
        log.info('loading page: {}'.format(url))
        driver.get(url)
        self._save_har(proxy.har)

        if run_cached:
            proxy.new_har(self.cached_label)
            log.info('loading cached page: {}'.format(url))
            driver.get(url)
            self._save_har(proxy.har, cached=True)

        driver.quit()

    def slugify(self, text):
        pattern = re.compile(r'[^a-z0-9]+')
        slug = '-'.join(word for word in pattern.split(text.lower()) if word)
        return slug
Пример #45
0
class Get_url:
    PROXY_PATH = path.abspath(
        r"D:\Anaconda3\browsermob-proxy-2.1.4\bin/browsermob-proxy.bat")
    CHROME_PATH = path.abspath(r"D:\Anaconda3\chromedriver.exe")
    CHROME_OPTIONS = {"profile.managed_default_content_settings.images": 1}

    def __init__(self, url):
        self.url = url

    def open_web(self, win_size=(800, 800)):
        self.driver.set_window_size(*win_size)
        self.driver.get(self.url)

    def win_setting(self):
        enlarge_element = self.driver.find_element_by_xpath(
            "//div[@class='BMap_smcbg in']")
        for i in range(6):
            time.sleep(1)
            enlarge_element.click()

    def initProxy(self):
        """
        step 4 初始化 browermobproxy
        设置需要屏蔽的网络连接,此处屏蔽了css,和图片(有时chrome的设置会失效),可加快网页加载速度
        新建proxy代理地址
        """
        self.server = Server(self.PROXY_PATH)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def initWeb(self):
        chromeSettings = webdriver.ChromeOptions()
        chromeSettings.add_argument('--proxy-server={host}:{port}'.format(
            host="localhost", port=self.proxy.port))
        chromeSettings.add_experimental_option("prefs", self.CHROME_OPTIONS)
        self.driver = webdriver.Chrome(executable_path=self.CHROME_PATH,
                                       chrome_options=chromeSettings)
        self.open_web()
        self.win_setting()

    def move(self, x_offset, y_offset, mode_str):
        def drag_left(n):
            for i in range(n):
                pag.moveTo(20, 150, 0.5)
                pag.dragTo(276, 150, 1)

        def drag_right(n):
            for i in range(n):
                pag.moveTo(276, 150, 0.5)
                pag.dragTo(20, 150, 1)

        def drag_down(n):
            for i in range(n):
                pag.moveTo(20, 406, 0.5)
                pag.dragTo(20, 150, 1)

        def drag_up(n):
            for i in range(n):
                pag.moveTo(20, 150, 0.5)
                pag.dragTo(20, 406, 1)

        def up_left_rect(x_n, y_n):
            for down_n in range(y_n):
                if (down_n % 2) == 0:
                    drag_left(x_n)
                    print('向左移动')
                else:
                    drag_right(x_n)
                drag_up(1)

        def up_right_rect(x_n, y_n):
            for down_n in range(y_n):
                if (down_n % 2) == 0:
                    drag_right(x_n)
                    print('向右移动')
                else:
                    drag_left(x_n)
                drag_up(1)

        def down_left_rect(x_n, y_n):
            for down_n in range(y_n):
                if (down_n % 2) == 0:
                    drag_left(x_n)
                    print('向左移动')
                else:
                    drag_right(x_n)
                drag_down(1)

        def down_right_rect(x_n, y_n):
            for down_n in range(y_n):
                if (down_n % 2) == 0:
                    drag_right(x_n)
                    print('向右移动')
                else:
                    drag_left(x_n)
                drag_down(1)

        x_n = int(x_offset / 256)
        y_n = int(y_offset / 256)
        if mode_str == 'up_left':
            up_left_rect(x_n, y_n)
        elif mode_str == 'up_right':
            up_right_rect(x_n, y_n)
        elif mode_str == 'down_left':
            down_left_rect(x_n, y_n)
        else:
            down_right_rect(x_n, y_n)

    def start(self):
        """step 8 配置monitor的启动顺序"""
        try:
            self.initProxy()
            self.initWeb()
            print('初始化完成')
            self.proxy.new_har('monitor', options={'captureContent': True})
            time.sleep(2)
            next_step = 'down_right'
            while next_step in [
                    'up_left', 'up_right', 'down_left', 'down_right'
            ]:
                self.move(2560, 2560, next_step)
                # 每次采集完,需调整地图回到采集原点,缩放比例尺为50米
                next_step = input(
                    '输入采集模式(up_left/up_right/down_left/down_right):')

        except Exception as err:
            print(err)

    def get_req_url(self, targetUrl):
        if self.proxy.har['log']['entries']:
            req_list = []
            for loop_record in self.proxy.har['log']['entries']:
                req_url = urllib.parse.unquote(loop_record["request"]['url'])
                try:
                    if re.fullmatch(targetUrl, req_url):
                        url_dict = {}
                        p_str = req_url.split("=")[2].replace(
                            'E9FA;C92E98O5K?CDI8A',
                            '').replace('3N5L?3K8:', '')
                        x_code, y_code = p_str.split(';EK9FJE2>C')
                        url_dict['x_code'] = x_code
                        url_dict['y_code'] = y_code
                        url_dict['url'] = req_url
                        req_list.append(url_dict)
                except Exception as err:
                    print(err)
                    continue
            return req_list

    def quit(self):
        """
        step 9 配置monitor的退出顺序
        代理sever的退出可能失败,目前是手动关闭,若谁能提供解决方法,将不胜感激
        """
        self.driver.close()
        self.driver.quit()
        try:
            self.proxy.close()
            self.server.process.terminate()
            self.server.process.wait()
            self.server.process.kill()
        except OSError:
            pass
Пример #46
0
from browsermobproxy import Server
server = Server("/root/Desktop/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy")
server.start()
proxy = server.create_proxy()

from selenium import webdriver
profile = webdriver.FirefoxProfile()
profile.set_proxy(proxy.selenium_proxy())
driver = webdriver.Firefox(firefox_profile=profile)


proxy.new_har("google")
driver.get("http://www.google.co.uk")
test = proxy.har # returns a HAR JSON blob

print test

server.stop()
driver.quit()
Пример #47
0
    def run_webdriver(self, start_url, port, config, download_dir):
        global useragent
        global referer
        urllib3_logger = logging.getLogger('urllib3')
        urllib3_logger.setLevel(logging.DEBUG)
        logging.info("Starting WebRunner")
        firefox_profile = None
        server = None
        proxy = None
        har = None

        if config.referer:
            referer = config.referer
        else:
            referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format(
                config.url, config.url)

        if config.useragent:
            useragent = config.useragent
        else:
            useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0'

        logging.debug("Running with UserAgent: {}".format(useragent))
        logging.debug("Running with Referer: {}".format(referer))
        logging.debug("Checking URL: {}".format(config.url))

        server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port})
        server.start()
        proxy = server.create_proxy()
        proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'})

        request_js = (
            'var referer = request.getProxyRequest().getField("Referer");'
            'addReferer(request);'
            'function addReferer(r){'
            'if (! referer ) {'
            'r.addRequestHeader("Referer","' + referer + '");'
            '}'
            'return;'
            '}')
        proxy.request_interceptor(request_js)
        if config.firefoxprofile:
            firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile)
        else:
            firefox_profile = FirefoxProfile()

        logging.debug("Using profile {}".format(firefox_profile.path))

        firefox_profile.set_preference("security.OCSP.enabled", 0)
        firefox_profile.set_preference("browser.download.folderList", 2)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("browser.download.dir", download_dir)
        firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                                       "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;appl\
                                       ication/msword;text/plain;application/octet")
        firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("network.proxy.type", 1)
        firefox_profile.set_proxy(proxy.selenium_proxy())
        try:
            webdriver = WebDriver(firefox_profile)
            proxy.new_har(start_url.hostname,
                          options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"})
            self.analyse_page(webdriver, start_url)
            har = proxy.har
            logging.info("Stopping WebRunner")
            proxy.close()
            server.stop()
            webdriver.quit()
            har = Har(har)
        except Exception, e:
            logging.error(e)
            proxy.close()
            webdriver.quit()
            server.stop()
Пример #48
0
class Brower_scan():
    def __init__(self):
        self.response_result = []
        self.result = {}
        self.args = self.init__args()
        self.init_browsermobproxy()
        self.init_chrome()
        self.init_dict_list()
        self.result_handing()
        self.end_env()

    def init__args(self):
        print("""
                         ____. ____________________             __                 
                        |    |/   _____/\    _____/____ _______|  |  ___
                        |    |\_____  \  |    __) \__  \\\\_   __|  | /  /
                        |    |  ____|  | |    |    /    \ | |  |  |/  /
                    /\__|    |/        | |    |   /  __  \| |  |  |   \   
                    \________/_______  / |__  /  (____   /__|  |__|_  _\   
                                     \/      \/        \/           \/        
            
    Author:0xAXSDD By Gamma安全实验室
    version:1.0
    explain:这是一款用户绕过前端js加密进行密码爆破的工具,你无需在意js加密的细节,只需要输入你想要爆破url,已经username输入框的classname,password输入框的classname,点击登录框classname,爆破用户名,密码字典等就可,暂时不支持带验证码校验的爆破
    例子:
    只爆破密码:python JsFak.py -u url -user admin -Pd password.txt -cu user_classname -cp pass_classname -l login_classname
    爆破密码和用户:python main.py -ud username.txt -pd password.txt -cu user_classname -cp user_classname -l user_classname -u url
    详情功能参考  -h
        
    注意:如果遇到的classname  带空格  请用""括起来 Sever服务默认的是8080端口,如果需要修改,直接点Sever类修改,并指定参数-p
                """)
        parser = argparse.ArgumentParser(
            description=
            'Use your browser to automatically call JS encryption to encrypt your payload'
        )
        parser.add_argument("-u",
                            "--url",
                            metavar='url',
                            required=True,
                            help="Js encryption is required url")
        parser.add_argument("-cu",
                            "--class-user",
                            metavar='class_user',
                            required=True,
                            help="The class name of the Username tag.")
        parser.add_argument("-cp",
                            "--class-passwd",
                            metavar='class_passwd',
                            required=True,
                            help="The class name of the Password tag.")
        parser.add_argument("-l",
                            "--class-login",
                            metavar='class_login',
                            required=True,
                            help="The class name of the Password tag.")
        parser.add_argument("-ud",
                            "--Username-dict",
                            metavar='Username_dict',
                            help="Username dict file")
        parser.add_argument("-pd",
                            "--Password-dict",
                            metavar='Password_dict',
                            required=True,
                            help="Password dict file")
        parser.add_argument("-user",
                            "--username",
                            metavar='username',
                            help="username")
        parser.add_argument("-f",
                            "--out-file",
                            metavar='out_file',
                            help="out - file name")
        parser.add_argument("-p",
                            "--port",
                            metavar="port",
                            help="designated port")
        return parser.parse_args()

    def init_dict_list(self):
        print(
            "------------------------------------开始扫描!--------------------------------------\n"
        )
        if self.args.Username_dict != None:
            with open(self.args.Username_dict, "r") as f_u:
                self.username_list = f_u.readlines()
                for username in self.username_list:
                    with open(self.args.Password_dict, "r") as f:
                        self.password_list = f.readlines()
                        for password in self.password_list:
                            self.fill_out_a_form(username.replace('\n', ''),
                                                 password.replace('\n', ''))
        else:
            with open(self.args.Password_dict, "r") as f:
                self.password_list = f.readlines()
                for password in self.password_list:
                    self.fill_out_a_form(self.args.username,
                                         password.replace('\n', ''))
        self.wget_response()

    def init_browsermobproxy(self):
        try:
            self.server = Server(
                "browsermob-proxy-2.1.4\\bin\\browsermob-proxy.bat")
        except Exception as e:
            print("browsermob-proxy 服务启动失败!请查看输入路径是否正确,或者端口是否被占用!\n")
            return 0
        self.server.start()
        self.proxy = self.server.create_proxy()
        self.chrome_options = Options()
        self.chrome_options.add_argument('--proxy-server={0}'.format(
            self.proxy.proxy))
        self.chrome_options.add_argument('--headless')

    def init_chrome(self):
        try:
            self.chrome = webdriver.Chrome(chrome_options=self.chrome_options,
                                           executable_path='chromedriver.exe')
            self.proxy.new_har("ht_list2", options={'captureContent': True})
            self.chrome.get(self.args.url)
        except Exception as e:
            print("Chrome浏览器启动失败!请检查是否安装了chrome浏览器\n")
            return 0

    def fill_out_a_form(self, username, password):
        try:
            self.chrome.find_element_by_css_selector("[class='{0}']".format(
                self.args.class_user)).clear()
            self.chrome.find_element_by_css_selector("[class='{0}']".format(
                str(self.args.class_user))).send_keys(username)
            self.chrome.find_element_by_css_selector("[class='{0}']".format(
                self.args.class_passwd)).clear()
            self.chrome.find_element_by_css_selector("[class='{0}']".format(
                str(self.args.class_passwd))).send_keys(password)
            self.chrome.find_element_by_css_selector("[class='{0}']".format(
                self.args.class_login)).send_keys(Keys.RETURN)
        except Exception as e:
            print("Please check that the className entered is correct!\n")
            return 0

    def wget_response(self):
        result = self.proxy.har
        for entry in result['log']['entries']:
            _url = entry['request']['url']
            print(_url)
            if "password" in _url and "username" in _url:
                _response = entry['response']
                _content = _response['content']
                # 获取接口返回内容
                self.response_result.append(_response['content']['text'])
        self.result = dict(zip(self.password_list, self.response_result))

    def result_handing(self):
        if self.args.Username_dict != None:
            for username in self.username_list:
                for key, value in self.result.items():
                    if self.args.out_file != None:
                        with open(self.args.out_file, "a",
                                  encoding="utf-8") as f:
                            f.writelines(
                                "账号:{user}密码:{key} :结果:{result}".format(
                                    user=username, key=key, result=value))
                    else:
                        print("账号:{user}密码:{key} :结果:{result}".format(
                            user=username, key=key, result=value))

        else:
            for key, value in self.result.items():
                if self.args.out_file != None:
                    with open(self.args.out_file, "a", encoding="utf-8") as f:
                        f.writelines("账号:{user}密码:{key} :结果:{result}".format(
                            user=self.args.username, key=key, result=value))
                else:
                    print("账号:{user}密码:{key} :结果:{result}".format(
                        user=self.args.username, key=key, result=value))

    def end_env(self):
        try:
            self.server.stop()
            self.chrome.quit()
            if self.args.port == None:
                self.args.port = 8080
            print(self.args.port)
            find_netstat = os.popen(
                "netstat -ano | findstr {port}".format(port=self.args.port))
            pid = find_netstat.read().split()[4]
            kail_pid = os.popen("taskkill /F /PID {PID}".format(PID=pid))
            print(kail_pid.read())
            return 1
        except IndexError as e:
            return 0
Пример #49
0
class Monitor(object):
    """
    step 3 配置chromedriver 和 browermobproxy 路径
    需要使用完整路径,否则browsermobproxy无法启动服务
    我是将这两个部分放到了和monitor.py同一目录
    同时设置chrome为屏蔽图片,若需要抓取图片可自行修改
    """
    PROXY_PATH = path.abspath(
        "F:/rudder/py/accountApi/utils/browsermob-proxy-2.1.1/bin/browsermob-proxy.bat"
    )
    CHROME_PATH = path.abspath("F:/rudder/py/accountApi/utils/chromedriver")
    CHROME_OPTIONS = {"profile.managed_default_content_settings.images": 2}
    canFoundInText = False

    def __init__(self):
        """
        类初始化函数暂不做操作
        """
        pass

    def initProxy(self):
        """
        step 4 初始化 browermobproxy
        设置需要屏蔽的网络连接,此处屏蔽了css,和图片(有时chrome的设置会失效),可加快网页加载速度
        新建proxy代理地址
        """
        self.server = Server(self.PROXY_PATH)
        self.server.start()
        self.proxy = self.server.create_proxy()
        self.proxy.blacklist([
            "http://.*/.*.css.*", "http://.*/.*.jpg.*", "http://.*/.*.png.*",
            "http://.*/.*.gif.*"
        ], 200)

    def initChrome(self):
        """
        step 5 初始化selenium, chrome设置
        将chrome的代理设置为browermobproxy新建的代理地址
        """
        chromeSettings = webdriver.ChromeOptions()
        chromeSettings.add_argument('--proxy-server={host}:{port}'.format(
            host="localhost", port=self.proxy.port))
        chromeSettings.add_experimental_option("prefs", self.CHROME_OPTIONS)
        self.driver = webdriver.Chrome(executable_path=self.CHROME_PATH,
                                       chrome_options=chromeSettings)

    def genNewRecord(self, name="monitor", options={'captureContent': True}):
        """
        step 6 新建监控记录,设置内容监控为True
        """
        self.proxy.new_har(name, options=options)

    def getContentText(self, targetUrl):
        """
        step 7 简单的获取目标数据的函数
        其中 targetUrl 为浏览器获取对应数据调用的url,需要用正则表达式表示
        """
        if self.proxy.har['log']['entries']:
            for loop_record in self.proxy.har['log']['entries']:
                try:
                    print(loop_record)
                    # if re.fullmatch(targetUrl , loop_record["request"]['url']):
                    # return loop_record["response"]['content']["text"]
                except Exception as err:
                    print(err)
                    continue
        return None

    def Start(self):
        """step 8 配置monitor的启动顺序"""
        try:
            self.initProxy()
            self.initChrome()
        except Exception as err:
            print(err)

    def Quit(self):
        """
        step 9 配置monitor的退出顺序
        代理sever的退出可能失败,目前是手动关闭,若谁能提供解决方法,将不胜感激
        """
        self.driver.close()
        self.driver.quit()
        try:
            self.proxy.close()
            self.server.process.terminate()
            self.server.process.wait()
            self.server.process.kill()
        except OSError:
            pass

    def getPageContent(self):
        print(666)

    #获取手机号是否存在接口, 返回-1未查找到用户名输入框,返回-2填写后无HTTP请求,返回-3填写测试数据后未发现请求包,返回-4无法抓取已注册请求包(现有测试数据都未注册)
    def getPhoneApi(self):
        if self.server == None:
            return False
        # self.driver.get('http://www.cndns.com/members/register.aspx')
        element = zfuncs.z_get_input_element_by_key_phone(
            self.driver)  #获取手机号码输入框
        if element == False:
            print "未查找到手机号码输入框"
            return -1
        req_url = self.get_phone_api_url(element)  #获取请求URL,用于定位请求包
        # print req_url
        if req_url == False:
            element = zfuncs.z_get_input_element_by_key_submit(
                self.driver)  #提交
            req_url = self.get_click(element)  #获取请求URL,用于定位请求包
            if req_url == False:
                print "未发现请求包"
            #return -2
        # print(entry['request']['url'].find(req_url))
        #使用常用用户名测试,获取用户存在响应包
        # for line in open("./keys/phone.value"):
        # line = line.strip('\n')
        # entry = self.find_entry_by_string(element, line)
        #     if entry!=False and entry['request']['url'].find(req_url)!=-1:#确认请求包
        #判断是否已注册请求包
        line = "1300000"
        if self.canFoundInText == False:
            is_exist = zfuncs.z_get_isexists_by_key_exists(self.driver)
        else:
            is_exist = zfuncs.z_get_element_by_key_exists(self.driver)
        if is_exist != False:
            print line + ":发现已注册"
            # return entry #返回当前请求包
        else:
            print line + ":未发现已注册"
        #     else:
        #         print line+":未发现请求包"
        #         return -3

        # return -4

    #获取手机号是否存在接口的请求URL
    def get_phone_api_url(self, element):
        if self.server == None:
            return False
        str = '1300' + self.id_generator(7, '0123456789')  #生成测试手机号
        str = '13000000000'  ########################### guding
        entry = self.find_entry_by_string(element, str)

        if entry == False:
            print("输入后未发起请求")
            return False

        url = entry['request']['url']
        return url.split("?")[0]

        #定位请求包位置, element为用来填写内容的input输入框
    def find_entry_by_string(self, element, keystr):
        #获取输入内容后的所有网络请求
        entries = self.get_entries(element, keystr)
        # pdb.set_trace()
        # print("请求后返回"+dir(entries))
        #查找是否有网络请求
        entry = self.find_har_by_string(entries, keystr)
        if entry == False:
            print "未发起网络请求"
            return False
        print "发现填写后会发起网络请求"

        return entry

    #获取输入内容后的所有网络请求
    def get_entries(self, element, keystr):
        if element == False:  #该页未查找到输入用户名的地方
            return False
        if element.get_attribute('name') != '':
            print "Input name: " + element.get_attribute('name')
        if element.get_attribute('id') != '':
            print "Input id: " + element.get_attribute('id')
        print "填写测试字符串:" + keystr
        element.send_keys(Keys.CONTROL + "a")
        element.send_keys(keystr)
        element.send_keys(Keys.TAB)
        time.sleep(2)  #等待请求结束,页面改变
        # print("请求地址"+self.proxy.har['log']['entries'])
        return self.proxy.har['log']['entries']

    #查找数组中包含关键字的数组项
    def find_har_by_string(self, arr, keystr):
        if type(arr) != list:
            print("feilist")
            return False
            #倒序遍历数组,查找关键字符串
        for i in range(0, arr.__len__())[::-1]:
            # print arr[i]
            if json.dumps(arr[i]).find(keystr) != -1:
                return arr[i]
        return False

    #获取随机值
    def id_generator(self,
                     size=6,
                     chars=string.ascii_lowercase + string.digits):
        return ''.join(random.choice(chars) for _ in range(size))

    #触发点击事件
    def get_click(self, element):
        if element == False:
            return False

        element.click()
        print("触发点击事件")
        time.sleep(2)  #等待请求结束,页面改变
        # print("请求地址"+self.proxy.har['log']['entries'])
        return self.proxy.har['log']['entries']
Пример #50
0
def getCookies():
    # chrome_options = Options()
    # chrome_options.add_argument("--headless")

    print(sys.path[0] + '\browsermobproxy.bat')
    server = Server(sys.path[0] + '\\browsermob-proxy')
    server.start()
    proxy = server.create_proxy()

    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))

    driver = webdriver.Chrome(chrome_options=chrome_options)
    #executable_path=r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver",

    proxy.new_har("taobao")

    time.sleep(3)
    driver.get('https://login.taobao.com/member/login.jhtml')
    proxy.har

    time.sleep(3)
    #driver.find_element_by_class_name('login-switch').click()
    # driver.find_element_by_xpath(
    #    '//*[@id="J_QRCodeLogin"]/div[5]/a[1]').click()
    #time.sleep(3)

    #输入账号密码
    #driver.find_element_by_xpath(
    #    '//*[@id="TPL_username_1"]').send_keys('*****@*****.**')
    #driver.find_element_by_xpath(
    #    '//*[@id="TPL_password_1"]').send_keys('Zzz8801668')

    #滑块操作
    #move_button = driver.find_element_by_xpath('//*[@id="nc_1_n1t"]')
    #
    #try:
    #    driver.find_element_by_xpath('//*[@id="nocaptcha"]').get_attribute('style')
    #except:
    #    print('不需要滑动模块')
    #    time.sleep(5)
    #else:
    #    print('需要滑动模块')
    #    # 初始化AtionChains()
    #    action = ActionChains(driver)
    #    # 鼠标移动到元素上,点击并hold
    #    action.move_to_element(move_button).click_and_hold().perform()
    #    # 移动鼠标(260,0)
    #    for i in range(1,300):
    #        action.move_by_offset(1, 0).perform()
    #        time.sleep(round(((round(i/300,4)**3)+round(i/150)),4))
    #    #action.move_by_offset(300, 0).perform()
    #
    #    time.sleep(1)
    #    # 释放鼠标
    #    action.release().perform()
    #    time.sleep(20)
    #finally:
    #    driver.find_element_by_xpath('//*[@id="J_SubmitStatic"]').click()
    #    time.sleep(2)
    # action.click_and_hold(move_button).perform()
    # action.drag_and_drop_by_offset(move_button,260,0).perform()
    # 鼠标移动操作在测试环境中比较常用到的场景是需要获取某元素的 flyover/tips,
    # 实际应用中很多 flyover 只有当鼠标移动到这个元素之后才出现,
    # 所以这个时候通过执行 moveToElement(toElement) 操作,
    # 就能达到预期的效果。但是根据我个人的经验,这个方法对于某些特定产品的图标,图像之类的 flyover/tips 也不起作用,
    # 虽然在手动操作的时候移动鼠标到这些图标上面可以出现 flyover, 但是当使用 WebDriver 来模拟这一移动操作时,虽然方法成功执行了,
    # 但是 flyover 却出不来。所以在实际应用中,还需要对具体的产品页面做相应的处理。

    time.sleep(20)

    cookies = {}
    #driver.get("https://i.taobao.com/my_taobao.htm")
    for elem in driver.get_cookies():
        cookies[elem['name']] = elem['value']
    if len(cookies) > 0:
        print("get Cookies Successful!!!")
    else:
        print("登陆失败")
        sys.exit()
    driver.close()
    driver.quit()
    return cookies
Пример #51
0
class ApiCrawler(object):
    def __init__(self, target, supported_methods=('GET', 'POST')):

        self.target = list(target)
        self.supported_methods = supported_methods
        self.browser_mob = 'C:/browsermob-proxy-2.1.4/bin/browsermob-proxy'  #Path to browsermob
        self.server = None
        self.current_har = None

    def __start_server(self):
        self.server = Server(self.browser_mob)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def __start_driver(self):
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--proxy-server={}".format(
            self.proxy.proxy))
        self.driver = webdriver.Chrome(chrome_options=chrome_options)

    def __start_all(self):
        self.__start_server()
        self.__start_driver()

    def __create_har_no_interaction(self, title, url):
        self.proxy.new_har(title)
        self.driver.get(url)
        self.current_har = self.proxy.har
        return self.proxy.har

    def __parse_har(self):
        response = []
        temp = self.current_har['log']['entries']
        for i in temp:
            if i['request']['method'] in self.supported_methods:
                if any(target in i['request']['url']
                       for target in self.target):
                    url = i['request']['url']
                    method = i['request']['method']
                    params = parse_results(url)
                    status = i['response']['status']
                    redirect_url = i['response']['redirectURL']
                    if params:
                        result_row = [url, method, status, redirect_url
                                      ] + params
                        response.append(result_row)
        return response

    def __write_to_csv(self, url, results):

        parsed = urlparse(url)

        with open('{}-{}.csv'.format(parsed.netloc, date.today()),
                  'a') as file:
            writer = csv.writer(file, dialect='excel')

            for item in results:
                item = [parsed.netloc] + item
                writer.writerow(item)

    def __stop_all(self):
        self.server.stop()
        self.driver.quit()

    def single_page(self, url):
        self.__start_all()
        self.__create_har_no_interaction('N/A', url)
        results = self.__parse_har()
        self.__write_to_csv(url, results)
        self.__stop_all()

    def list_of_pages(self, url_list):
        self.__start_all()
        for url in url_list:
            try:
                self.__create_har_no_interaction('N/A', url)
                results = self.__parse_har()
                self.__write_to_csv(url, results)
            except:
                continue
        self.__stop_all()
Пример #52
0
class ProxyManager():
    __BMP = "/Users/hari.ramachandran/PycharmProjects/raw/utils/bin/browsermob-proxy.bat"
    STOP = False

    def __init__(self):
        self.__server = Server(ProxyManager.__BMP)
        self.__client = None
        self._thread = None
        self.stop = False
        self.har_file_name = None

    def start_server(self):
        self.__server.start()
        return self.__server

    def start_client(self):
        self.__client = self.__server.create_proxy(
            params={"trustAllServers": "true"})
        return self.__client

    @property
    def client(self):
        return self.__client

    @property
    def server(self):
        return self.__client

    def sniff_api(self, host=None, port=9440):
        server = self.start_server()
        client = self.start_client()
        client.new_har("https://{}:{}".format(host, port),
                       options={
                           "captureHeaders": "true",
                           "captureContent": "true"
                       })
        options = webdriver.ChromeOptions()
        options.add_argument("--proxy-server={}".format(client.proxy))
        driver = webdriver.Chrome(chrome_options=options)
        driver.get("https://{}:{}".format(host, port))
        while self.stop == False:
            time.sleep(1)
        self.har_file_name = "raw_har_{}".format(int(time.time()))
        har_data = json.dumps(client.har, indent=4)
        har_file = open("{}.har".format(self.har_file_name), 'w')
        # print type(client.har)
        # print client.har
        har_file.write(har_data)
        har_file.close()
        server.stop()
        driver.quit()
        # valid_apis = self.get_valid_api()

    def stop_sniffing(self):
        self.stop = True

    def get_valid_api(self):
        filterer = ApiFilter(har_file="{}.har".format(self.har_file_name),
                             mouse_listener=None)
        return filterer.api_filter()

    def segregate_api(self, valid_apis):
        pass
Пример #53
0
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException, ElementNotVisibleException
from browsermobproxy import Server
import urllib.parse

server = Server(
    r"C:\Users\Administrator\Desktop\browsermob-proxy-2.1.0-beta-6\bin\browsermob-proxy.bat"
)
server.start()
proxy = server.create_proxy()
proxy.new_har()

chrome_options = webdriver.ChromeOptions()
url = urllib.parse.urlparse(proxy.proxy).path
chrome_options.add_argument('--proxy-server=%s' % url)
driver = webdriver.Chrome(executable_path=r"c:\chromedriver.exe",
                          chrome_options=chrome_options)
driver.get("http://v.pptv.com/show/61uPDHTaSojradE.html")

print(proxy.har)

driver.quit()
server.stop()
Пример #54
0
class Fetcher:
    def __init__(self):
        self.server = None
        self.proxy = None
        self.browser = None
        self.driver = None

    def set_remote_server(self, host, port):
        """Defines an already running proxy server for gathering
        includes and content
        """
        self.server = RemoteServer(host, port)
        self.proxy = self.server.create_proxy()

    def start_local_server(self, binpath=None):
        """Starts a local instance of BrowserMob.
        
        Keyword Arguments:
        binpath -- The full path, including the binary name to the 
        browsermob-proxy binary.
        """
        if binpath is None:
            binpath = "{0}/browsermob-proxy-2.1.0-beta-4/bin/browsermob-proxy".format(
                getcwd())

        self.server = Server(binpath)
        self.server.start()
        self.proxy = self.server.create_proxy()

    def set_firefox(self):
        """Sets the Webdriver for Firefox"""
        self.profile = webdriver.FirefoxProfile()
        self.profile.set_proxy(self.proxy.selenium_proxy())
        self.driver = webdriver.Firefox(firefox_profile=self.profile)

    def run(self, site, name='fetch'):
        """Runs an instance of the Fetcher. Requires that either
        set_remote_server() or start_local_server() has been previously  
        called.

        Keyword Arguments:
        site -- The URL of the site to load.
        name -- Name of the resulting HAR.
        """
        try:
            self.proxy.headers(
                {'Via': None}
            )  # TODO: Need to override BrowserMob to remove the Via Header - https://github.com/lightbody/browsermob-proxy/issues/213
            self.proxy.new_har(name,
                               options={
                                   'captureHeaders': True,
                                   'captureContent': True,
                                   'captureBinaryContent': True
                               })
            self.driver.get(site)

            har = self.proxy.har
            har['dom'] = self.driver.page_source
            return har
        except AttributeError:
            print "[!] FAILED: Ensure you have set a Webdriver"

    def close(self):
        try:
            self.proxy.stop(
            )  # The proxy won't need to be stopped if using remote_server()
        except AttributeError:
            pass

        try:
            self.driver.close()
        except AttributeError:
            print '[!] Driver not found'
Пример #55
0
class HarProfiler:

    def __init__(self, config, url, login_first=False):
        self.url = url
        self.login_first = login_first

        self.login_user = config.get('login_user')
        self.login_password = config.get('login_password')

        self.browsermob_dir = config['browsermob_dir']
        self.har_dir = config['har_dir']
        self.label_prefix = config['label_prefix'] or ''
        self.run_cached = config['run_cached']
        self.virtual_display = config['virtual_display']
        self.virtual_display_size_x = config['virtual_display_size_x']
        self.virtual_display_size_y = config['virtual_display_size_y']

        self.label = '{}{}'.format(self.label_prefix, self.slugify(url))
        self.cached_label = '{}-cached'.format(self.label)

        epoch = time.time()
        self.har_name = '{}-{}.har'.format(self.label, epoch)
        self.cached_har_name = '{}-{}.har'.format(self.cached_label, epoch)

    def __enter__(self):
        if self.virtual_display:
            log.info('starting virtual display')
            self.display = Display(visible=0, size=(
                self.virtual_display_size_x,
                self.virtual_display_size_y
            ))
            self.display.start()

        log.info('starting browsermob proxy')
        self.server = Server('{}/bin/browsermob-proxy'.format(
            self.browsermob_dir)
        )
        self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        log.info('stopping browsermob proxy')
        self.server.stop()
        if self.virtual_display:
            log.info('stopping virtual display')
            self.display.stop()

    def _make_proxied_webdriver(self):
        proxy = self.server.create_proxy()
        profile = webdriver.FirefoxProfile()
        profile.set_proxy(proxy.selenium_proxy())
        driver = webdriver.Firefox(firefox_profile=profile)
        return (driver, proxy)

    def _save_har(self, har, cached=False):
        if not os.path.isdir(self.har_dir):
            os.makedirs(self.har_dir)
        if not cached:
            har_name = self.har_name
        elif cached:
            har_name = self.cached_har_name

        log.info('saving HAR file: {}'.format(har_name))
        with open(os.path.join(self.har_dir, har_name), 'w') as f:
            json.dump(har, f, indent=2, ensure_ascii=False)

    def _login(self, driver):
        log.info('logging in...')

        error_msg = 'must specify login credentials in yaml config file'
        if self.login_user is None:
            raise RuntimeError(error_msg)
        if self.login_password is None:
            raise RuntimeError(error_msg)

        driver.get('https://courses.edx.org/login')

        # handle both old and new style logins
        try:
            email_field = driver.find_element_by_id('email')
            password_field = driver.find_element_by_id('password')
        except NoSuchElementException:
            email_field = driver.find_element_by_id('login-email')
            password_field = driver.find_element_by_id('login-password')
        email_field.send_keys(self.login_user)
        password_field.send_keys(self.login_password)
        password_field.submit()

    def _add_page_event_timings(self, driver, har):
        jscript = textwrap.dedent("""
            var performance = window.performance || {};
            var timings = performance.timing || {};
            return timings;
            """)
        timings = driver.execute_script(jscript)
        har['log']['pages'][0]['pageTimings']['onContentLoad'] = (
            timings['domContentLoadedEventEnd'] - timings['navigationStart']
        )
        har['log']['pages'][0]['pageTimings']['onLoad'] = (
            timings['loadEventEnd'] - timings['navigationStart']
        )
        return har

    def load_page(self):
        try:
            driver, proxy = self._make_proxied_webdriver()

            if self.login_first:
                self._login(driver)

            proxy.new_har(self.label)
            log.info('loading page: {}'.format(self.url))
            driver.get(self.url)
            har = self._add_page_event_timings(driver, proxy.har)
            self._save_har(har)

            if self.run_cached:
                proxy.new_har(self.cached_label)
                log.info('loading cached page: {}'.format(self.url))
                driver.get(self.url)
                har = self._add_page_event_timings(driver, proxy.har)
                self._save_har(har, cached=True)
        except Exception:
            raise
        finally:
            driver.quit()

    def slugify(self, text):
        pattern = re.compile(r'[^a-z0-9]+')
        slug = '-'.join(word for word in pattern.split(text.lower()) if word)
        return slug
Пример #56
0
########################################################################################################################

app = Flask(__name__)

# Set up BrowserMob proxy
for proc in psutil.process_iter():
    # Kill BrowserMob if it happens to already be running
    if proc.name() == 'browsermob-proxy':
        proc.kill()
browsermob_options = {'port': 8090}
browsermob_server = Server(path=BROWSERMOB_PROXY_PATH,
                           options=browsermob_options)
browsermob_server.start()
time.sleep(1)
proxy = browsermob_server.create_proxy()
time.sleep(1)

# Set up the Selenium driver for headless Chrome
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('headless')
chrome_options.add_argument('proxy-server={0}'.format(proxy.proxy))
# Start: "Pen testing" options
chrome_options.add_argument('disable-web-security')
chrome_options.add_argument('allow-running-insecure-content')
chrome_options.add_argument('disable-client-side-phishing-detection')
chrome_options.add_argument('disable-extensions')
chrome_options.add_argument('disable-offer-store-unmasked-wallet-cards')
chrome_options.add_argument('disable-offer-upload-credit-cards')
chrome_options.add_argument('disable-popup-blocking')
chrome_options.add_argument('disable-signin-promo')
Пример #57
0
    def run_webdriver(self, start_url, port, config, download_dir):
        """
        Run Selenium WebDriver
        """
        useragent = None
        referer = None
        webdriver = None
        urllib3_logger = logging.getLogger('urllib3')
        urllib3_logger.setLevel(logging.DEBUG)

        self.logger.info("Starting WebRunner")
        firefox_profile = None
        server = None
        proxy = None
        har = None

        if config.referer:
            referer = config.referer
        else:
            referer = 'http://www.google.com/search?q={}+&oq={}&oe=utf-8&rls=org.mozilla:en-US:official&client=firefox-a&channel=fflb&gws_rd=cr'.format(
                config.url, config.url)

        if config.useragent:
            useragent = config.useragent
        else:
            useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:24.0) Gecko/20100101 Firefox/24.0'

        self.logger.debug("Running with UserAgent: {}".format(useragent))
        self.logger.debug("Running with Referer: {}".format(referer))
        self.logger.debug("Checking URL: {}".format(config.url))

        server = Server("lib/browsermob/bin/browsermob-proxy", {'port': port})
        server.start()
        proxy = server.create_proxy()
        proxy.headers({'User-Agent': useragent, 'Accept-Encoding': "", 'Connection': 'Close'})
        request_js = (
            'var referer = request.getProxyRequest().getField("Referer");'
            'addReferer(request);'
            'function addReferer(r){'
            'if (! referer ) {'
            'r.addRequestHeader("Referer","' + referer + '");'
            '}'
            'return;'
            '}')
        proxy.request_interceptor(request_js)
        from types import NoneType
        if config.firefoxprofile is not None and os.path.isdir(config.firefoxprofile):
            self.logger.debug("Using existing firefox profile")
            firefox_profile = FirefoxProfile(profile_directory=config.firefoxprofile)
        else:
            firefox_profile = FirefoxProfile()

        self.logger.debug("Using profile {}".format(firefox_profile.path))

        firefox_profile.set_preference("security.OCSP.enabled", 0)
        firefox_profile.set_preference("browser.download.folderList", 2)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("browser.download.dir", download_dir)
        firefox_profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                                       "application/x-xpinstall;application/x-zip;application/x-zip-compressed;application/octet-stream;application/zip;application/pdf;application/msword;text/plain;application/octet")
        firefox_profile.set_preference("browser.helperApps.alwaysAsk.force", False)
        firefox_profile.set_preference("browser.download.manager.showWhenStarting", False)
        firefox_profile.set_preference("security.mixed_content.block_active_content", False)
        firefox_profile.set_preference("security.mixed_content.block_display_content", False)
        firefox_profile.set_preference("extensions.blocklist.enabled", False)
        firefox_profile.set_preference("network.proxy.type", 1)
        firefox_profile.set_proxy(proxy.selenium_proxy())
        firefox_profile.set_preference("webdriver.log.file", "/tmp/ff.log")
        firefox_profile.set_preference("webdriver.log.driver", "DEBUG")
        firefox_profile.set_preference("browser.newtabpage.enhanced", False)
        firefox_profile.set_preference("browser.newtabpage.enabled", False)
        firefox_profile.set_preference("browser.newtabpage.directory.ping", "")
        firefox_profile.set_preference("browser.newtabpage.directory.source", "")
        firefox_profile.set_preference("browser.search.geoip.url", "")

        try:
            self.xvfb.start()
            capabilities = DesiredCapabilities.FIREFOX
            capabilities['loggingPrefs'] = {'browser':'ALL'}
            if os.path.exists("{}/firefox".format(firefox_profile.path)):
                binary = FirefoxBinary("{}/firefox".format(firefox_profile.path))
            else:
                binary = FirefoxBinary("/usr/bin/firefox")
            webdriver = WebDriver(capabilities=capabilities, firefox_profile=firefox_profile, firefox_binary=binary)
            proxy.new_har(start_url.hostname,
                          options={"captureHeaders": "true", "captureContent": "true", "captureBinaryContent": "true"})
            self.analyse_page(webdriver, start_url)
            for entry in webdriver.get_log('browser'):
                self.logger.info("Firefox: {}".format(entry))
            har = proxy.har
            self.logger.info("Stopping WebRunner")
            proxy.close()
            server.stop()
            webdriver.quit()
            har = Har(har)
        except Exception, e:
            self.logger.error(e)
            proxy.close()
            if webdriver:
                webdriver.quit()
            self.xvfb.stop()
            server.stop()
Пример #58
0
class ProxyManager:
    """
    Detect http request via proxy.

    Returns:
        [type]: [description]
    """
    __PARENT_DIR = dirname(dirname(abspath(__file__)))
    __OS_NAME = platform.system()
    __DRIVER_PATH = ''

    GOOGLE_CHROME = "google-chrome"
    CHROME = "chrome"
    CHROMIUM = "chromium"
    CHROMIUM_BROWSER = "chromium-browser"
    MOZILLA = "mozilla"
    FIREFOX = "firefox"

    detectManager = DetectManager()

    def __init__(self):
        self.detectManager.createDriver()
        if ProxyManager.__OS_NAME == 'Linux' or ProxyManager.__OS_NAME == 'Darwin':
            self.__BMP = self.__PARENT_DIR + "/browsermob-proxy-2.1.4/bin/browsermob-proxy"
            self.__DRIVER_PATH = self.__PARENT_DIR + "/drivers/" + self.__getDriverName(
                '')
        elif ProxyManager.__OS_NAME == 'Windows':
            self.__BMP = self.__PARENT_DIR + "\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat"

        self.__server = Server(self.__BMP, {'port': 9090})
        self.__client = None

    def start_server(self):
        self.__server.start()
        return self.__server

    def start_client(self):
        self.__client = self.__server.create_proxy(
            params={"trustAllServer": "true"})
        return self.__client

    @property
    def client(self):
        return self.__client

    @property
    def server(self):
        return self.__server

    @property
    def driverPath(self):
        return self.__DRIVER_PATH

    def selectBrowser(self, browserName):
        self.__DRIVER_PATH = self.__PARENT_DIR + "/drivers/" + self.__getDriverName(
            browserName)

    def __getDriverName(self, browserName):
        """
        If selected value is None or is empty then get driver  of the first installed browser.
        Else get driver of the selected browser.  
        """
        if browserName is None or browserName == '':
            return next(iter(self.detectManager.installedBrowser.values()))
        else:
            return self.detectManager.installedBrowser[browserName]