Пример #1
0
def initializeChrome():
    global driver
    global server
    global proxy
    dict = {'port': 8090}
    try:
        server = Server(path="/usr/local/bin/skillshare-dl/browsermob-proxy",
                        options=dict)
    except:
        path_to_bmp = Path(
            "./binaries/browsermob-proxy-2.1.4/bin/browsermob-proxy").absolute(
            )
        path_on_windows = str(PureWindowsPath(path_to_bmp))
        server = Server(path=path_on_windows, options=dict)
        #server = Server(path="binaries/browsermob-proxy-2.1.4/bin/browsermob-proxy", options=dict)
    server.start()
    proxy = server.create_proxy()
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))

    path_to_chromedriver = str(Path("./binaries/chromedriver.exe").absolute())
    try:
        driver = webdriver.Chrome(options=chrome_options)
    except:
        driver = webdriver.Chrome(path_to_chromedriver, options=chrome_options)
    print('initialized Chrome window!')
Пример #2
0
def create_server(browser, websites):
    print("=====> Creating New Server - Please Wait... <=====")
    server = Server(browsermob_proxy_path)
    server.start()
    proxy = server.create_proxy()
    configure_server(proxy, browser, websites)
    close_server(proxy, server)
def CaptureNetworkTraffic(url, server_ip, headers, file_path):
    ''' 
	This function can be used to capture network traffic from the browser. Using this function we can capture header/cookies/http calls made from the browser
	url - Page url
	server_ip - remap host to for specific URL
	headers - this is a dictionary of the headers to be set
	file_path - File in which HAR gets stored
	'''
    port = {'port': 9090}
    server = Server("G:\\browsermob\\bin\\browsermob-proxy",
                    port)  #Path to the BrowserMobProxy
    server.start()
    proxy = server.create_proxy()
    proxy.remap_hosts("www.example.com", server_ip)
    proxy.remap_hosts("www.example1.com", server_ip)
    proxy.remap_hosts("www.example2.com", server_ip)
    proxy.headers(headers)
    profile = webdriver.FirefoxProfile()
    profile.set_proxy(proxy.selenium_proxy())
    driver = webdriver.Firefox(firefox_profile=profile)
    new = {'captureHeaders': 'True', 'captureContent': 'True'}
    proxy.new_har("google", new)
    driver.get(url)
    proxy.har  # returns a HAR JSON blob
    server.stop()
    driver.quit()
    file1 = open(file_path, 'w')
    json.dump(proxy.har, file1)
    file1.close()
Пример #4
0
def initializeChrome():
    global driver
    global server
    global proxy
    dict = {'port': 8090}
    path_to_bmp = Path(
        "./binaries/browsermob-proxy-2.1.4/bin/browsermob-proxy").absolute()
    path_on_windows = str(PureWindowsPath(path_to_bmp))
    server = Server(path=path_on_windows, options=dict)
    #server = Server(path="binaries/browsermob-proxy-2.1.4/bin/browsermob-proxy", options=dict)
    server.start()
    proxy = server.create_proxy()
    user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'
    username_windows = input(
        "Please enter your username the way it's written in C:/Users/ - it's needed to locate your Chrome user data."
    )
    user_data_path = Path('C:/Users/' + str(username_windows) +
                          '/AppData/Local/Google/Chrome/User Data')
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--disable-extensions")
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    chrome_options.add_experimental_option("excludeSwitches",
                                           ['enable-automation'])
    chrome_options.add_argument(f'--user-data-dir={user_data_path}')
    path_to_chromedriver = str(Path("./binaries/chromedriver.exe").absolute())
    driver = webdriver.Chrome(path_to_chromedriver, options=chrome_options)
    print('initialized Chrome window!')
Пример #5
0
def test2():
    browsermob_path = '/usr/local/browsermob-proxy-2.1.4/bin/browsermob-proxy'
    server = Server(browsermob_path, {'port':9999})

    time.sleep(1)
    proxy = server.create_proxy()
    time.sleep(1)
Пример #6
0
    def __init__(self, root="C:/xampp/htdocs/webscrape/", folder=""):

        self.url = ""
        self.root = root + folder + "/"

        # Start browsermob proxy
        self.server = Server(
            r"C:\webdrivers\browsermob-proxy\bin\browsermob-proxy")
        self.server.start()
        self.proxy = self.server.create_proxy()

        # Setup Chrome webdriver - note: does not seem to work with headless On
        options = webdriver.ChromeOptions()
        options.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
        options.add_argument(
            '--proxy-server=%s' % self.proxy.proxy
        )  # Setup proxy to point to our browsermob so that it can track requests

        self.w = webdriver.Chrome(r'C:/webdrivers/chromedriver.exe',
                                  chrome_options=options)
        self.proxy.new_har("Listener", options={'captureHeaders':
                                                True})  # Request listener
        #self.proxy.new_har("Listener" )  # Request listener

        print("Browser and Server initialized...")
Пример #7
0
def setupdevices():    
    """
    Description:
        Sets u browser proxy, Selenium driver, and har object

    Usage:
        [driver,proxy]=setupdevices()
        
    Inputs:
        NA
    
    Output:
        Selenium driver
        Browsermob proxy
        Browsermob server        
    """    
    #set up proxy
    server = Server("############/browsermob-proxy-2.0-beta-9/bin/browsermob-proxy")
    server.start()
    proxy = server.create_proxy()
    profile  = webdriver.FirefoxProfile()
    profile.set_proxy(proxy.selenium_proxy())
    proxy.new_har("________")
    
    #set up driver
    driver = webdriver.Firefox(firefox_profile=profile)
    
    return (driver,proxy,server)
Пример #8
0
def get_driver(request, cmdopt_browser, cmdopt_window):
    """Fixture to create, return and close driver"""
    server = Server(
        "C:\\Program Files (x86)\\browsermob-proxy\\bin\\browsermob-proxy",
        {"port": 9090})
    server.start()
    proxy = server.create_proxy()
    url = urllib.parse.urlparse(proxy.proxy).path
    driver = None
    if cmdopt_browser == "ie":
        driver = webdriver.Ie()
    elif cmdopt_browser == "firefox":
        if cmdopt_window == "headless":
            options = Firefox_options()
            options.add_argument("--headless")
            options.add_argument('--proxy-server={}'.format(url))
            driver = webdriver.Firefox(firefox_options=options)
        else:
            options = Firefox_options()
            options.add_argument('--proxy-server={}'.format(url))
            driver = webdriver.Firefox()
        proxy.new_har()
        request.addfinalizer(driver.quit)
    elif cmdopt_browser == "chrome":
        if cmdopt_window == "headless":
            options = Chrome_options()
            options.headless = True
            options.add_argument('--proxy-server={}'.format(url))
            driver = webdriver.Chrome(chrome_options=options)
            proxy.new_har()
            request.addfinalizer(driver.quit)
        else:
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--proxy-server={}'.format(url))
            d = DesiredCapabilities.CHROME
            d['loggingPrefs'] = {'browser': 'ALL'}
            driver = webdriver.Chrome(desired_capabilities=d,
                                      chrome_options=chrome_options)
            proxy.new_har()
            ef_driver = EventFiringWebDriver(driver, MyListener())

            def fin():
                log_timestamp = str(datetime.datetime.now())[0:-4].replace(
                    '-', '.').replace(' ', '_').replace(':', '.')
                browserlog_filename = log_timestamp + '_browser_log_file.log'
                browserlogfile = open(browserlog_filename, 'w')
                print('-------------------------')
                for i in ef_driver.get_log('browser'):
                    print(i)
                    browserlogfile.write(str(i) + '\n')
                print(proxy.har)
                server.stop()
                ef_driver.quit

            request.addfinalizer(fin)
            return ef_driver
    else:
        return "unsupported browser"

    return driver
Пример #9
0
def proxy():
    server = Server("/home/sergey/repositories/browsermob-proxy-2.1.4-bin/browsermob-proxy-2.1.4/bin/browsermob-proxy")
    server.start()
    proxy = server.create_proxy()
    proxy.new_har(title='project_har')
    yield proxy
    server.stop()
Пример #10
0
def main():
	s = Server('/home/creature/browsermob/bin/browsermob-proxy', { 'port' : 1337})
	s.start()
	proxy = s.create_proxy({'port': 1338})
	failcount = 0
	d = tu.newProxytest(proxy)
	proxy.new_har(options={'captureHeaders':False, 'captureContent': True})
	if not deployOneTest(d):
		failcount += 1
	if not deleteTest(d):
		failcount += 1
	if not projectDeployTest(d):
		failcount += 1
		out = open('deploy.har', 'w')
		out.write(str(proxy.har))
		out.close()
	# test all services from multiple projects showing up in services
	if not multiDeployTest(d):
		failcount += 1
	# test that stopping services from services page removes them from project deployments
	if not cleanupTest(d):
		failcount += 1

	tu.endtest(d)
	s.stop()
	sys.exit(failcount)
Пример #11
0
def login(weburl, user, passwd, proxy_location, webdriver_location):
    server = Server(proxy_location + r'\browsermob-proxy.bat')
    server.start()
    proxy = server.create_proxy()
    proxy.new_har(options={'captureContent': True, 'captureHeaders': True})
    chrome_options = Options()
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument('--proxy-server={}'.format(proxy.proxy))
    browser = webdriver.Chrome(webdriver_location + r'\chromedriver',
                               options=chrome_options)
    browser.set_page_load_timeout(30)
    wait = WebDriverWait(browser, 10)
    #登入
    browser.get(login_url)
    id0 = pq(browser.page_source)('div.z-page').attr('id').replace(
        '_', '')  #抓第一行 每個id,都會隨機亂產生
    browser.find_element_by_xpath('//*[@id="{}b"]'.format(id0)).send_keys(
        username)  #亂數+b
    browser.find_element_by_xpath('//*[@id="{}c"]'.format(id0)).send_keys(
        password)  #亂數+c
    browser.find_element_by_xpath('//*[@id="{}g"]'.format(id0)).click()  #亂數+g
    time.sleep(4)
    content_id = pq(browser.page_source)('div.z-page').attr('id').replace(
        '_', '')  #主頁面的亂數
    print(id0)
    print(content_id)
    #回傳 browser, wait ,webid, proxy
    return browser, wait, proxy, content_id
Пример #12
0
def reloadHeaderAndCookie():
    global httpArchive
    browsermob_path = '/usr/local/browsermob-proxy-2.1.4/bin/browsermob-proxy'
    server = Server(browsermob_path)

    proxy = server.create_proxy()

    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    browser = webdriver.Chrome(options=chrome_options)
    options = {'captureHeaders': True, 'captureCookies': True}

    signIn(browser)
    proxy.new_har("lifemiles", options=options)
    url = "https://www.lifemiles.com/fly/find"

    time.sleep(2)
    browser.get(url)
    time.sleep(5)

    originID = "ar_bkr_fromairport"
    browser.find_element_by_id(originID).send_keys("SFO")
    time.sleep(2)
    destinationID = "ar_bkr_toairport"
    browser.find_element_by_id(destinationID).send_keys("SFO")
    time.sleep(2)

    server.start()
    searchClass = "Booker_bookerActionButtonSmall__3Fh2d"
    try:
        browser.find_element_by_class_name(searchClass).click()
    except Exception, e:
        print("was not able to click search button, try javascript")
        browser.execute_script(
            "document.querySelector('.{0}').click()".format(searchClass))
Пример #13
0
def download_file(url, file_name):
    server = Server('./browsermob-proxy')  #Local path to BMP
    server.start()
    proxy = server.create_proxy(
    )  #Proxy is used to generate a HAR file containing the connection URLS that the MP3s are loaded from.
    chrome_options = Options()
    chrome_options.add_argument("--proxy-server={0}".format(
        proxy.proxy))  #Configure chrome options
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument('--headless')
    driver = webdriver.Chrome(executable_path='./chromedriver',
                              options=chrome_options)
    proxy.new_har('filename')
    driver.get(url)
    save = proxy.har
    server.stop()
    driver.quit()
    results = [entry['request']['url'] for entry in save['log']['entries']]
    embedded_link = [
        res for res in results if "https://embed.vhx.tv/videos" in res
    ][0]
    subprocess.call([
        "./youtube-dl", "-f"
        "best[height=540]", "-o"
        "{}.mp4".format(file_name), "--ignore-errors", embedded_link
    ])
Пример #14
0
def browser_and_proxy():
    server = Server(config.BROWSERMOB_PATH)
    server.start()
    proxy = server.create_proxy()
    proxy.new_har(options={'captureContent': True})

    # Set up Chrome
    option = webdriver.ChromeOptions()
    option.add_argument('--proxy-server=%s' % proxy.proxy)

    prefs = {"profile.managed_default_content_settings.images": 2}
    option.add_experimental_option("prefs", prefs)
    option.add_argument('--headless')
    option.add_argument('--no-sandbox')
    option.add_argument('--disable-gpu')

    capabilities = DesiredCapabilities.CHROME.copy()
    capabilities['acceptSslCerts'] = True
    capabilities['acceptInsecureCerts'] = True

    path = config.CHROME_PATH
    browser = webdriver.Chrome(options=option,
                               desired_capabilities=capabilities,
                               executable_path=path)

    try:
        yield browser, proxy
    finally:
        browser.quit()
        server.stop()
Пример #15
0
def getAuthKey():
    username = config['Users']['username']
    password = config['Users']['password']
    proxyServerAddress = config['config']['proxyserverpath']

    # Creating Proxy server
    server = Server(proxyServerAddress)
    server.start()
    proxy = server.create_proxy()
    proxy.whitelist(regexp='*emofid.com*', status_code=123)
    proxy.new_har(title="mofid",
                  options={
                      'captureContent': False,
                      'captureBinaryContent': False,
                      'captureHeaders': True
                  })

    # Creating browser
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    browser = webdriver.Chrome(chrome_options=chrome_options)

    url = "https://account.emofid.com/Login?returnUrl=%2Fconnect%2Fauthorize%2Fcallback%3Fclient_id%3Deasy2_client_pkce%26redirect_uri%3Dhttps%253A%252F%252Fd.easytrader.emofid.com%252Fauth-callback%26response_type%3Dcode%26scope%3Deasy2_api%2520openid%26state%3Df8ff796b1d994e0d8f6fa1f6e878f165%26code_challenge%3D7qf19ieakAg4BvrDkBTHbr5h7_A0BSvci7dtp-0ZUWY%26code_challenge_method%3DS256%26response_mode%3Dquery"
    browser.get(url)

    userFiled = browser.find_element_by_xpath('//*[@id="Username"]')
    userFiled.clear()
    userFiled.send_keys(username)

    passwordFiled = browser.find_element_by_xpath('//*[@id="Password"]')
    passwordFiled.clear()
    passwordFiled.send_keys(password, Keys.RETURN)

    element = WebDriverWait(browser, 10).until(
        EC.presence_of_element_located(
            (By.XPATH, "/html/body/app-root/d-release-notes/div/div/button")))
    element.click()

    try:
        browser.find_element_by_xpath(
            '//*[@id="root"]/main/div[2]/div[1]/ul[2]/li[1]/span/i').click()
    except:
        print('Error')

    with open('data.json', 'w') as outfile:
        json.dump(proxy.har, outfile)

    server.stop()

    tree = Tree(proxy.har)
    authKey = ''
    result = tree.execute(
        "$.log.entries.request[@.url is 'https://d11.emofid.com/easy/api/account/checkuser'].headers"
    )
    for entry in result:
        for e in entry:
            if e['name'] == 'Authorization':
                authKey = e["value"]
    return authKey
Пример #16
0
    def __start_bmproxy(self, config):
        # BrowserMob
        from browsermobproxy import Server
        from arjuna.tpi.constant import ArjunaOption
        capture_traffic = config.value(ArjunaOption.BROWSER_NETWORK_RECORDER_ENABLED)
        if capture_traffic:
            bmproxy_dir = config.value(ArjunaOption.TOOLS_BMPROXY_DIR)
            sub_dirs = os.listdir(bmproxy_dir)
            bmproxy_bin_dir = None
            if "bin" in sub_dirs:
                bmproxy_bin_dir = os.path.join(bmproxy_dir, "bin")
            else:
                sub_dirs.sort(reverse=True) # Last version will be picked.
                for d in sub_dir:
                    if d.startswith("browsermob"):
                        bmproxy_bin_dir = os.path.join(bmproxy_dir, d, "bin")
                        break

            if bmproxy_bin_dir is None:
                raise Exception("Network recording is enabled in configuration. There was an error in creating proxy server/server using BrowserMob Proxy. Could not find proxy package at {}".format(bmproxy_dir))
            
            if platform.system().lower() == "windows":
                exe = "browsermob-proxy.bat"
            else:
                exe = "browsermob-proxy"
            bmproxy_exe_path = os.path.join(bmproxy_bin_dir, exe)

            try:
                self.__bmproxy_server = Server(bmproxy_exe_path)
                self.__bmproxy_server.start()
            except ProxyServerError as e:
                raise Exception("Network recording is enabled in configuration. There was an error in creating proxy server/server using BrowserMob Proxy. Fix and retry. Error message: {}".format(str(e)))
Пример #17
0
 def __init__(self, results, reports, **kwargs):
     self.results = results
     self.reports = reports
     self.args = kwargs
     self.listen_port = 9760
     self.lock = threading.Lock()
     self.vulnerable = []
     self.server = None
     self.cookies = {}
     for entry in self.args['cookie'].split(';'):
         if entry.find('=') == -1:
             continue
         key, value = entry.strip().split('=', 1)
         self.cookies[key] = value
     # Create proxy server
     logging.info('Starting browsermobproxy server...')
     self.proxy_server = Server(self.args['browsermobproxy'])
     self.proxy_server.start()
     self.proxy = self.proxy_server.create_proxy()
     logging.info('Browsermobproxy server started')
     # Create Chrome engine
     logging.info('Creating Selenium Chrome webdriver...')
     self.chrome_options = webdriver.ChromeOptions()
     self.chrome_options.add_argument('--proxy-server={}'.format(
         self.proxy.proxy))
     if 'headless' in self.args:
         self.chrome_options.add_argument('--headless')
     self.chrome_options.add_argument('--disable-gpu')
     self.chrome_options.add_argument("--disable-extensions")
     self.driver = webdriver.Chrome(chrome_options=self.chrome_options)
     logging.info('Selenium Chrome webdriver created')
Пример #18
0
def reloadHeaderAndCookie():
    killPortProcess(8090)
    global browser
    browsermob_path = '/usr/local/browsermob-proxy-2.1.4/bin/browsermob-proxy'
    server = Server(browsermob_path, {'port': 8090})
    server.start()
    time.sleep(10)
    proxy = server.create_proxy()
    time.sleep(1)

    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--proxy-server={0}".format(proxy.proxy))
    browser = webdriver.Chrome(options=chrome_options)
    url1 = "https://www.united.com/en/us"
    url2 = "https://www.united.com/ual/en/US/flight-search/book-a-flight/results/awd?f=SFO&t=PVG&d=2021-11-07&tt=1&at=1&sc=7&px=1&taxng=1&newHP=True&idx=1"

    options = {'captureHeaders': True, 'captureCookies': True}
    proxy.new_har("united", options=options)
    browser.get(url1)
    time.sleep(5)

    browser.get(url2)

    print("click close button to get actual results")
    time.sleep(5)
    print("we now save all auths")
    newH = proxy.har  # returns a HAR JSON blob
    with open('latestUnitedAuth.json', 'w') as outfile:
        json.dump(newH, outfile)
    time.sleep(10)
    server.stop()
    browser.quit()
Пример #19
0
def get_dependencies(url):
    browsermobproxy_location = "browsermob-proxy/bin/browsermob-proxy"
    clean_opened_processes()
    server = Server(browsermobproxy_location)
    server.start()
    time.sleep(0.5)

    proxy = server.create_proxy()
    time.sleep(0.5)

    options = Options()
    options.headless = True

    profile = webdriver.FirefoxProfile()
    profile.set_proxy(proxy.selenium_proxy())

    driver = webdriver.Firefox(options=options, firefox_profile=profile)
    proxy.new_har("captured_elems")
    driver.get(url)
    time.sleep(3)

    resources = [
        elem["request"]["url"] for elem in proxy.har["log"]["entries"]
    ]

    server.stop()
    driver.quit()

    resources = list(set(resources))  # eliminamos duplicados
    return resources
Пример #20
0
    def __init__(self, url, username, password):
        self.bearer = None
        # enable browser logging
        d = DesiredCapabilities.CHROME
        d['loggingPrefs'] = {'browser': 'ALL'}

        self.server = Server("BrowserMob\\bin\\browsermob-proxy")
        cli.print_info("Starting proxy server...")
        self.server.start()
        self.proxy = self.server.create_proxy()
        cli.add_to_print("OK\n\n")
        cli.print_warning("NOTE: Connections will show as INSECURE!\n")

        options = webdriver.ChromeOptions()
        self.url = url
        options.add_argument("--proxy-server={0}".format(self.proxy.proxy))

        cli.print_info("Browser started\n")
        self.browser = Chrome(options=options,
                              executable_path="./chromedriver.exe",
                              desired_capabilities=d)
        self.username = username
        self.password = password
        del password
        del username
Пример #21
0
def reloadHeaderAndCookie():
    browsermob_path = '/usr/local/browsermob-proxy-2.1.4/bin/browsermob-proxy'
    server = Server(browsermob_path, {'port': 9999})

    time.sleep(1)
    proxy = server.create_proxy()
    time.sleep(1)
Пример #22
0
    def __init__(self,
                 url,
                 har_name,
                 browsermob_proxy_location,
                 selector_dictionary=None,
                 default_timeout=None,
                 firefox_binary=None,
                 highlight=False,
                 geckodriver="geckodriver"):

        self.selector_dictionary = selector_dictionary
        self.default_timeout = default_timeout if default_timeout is not None else 30
        self.highlight = highlight

        for proc in psutil.process_iter():
            # check whether the process name matches
            if proc.name() == "browsermob-proxy":
                proc.kill()
        options = {'port': 8090}
        # self.server = Server(path="../tools/browsermob-proxy-2.1.4/bin/browsermob-proxy", options=dict)
        self.server = Server(path=browsermob_proxy_location, options=options)

        self.server.start()
        time.sleep(1)
        self.proxy = self.server.create_proxy()
        time.sleep(1)

        profile = webdriver.FirefoxProfile()
        selenium_proxy = self.proxy.selenium_proxy()
        profile.set_proxy(selenium_proxy)
        self.driver = webdriver.Firefox(firefox_profile=profile,
                                        firefox_binary=firefox_binary,
                                        executable_path=geckodriver)
        self.proxy.new_har(har_name)
        self.driver.get(url)
Пример #23
0
def print_hi():
    server = Server(r'D:\exchange_data\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat')
    server.start()
    proxy = server.create_proxy()

    # 设置driver options
    chrome_options = Options()
    chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy))

    driver = webdriver.Chrome(chrome_options=chrome_options)
    #
    url = 'https://www.baidu.com/'
    proxy.new_har('fund', options={'captureHeaders': True, 'captureContent': True})
    driver.get(url)

    result = proxy.har
    print(result)

    for entry in result['log']['entries']:
        _url = entry['request']['url']
        # 根据URL找到数据接口
        # if "lsjz?callback=" in _url:
        _response = entry['response']
        _content = _response['content']['text']
            # 获取接口返回内容
        print(_content)
    server.stop()
Пример #24
0
 def init_browser(self):
     # response listen proxy
     server = Server(r'C:\python_job\khedu-test\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat')
     server.start()
     proxy = server.create_proxy()
     proxy.new_har(options={
         'captureContent': True,
         'captureHeaders': True
     })
     # -- chromedriver --
     chrome_options = Options()
     chrome_options.add_argument('--ignore-certificate-errors')
     chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy))
     chrome_options.add_argument('--headless')  # 無頭模式
     browser = webdriver.Chrome(r'C:\python_job\khedu-test\chromedriver.exe', options=chrome_options)
     browser.maximize_window()
     time.sleep(1)#變大視窗需要等待
     browser.set_page_load_timeout(30)
     wait = WebDriverWait(browser,30)
     browser.get(login_url)
     # -- login --
     id0 = pq(browser.page_source)('div.z-page').attr('id').replace('_', '') # 每次重load網站都有個隨機id
     browser.find_element_by_xpath('//*[@id="{}b"]'.format(id0)).send_keys(username)
     browser.find_element_by_xpath('//*[@id="{}c"]'.format(id0)).send_keys(password)
     browser.find_element_by_xpath('//*[@id="{}g"]'.format(id0)).click()
     time.sleep(2) # not so fast
     browser.refresh() #頁面會卡在轉圈圈 需要reload
     time.sleep(2) 
     return browser, proxy, wait
Пример #25
0
def run(dep, arr, arr_date, num, china):
    server = Server(path)  # 设置服务器脚本路径
    server.start()
    tasks = []
    semaphore = asyncio.Semaphore(num)  # 限制并发量
    i = 0
    for date in arr_date:
        for departureCity in dep:
            for arrivalCity in arr:
                if departureCity != arrivalCity:
                    if departureCity not in china or arrivalCity not in china:
                        url = search_url(departureCity, arrivalCity, date)
                        print(departureCity + "--" + arrivalCity +
                              "开始爬取数据..." + str(i))
                        i = i + 1
                        while True:
                            try:
                                c = get_request(url, server, semaphore,
                                                departureCity, arrivalCity,
                                                date)
                                task = asyncio.ensure_future(c)
                                task.add_done_callback(callback)
                                tasks.append(task)
                                break
                            except Exception as e:
                                print(e)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))
    loop.close()
    print("server closed")
    server.stop()
Пример #26
0
    def _start_ProxyHelper(self, options, proxy_port=None):
        """
        启动代理类,用于监听Http请求
        :param options: chrom浏览器启动选项
        :param proxy_port: 代理端口号
        """
        assert self._proxyHelper is None, "代理已开启"
        # 代理端口,若未指定,获取默认端口
        proxy_port = GF.proxy_port() if proxy_port is None else proxy_port

        # 启动代理浏览器
        server = Server(GF.proxy_path(), {'port': proxy_port})
        server.start({'log_path': GF.cache_path()})
        proxy = server.create_proxy()

        # 开启har监听
        proxy.new_har(options={'captureContent': True, 'captureHeaders': True})
        options.add_argument('--proxy-server={0}'.format(proxy.proxy))

        # 启动代理日志
        self.init_proxy_log()
        # 启动代理助手
        self._proxyHelper = ProxyHelper(self, proxy)

        return options
Пример #27
0
    def __init__(self, browsermobDirectory, headless=False):
        # Imports
        print(
            "New class reference, finding valid signature. This might take a minute."
        )
        from browsermobproxy import Server
        import psutil
        import json
        import time
        import json
        from selenium import webdriver
        from selenium.webdriver.firefox.options import Options

        # Kills any browsermob-proxy
        for proc in psutil.process_iter():
            # check whether the process name matches
            if proc.name() == "browsermob-proxy":
                proc.kill()

        dict = {'port': 8090}
        server = Server(path=browsermobDirectory, options=dict)
        # "browsermob-proxy/bin/browsermob-proxy"
        server.start()
        time.sleep(1)
        proxy = server.create_proxy()
        time.sleep(1)

        # Creates FF profile
        profile = webdriver.FirefoxProfile()
        selenium_proxy = proxy.selenium_proxy()
        profile.set_proxy(selenium_proxy)
        options = Options()
        if headless == True:
            options.headless = True
        driver = webdriver.Firefox(firefox_profile=profile, options=options)

        # Records FF Har
        proxy.new_har("list")
        driver.get("https://www.tiktok.com/en/trending")
        data = proxy.har
        for element in data['log']['entries']:
            if "https://m.tiktok.com/share/item/list?" in element['request'][
                    'url'] or "https://www.tiktok.com/share/item/list?" in element[
                        'request']['url']:
                print("Found signature, continuing.")
                self.signature = element['request']['queryString'][6]['value']

        # Get Trending hashtags
        hashtags = driver.find_elements_by_xpath(
            '//h3[@class="_list_item_title"]/a')
        hashtagArray = []
        for hashtag in hashtags:
            hashtagArray.append(hashtag.get_attribute('title'))

        self.hashtag = hashtagArray
        self.headless = headless
        self.browsermobDirectory = browsermobDirectory

        server.stop()
        driver.quit()
Пример #28
0
def locate(url, search_parameters=config.SEARCH_PARAMETERS):
    server = Server(config.BROWSERMOB_PROXY)
    server.start()
    proxy = server.create_proxy()
    options = Options()
    options.headless = config.HEADLESS
    profile = webdriver.FirefoxProfile(config.FIREFOX_PROFILE)
    selenium_proxy = proxy.selenium_proxy()
    profile.set_proxy(selenium_proxy)
    browser = webdriver.Firefox(firefox_profile=profile, options=options)
    proxy.new_har('source', options={'captureHeaders': True})
    browser.get(url)
    sleep(5)
    browser.close()
    server.stop()
    streams = []
    subtitles = []
    for entry in proxy.har['log']['entries']:
        for param in search_parameters:
            request = {'method': entry['request']['method'], 'url': entry['request']['url'], 'headers': {x['name']: x['value'] for x in entry['request']['headers']}}
            if param in entry['request']['url'].split('?')[0]:
                if request not in streams:
                    streams.append(request)
            elif '.vtt' in entry['request']['url'].split('?')[0] or '.srt' in entry['request']['url'].split('?')[0] or '.ass' in entry['request']['url'].split('?')[0]:
                if request not in subtitles:
                    subtitles.append(request)
    if os.path.exists(os.path.join(os.path.abspath(os.getcwd()), 'bmp.log')):
        os.remove(os.path.join(os.path.abspath(os.getcwd()), 'bmp.log'))
    if os.path.exists(os.path.join(os.path.abspath(os.getcwd()), 'geckodriver.log')):
        os.remove(os.path.join(os.path.abspath(os.getcwd()), 'geckodriver.log'))
    if os.path.exists(os.path.join(os.path.abspath(os.getcwd()), 'server.log')):
        os.remove(os.path.join(os.path.abspath(os.getcwd()), 'server.log'))
    return streams, subtitles
Пример #29
0
def getToken():
    server = Server(r'F:\browsermob-proxy-2.1.4\bin\browsermob-proxy.bat')
    server.start()
    proxy = server.create_proxy()

    chrome_options = Options()
    chrome_options.add_argument('--proxy-server={0}'.format(proxy.proxy))

    driver = webdriver.Chrome(chrome_options=chrome_options)
    base_url = "http://jzsc.mohurd.gov.cn/data/company/detail?id=C5C5C4C3C5C2C7C7C5C5C0C2C7CCC7C7C5C6"
    proxy.new_har("douyin",
                  options={
                      'captureHeaders': True,
                      'captureContent': True
                  })
    driver.get(base_url)
    while '验证已过期,是否重新重新进行验证或停留在当前页面?' in driver.page_source:
        driver.find_element_by_xpath(
            '//*[@id="app"]/div/header/div[5]/div/div[3]/div/button[1]').click(
            )
        time.sleep(2.5)
        driver.refresh()
        time.sleep(3)
    result = proxy.har
    token = set()
    for entry in result['log']['entries']:
        _url = entry['request']['url']
        if "api/webApi/dataservice/query/comp/caDetailList?qyId" in str(_url):
            _response = entry['request']
            _accessToken = entry['request']['headers'][4]['value']
            if _accessToken != '':
                token.add(_accessToken)
    server.stop()
    driver.quit()
    return list(token)[0]
Пример #30
0
def fetch_har_by_url(url, segments, index):
    project_dir = os.path.dirname(__file__)
    bpm_path = os.path.join(project_dir,
                            "browsermob-proxy-2.1.1/bin/browsermob-proxy")

    server = Server(bpm_path)
    server.start()
    proxy = server.create_proxy()

    profile = webdriver.FirefoxProfile()
    profile.set_proxy(proxy.selenium_proxy())

    driver = webdriver.Firefox(firefox_profile=profile,
                               executable_path=os.path.join(
                                   os.path.dirname(__file__), _geckodriver))

    proxy.new_har(url,
                  options={
                      'captureHeaders': True,
                      'captureContent': True,
                      'captureBinaryContent': True
                  })
    proxy.wait_for_traffic_to_stop(2000, 10000)

    driver.get(url)

    har = proxy.har

    server.stop()
    driver.quit()

    return har