def proxied_driver(addresses, driver_type="chrome", co=None): assert len(addresses) > 0, "At least one proxy address must be provided" prox = Proxy() prox.proxy_type = ProxyType.MANUAL addr = random.choice(addresses) prox.http_proxy = addr prox.ssl_proxy = addr assert driver_type in ["chrome", "firefox"], "proxy_type must be chrome or firefox " if driver_type == "chrome": capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) driver = webdriver.Chrome(chrome_options=co, desired_capabilities=capabilities) # 참고로 이런 .method의 번거로움이 python 단점 중 하나. 간단한 설정이어야 하는게 이상하게 구성하게됨 elif driver_type == "firefox": capabilities = DesiredCapabilities.FIREFOX prox.add_to_capabilities(capabilities) driver = webdriver.Firefox( firefox_options=co, desired_capabilities=capabilities, executable_path="/usr/local/bin/geckodriver") return driver
def main(): """ This is the entry for the command which makes it convenient to install the proxy certificate """ commandArgs = sys.argv[1:] proxyPort = findFreePort() proxyThread = threading.Thread(target=runProxy, args=[proxyPort], daemon=True) proxyThread.start() capabilities = webdriver.DesiredCapabilities.CHROME capabilities['loggingPrefs'] = {'browser': 'ALL'} proxyConfig = Proxy() proxyConfig.proxy_type = ProxyType.MANUAL proxyConfig.http_proxy = f"localhost:{proxyPort}" proxyConfig.add_to_capabilities(capabilities) driver = webdriver.Chrome(desired_capabilities=capabilities) driver.get("http://mitm.it/") print( "Please kill the command with Ctrl-C or (Cmd-C on macOS) when you are finished installing the certificates. Timeout in 600 seconds..." ) timeout = 600 if len(commandArgs) > 0: timeout = int(str(commandArgs[0])) time.sleep(timeout)
def test1(): from selenium.webdriver.common.proxy import Proxy, ProxyType from selenium.webdriver.common.desired_capabilities import DesiredCapabilities proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': 'http://117.78.51.231:3128' }) desired_capabilities = DesiredCapabilities.PHANTOMJS.copy() proxy.add_to_capabilities(desired_capabilities) # headers = { # # 'referer':'https://list.tmall.com/', # # 'Host':'https://list.tmall.com', # 'Connection':'keep-alive' # } # dcap = dict(DesiredCapabilities.PHANTOMJS) # dcap["phantomjs.page.settings.userAgent"] = ("Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0) ") # for key,value in headers.items(): # dcap['phantomjs.page.customHeaders.{}'.format(key)] = value # driver = webdriver.PhantomJS(desired_capabilities=dcap) driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities) driver.get(url) html = driver.page_source print(html) driver.quit()
def main(proxy, company_name): begin = time.time() proxies = Proxy({'proxyType': ProxyType.MANUAL, 'httpProxy': proxy}) desired_capabilities = DesiredCapabilities.PHANTOMJS.copy() proxies.add_to_capabilities(desired_capabilities) # driver = webdriver.PhantomJS( # executable_path=r'E:\Program Files\Phantomjs\phantomjs-2.1.1-windows\bin\phantomjs.exe', # # desired_capabilities=desired_capabilities # ) driver = webdriver.Chrome( executable_path= r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe') driver.implicitly_wait(10) driver.get("http://bj.gsxt.gov.cn/sydq/loginSydqAction!sydq.dhtml") cracker = BaseGeetestCrack(driver) cracker.crack(company_name) # time.sleep(3) try: driver.find_element_by_class_name('search-result') html = driver.page_source driver.close() html_parser(html) print('耗时: ', time.time() - begin) # return html except Exception as e: print('can not find search-result', e) driver.close() print('耗时: ', time.time() - begin) return None
def create_instances(self): for _proxy in self.proxies: proxy = _proxy.proxy service_args = [] # service_args.append('--proxy={}:{}'.format(proxy.ip, proxy.port)) if proxy.username and proxy.password: service_args.append('--proxy-auth={}:{}'.format( proxy.username, proxy.password)) capabilities = DesiredCapabilities.PHANTOMJS capabilities[ 'phantomjs.page.settings.resourceTimeout'] = self.max_delay_limit * 1000 _proxy_ = Proxy() _proxy_.proxy_type = ProxyType.MANUAL _proxy_.http_proxy = '{}:{}'.format(proxy.ip, proxy.port) _proxy_.socks_proxy = '{}:{}'.format(proxy.ip, proxy.port) _proxy_.ssl_proxy = '{}:{}'.format(proxy.ip, proxy.port) _proxy_.add_to_capabilities(capabilities) driver = webdriver.PhantomJS( service_args=service_args, desired_capabilities=capabilities, service_log_path='/tmp/ghostdriver.log') driver.set_window_size(1120, 1080) driver.set_page_load_timeout(self.max_delay_limit) self.multi_instances.append(driver)
def initialize(driver_url): if len(Config.DESIRED_CAPABILITIES) == 0: options = webdriver.ChromeOptions() options.add_experimental_option('prefs', {'intl.accept_languages': 'ja_JP'}) cap = options.to_capabilities() else: cap = {} for k, v in [cap.split("=") for cap in Config.DESIRED_CAPABILITIES]: k = k.strip("\"'") v = maybe_bool(v.strip("\"'")) merge(cap, construct_dict(k, v)) if Config.HTTP_PROXY or Config.HTTPS_PROXY or Config.NO_PROXY: proxy = Proxy() proxy.sslProxy = Config.HTTPS_PROXY proxy.httpProxy = Config.HTTP_PROXY proxy.noProxy = Config.NO_PROXY proxy.proxyType = ProxyType.MANUAL proxy.add_to_capabilities(cap) driver = webdriver.Remote(command_executor=driver_url, desired_capabilities=cap) return driver
def get_browser(): # @todo add windows path if platform.system() == "Darwin": os.environ["webdriver.chrome.driver"] = os.path.expanduser( "~" ) + '/Library/Application Support/ZAP/webdriver/macos/64/chromedriver' else: os.environ["webdriver.chrome.driver"] = os.path.expanduser( "~") + '/.ZAP/webdriver/linux/64/chromedriver' proxy = Proxy() proxy.proxy_type = ProxyType.MANUAL proxy.http_proxy = "127.0.0.1:8080" proxy.socks_proxy = "127.0.0.1:8080" proxy.ssl_proxy = "127.0.0.1:8080" capabilities = webdriver.DesiredCapabilities.CHROME proxy.add_to_capabilities(capabilities) options = webdriver.ChromeOptions() options.add_argument('--ignore-certificate-errors') options.add_argument("--test-type") return webdriver.Chrome( executable_path=os.environ["webdriver.chrome.driver"], chrome_options=options, desired_capabilities=capabilities)
def __init__(self, proxy_select): self.path = 'ChromeDriver\chromedriver' capabilities = dict(DesiredCapabilities.CHROME) if proxy_select: proxyrotator = proxy_rotator() proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': proxyrotator['proxy'], 'ftpProxy': '', 'sslProxy': '', 'noProxy': '' }) proxy.add_to_capabilities(capabilities) chrome_options = webdriver.ChromeOptions() prefs = {"profile.managed_default_content_settings.images": 2} chrome_options.add_experimental_option("prefs", prefs) chrome_options.add_argument("user-agent=".join( proxyrotator['randomUserAgent'])) self.driver = webdriver.Chrome(self.path, desired_capabilities=capabilities, options=chrome_options) self.driver.set_window_position(0, 0) self.driver.set_window_size(1024, 800) self.driver.set_page_load_timeout(600) self.main_url = 'https://es.wallapop.com/' self.driver.get(self.main_url) self.error = False self.listado_ids = [] print('Object created')
def get_phantomjs_browser( self, proxy=None, timeout=15, ): """ 创建一个phantomjs浏览器 :param proxy: String "ip:port" :param timeout: Int :return: Phantomjs.Browser 浏览器对象 """ capabilities = webdriver.DesiredCapabilities.PHANTOMJS capabilities['phantomjs.page.settings.userAgent'] = random.choice( USER_AGENTS) capabilities["phantomjs.page.settings.loadImages"] = False if proxy: prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = proxy prox.socks_proxy = proxy prox.ssl_proxy = proxy prox.add_to_capabilities(capabilities) browser = webdriver.PhantomJS(desired_capabilities=capabilities) browser.maximize_window() browser.set_page_load_timeout(timeout) return browser
def proxy_driver(PROXIES, co=co): prox = Proxy() ua = UserAgent() while True: if PROXIES: pxy = PROXIES[-1] break else: print("--- Proxies used up (%s)" % len(PROXIES)) PROXIES = get_proxies() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = pxy #prox.socks_proxy = pxy prox.ssl_proxy = pxy capabilities = dict(DesiredCapabilities.CHROME) capabilities["chrome.page.settings.userAgent"] = (ua.random) prox.add_to_capabilities(capabilities) service_args = ['--ssl-protocol=any', '--ignore-ssl-errors=true'] driver = webdriver.Chrome("chromedriver.exe", options=co, desired_capabilities=capabilities, service_args=service_args) return driver
def __call__(self): proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': f"http://{PROXY_USER}:{PROXY_PASSWORD}@{PROXY_HOST}:{PROXY_PORT}" }) options = webdriver.ChromeOptions() options.add_argument("--start-maximized") options.add_argument("--headless") options.add_argument(f"user-agent={generate_user_agent()}") capabilities = webdriver.DesiredCapabilities.CHROME proxy.add_to_capabilities(capabilities) driver = webdriver.Chrome("./chromedriver", chrome_options=options, desired_capabilities=capabilities) driver.get(URL) elements = driver.find_elements_by_tag_name("a") for element in elements: if has_concurrences(KEY_WORDS, element.text): element.click() driver.close()
def get_webdriver(): options = webdriver.ChromeOptions() options.add_argument('window-size=1920x1080') options.add_argument('--headless') options.add_argument('--no-sandbox') # Configure Proxy option proxy = Proxy() if Settings.socks_proxy is None and Settings.https_proxy is None and Settings.https_proxy is None: proxy.proxy_type = ProxyType.DIRECT else: proxy.proxy_type = ProxyType.MANUAL if Settings.socks_proxy is not None: options.add_argument("--proxy-server=" + Settings.get_full_socks_proxy()) else: if Settings.http_proxy is not None: proxy.http_proxy = Settings.http_proxy if Settings.https_proxy is not None: proxy.ssl_proxy = Settings.https_proxy # Configure capabilities capabilities = webdriver.DesiredCapabilities.CHROME proxy.add_to_capabilities(capabilities) return webdriver.Chrome(options=options, desired_capabilities=capabilities)
def tt1(): desired_capabilities = DesiredCapabilities.PHANTOMJS.copy() headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Host': 'www.dianping.com', } for key, value in headers.iteritems(): desired_capabilities['phantomjs.page.customHeaders.{}'.format(key)] = value desired_capabilities[ 'phantomjs.page.customHeaders.User-Agent'] = \ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) ' \ 'AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 ' \ 'Safari/604.1.38' ip_port = random.choice(redis_conn1()) print ip_port proxy = Proxy( { 'proxyType': ProxyType.MANUAL, 'httpProxy': '%s' % ip_port # 代理ip和端口 } ) proxy.add_to_capabilities(desired_capabilities) print desired_capabilities driver = webdriver.PhantomJS(desired_capabilities=desired_capabilities) driver.set_page_load_timeout(10) driver.get("http://www.dianping.com/shop/%s" % ['76964345', '15855144', ]) list1 = driver.find_elements_by_xpath('//div[@class="comment-condition J-comment-condition Fix"]/div/span/a') for l in list1: print l.text if '403 Forbidden' in driver.page_source: print driver.page_source driver.close()
def selenium_soup(url): service_args = [] service_args.append('--load-images=no') service_args.append('--disk-cache=yes') service_args.append('--ignore-ssl-errors=true') service_args.append('--ssl-protocol=any') service_args.append('--ignore-ssl-errors=true') # 新建一个“期望技能” dcp = DesiredCapabilities.PHANTOMJS.copy() # 设置浏览器请求头 dcp["phantomjs.page.settings.userAgent"] = get_random_headers() # 把代理ip加入到技能中 proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': get_random_httpip()['HTTP'][7:-1] }) proxy.add_to_capabilities(dcp) driver = webdriver.PhantomJS(executable_path="phantomjs", desired_capabilities=dcp, service_args=service_args) driver.get(url) bsObj = BeautifulSoup(driver.page_source, "lxml") driver.close() return dcp
def get_browser(PROXY): import os luminati_host = os.environ.get('LUMINATI_HOST') luminati_port = os.environ.get('LUMINATI_PORT') PROXY = 'http://' + luminati_host + ':' + luminati_port print(PROXY) proxy = Proxy() proxy.http_proxy = PROXY proxy.ftp_proxy = PROXY proxy.sslProxy = PROXY proxy.no_proxy = "localhost" #etc... ;) proxy.proxy_type = ProxyType.MANUAL capabilities = webdriver.DesiredCapabilities.CHROME proxy.add_to_capabilities(capabilities) # path = '/home/balu/balu/work/Courses/luminati+selinium/testapp/app/helper_files/chromedriver' # driver = webdriver.Chrome(executable_path = path, desired_capabilities=capabilities) driver = webdriver.Remote("http://172.20.128.1:4444/wd/hub", desired_capabilities=capabilities) url = 'https://lumtest.com/myip.json' driver.get(url) print(driver.page_source) return driver
def get_driver(proxy=False, login=False): if proxy: #set up proxy prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = "10.211.55.4:808" capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) else: capabilities = webdriver.DesiredCapabilities.CHROME chrome_options = webdriver.ChromeOptions() #chrome_options.add_argument('--headless') #chrome_options.add_argument('--disable-gpu') chrome_options.add_argument( '--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' ) chrome_options.add_argument('--window-size=1024,768') driver = webdriver.Chrome('./chromedriver', chrome_options=chrome_options, desired_capabilities=capabilities) if login: driver.get(login_url) name = driver.find_element_by_xpath(xpaths.login_username) name.send_keys(username) pw = driver.find_element_by_xpath(xpaths.login_password) pw.send_keys(password) login = driver.find_element_by_xpath(xpaths.login_btn) login.click() return driver
def __open_browser(context): chrm = context.config.userdata['chromedriver_path'] try: # if there is a proxy, we'll use it. Otherwise, we won't. requests.get("http://localhost:8888", timeout=0.01) # if there was no exception, we continue here. PROXY = "localhost:8888" proxy = Proxy() proxy.proxy_type = ProxyType.MANUAL proxy.http_proxy = PROXY capabilities = webdriver.DesiredCapabilities.CHROME proxy.add_to_capabilities(capabilities) if (chrm): context.driver = webdriver.Chrome( desired_capabilities=capabilities, executable_path=chrm) else: context.driver = webdriver.Chrome( desired_capabilities=capabilities) return context.driver except: if (chrm): context.driver = webdriver.Chrome(executable_path=chrm) else: context.driver = webdriver.Chrome() return context.driver
def initialize_browser() -> object: chrome_options = Options() chrome_options.add_argument('--window-size=1920x1080') chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument( 'user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 ' 'Safari/537.36') proxy_url = "127.0.0.1:24001" proxy = Proxy() proxy.proxy_type = ProxyType.MANUAL proxy.http_proxy = proxy_url proxy.ssl_proxy = proxy_url capabilities = webdriver.DesiredCapabilities.CHROME proxy.add_to_capabilities(capabilities) if is_development(): return webdriver.Chrome('./bin/chromedriver.exe', chrome_options=chrome_options, desired_capabilities=capabilities) else: chrome_options.add_argument('--data-path=/tmp/data-path') chrome_options.add_argument('--homedir=/tmp') chrome_options.add_argument('--disk-cache-dir=/tmp/cache-dir') chrome_options.add_argument('--user-data-dir=/tmp/user-data') chrome_options.add_argument('--hide-scrollbars') chrome_options.add_argument('--enable-logging') chrome_options.add_argument('--log-level=0') chrome_options.add_argument('--v=99') chrome_options.add_argument('--single-process') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-gpu') return webdriver.Chrome(chrome_options=chrome_options, desired_capabilities=capabilities)
def __get_driver_desired_capabilities(self): prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = "103.109.58.245:8080" capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) return capabilities
def CreateBrowser(proxy, head=False, window=False, ua=False): if proxy: prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.ssl_proxy = proxy capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) # Set random user agent opts = Options() if ua: ua = UserAgent() agent = ua.chrome opts.add_argument("user-agent=" + agent) if not head: opts.add_argument("--headless") if window: opts.add_argument("--window-size=" + window) else: opts.add_argument("--window-size=%s" % "1920,1080") if proxy: _browser = webdriver.Chrome(options=opts, desired_capabilities=capabilities) else: _browser = webdriver.Chrome(options=opts) return _browser
def openDriver(self): proxy = Proxy({ 'proxyType': ProxyType.MANUAL, # 用不用都行 # '203.130.46.108:9090' # '117.127.0.202:8080' 00 # '120.234.63.196:3128' 00 'httpProxy': '60.217.132.244:8060' }) # 新建一个“期望技能”,哈哈 desired_capabilities = DesiredCapabilities.FIREFOX.copy() proxy.add_to_capabilities(desired_capabilities) # self.driver = webdriver.Firefox(executable_path=self.firefoxPath,desired_capabilities=desired_capabilities) self.driver = webdriver.Firefox(executable_path=self.firefoxPath) self.driver.get(self.host) time.sleep(10) self.driver.get(self.baseUrl) self.driver.maximize_window() self.driver.implicitly_wait(10) try: WebDriverWait( self.driver, 10, 0.5).until(lambda x: x.find_element_by_id("searchStr")) except Exception as e: print(e) time.sleep(8) searchInput = self.driver.find_element_by_id("searchStr") if None != searchInput: # searchInput.send_keys(self.domain) time.sleep(4) submitBtn = self.driver.find_element_by_id("basic_searchData") submitBtn.click() self.parseSearchPage(self.domain)
def __init__(self, proxy_select): from selenium import webdriver from selenium.webdriver.common.proxy import Proxy, ProxyType from selenium.webdriver.common.desired_capabilities import DesiredCapabilities self.path = 'ChromeDriver\chromedriver' capabilities = dict(DesiredCapabilities.CHROME) if proxy_select: proxyrotator = proxy_rotator() proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': proxyrotator['proxy'], }) proxy.add_to_capabilities(capabilities) chrome_options = webdriver.ChromeOptions() prefs = {"profile.managed_default_content_settings.images": 2} chrome_options.add_experimental_option("prefs", prefs) chrome_options.page_load_strategy = 'normal' self.driver = webdriver.Chrome(self.path, desired_capabilities=capabilities, options=chrome_options) self.driver.set_window_position(0, 0) self.driver.set_window_size(1024, 800) self.driver.set_page_load_timeout(600) self.login_url = 'https://com.vibbo.com/usrarea/login/' self.driver.get(self.login_url) self.error = False self.listado_url_products = [] print('Object created')
def __init__(self, proxy): """ Initialize the web driver. """ if proxy: PROXY = self.get_proxy() custom_proxy = Proxy() custom_proxy.proxy_type = ProxyType.MANUAL custom_proxy.ssl_proxy = PROXY capabilities = webdriver.DesiredCapabilities.CHROME custom_proxy.add_to_capabilities(capabilities) self.driver = webdriver.Chrome( ChromeDriverManager().install(), desired_capabilities=capabilities ) else: options = Options() options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') self.driver = webdriver.Chrome( ChromeDriverManager().install(), chrome_options=options )
def load_page(source): prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = "127.0.0.1:9090" prox.socks_proxy = "127.0.0.1:9090" prox.ssl_proxy = "127.0.0.1:9090" capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) driver = webdriver.Chrome(desired_capabilities=capabilities) driver.get(source) navigationStart = driver.execute_script( "return window.performance.timing.navigationStart") responseStart = driver.execute_script( "return window.performance.timing.responseStart") domComplete = driver.execute_script( "return window.performance.timing.domComplete") backendPerformance = responseStart - navigationStart frontendPerformance = domComplete - responseStart print "Back End: %s" % backendPerformance print "Front End: %s" % frontendPerformance driver.quit()
def get_capabilities(self): capabilities = DesiredCapabilities.FIREFOX.copy() # capabilities = DesiredCapabilities.CHROME.copy() # print('get_capabilities') proxy = self.profile.proxy if proxy is None or not proxy.active: # use random default proxy proxies = models.Proxy.objects.filter(active=True, default=True).all() if proxies: proxy = random.choice(proxies) else: proxy = None if proxy: # print('setting a proxy') # print(proxy) prox = Proxy() prox.proxy_type = ProxyType.MANUAL if proxy.proxy_type == proxy.HTTP: # print('HTTP proxy') prox.http_proxy = f'{proxy.ip}:{proxy.port}' prox.ssl_proxy = f'{proxy.ip}:{proxy.port}' prox.ftp_proxy = f'{proxy.ip}:{proxy.port}' elif proxy.proxy_type == proxy.SOCKS: # print('Socks proxy') prox.socks_proxy = f'{proxy.ip}:{proxy.port}' prox.socks_username = proxy.username prox.socks_password = proxy.password prox.add_to_capabilities(capabilities) # print(capabilities) return capabilities
def getGoogleChromeDriver(fullproxy): try: proxy = fullproxy.split(' ')[0] conditionalPrint("proxy used : " + proxy) WINDOW_SIZE = "1920,1080" option = webdriver.ChromeOptions() option.add_argument("--incognito") #option.add_argument("--disable-gpu") #option.add_argument("--disable-infobars") #option.add_argument("--disable-notifications") #option.add_argument("--disable-extensions") if hideBrowser == "YES": option.add_argument("--headless") option.add_argument("--window-size=%s" % WINDOW_SIZE) prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = proxy prox.socks_proxy = proxy prox.ssl_proxy = proxy capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) browser = webdriver.Chrome( executable_path="C:\\webdrivers\\chromedriver.exe", chrome_options=option, desired_capabilities=capabilities) #browser.set_window_position(-10000, 0) return browser except Exception: LogError(traceback, "fullproxy = " + fullproxy) return None
def get_browser(url, proxy=""): from selenium import webdriver from selenium.webdriver.common.proxy import Proxy, ProxyType if proxy == "": browser = webdriver.Chrome( ) #replace with .Firefox(), or with the browser of your choice else: prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.https_proxy = proxy #prox.https_proxy = proxy #prox.socks_proxy = "ip_addr:port" #prox.ssl_proxy = "ip_addr:port" capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) browser = webdriver.Chrome(desired_capabilities=capabilities) browser.get(url) #navigate to the page #browser.close() return browser
def prepare_driver(): """ Funkcja odpowiedzialna za skonfigurowanie proxy oraz otworzenie przeglądarki :return: webdriver """ headless_proxy = "127.0.0.1:3128" proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': headless_proxy, 'ftpProxy': headless_proxy, 'sslProxy': headless_proxy, 'noProxy': '' }) chrome_options = webdriver.ChromeOptions() prefs = {"profile.managed_default_content_settings.images": 2} chrome_options.add_experimental_option("prefs", prefs) capabilities = dict(DesiredCapabilities.CHROME) # capabilities["marionette"] = False proxy.add_to_capabilities(capabilities) print(capabilities) # słowo return zwracam nam wartość danej funkcji w tym przypadku zwraca nam webdriver chroma czyli otwiera przeglądarke return webdriver.Chrome('./drivers/chromedriver_linux/chromedriver', )
def __init__(self, songUrl, proxy_url): self.songUrl = songUrl self.id = -1 self.name = '' self.album_id = -1 self.comments_num = -1 self.similar_song_ids = [] self.artists = [] chrome_options = Options() prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = proxy_url prox.ssl_proxy = proxy_url # prox.socks_proxy = proxy_url capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) chrome_options.add_argument('--headless') # chrome_options.add_argument('user-agent={0}'.format(random.choice(uas))) # chrome_options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2}) if config_is_ubuntu: chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') # chrome_options.add_argument('--proxy-server=http://111.222.141.127:8118') # chrome_options.add_argument('--proxy-server={}'.format(proxy_url)) # chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) # debug_print_thread("we are using proxy sever with url " + proxy_url) # chrome_options.add_argument('--proxy-server=http://114.98.27.147:4216') self.driver = webdriver.Chrome(config_chrome_path, options=chrome_options)
def openDriver(self): proxy = Proxy({ 'proxyType': ProxyType.MANUAL, # 用不用都行 # '203.130.46.108:9090' # '117.127.0.202:8080' 00 # '120.234.63.196:3128' 00 'httpProxy': '114.99.10.66:61234' }) # 新建一个“期望技能”,哈哈 desired_capabilities = DesiredCapabilities.FIREFOX.copy() proxy.add_to_capabilities(desired_capabilities) self.driver = webdriver.Firefox( executable_path=self.firefoxPath, desired_capabilities=desired_capabilities) self.driver.get(self.host) time.sleep(6) urlList = self.getProQuestStaryUrls() print("list :: ", urlList) if len(urlList) > 0: for i in range(len(urlList)): item = {} detailUrl = urlList[i][1][21:] domain = urlList[i][2] pid = urlList[i][0] item['url'] = detailUrl item['pid'] = pid item['domain'] = domain self.driver.get(detailUrl) self.driver.maximize_window() self.driver.implicitly_wait(10) self.parseSearchPage(item) time.sleep(random.randint(4, 6))
def __init__(self, email, password, category): super().__init__() self.email = email self.password = password self.category = category self.count = 1 logging.basicConfig( handlers=[logging.FileHandler('./Logs/scraper.log', 'w', 'utf-8')], format=': %(asctime)s : %(levelname)s : %(message)s : ', ) self.logger = logging.getLogger() self.logger.setLevel(logging.INFO) self.links = link_generator.generate(self.category) self.proxy = proxy_generator.get_proxy() self.scrapedData = [] try: prox = Proxy() prox.proxy_type = ProxyType.MANUAL prox.http_proxy = self.proxy capabilities = webdriver.DesiredCapabilities.CHROME prox.add_to_capabilities(capabilities) self.driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_settings, desired_capabilities=capabilities) except Exception as e: self.logger.critical("Driver Error: " + str(e))
def test_what_things_look_like(self): bmp_capabilities = copy.deepcopy(selenium.webdriver.common.desired_capabilities.DesiredCapabilities.FIREFOX) self.client.add_to_capabilities(bmp_capabilities) proxy_capabilities = copy.deepcopy(selenium.webdriver.common.desired_capabilities.DesiredCapabilities.FIREFOX) proxy = Proxy({'httpProxy': 'localhost:%d' % self.client.port}) proxy.add_to_capabilities(proxy_capabilities) assert bmp_capabilities == proxy_capabilities
def testCanAddAutodetectProxyToDesiredCapabilities(self): proxy = Proxy() proxy.auto_detect = self.AUTODETECT_PROXY["autodetect"] desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = self.AUTODETECT_PROXY.copy() proxy_capabilities["proxyType"] = "AUTODETECT" expected_capabilities = {"proxy": proxy_capabilities} self.assertEqual(expected_capabilities, desired_capabilities)
def testCanAddPACProxyToDesiredCapabilities(self): proxy = Proxy() proxy.proxy_autoconfig_url = self.PAC_PROXY["proxyAutoconfigUrl"] desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = self.PAC_PROXY.copy() proxy_capabilities["proxyType"] = "PAC" expected_capabilities = {"proxy": proxy_capabilities} self.assertEqual(expected_capabilities, desired_capabilities)
def testCanAddPACProxyToDesiredCapabilities(self): proxy = Proxy() proxy.proxy_autoconfig_url = self.PAC_PROXY['proxyAutoconfigUrl'] desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = self.PAC_PROXY.copy() proxy_capabilities['proxyType'] = 'PAC' expected_capabilities = {'proxy': proxy_capabilities} self.assertEqual(expected_capabilities, desired_capabilities)
def testCanAddAutodetectProxyToDesiredCapabilities(self): proxy = Proxy() proxy.auto_detect = self.AUTODETECT_PROXY['autodetect'] desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = self.AUTODETECT_PROXY.copy() proxy_capabilities['proxyType'] = 'AUTODETECT' expected_capabilities = {'proxy': proxy_capabilities} self.assertEqual(expected_capabilities, desired_capabilities)
def testCanAddAutodetectProxyToDesiredCapabilities(): proxy = Proxy() proxy.auto_detect = AUTODETECT_PROXY['autodetect'] desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = AUTODETECT_PROXY.copy() proxy_capabilities['proxyType'] = 'AUTODETECT' expected_capabilities = {'proxy': proxy_capabilities} assert expected_capabilities == desired_capabilities
def testCanAddPACProxyToDesiredCapabilities(): proxy = Proxy() proxy.proxy_autoconfig_url = PAC_PROXY['proxyAutoconfigUrl'] desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = PAC_PROXY.copy() proxy_capabilities['proxyType'] = 'PAC' expected_capabilities = {'proxy': proxy_capabilities} assert expected_capabilities == desired_capabilities
def testCanAddToDesiredCapabilities(self): desired_capabilities = {} proxy = Proxy() proxy.http_proxy = 'some.url:1234' proxy.add_to_capabilities(desired_capabilities) expected_capabilities = { 'proxy': { 'proxyType': 'manual', 'httpProxy': 'some.url:1234' } } self.assertEqual(expected_capabilities, desired_capabilities)
def testCanAddManualProxyToDesiredCapabilities(self): proxy = Proxy() proxy.http_proxy = self.MANUAL_PROXY["httpProxy"] proxy.ftp_proxy = self.MANUAL_PROXY["ftpProxy"] proxy.no_proxy = self.MANUAL_PROXY["noProxy"] proxy.sslProxy = self.MANUAL_PROXY["sslProxy"] proxy.socksProxy = self.MANUAL_PROXY["socksProxy"] proxy.socksUsername = self.MANUAL_PROXY["socksUsername"] proxy.socksPassword = self.MANUAL_PROXY["socksPassword"] desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = self.MANUAL_PROXY.copy() proxy_capabilities["proxyType"] = "MANUAL" expected_capabilities = {"proxy": proxy_capabilities} self.assertEqual(expected_capabilities, desired_capabilities)
def __init__(self, host="127.0.0.1", server="./selenium-server.jar"): if server and path.isfile(server) and not _server_started(): self.selenium = _Selenium(server) proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': host, 'ftpProxy': host, 'sslProxy': host, 'noProxy': host }) caps = webdriver.DesiredCapabilities.FIREFOX proxy.add_to_capabilities(caps) try: self.driver = webdriver.Remote(desired_capabilities=caps) except URLError: raise SeleniumServerError
def testCanAddManualProxyToDesiredCapabilities(self): proxy = Proxy() proxy.http_proxy = self.MANUAL_PROXY['httpProxy'] proxy.ftp_proxy = self.MANUAL_PROXY['ftpProxy'] proxy.no_proxy = self.MANUAL_PROXY['noProxy'] proxy.sslProxy = self.MANUAL_PROXY['sslProxy'] proxy.socksProxy = self.MANUAL_PROXY['socksProxy'] proxy.socksUsername = self.MANUAL_PROXY['socksUsername'] proxy.socksPassword = self.MANUAL_PROXY['socksPassword'] desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = self.MANUAL_PROXY.copy() proxy_capabilities['proxyType'] = 'MANUAL' expected_capabilities = {'proxy': proxy_capabilities} self.assertEqual(expected_capabilities, desired_capabilities)
def __init__(self, host="127.0.0.1", server="./selenium-server.jar"): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) port = sock.connect_ex(("127.0.0.1", 4444)) == 0 if server and path.isfile(server) and not port: self.selenium = _Selenium(server) proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': host, 'ftpProxy': host, 'sslProxy': host, 'noProxy': host }) caps = webdriver.DesiredCapabilities.FIREFOX proxy.add_to_capabilities(caps) try: self.driver = webdriver.Remote(desired_capabilities=caps) except URLError: raise SeleniumServerError
def testCanInitEmptyProxy(): proxy = Proxy() assert ProxyType.UNSPECIFIED == proxy.proxy_type assert '' == proxy.http_proxy assert '' == proxy.ftp_proxy assert '' == proxy.no_proxy assert '' == proxy.sslProxy assert '' == proxy.socksProxy assert '' == proxy.socksUsername assert '' == proxy.socksPassword assert proxy.auto_detect is False assert '' == proxy.proxy_autoconfig_url desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = {} proxy_capabilities['proxyType'] = 'UNSPECIFIED' expected_capabilities = {'proxy': proxy_capabilities} assert expected_capabilities == desired_capabilities
def testCanInitEmptyProxy(self): proxy = Proxy() self.assertEqual(ProxyType.UNSPECIFIED, proxy.proxy_type) self.assertEqual('', proxy.http_proxy) self.assertEqual('', proxy.ftp_proxy) self.assertEqual('', proxy.no_proxy) self.assertEqual('', proxy.sslProxy) self.assertEqual('', proxy.socksProxy) self.assertEqual('', proxy.socksUsername) self.assertEqual('', proxy.socksPassword) self.assertEqual(False, proxy.auto_detect) self.assertEqual('', proxy.proxy_autoconfig_url) desired_capabilities = {} proxy.add_to_capabilities(desired_capabilities) proxy_capabilities = {} proxy_capabilities['proxyType'] = 'UNSPECIFIED' expected_capabilities = {'proxy': proxy_capabilities} self.assertEqual(expected_capabilities, desired_capabilities)
def _setup_driver(self): proxy = Proxy() proxy.proxy_type = ProxyType.DIRECT if 'http' in self.proxy: proxy.http_proxy = self.proxy['http'] if 'https' in self.proxy: proxy.ssl_proxy = self.proxy['https'] capa = self._build_capabilities() proxy.add_to_capabilities(capa) options = self._build_options() # TODO some browsers don't need headless # TODO handle different proxy setting? options.set_headless(self.HEADLESS) if self.DRIVER is webdriver.Firefox: if self.responses_dirname and not os.path.isdir(self.responses_dirname): os.makedirs(self.responses_dirname) options.profile = DirFirefoxProfile(self.responses_dirname) if self.responses_dirname: capa['profile'] = self.responses_dirname self.driver = self.DRIVER(options=options, capabilities=capa) elif self.DRIVER is webdriver.Chrome: self.driver = self.DRIVER(options=options, desired_capabilities=capa) elif self.DRIVER is webdriver.PhantomJS: if self.responses_dirname: if not os.path.isdir(self.responses_dirname): os.makedirs(self.responses_dirname) log_path = os.path.join(self.responses_dirname, 'selenium.log') else: log_path = NamedTemporaryFile(prefix='weboob_selenium_', suffix='.log', delete=False).name self.driver = self.DRIVER(desired_capabilities=capa, service_log_path=log_path) else: raise NotImplementedError() if self.WINDOW_SIZE: self.driver.set_window_size(*self.WINDOW_SIZE)
def testCanAddToDesiredCapabilities(self): desired_capabilities = {} proxy = Proxy() proxy.http_proxy = 'some.url:1234' proxy.ftp_proxy = 'ftp.proxy:1234' proxy.no_proxy = 'localhost, foo.localhost' proxy.sslProxy = 'ssl.proxy:1234' proxy.autodetect = 'True' proxy.add_to_capabilities(desired_capabilities) expected_capabilities = { 'proxy': { 'proxyType': 'MANUAL', 'httpProxy': 'some.url:1234', 'ftpProxy': 'ftp.proxy:1234', 'noProxy': 'localhost, foo.localhost', 'sslProxy': 'ssl.proxy:1234', 'autodetect': 'True' } } print 'descap', desired_capabilities self.assertEqual(expected_capabilities, desired_capabilities)
def get_browser(user_agent, proxy_address, cert_path): """ Set up a Selenium browser with given user agent, proxy and SSL cert. """ # PhantomJS if settings.CAPTURE_BROWSER == 'PhantomJS': desired_capabilities = dict(DesiredCapabilities.PHANTOMJS) desired_capabilities["phantomjs.page.settings.userAgent"] = user_agent browser = webdriver.PhantomJS( executable_path=getattr(settings, 'PHANTOMJS_BINARY', 'phantomjs'), desired_capabilities=desired_capabilities, service_args=[ "--proxy=%s" % proxy_address, "--ssl-certificates-path=%s" % cert_path, "--ignore-ssl-errors=true", "--local-url-access=false", "--local-storage-path=.", ], service_log_path=settings.PHANTOMJS_LOG) # Firefox elif settings.CAPTURE_BROWSER == 'Firefox': desired_capabilities = dict(DesiredCapabilities.FIREFOX) proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': proxy_address, 'ftpProxy': proxy_address, 'sslProxy': proxy_address, }) proxy.add_to_capabilities(desired_capabilities) profile = webdriver.FirefoxProfile() profile.accept_untrusted_certs = True profile.assume_untrusted_cert_issuer = True browser = webdriver.Firefox( capabilities=desired_capabilities, firefox_profile=profile) # Chrome elif settings.CAPTURE_BROWSER == 'Chrome': # http://blog.likewise.org/2015/01/setting-up-chromedriver-and-the-selenium-webdriver-python-bindings-on-ubuntu-14-dot-04/ download_dir = os.path.abspath('./downloads') os.mkdir(download_dir) chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--proxy-server=%s' % proxy_address) chrome_options.add_argument('--test-type') chrome_options.add_experimental_option("prefs", {"profile.default_content_settings.popups": "0", "download.default_directory": download_dir, "download.prompt_for_download": "false"}) desired_capabilities = chrome_options.to_capabilities() desired_capabilities["acceptSslCerts"] = True # for more detailed progress updates # desired_capabilities["loggingPrefs"] = {'performance': 'INFO'} # then: # performance_log = browser.get_log('performance') browser = webdriver.Chrome(desired_capabilities=desired_capabilities) else: assert False, "Invalid value for CAPTURE_BROWSER." browser.implicitly_wait(ELEMENT_DISCOVERY_TIMEOUT) browser.set_page_load_timeout(ROBOTS_TXT_TIMEOUT) return browser
def start_client(self): capabilities = {} for c in self.capabilities: name, value = c.split(':') # handle integer capabilities if value.isdigit(): value = int(value) # handle boolean capabilities elif value.lower() in ['true', 'false']: value = value.lower() == 'true' capabilities.update({name: value}) if self.proxy_host and self.proxy_port: proxy = Proxy() proxy.http_proxy = '%s:%s' % (self.proxy_host, self.proxy_port) proxy.ssl_proxy = proxy.http_proxy proxy.add_to_capabilities(capabilities) profile = None if self.driver.upper() == 'REMOTE': capabilities.update(getattr(webdriver.DesiredCapabilities, self.browser_name.upper())) if json.loads(self.chrome_options) or self.extension_paths: capabilities = self.create_chrome_options( self.chrome_options, self.extension_paths).to_capabilities() if self.browser_name.upper() == 'FIREFOX': profile = self.create_firefox_profile( self.firefox_preferences, self.profile_path, self.extension_paths) if self.browser_version: capabilities['version'] = self.browser_version capabilities['platform'] = self.platform.upper() executor = 'http://%s:%s/wd/hub' % (self.host, self.port) try: self.selenium = webdriver.Remote(command_executor=executor, desired_capabilities=capabilities or None, browser_profile=profile) except AttributeError: valid_browsers = [attr for attr in dir(webdriver.DesiredCapabilities) if not attr.startswith('__')] raise AttributeError("Invalid browser name: '%s'. Valid options are: %s" % (self.browser_name, ', '.join(valid_browsers))) elif self.driver.upper() == 'CHROME': options = None if self.chrome_options or self.extension_paths: options = self.create_chrome_options( self.chrome_options, self.extension_paths) if self.chrome_path: self.selenium = webdriver.Chrome(executable_path=self.chrome_path, chrome_options=options, desired_capabilities=capabilities or None) else: self.selenium = webdriver.Chrome(chrome_options=options, desired_capabilities=capabilities or None) elif self.driver.upper() == 'FIREFOX': binary = self.firefox_path and FirefoxBinary(self.firefox_path) or None profile = self.create_firefox_profile( self.firefox_preferences, self.profile_path, self.extension_paths) self.selenium = webdriver.Firefox( firefox_binary=binary, firefox_profile=profile, capabilities=capabilities or None) elif self.driver.upper() == 'IE': self.selenium = webdriver.Ie() elif self.driver.upper() == 'PHANTOMJS': self.selenium = webdriver.PhantomJS() elif self.driver.upper() == 'OPERA': capabilities.update(webdriver.DesiredCapabilities.OPERA) self.selenium = webdriver.Opera(executable_path=self.opera_path, desired_capabilities=capabilities) elif self.driver.upper() == 'BROWSERSTACK': from cloud import BrowserStack self.cloud = BrowserStack() self.selenium = self.cloud.driver( self.test_id, capabilities, self.options) elif self.driver.upper() == 'SAUCELABS': from cloud import SauceLabs self.cloud = SauceLabs() self.selenium = self.cloud.driver( self.test_id, capabilities, self.options, self.keywords) else: self.selenium = getattr(webdriver, self.driver)() if self.event_listener is not None and not isinstance(self.selenium, EventFiringWebDriver): self.selenium = EventFiringWebDriver(self.selenium, self.event_listener())
def start_webdriver_client(self): capabilities = {} if self.capabilities: capabilities.update(json.loads(self.capabilities)) if self.proxy_host and self.proxy_port: proxy = Proxy() proxy.http_proxy = '%s:%s' % (self.proxy_host, self.proxy_port) proxy.ssl_proxy = proxy.http_proxy proxy.add_to_capabilities(capabilities) profile = None if self.driver.upper() == 'REMOTE': capabilities.update(getattr(webdriver.DesiredCapabilities, self.browser_name.upper())) if json.loads(self.chrome_options) or self.extension_paths: capabilities = self.create_chrome_options( self.chrome_options, self.extension_paths).to_capabilities() if self.browser_name.upper() == 'FIREFOX': profile = self.create_firefox_profile( self.firefox_preferences, self.profile_path, self.extension_paths) if self.browser_version: capabilities['version'] = self.browser_version capabilities['platform'] = self.platform.upper() executor = 'http://%s:%s/wd/hub' % (self.host, self.port) try: self.selenium = webdriver.Remote(command_executor=executor, desired_capabilities=capabilities or None, browser_profile=profile) except AttributeError: valid_browsers = [attr for attr in dir(webdriver.DesiredCapabilities) if not attr.startswith('__')] raise AttributeError("Invalid browser name: '%s'. Valid options are: %s" % (self.browser_name, ', '.join(valid_browsers))) elif self.driver.upper() == 'CHROME': options = None if self.chrome_options or self.extension_paths: options = self.create_chrome_options( self.chrome_options, self.extension_paths) if self.chrome_path: self.selenium = webdriver.Chrome(executable_path=self.chrome_path, chrome_options=options, desired_capabilities=capabilities or None) else: self.selenium = webdriver.Chrome(chrome_options=options, desired_capabilities=capabilities or None) elif self.driver.upper() == 'FIREFOX': binary = self.firefox_path and FirefoxBinary(self.firefox_path) or None profile = self.create_firefox_profile( self.firefox_preferences, self.profile_path, self.extension_paths) self.selenium = webdriver.Firefox( firefox_binary=binary, firefox_profile=profile, capabilities=capabilities or None) elif self.driver.upper() == 'IE': self.selenium = webdriver.Ie() elif self.driver.upper() == 'OPERA': capabilities.update(webdriver.DesiredCapabilities.OPERA) self.selenium = webdriver.Opera(executable_path=self.opera_path, desired_capabilities=capabilities) else: self.selenium = getattr(webdriver, self.driver)()