def __init__(self): self.start_page = START_PAGE self.end_page = END_PAGE self.weixin_url = REFER_FIRST # self.driver = Firefox() if hasattr(config, 'PHANTOMJS_PATH'): self.driver = PhantomJS(executable_path=getattr(config, 'PHANTOMJS_PATH')) else: self.driver = PhantomJS()
def __init__(self): self.start_page = START_PAGE self.end_page = END_PAGE self.weixin_url = REFER_FIRST # self.driver = Firefox() if hasattr(config, 'PHANTOMJS_PATH'): self.driver = PhantomJS(executable_path=getattr(config, 'PHANTOMJS_PATH')) else: self.driver = PhantomJS() self.client = MongoClient(HOST, PORT) self.collection = self.client[DB][COLLECTION] self.all_uids = self.uids
def create_driver(self): if 1: caps = DesiredCapabilities().FIREFOX.copy() profile_path = path.expanduser( '~') + '/.mozilla/firefox/' + self.account['name'] # caps['proxy'] = { caps['moz:firefoxOptions'] = { "args": ["-profile", profile_path], # geckodriver 0.18+ } profile = FirefoxProfile(profile_path) #profile.set_preference("general.useragent.override", 'Mozilla/5.0 (X11; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0') self.driver = Firefox(profile, capabilities=caps) #self.driver = Firefox(profile) else: # PhantomJS # https://github.com/detro/ghostdriver caps = DesiredCapabilities().PHANTOMJS caps["phantomjs.page.settings.userAgent"] = \ 'Mozilla/5.0 (X11; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0' service_args = [ '--proxy={}'.format(':'.join( self.account['Proxy'].split(':')[:2])), '--proxy-type=http', ] print(service_args) self.driver = PhantomJS(service_args=service_args, capabilities=caps) self.driver.set_window_size(1120, 550)
def init_phantom(self): self.prefixfiles = os.path.join(scrapyd_config().get('logs_dir'), HYPHE_PROJECT, self.name, self.crawler.settings['JOBID']) self.log("Using path %s for PhantomJS crawl" % self.prefixfiles, log.INFO) phantom_args = [] if PROXY and not PROXY.startswith(':'): phantom_args.append('--proxy=%s' % PROXY) phantom_args.append('--cookies-file=%s-phantomjs-cookie.txt' % self.prefixfiles) phantom_args.append('--ignore-ssl-errors=true') phantom_args.append('--load-images=false') self.capabilities = dict(DesiredCapabilities.PHANTOMJS) self.capabilities[ 'phantomjs.page.settings.userAgent'] = self.user_agent self.capabilities['takesScreenshot'] = False self.capabilities[ 'phantomjs.page.settings.javascriptCanCloseWindows'] = False self.capabilities[ 'phantomjs.page.settings.javascriptCanOpenWindows'] = False self.phantom = PhantomJS(executable_path=PHANTOM['PATH'], service_args=phantom_args, desired_capabilities=self.capabilities, service_log_path="%s-phantomjs.log" % self.prefixfiles) self.phantom.implicitly_wait(10) self.phantom.set_page_load_timeout(60) self.phantom.set_script_timeout(self.ph_timeout + 15)
def get_driver(self): # Start a new browser and return the WebDriver browser_name = self.config.get('selenium', 'browser') if browser_name == 'firefox': from selenium.webdriver import Firefox browser_path = self.config.get('selenium', 'firefox_path') browser_binary = FirefoxBinary(browser_path) return Firefox(firefox_binary=browser_binary) if browser_name == 'chrome': from selenium.webdriver import Chrome return Chrome() if browser_name == 'phantomjs': from selenium.webdriver import PhantomJS return PhantomJS() # @TODO: Add chrome raise RuntimeError('Unsupported/unknown browser')
def test_plotly(remove_build): """Tests plotly.""" viz = Plotly() ctrl = Nouislider() ctrl2 = Button() path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'build') layout = Layout(directory=path) layout.add(viz) layout.add_sidebar(ctrl) layout.add_sidebar(ctrl2) layout.subscribe(callback, ctrl.on_change) layout.subscribe(callback, ctrl2.on_click) layout.build() env = os.environ env['PYTHONPATH'] = '{}:{}'.format(os.getcwd(), os.environ.get('PYTHONPATH', '')) server = subprocess.Popen(os.path.join(path, 'src/server.py'), env=env) time.sleep(5) driver = PhantomJS() driver.get('http://localhost:9991') assert driver.title == 'Bowtie App' server.kill()
def __init__(self, url, phantomjs=None, resolution=None, ya_class=None, screen_path=None, screen_pattern=None, csv_path=None): self.url = url self.phantomjs = phantomjs or DEFAULT_PHANTOMJS assert os.path.isfile(self.phantomjs), "phantomjs не найден" resolution = resolution or FULLHD assert isinstance(resolution, (list, tuple)) assert len(resolution) == 2 self.ya_class = ya_class or DEFAULT_YA_CLASS self.screen_path = screen_path or PATH self.screen_pattern = screen_pattern or '%s.png' assert '%s' in self.screen_pattern self.csv_path = csv_path or os_join(PATH, 'statistic.csv') self.driver = PhantomJS(self.phantomjs) self.driver.set_window_size(*resolution)
def get_selenium(**kwargs): driver = getattr(settings, 'TEST_SELENIUM_DRIVER', 'firefox') if driver in ('chrome', 'chrome_headless'): from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver options = Options() if driver == 'chrome_headless': options.add_argument('headless') options.add_argument('disable-gpu') for key, val in kwargs.items(): if val is not None: options.add_argument('{key}={val}'.format(key=key, val=val)) else: options.add_argument('{key}'.format(key=key)) driver_path = os.environ.get('CHROME_DRIVER_PATH', None) if driver_path is not None: return ChromeDriver(driver_path, options=options) return ChromeDriver(options=options) elif driver == 'phantomjs': from selenium.webdriver import PhantomJS return PhantomJS() else: from selenium.webdriver.firefox.webdriver import WebDriver as FirefoxDriver return FirefoxDriver()
def get_crawler(self, dynamic): crawler = None if (dynamic): crawler = PhantomJS( "/Users/mac/Desktop/Web scraping/phantomjs-2.1.1-macosx/bin/phantomjs" ) return crawler
def scrape_statuses(self): headless_browser = PhantomJS() headless_browser.get(MTA_URL) soup = BeautifulSoup(headless_browser.page_source, "html.parser") for line_name in LINES: line = self.get_line(soup, line_name) self.lines.append(line)
def main(): steam_id, api, return_amount, user_categories = read_config_values() print("SteamID:", steam_id) print("API key:", api) print("Return amount:", return_amount) if len(user_categories): # > 0 check_user_categories_validity(user_categories) print("Categories:", "; ".join(user_categories)) print() print("Fetching your Steam library..") user_library = fetch_user_library(api, steam_id) print("Found {} in your library.".format(len(user_library))) print("Opening PhantomJS..") driver = PhantomJS(cwd + r"\dependencies\phantomJS\phantomjs.exe", service_log_path=cwd + r"\dependencies\phantomJS\ghostdriver.log") print("Opening SteamDB..") output = fetch_sales(driver, user_library, return_amount, user_categories) driver.quit() with open("games.txt", 'w', encoding='utf-8') as file: file.write(output) input("\nDone. I also wrote the games to a text file.")
def get_driver(self): # Start a new browser and return the WebDriver browser_name = self.config.get('selenium', 'browser') if browser_name == 'firefox': from selenium.webdriver import Firefox browser_binary = FirefoxBinary() driver = Firefox(firefox_binary=browser_binary) driver._is_remote = False # Workaround for http://stackoverflow.com/a/42770761/489916 return driver if browser_name == 'chrome': from selenium.webdriver import Chrome return Chrome() if browser_name == 'phantomjs': from selenium.webdriver import PhantomJS return PhantomJS() # @TODO: Add chrome raise RuntimeError('Unsupported/unknown browser')
def __init__(self): APP_ROOT = os.path.dirname(os.path.abspath(__file__)) print(APP_ROOT) self.req = 0 self.driver = PhantomJS(APP_ROOT + "/phantomjs", service_log_path=os.path.devnull) self.driver.implicitly_wait(3)
def __init__(self): super().__init__(init=False) self.driver = PhantomJS() self.driver.maximize_window() self.wait = WebDriverWait(self.driver, 15) self.url = 'http://www.cnstock.com/' self.name = '中国证券网'
def __init__(self): dcap = dict(DesiredCapabilities.PHANTOMJS) # 设置userAgent dcap["phantomjs.page.settings.userAgent"] = ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0 " ) self.driver = PhantomJS( executable_path=r'phantomjs-2.1.1-windows\bin\phantomjs.exe', desired_capabilities=dcap)
def phantomjs_driver(request, capabilities, driver_path): """Return a WebDriver using a PhantomJS instance""" kwargs = {} if capabilities: kwargs['desired_capabilities'] = capabilities if driver_path is not None: kwargs['executable_path'] = driver_path return PhantomJS(**kwargs)
def main(): global HEAD if len(sys.argv) > 1: try: HEAD = int(sys.argv[1]) except: HEAD = 10 # test mirror list mirror_list = read_mirrors() for i in mirror_list: try: cururl = i print("Testing:",i) res = request.urlopen(i) except: print("Testing on",i,"failed") continue try: update_mirrors(cururl) break; except: continue; try: res except: raise Warning('All mirrors unavailable!') print('Available mirror:',cururl) # get vpn table countries = dict() dr = PhantomJS() dr.get(cururl) page = Selector(text=dr.page_source)\ .xpath('.//td[@id="vpngate_inner_contents_td"]/' 'table[@id="vg_hosts_table_id"]//tr') if HEAD < len(page): page = page[:HEAD] print('Pagelen:',len(page)) for vpn in page: if len(vpn.xpath('./td[@class="vg_table_header"]')) > 0: continue row = vpn.xpath('./td') country = row[0].xpath('./text()').extract_first() country = '_'.join(country.split(' ')) ovpn = row[6].xpath('./a/@href').extract_first() if ovpn: if country in countries: countries[country] += 1 get_ovpn(url=cururl+ovpn, save_to=country+'/'+str(countries[country])) else: countries[country] = 0 if not os.path.exists(country): os.mkdir(country) get_ovpn(url=cururl+ovpn, save_to=country+'/'+str(countries[country])) dr.quit()
def getHtmlSource(url, time=10): driver = PhantomJS(service_args=[ '--ignore-ssl-errors=true', '--ssl-protocol=any', '--web-security=false' ]) driver.get(url) WebDriverWait(driver, timeout=time) source = driver.page_source #driver.save_screenshot('a.png') return source
def get_selenium(): driver = getattr(settings, 'TEST_SELENIUM_DRIVER', 'firefox') if driver == 'chrome': from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver return ChromeDriver() elif driver == 'phantomjs': from selenium.webdriver import PhantomJS return PhantomJS() else: from selenium.webdriver.firefox.webdriver import WebDriver as FirefoxDriver return FirefoxDriver()
class SeleniumMiddleware: driver = PhantomJS() def process_request(self, request, spider): spider.driver = self.driver self.driver.get(request.url) return HtmlResponse(self.driver.current_url, body=self.driver.page_source, encoding='utf-8', request=request)
def get_selenium_driver(driver_name): if driver_name == 'chrome': from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver return ChromeDriver() elif driver_name == 'phantomjs': from selenium.webdriver import PhantomJS return PhantomJS() elif driver_name == 'firefox': from selenium.webdriver.firefox.webdriver import WebDriver as FirefoxDriver return FirefoxDriver() raise ValueError('Unkown driver name')
def __init__(self): """ Default constructor ARGS: None RETURNS: None """ self.browser = PhantomJS(executable_path='./drivers/phantomjs', port=free_port()) # Optional argument, if not specified will search path. self.timeout = 5 # seconds
def setUp(self): """ Start a new browser instance for each test """ self._screenshot_number = 1 self.browser = os.getenv('SELENIUM_BROWSER', settings.SELENIUM_DEFAULT_BROWSER) command_executor = os.getenv('SELENIUM_COMMAND_EXECUTOR', '') executor_was_set_explicitly = command_executor != '' command_executor = command_executor or 'http://127.0.0.1:4444/wd/hub' if os.getenv('SELENIUM_HOST'): self.sel = self.sauce_labs_driver() elif executor_was_set_explicitly and self.browser in ('chrome', 'firefox'): if self.browser == 'chrome': caps = DesiredCapabilities.CHROME browser_profile = None else: caps = DesiredCapabilities.FIREFOX browser_profile = self.get_firefox_profile() self.sel = RemoteWebDriver(command_executor=command_executor, desired_capabilities=caps, browser_profile=browser_profile) elif self.browser == 'firefox': self.sel = Firefox(self.get_firefox_profile()) elif self.browser == 'htmlunit': self.sel = RemoteWebDriver(command_executor=command_executor, desired_capabilities=DesiredCapabilities.HTMLUNITWITHJS) elif self.browser in ['ios', 'ipad', 'ipod', 'iphone']: capabilities = { 'app': 'safari', 'browserName': '', 'device': 'iPhone Simulator', 'os': 'iOS 6.1' } self.sel = RemoteWebDriver(command_executor=self.appium_command_executor(), desired_capabilities=capabilities) elif self.browser == 'opera': self.sel = RemoteWebDriver(command_executor=command_executor, desired_capabilities=DesiredCapabilities.OPERA) elif self.browser == 'iexplore': self.sel = RemoteWebDriver(command_executor=command_executor, desired_capabilities=DesiredCapabilities.INTERNETEXPLORER) elif self.browser == 'phantomjs': self.sel = PhantomJS(service_args=['--debug=true', '--webdriver-loglevel=DEBUG']) elif self.browser == 'safari': # requires a Safari extension to be built from source and installed self.sel = RemoteWebDriver(command_executor=command_executor, desired_capabilities=DesiredCapabilities.SAFARI) else: self.sel = Chrome() self.sel.set_page_load_timeout(settings.SELENIUM_PAGE_LOAD_TIMEOUT) # Give the browser a little time; Firefox throws random errors if you # hit it too soon time.sleep(1)
def main(): ip_list = get_ip_list() webdriver = PhantomJS(executable_path=get_phantomjs_path()) for ip_address in ip_list: print('=' * 30) print('Checking: {}'.format(ip_address)) ip, status, listing_risk = crawler(ip_address, webdriver) print('{} | {} | {}'.format(ip, status, listing_risk)) print('=' * 30) save_result(ip, status, listing_risk) # wait print('\nWait {} seconds until next!\n'.format(WAIT_TIME)) sleep(WAIT_TIME)
def setUp(self): self.driver = PhantomJS() self.user = User.objects.create_user('admin', '*****@*****.**', 'pass') self.user.save() self.provider = Provider( name='provider', user=self.user, ) self.provider.save() self.provider_adverts = mommy.make(Advertisement, _quantity=20, provider=self.provider)
def onegoogolePR(self, url): '''返回单个PR''' prUrl = 'http://pr.chinaz.com' # 谷歌PR查询地址 driver = PhantomJS() driver.get(prUrl) driver.find_element_by_id('PRAddress').send_keys(url) driver.find_element_by_class_name('search-write-btn').click() try: imgsrc = driver.find_element_by_css_selector('span#pr>img').get_attribute('src') pr = search(r'\d', imgsrc).group() except: pr = '暂无数据' driver.quit() return pr
def PhantomJS(cls): dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36") # proxy = get_proxy() SERVICE_ARGS = [ '--disk-cache=true', # 图片不加载 '--load-images=false',# 图片不加载 # '--proxy={}'.format(proxy), # 设置的代理ip # '--proxy-type=http', # 代理类型 '--ignore-ssl-errors=true', ] driver = PhantomJS(executable_path='./geckodriver.exe', desired_capabilities=dcap, service_args=SERVICE_ARGS, service_log_path='./log/ghostdriver.log') return driver
def __init__(self, login, password, userAgent=LINUX_USER_AGENT): ''' Constructor :param login: :param password: ''' self.login = login self.password = password dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = (userAgent) self.driver = PhantomJS(desired_capabilities=dcap) self.driver.set_window_size(1366, 768)
def phantomjs(self, exe_path=None, disable_log=True, log_path='logs/ghostdriver.log'): service_args = [] if disable_log: service_args.append('--webdriver-loglevel=NONE') # I know phantomjs is deprecated, but I DO NOT LIKE the warnings... import warnings backup = warnings.warn warnings.warn = str try: if exe_path: self.driver = PhantomJS(executable_path=exe_path, service_args=service_args, service_log_path=log_path) else: self.driver = PhantomJS(service_args=service_args, service_log_path=log_path) except WebDriverException as e: logger.error(e.msg) self.driver = None return finally: warnings.warn = backup
def _get_phantomjs_browser(self): logger.info('Loading PhantomJS Web Driver') if not settings.BROWSER_HEADLESS: logger.warning('PhamtomJS run just in headless mode') try: browser = PhantomJS( executable_path=settings.PHANTOMJS_EXECUTABLE_PATH) except Exception: logger.exception('Error on load PhantomJS browser.') raise BrowserNotFound(browser=PHANTOMJS) logger.info('PhantomJS Web Driver loaded') return browser