Пример #1
0
    def __init__(self):
        self.start_page = START_PAGE
        self.end_page = END_PAGE
        self.weixin_url = REFER_FIRST

        # self.driver = Firefox()
        if hasattr(config, 'PHANTOMJS_PATH'):
            self.driver = PhantomJS(executable_path=getattr(config, 'PHANTOMJS_PATH'))
        else:
            self.driver = PhantomJS()
Пример #2
0
    def __init__(self):
        self.start_page = START_PAGE
        self.end_page = END_PAGE
        self.weixin_url = REFER_FIRST

        # self.driver = Firefox()
        if hasattr(config, 'PHANTOMJS_PATH'):
            self.driver = PhantomJS(executable_path=getattr(config, 'PHANTOMJS_PATH'))
        else:
            self.driver = PhantomJS()

        self.client = MongoClient(HOST, PORT)
        self.collection = self.client[DB][COLLECTION]
        self.all_uids = self.uids
Пример #3
0
    def create_driver(self):
        if 1:
            caps = DesiredCapabilities().FIREFOX.copy()

            profile_path = path.expanduser(
                '~') + '/.mozilla/firefox/' + self.account['name']

            # caps['proxy'] = {
            caps['moz:firefoxOptions'] = {
                "args": ["-profile", profile_path],  # geckodriver 0.18+
            }

            profile = FirefoxProfile(profile_path)
            #profile.set_preference("general.useragent.override", 'Mozilla/5.0 (X11; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0')

            self.driver = Firefox(profile, capabilities=caps)
            #self.driver = Firefox(profile)
        else:  # PhantomJS
            # https://github.com/detro/ghostdriver
            caps = DesiredCapabilities().PHANTOMJS
            caps["phantomjs.page.settings.userAgent"] = \
                'Mozilla/5.0 (X11; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0'
            service_args = [
                '--proxy={}'.format(':'.join(
                    self.account['Proxy'].split(':')[:2])),
                '--proxy-type=http',
            ]
            print(service_args)
            self.driver = PhantomJS(service_args=service_args,
                                    capabilities=caps)
            self.driver.set_window_size(1120, 550)
Пример #4
0
 def init_phantom(self):
     self.prefixfiles = os.path.join(scrapyd_config().get('logs_dir'),
                                     HYPHE_PROJECT, self.name,
                                     self.crawler.settings['JOBID'])
     self.log("Using path %s for PhantomJS crawl" % self.prefixfiles,
              log.INFO)
     phantom_args = []
     if PROXY and not PROXY.startswith(':'):
         phantom_args.append('--proxy=%s' % PROXY)
     phantom_args.append('--cookies-file=%s-phantomjs-cookie.txt' %
                         self.prefixfiles)
     phantom_args.append('--ignore-ssl-errors=true')
     phantom_args.append('--load-images=false')
     self.capabilities = dict(DesiredCapabilities.PHANTOMJS)
     self.capabilities[
         'phantomjs.page.settings.userAgent'] = self.user_agent
     self.capabilities['takesScreenshot'] = False
     self.capabilities[
         'phantomjs.page.settings.javascriptCanCloseWindows'] = False
     self.capabilities[
         'phantomjs.page.settings.javascriptCanOpenWindows'] = False
     self.phantom = PhantomJS(executable_path=PHANTOM['PATH'],
                              service_args=phantom_args,
                              desired_capabilities=self.capabilities,
                              service_log_path="%s-phantomjs.log" %
                              self.prefixfiles)
     self.phantom.implicitly_wait(10)
     self.phantom.set_page_load_timeout(60)
     self.phantom.set_script_timeout(self.ph_timeout + 15)
Пример #5
0
    def get_driver(self):
        # Start a new browser and return the WebDriver

        browser_name = self.config.get('selenium', 'browser')

        if browser_name == 'firefox':
            from selenium.webdriver import Firefox

            browser_path = self.config.get('selenium', 'firefox_path')
            browser_binary = FirefoxBinary(browser_path)

            return Firefox(firefox_binary=browser_binary)

        if browser_name == 'chrome':
            from selenium.webdriver import Chrome

            return Chrome()

        if browser_name == 'phantomjs':
            from selenium.webdriver import PhantomJS

            return PhantomJS()

        # @TODO: Add chrome
        raise RuntimeError('Unsupported/unknown browser')
Пример #6
0
def test_plotly(remove_build):
    """Tests plotly."""
    viz = Plotly()
    ctrl = Nouislider()
    ctrl2 = Button()

    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'build')
    layout = Layout(directory=path)
    layout.add(viz)
    layout.add_sidebar(ctrl)
    layout.add_sidebar(ctrl2)
    layout.subscribe(callback, ctrl.on_change)
    layout.subscribe(callback, ctrl2.on_click)
    layout.build()

    env = os.environ
    env['PYTHONPATH'] = '{}:{}'.format(os.getcwd(),
                                       os.environ.get('PYTHONPATH', ''))
    server = subprocess.Popen(os.path.join(path, 'src/server.py'), env=env)
    time.sleep(5)

    driver = PhantomJS()
    driver.get('http://localhost:9991')

    assert driver.title == 'Bowtie App'

    server.kill()
Пример #7
0
    def __init__(self,
                 url,
                 phantomjs=None,
                 resolution=None,
                 ya_class=None,
                 screen_path=None,
                 screen_pattern=None,
                 csv_path=None):
        self.url = url

        self.phantomjs = phantomjs or DEFAULT_PHANTOMJS
        assert os.path.isfile(self.phantomjs), "phantomjs не найден"

        resolution = resolution or FULLHD
        assert isinstance(resolution, (list, tuple))
        assert len(resolution) == 2

        self.ya_class = ya_class or DEFAULT_YA_CLASS
        self.screen_path = screen_path or PATH

        self.screen_pattern = screen_pattern or '%s.png'
        assert '%s' in self.screen_pattern

        self.csv_path = csv_path or os_join(PATH, 'statistic.csv')

        self.driver = PhantomJS(self.phantomjs)
        self.driver.set_window_size(*resolution)
Пример #8
0
def get_selenium(**kwargs):
    driver = getattr(settings, 'TEST_SELENIUM_DRIVER', 'firefox')
    if driver in ('chrome', 'chrome_headless'):
        from selenium.webdriver.chrome.options import Options
        from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
        options = Options()
        if driver == 'chrome_headless':
            options.add_argument('headless')
            options.add_argument('disable-gpu')
            for key, val in kwargs.items():
                if val is not None:
                    options.add_argument('{key}={val}'.format(key=key,
                                                              val=val))
                else:
                    options.add_argument('{key}'.format(key=key))
        driver_path = os.environ.get('CHROME_DRIVER_PATH', None)
        if driver_path is not None:
            return ChromeDriver(driver_path, options=options)
        return ChromeDriver(options=options)
    elif driver == 'phantomjs':
        from selenium.webdriver import PhantomJS
        return PhantomJS()
    else:
        from selenium.webdriver.firefox.webdriver import WebDriver as FirefoxDriver
        return FirefoxDriver()
Пример #9
0
 def get_crawler(self, dynamic):
     crawler = None
     if (dynamic):
         crawler = PhantomJS(
             "/Users/mac/Desktop/Web scraping/phantomjs-2.1.1-macosx/bin/phantomjs"
         )
     return crawler
Пример #10
0
 def scrape_statuses(self):
     headless_browser = PhantomJS()
     headless_browser.get(MTA_URL)
     soup = BeautifulSoup(headless_browser.page_source, "html.parser")
     for line_name in LINES:
         line = self.get_line(soup, line_name)
         self.lines.append(line)
Пример #11
0
def main():
    steam_id, api, return_amount, user_categories = read_config_values()
    print("SteamID:", steam_id)
    print("API key:", api)
    print("Return amount:", return_amount)
    if len(user_categories):  # > 0
        check_user_categories_validity(user_categories)
        print("Categories:", "; ".join(user_categories))
    print()

    print("Fetching your Steam library..")
    user_library = fetch_user_library(api, steam_id)
    print("Found {} in your library.".format(len(user_library)))

    print("Opening PhantomJS..")
    driver = PhantomJS(cwd + r"\dependencies\phantomJS\phantomjs.exe",
                       service_log_path=cwd +
                       r"\dependencies\phantomJS\ghostdriver.log")

    print("Opening SteamDB..")
    output = fetch_sales(driver, user_library, return_amount, user_categories)

    driver.quit()
    with open("games.txt", 'w', encoding='utf-8') as file:
        file.write(output)
    input("\nDone. I also wrote the games to a text file.")
Пример #12
0
    def get_driver(self):
        # Start a new browser and return the WebDriver

        browser_name = self.config.get('selenium', 'browser')

        if browser_name == 'firefox':
            from selenium.webdriver import Firefox

            browser_binary = FirefoxBinary()

            driver = Firefox(firefox_binary=browser_binary)
            driver._is_remote = False  # Workaround for http://stackoverflow.com/a/42770761/489916
            return driver

        if browser_name == 'chrome':
            from selenium.webdriver import Chrome

            return Chrome()

        if browser_name == 'phantomjs':
            from selenium.webdriver import PhantomJS

            return PhantomJS()

        # @TODO: Add chrome
        raise RuntimeError('Unsupported/unknown browser')
Пример #13
0
 def __init__(self):
     APP_ROOT = os.path.dirname(os.path.abspath(__file__))
     print(APP_ROOT)
     self.req = 0
     self.driver = PhantomJS(APP_ROOT + "/phantomjs",
                             service_log_path=os.path.devnull)
     self.driver.implicitly_wait(3)
Пример #14
0
 def __init__(self):
     super().__init__(init=False)
     self.driver = PhantomJS()
     self.driver.maximize_window()
     self.wait = WebDriverWait(self.driver, 15)
     self.url = 'http://www.cnstock.com/'
     self.name = '中国证券网'
Пример #15
0
 def __init__(self):
     dcap = dict(DesiredCapabilities.PHANTOMJS)  # 设置userAgent
     dcap["phantomjs.page.settings.userAgent"] = (
         "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0 "
     )
     self.driver = PhantomJS(
         executable_path=r'phantomjs-2.1.1-windows\bin\phantomjs.exe',
         desired_capabilities=dcap)
Пример #16
0
def phantomjs_driver(request, capabilities, driver_path):
    """Return a WebDriver using a PhantomJS instance"""
    kwargs = {}
    if capabilities:
        kwargs['desired_capabilities'] = capabilities
    if driver_path is not None:
        kwargs['executable_path'] = driver_path
    return PhantomJS(**kwargs)
Пример #17
0
def main():
    global HEAD
    if len(sys.argv) > 1:
        try: HEAD = int(sys.argv[1])
        except: HEAD = 10
    # test mirror list
    mirror_list = read_mirrors()
    for i in mirror_list:
        try:
            cururl = i
            print("Testing:",i)
            res = request.urlopen(i)
        except:
            print("Testing on",i,"failed")
            continue
        try:
            update_mirrors(cururl)
            break;
        except:
            continue;

    try: res
    except: raise Warning('All mirrors unavailable!')
    print('Available mirror:',cururl)

    # get vpn table
    countries = dict()
    dr = PhantomJS()
    dr.get(cururl)
    page = Selector(text=dr.page_source)\
            .xpath('.//td[@id="vpngate_inner_contents_td"]/'
                    'table[@id="vg_hosts_table_id"]//tr')

    if HEAD < len(page): page = page[:HEAD]

    print('Pagelen:',len(page))

    for vpn in page:
        if len(vpn.xpath('./td[@class="vg_table_header"]')) > 0:
            continue

        row = vpn.xpath('./td')
        country = row[0].xpath('./text()').extract_first()
        country = '_'.join(country.split(' '))
        ovpn = row[6].xpath('./a/@href').extract_first()

        if ovpn:
            if country in countries:
                countries[country] += 1
                get_ovpn(url=cururl+ovpn, save_to=country+'/'+str(countries[country]))
            else:
                countries[country] = 0
                if not os.path.exists(country):
                    os.mkdir(country)
                get_ovpn(url=cururl+ovpn, save_to=country+'/'+str(countries[country]))

    dr.quit()
def getHtmlSource(url, time=10):
    driver = PhantomJS(service_args=[
        '--ignore-ssl-errors=true', '--ssl-protocol=any',
        '--web-security=false'
    ])
    driver.get(url)
    WebDriverWait(driver, timeout=time)
    source = driver.page_source
    #driver.save_screenshot('a.png')
    return source
Пример #19
0
def get_selenium():
    driver = getattr(settings, 'TEST_SELENIUM_DRIVER', 'firefox')
    if driver == 'chrome':
        from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
        return ChromeDriver()
    elif driver == 'phantomjs':
        from selenium.webdriver import PhantomJS
        return PhantomJS()
    else:
        from selenium.webdriver.firefox.webdriver import WebDriver as FirefoxDriver
        return FirefoxDriver()
Пример #20
0
class SeleniumMiddleware:

    driver = PhantomJS()

    def process_request(self, request, spider):
        spider.driver = self.driver
        self.driver.get(request.url)
        return HtmlResponse(self.driver.current_url,
                            body=self.driver.page_source,
                            encoding='utf-8',
                            request=request)
Пример #21
0
def get_selenium_driver(driver_name):
    if driver_name == 'chrome':
        from selenium.webdriver.chrome.webdriver import WebDriver as ChromeDriver
        return ChromeDriver()
    elif driver_name == 'phantomjs':
        from selenium.webdriver import PhantomJS
        return PhantomJS()
    elif driver_name == 'firefox':
        from selenium.webdriver.firefox.webdriver import WebDriver as FirefoxDriver
        return FirefoxDriver()
    raise ValueError('Unkown driver name')
Пример #22
0
    def __init__(self):
        """
        Default constructor

        ARGS:
            None
        RETURNS:
            None
        """
        self.browser = PhantomJS(executable_path='./drivers/phantomjs',
                                 port=free_port())  # Optional argument, if not specified will search path.
        self.timeout = 5 # seconds
Пример #23
0
 def setUp(self):
     """ Start a new browser instance for each test """
     self._screenshot_number = 1
     self.browser = os.getenv('SELENIUM_BROWSER',
                              settings.SELENIUM_DEFAULT_BROWSER)
     command_executor = os.getenv('SELENIUM_COMMAND_EXECUTOR', '')
     executor_was_set_explicitly = command_executor != ''
     command_executor = command_executor or 'http://127.0.0.1:4444/wd/hub'
     if os.getenv('SELENIUM_HOST'):
         self.sel = self.sauce_labs_driver()
     elif executor_was_set_explicitly and self.browser in ('chrome', 'firefox'):
         if self.browser == 'chrome':
             caps = DesiredCapabilities.CHROME
             browser_profile = None
         else:
             caps = DesiredCapabilities.FIREFOX
             browser_profile = self.get_firefox_profile()
         self.sel = RemoteWebDriver(command_executor=command_executor,
                                    desired_capabilities=caps,
                                    browser_profile=browser_profile)
     elif self.browser == 'firefox':
         self.sel = Firefox(self.get_firefox_profile())
     elif self.browser == 'htmlunit':
         self.sel = RemoteWebDriver(command_executor=command_executor,
                                    desired_capabilities=DesiredCapabilities.HTMLUNITWITHJS)
     elif self.browser in ['ios', 'ipad', 'ipod', 'iphone']:
         capabilities = {
             'app': 'safari',
             'browserName': '',
             'device': 'iPhone Simulator',
             'os': 'iOS 6.1'
         }
         self.sel = RemoteWebDriver(command_executor=self.appium_command_executor(),
                                    desired_capabilities=capabilities)
     elif self.browser == 'opera':
         self.sel = RemoteWebDriver(command_executor=command_executor,
                                    desired_capabilities=DesiredCapabilities.OPERA)
     elif self.browser == 'iexplore':
         self.sel = RemoteWebDriver(command_executor=command_executor,
                                    desired_capabilities=DesiredCapabilities.INTERNETEXPLORER)
     elif self.browser == 'phantomjs':
         self.sel = PhantomJS(service_args=['--debug=true',
                                            '--webdriver-loglevel=DEBUG'])
     elif self.browser == 'safari':
         # requires a Safari extension to be built from source and installed
         self.sel = RemoteWebDriver(command_executor=command_executor,
                                    desired_capabilities=DesiredCapabilities.SAFARI)
     else:
         self.sel = Chrome()
     self.sel.set_page_load_timeout(settings.SELENIUM_PAGE_LOAD_TIMEOUT)
     # Give the browser a little time; Firefox throws random errors if you
     # hit it too soon
     time.sleep(1)
Пример #24
0
def main():
    ip_list = get_ip_list()
    webdriver = PhantomJS(executable_path=get_phantomjs_path())
    for ip_address in ip_list:
        print('=' * 30)
        print('Checking: {}'.format(ip_address))
        ip, status, listing_risk = crawler(ip_address, webdriver)
        print('{} | {} | {}'.format(ip, status, listing_risk))
        print('=' * 30)
        save_result(ip, status, listing_risk)
        # wait
        print('\nWait {} seconds until next!\n'.format(WAIT_TIME))
        sleep(WAIT_TIME)
Пример #25
0
    def setUp(self):
        self.driver = PhantomJS()

        self.user = User.objects.create_user('admin', '*****@*****.**', 'pass')
        self.user.save()

        self.provider = Provider(
            name='provider',
            user=self.user,
        )
        self.provider.save()

        self.provider_adverts = mommy.make(Advertisement, _quantity=20, provider=self.provider)
Пример #26
0
 def onegoogolePR(self, url):
     '''返回单个PR'''
     prUrl = 'http://pr.chinaz.com'  # 谷歌PR查询地址
     driver = PhantomJS()
     driver.get(prUrl)
     driver.find_element_by_id('PRAddress').send_keys(url)
     driver.find_element_by_class_name('search-write-btn').click()
     try:
         imgsrc = driver.find_element_by_css_selector('span#pr>img').get_attribute('src')
         pr = search(r'\d', imgsrc).group()
     except:
         pr = '暂无数据'
     driver.quit()
     return pr
Пример #27
0
 def PhantomJS(cls):
     dcap = dict(DesiredCapabilities.PHANTOMJS)
     dcap["phantomjs.page.settings.userAgent"] = ("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36")
     # proxy = get_proxy()
     SERVICE_ARGS = [
         '--disk-cache=true', # 图片不加载
         '--load-images=false',# 图片不加载
         # '--proxy={}'.format(proxy),  # 设置的代理ip
         # '--proxy-type=http',  # 代理类型
         '--ignore-ssl-errors=true',
     ]
     driver = PhantomJS(executable_path='./geckodriver.exe', desired_capabilities=dcap,
                        service_args=SERVICE_ARGS, service_log_path='./log/ghostdriver.log')
     return driver
    def __init__(self, login, password, userAgent=LINUX_USER_AGENT):
        '''
        Constructor

        :param login:
        :param password:
        '''

        self.login = login
        self.password = password

        dcap = dict(DesiredCapabilities.PHANTOMJS)
        dcap["phantomjs.page.settings.userAgent"] = (userAgent)
        self.driver = PhantomJS(desired_capabilities=dcap)
        self.driver.set_window_size(1366, 768)
Пример #29
0
    def phantomjs(self, exe_path=None, disable_log=True, log_path='logs/ghostdriver.log'):
        service_args = []
        if disable_log:
            service_args.append('--webdriver-loglevel=NONE')

        # I know phantomjs is deprecated, but I DO NOT LIKE the warnings...
        import warnings
        backup = warnings.warn
        warnings.warn = str

        try:
            if exe_path:
                self.driver = PhantomJS(executable_path=exe_path,
                                        service_args=service_args,
                                        service_log_path=log_path)
            else:
                self.driver = PhantomJS(service_args=service_args,
                                        service_log_path=log_path)
        except WebDriverException as e:
            logger.error(e.msg)
            self.driver = None
            return
        finally:
            warnings.warn = backup
Пример #30
0
    def _get_phantomjs_browser(self):
        logger.info('Loading PhantomJS Web Driver')

        if not settings.BROWSER_HEADLESS:
            logger.warning('PhamtomJS run just in headless mode')

        try:
            browser = PhantomJS(
                executable_path=settings.PHANTOMJS_EXECUTABLE_PATH)
        except Exception:
            logger.exception('Error on load PhantomJS browser.')
            raise BrowserNotFound(browser=PHANTOMJS)

        logger.info('PhantomJS Web Driver loaded')

        return browser