Пример #1
0
def read_cookies():

    # c_service = Service('E:\work\\twbot\chromedriver.exe')
    c_service = Service('/opt/google/chrome/chromedriver')
    c_service.command_line_args()
    c_service.start()

    #chrome
    option = webdriver.ChromeOptions()
    # option.set_headless()
    option.add_argument("--headless")
    option.add_argument("--no-sandbox")
    driver = webdriver.Chrome(chrome_options=option)
    # driver = webdriver.Chrome()

    #firefox
    # option = webdriver.FirefoxOptions()
    # # option.add_argument("headless")
    # # option.add_argument('--no-sandbox')
    # option.set_headless()
    # driver = webdriver.Firefox(firefox_options=option)

    driver.get("https://mobile.twitter.com")
    with open("qrsncookies.txt", "r") as fp:
        cookies = json.load(fp)
        for cookie in cookies:
            if 'expiry' in cookie:
                del cookie['expiry']
            driver.add_cookie(cookie)

    driver.get("https://mobile.twitter.com/home")
    print driver.title
    time.sleep(20)
    return driver, c_service
Пример #2
0
def main():

    service = Service('/Users/catemiller/vax-finder/chromedriver')
    service.start()
    driver = webdriver.Remote(service.service_url)

    # CVS
    print("Searching CVS")

    driver.get('https://www.cvs.com/immunizations/'
               'covid-19-vaccine?icid=coronavirus-lp-nav-vaccine')
    driver.find_element_by_link_text("California").click()

    cities = driver.find_elements_by_xpath('.//span[@class = "city"]')
    status = driver.find_elements_by_xpath('.//span[@class = "status"]')

    print("    Appointments are available in the following cities:")
    for ii in range(0, len(status)):
        if (status[ii].text == "Available"):
            print("        " + cities[ii].text)

    print("    To book an appointment: ")
    print('        https://www.cvs.com/vaccine/'
          'intake/store/covid-screener/covid-qns')

    driver.quit()
 def initialize_web_driver():
     DRIVER_PATH = r"C:\Users\jerem\Desktop\chromedriver.exe"
     service = Service(DRIVER_PATH)
     service.start()
     wd = webdriver.Remote(service.service_url)
     wd.quit()
     return DRIVER_PATH
Пример #4
0
def get_movie_url(url):
    data_dict = {}
    i = 0
    html = ask_url(url)
    doc = etree.HTML(html)
    # 所有级数的a标签和文本
    all_url = doc.xpath('//div[@class="fed-play-item fed-drop-item fed-visible"]//ul[@class="fed-part-rows"]/li/a/@href')
    all_title = doc.xpath('//div[@class="fed-play-item fed-drop-item fed-visible"]'
                          '//ul[@class="fed-part-rows"]/li/a/text()')
    # 用selenium获取iframe里的src
    c_service = Service('/usr/bin/chromedriver')
    c_service.command_line_args()
    c_service.start()
    option = webdriver.ChromeOptions()
    option.add_argument('--headless')
    option.add_argument('--no-sandbox')
    option.add_argument('--disable-dev-shm-usage')
    browser = webdriver.Chrome('/usr/bin/chromedriver', options=option)
    # print('正在爬取视频链接中')
    for url in all_url:
        browser.get('https://kuyun.tv'+url)
        movie_url = browser.find_element_by_id('fed-play-iframe').get_attribute('src')
        data_dict[all_title[i]] = movie_url
        i = i+1
    browser.quit()
    c_service.stop()
    return data_dict
Пример #5
0
class WebTestModule:
    def __init__(self,initPage:str = None):
        configLoader = ConfigLoader()
        self.url=configLoader.getUrl()
        self.driverPath=configLoader.getChromeDriverPath()
        self.initWebdriver(initPage)

    def initWebdriver(self,initPage:str):
        self.service = Service(self.driverPath)
        self.service.start()
        capabilities = {'acceptSslCerts': True}
        self.driver = webdriver.Remote(self.service.service_url,capabilities)
        self.driver.implicitly_wait(5)
        if initPage is None:
            self.driver.get(self.url)
        else:
            self.driver.get(initPage)
        #return driver
    def login(self,username:str,password:str):
        usernameTextField=self.driver.find_element_by_id('usernameText')
        usernameTextField.send_keys(username)
        passwordTextField=self.driver.find_element_by_id('passwordText')
        passwordTextField.send_keys(password)
        loginButton=self.driver.find_element_by_xpath("//input[@value='login']")
        loginButton.click()
        return HomePage(self)

    def logout(self):
        self.driver.find_element_by_id('logoutNav').click()
    def close(self):
        self.driver.quit()
Пример #6
0
def before_all(context):
    service = Service('drivers\\operadriver.exe')
    service.start()
    test_extension = "..\\restman-0.3.nex"

    b64ext = base64.b64encode(open(test_extension, 'rb').read())

    capabilities = {
        'operaOptions': {
            'binary': 'C:\\Program Files (x86)\\Opera Next\\30.0.1835.49\\opera.exe',
            'extensions': [b64ext],
        },
        'Proxy': {
            'proxyType': 'system'
        }
    }
    # Start web server (httpbin FTW!)
    context.server = subprocess.Popen([sys.executable, '-m', 'httpbin.core'])

    # Create browser
    context.browser = webdriver.Remote(service.service_url, capabilities)

    # Start extension
    context.browser.get('chrome-extension://fohkgjiaiapkkjjchddmhaaaghjakfeg/index.html')
    time.sleep(1)   # Wait for app to load
Пример #7
0
def punch(StudentId, Name):
    c_service = Service('/Users/wq//Downloads/chromedriver')
    c_service.command_line_args()
    c_service.start()
    driver = webdriver.Chrome(
        '/Users/wq/Downloads/chromedriver')  # 选择Chrome浏览器
    driver.get('http://xsc.sicau.edu.cn/SPCP')  # 打开网站
    #采用xpath定位
    result = driver.find_element_by_xpath('//*[@id="code-box"]')
    text = result.text
    driver.find_element_by_xpath('//*[@id="StudentId"]').click()
    driver.find_element_by_xpath('//*[@id="StudentId"]').send_keys(StudentId)
    driver.find_element_by_xpath('//*[@id="Name"]').click()
    driver.find_element_by_xpath('//*[@id="Name"]').send_keys(Name)
    driver.find_element_by_xpath('//*[@id="codeInput"]').click()
    driver.find_element_by_xpath('//*[@id="codeInput"]').send_keys(text)
    driver.find_element_by_xpath('//*[@id="Submit"]').click()
    driver.find_element_by_xpath('//*[@id="platfrom2"]').click()
    try:
        driver.find_element_by_xpath('//*[@id="ckCLS"]').click()
        driver.find_element_by_xpath('//*[@id="SaveBtnDiv"]/button').click()
    except:
        driver.find_element_by_xpath(
            '//*[@id="layui-layer1"]/div[3]/a').click()
    driver.quit()
    c_service.stop()
Пример #8
0
def dasked_get_images(url):
    service = Service(DRIVER_PATH)
    service.start()
    driver = webdriver.Remote(service.service_url)
    ssl._create_default_https_context = ssl._create_unverified_context

    title = url.split("/")[-1]
    driver.get(url)
    local_images = driver.find_elements_by_tag_name("img")
    img_set = set()

    for i in local_images:
        try:
            srcset = i.get_attribute("srcset")
            cdn_link = srcset.split("900w, ")[-1].replace("1080w", "").strip()

            if "/logo_" in cdn_link:
                continue

            if "-logo-" in cdn_link:
                continue

            if cdn_link:
                img_set.add(f"https:{cdn_link}")
        except Exception:
            pass

    img_set = list(img_set)

    ret = [(title + "_" + str(uuid.uuid4()).replace("-", ""), cdn_link)
           for cdn_link in img_set]

    driver.quit()

    return ret
Пример #9
0
def get_product_links():
    service = Service(DRIVER_PATH)
    service.start()
    driver = webdriver.Remote(service.service_url)
    ssl._create_default_https_context = ssl._create_unverified_context

    base_url = "https://worldwidecorals.com"
    output_path = f'./wwc/'

    # get all product links from all pages
    links = []
    for i in range(1, 8):
        driver.get(f'{base_url}/collections/all?page={i}')

        __scroll_down_page(driver)

        os.makedirs(output_path, exist_ok=True)

        # get the image source
        links.extend([
            element.get_attribute("href") for element in
            driver.find_elements_by_class_name('grid-product__link')
        ])
    driver.quit()

    return links
Пример #10
0
    def GetThread(self):
        
        from selenium import webdriver
        from selenium.webdriver.common.keys import Keys
        from selenium.webdriver.common.by import By

        from selenium.webdriver.support.ui import WebDriverWait
        from selenium.webdriver.support import expected_conditions as EC

        from selenium.webdriver.chrome.service import Service
        from selenium.webdriver.chrome.options import Options

        chrome_options = Options()
        chrome_options.add_argument("--headless")  

        service = Service('./chromedriver')
        service.start()
        driver = webdriver.Remote(service.service_url, desired_capabilities=chrome_options.to_capabilities())
        driver.implicitly_wait(10)
        driver.get("https://twitter.com/{user}/status/{tweet_id}".format(user=self.USER.screen_name, tweet_id=self.FirstTweetID))
        try:
            wait = WebDriverWait(driver, 10)
            element = wait.until(EC.title_contains(self.USER.name))
            links = driver.find_elements_by_xpath('//a[contains(@href, "{}/status")]'.format(self.USER.screen_name))
            for link in links:
                self.Tweets.append(link.get_attribute('href').split('/status/')[-1])
        except:
            print("Loading took too much time!")
        driver.quit()

        self.Tweets.sort()

        return self.Tweets
def crawler(url):
    service = Service('driver/chromedriver')
    service.start()

    # pass url to check if it's correct
    url = correct_url(url)
    driver = webdriver.Remote(service.service_url)
    driver.get(url)

    # crawler starts here
    # s = driver.find_element_by_css_selector("p").text
    # print(s)
    all_hover_elements = driver.find_elements_by_class_name('container')
    # p_elements = driver.find_elements_by_class_name
    # print(p_elements)

    for hover_element in all_hover_elements:
        # hover_element = driver
        p_element = hover_element.find_element_by_css_selector('p').text
        # product_name = p_element.get_attribute("body")
        print(p_element)

    # sleep after 15 seconds
    time.sleep(10)
    driver.quit()
Пример #12
0
def parse(url):
    service = Service('/usr/local/bin/chromedriver')
    service.start()
    driver = webdriver.Remote(service.service_url)
    driver.get(url)
    source_code = driver.page_source
    driver.quit()
    return source_code
Пример #13
0
def init_driver():
    # Locate Driver in system
    Path = "C:\SeleniumDrivers\chromedriver.exe"
    service = Service(Path)
    service.start()
    driver = webdriver.Remote(service.service_url)
    driver.implicitly_wait(40)
    return driver
def parse(url):
    service = Service('/usr/local/bin/chromedriver')
    service.start()
    driver = webdriver.Chrome(ChromeDriverManager().install())
    driver.get(url)
    time.sleep(120)
    source_code = driver.page_source
    driver.quit()
    return source_code
Пример #15
0
 def _get_selenium_service(cls) -> Service:
     if not hasattr(cls, '__selenium_service'):
         path = ChromeDriverLoader.driver_path
         if not path:
             raise AttributeError('Get empty driver path.')
         service = Service(path)
         service.start()
         setattr(cls, '__selenium_service', service)
     return getattr(cls, '__selenium_service')
Пример #16
0
def startWebDriverService():
    """ Starts the web driver as a service, this improves performance and does not require a manual start
    of the chrome driver.
    TODO: Make it actually work.
    """
    service = Service(D_PATH_CHROMEDRIVER)
    service.start()
    driver = webdriver.Remote(service.service_url)
    return driver
Пример #17
0
class DownloadDataSet:
    def construct(self):
        # Use the executable file in my desktop to start the chromedriver
        self.service = Service('/home/abilashbodapati/Desktop/chromedriver')
        # Start the chromedriver service
        self.service.start()
        # Target web-url initialized (Github login-page)
        self.driver = webdriver.Remote(self.service.service_url)

    def startChromeService(self, url):
        # Use the executable file in my desktop to start the chromedriver
        # service = Service('/home/abilashbodapati/Desktop/chromedriver')

        # Start the chromedriver service
        # self.service.start()

        # Target web-url initialized (Github login-page)
        # driver = webdriver.Remote(service.service_url)
        self.driver.get(url)
        # Maximize the window when the webpage is opened
        self.driver.maximize_window()

    def downloadData(self):
        # By now we are in the webpage for dataset

        # Click on the Download button
        self.driver.find_element_by_xpath(
            "/html/body/main/div[1]/div/div[5]/div[2]/div[1]/div/div/div[2]/div[2]/div[1]/div[2]/a[1]/div/span"
        ).click()
        time.sleep(4)

        # Click on the Signin button
        self.driver.find_element_by_xpath(
            "/html/body/main/div[1]/div[1]/div/form/div[2]/div/div[1]/a/li"
        ).click()
        time.sleep(5)
        self.driver.find_element_by_xpath(
            "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[1]/div/form/span/section/div/div/div[1]/div/div[1]/div/div[1]/input"
        ).send_keys('*****@*****.**')
        time.sleep(20)
        self.driver.find_element_by_xpath(
            "/html/body/div[1]/div[1]/div[2]/div/div[2]/div/div/div[2]/div/div[2]/div/div[1]/div/div/span/span"
        ).click()
        time.sleep(35)

        # Click on the Download button again after signed in
        self.driver.find_element_by_xpath(
            "/html/body/main/div[1]/div/div[5]/div[2]/div[1]/div/div/div[2]/div[2]/div[1]/div[2]/a[1]/div/span"
        ).click()
        time.sleep(1.75)

    def moveDataToFolder(self):
        # Unzip the folder from Downloads to Datasets folder.
        os.system(
            'unzip -q /home/abilashbodapati/Downloads/*_bundle_archive.zip -d ./Datasets'
        )
def request_raw_html_from_wiki() -> str:
    service = Service('./chromedriver/chromedriver')
    service.start()
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    driver = webdriver.Remote(service.service_url, options=options)
    driver.get(stardew_fish_url)
    page = driver.page_source
    driver.quit()
    return page
    def init_against_external_service(cls):

        service = Service(CHROMEDRIVER_BINARY_PATH)
        service.start()
        service_url = service.service_url

        # # for manually-run server
        # service_url = 'http://localhost:9515'

        driver = Remote(service_url, DesiredCapabilities.CHROME)

        return cls(driver)
Пример #20
0
def get_driver():
    chromedriver_path = webium.settings.chromedriverpath
    c_service = Service(chromedriver_path)
    c_service.command_line_args()
    c_service.start()
    chrome_options = Options()
    if hasattr(webium.settings,
               "chrome_nosandbox") and webium.settings.chrome_nosandbox:
        chrome_options.add_argument('--no-sandbox')
    if hasattr(webium.settings, "chrome_disable_shmusage"
               ) and webium.settings.chrome_disable_shmusage:
        chrome_options.add_argument('--disable-dev-shm-usage')
    if hasattr(webium.settings, "chrome_ignore_certificate_errors"
               ) and webium.settings.chrome_ignore_certificate_errors:
        chrome_options.add_argument('--ignore-certificate-errors')
    if hasattr(webium.settings,
               "chrome_disable_gpu") and webium.settings.chrome_disable_gpu:
        chrome_options.add_argument('--disable-gpu')
    if hasattr(webium.settings, "chrome_disable_plugins"
               ) and webium.settings.chrome_disable_plugins:
        chrome_options.add_argument('--disable-plugins')
    if hasattr(webium.settings,
               "chrome_handless") and webium.settings.chrome_handless:
        chrome_options.add_argument('--headless')

    service_args = []

    if hasattr(webium.settings,
               "service_load_images") and webium.settings.service_load_images:
        service_args.append('--load-images=yes')
    else:
        service_args.append('--load-images=no')

    if hasattr(webium.settings,
               "service_disk_cache") and webium.settings.service_disk_cache:
        service_args.append('--disk-cache=yes')
    else:
        service_args.append('--disk-cache=no')
    if hasattr(webium.settings, "service_ignore_ssl_errors"
               ) and webium.settings.service_ignore_ssl_errors:
        service_args.append('--ignore-ssl-errors=true')
    else:
        service_args.append('--ignore-ssl-errors=false')

    global _driver_instance
    if not _driver_instance:
        _driver_instance = webdriver.Chrome(chrome_options=chrome_options,
                                            service_args=service_args,
                                            executable_path=chromedriver_path)
        _driver_instance.implicitly_wait(webium.settings.implicit_timeout)
    return _driver_instance
Пример #21
0
def initializeDriver(browser_name='google',
                     driver='/opt/google/chrome/chromedriver'):
    print('initializeDriver: create a ' + browser_name + ' driver')
    google_driver = driver
    if browser_name == 'google':
        c_service = Service(google_driver)
        c_service.command_line_args()
        c_service.start()

        options = webdriver.ChromeOptions()
        options.add_argument('--headless')
        browser = webdriver.Chrome(executable_path=google_driver,
                                   chrome_options=options)
    return browser, c_service
Пример #22
0
    def setUp(self):
        super(ChromiumWebDriverFixture, self).setUp()
        # Import late to avoid hard dependency.
        from selenium.webdriver.chrome.service import Service as ChromeService
        service = ChromeService("/usr/lib/chromium-browser/chromedriver", 4444)

        # Set the LD_LIBRARY_PATH so the chrome driver can find the required
        # libraries.
        self.useFixture(
            EnvironmentVariable("LD_LIBRARY_PATH",
                                "/usr/lib/chromium-browser/libs"))
        service.start()

        # Stop service on cleanup.
        self.addCleanup(service.stop)
class ChromeDriverManager(DriverManager):
    __chservice = None

    def launch_browser(self):
        chrome_option = Options()
        chrome_option.add_argument("--disable-infobars")
        chrome_option.add_argument("--start-maximized")
        chrome_option.add_argument("--disable-popup-blocking")
        cur_dir_path = os.path.dirname(os.path.realpath(__file__))
        chromedriver = cur_dir_path.split(
            sep='\\base')[0] + QEEnvironment.get_environment_dict().get(
                'BrowserPath')
        os.environ["webdriver.chrome.driver"] = chromedriver
        driver = webdriver.Chrome(chromedriver, options=chrome_option)
        driver.get('https://ui.cogmento.com/')

    def start_service(self):
        try:
            if self.__chservice == None:
                cur_dir_path = os.path.dirname(os.path.realpath(__file__))
                driver_path = cur_dir_path.split(
                    sep='\\base')[0] + QEEnvironment.get_environment_dict(
                    ).get('BrowserPath')
                self.__chservice = Service(driver_path)
                self.__chservice.start()
                print('Service is started')
        except:
            print(traceback.print_exc())

    def stop_service(self):
        if self.__chservice != None and self.__chservice.is_connectable():
            print('Stop service')
            self.__chservice.stop()

    def create_driver(self):
        chrome_option = webdriver.ChromeOptions()
        chrome_option.add_argument("--disable-infobars")
        chrome_option.add_argument("--start-maximized")
        chrome_option.add_argument("--disable-popup-blocking")
        capabilities = DesiredCapabilities.CHROME.copy()
        capabilities['browser'] = 'chrome'
        capabilities = chrome_option.to_capabilities()
        self.driver = webdriver.Remote(self.__chservice.service_url,
                                       desired_capabilities=capabilities)
        self.edriver = EventFiringWebDriver(self.driver, EventListener())
        self.edriver.implicitly_wait(
            QEEnvironment.get_environment_dict().get('ImplicitWait'))
        self.edriver.get(QEEnvironment.get_environment_dict().get('URL'))
Пример #24
0
def getcook():
    loginurl = 'http://113.57.169.227:8088/ccps/login.jsp'  # 登录页面
    path = r'd:\chromedriver.exe'
    # 加载webdriver驱动,用于获取登录页面标签属性

    # driver = webdriver.Chrome(r'd:\chromedriver.exe')
    # option = webdriver.ChromeOptions()
    # option.binary_location = r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
    # option.add_argument('--headless') #增加无界面选项
    # option.add_argument('--disable-gpu') #如果不加这个选项,有时定位会出现问题
    # option.add_experimental_option('excludeSwitches', ['enable-logging'])
    c_service = Service(path)
    c_service.command_line_args()
    c_service.start()

    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')

    driver = webdriver.Chrome(executable_path=path, options=chrome_options)
    driver.get(loginurl)  # 请求登录页面
    driver.find_element_by_id('wcode').clear()  # 获取用户名输入框,并先清空
    driver.find_element_by_id('wcode').send_keys(u'WHBK100')  # 输入用户名
    driver.find_element_by_id('password').clear()  # 获取密码框,并清空
    driver.find_element_by_id('password').send_keys(u'')  # 输入密码

    #captcha = driver.find_element_by_id('captcha_image')  # 获取验证码标签
    #submit = driver.find_element_by_css_selector('a[name="登录"]')  # 获取提交按钮
    submit = driver.find_element_by_link_text("登录")
    # 判断是否需要验证码
    captcha = []
    if captcha:
        captcha_field = driver.find_element_by_id('captcha_field')  # 获取验证码输入框
        text = input("请输入验证码:")  # 控制栏输入验证码
        captcha_field.send_keys(text)  # 将输入的验证码传递给selenium打开的浏览器
        submit.click()  # 按钮提交并登录
    else:
        submit.click()  # 无验证码则直接登录提交

    cookies = driver.get_cookies()  # 获取COOK

    #driver.get('http://113.57.169.227:8088/ccps//workorder/findWorkOrderList.action?workOrder.range=yff&workOrder.standby3=order_deal')  # 请求其他页面
    time.sleep(1)
    driver.quit()
    c_service.stop()
    #print(cookies)
    return cookies  # 返回cookies 之后其他方法可以调用,这样不用每次请求都返回登录
Пример #25
0
def get_urls(xingqi, A):

    #由于在后台打开浏览器,因此不能很好的关闭,所以用service.start(),service.close()控制进程开关
    c_service = Service(
        'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')
    c_service.command_line_args()
    c_service.start()
    #使用谷歌自带的无头浏览器模式,悄无声息地运行
    opt = Options()
    opt.add_argument('--headless')
    browser = webdriver.Chrome(chrome_options=opt)
    wait = WebDriverWait(browser, 10)  #设置延迟10秒,等待网页加载
    #
    browser.get('http://www.qqshidao.com/index.php?c=home&a=bifen')
    time.sleep(3)
    submit = wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, '//*[@id="app"]/div[6]/div/span[6]')))
    submit.click()
    time.sleep(2)
    js = 'var q=document.documentElement.scrollTop=100000'  #设置往下拉网页的长度,设置大点,直接拉到底部
    browser.execute_script(js)  #发现当比赛较多时,往下拉一下后加载新的数据,因此,自动往下拉一下就停了
    time.sleep(1)
    browser.execute_script(js)  #再往下拉一下,加载全部比赛
    time.sleep(2)

    yuanma = browser.page_source

    browser.quit()
    c_service.stop()

    s = etree.HTML(yuanma)
    urls = []
    urls_ = s.xpath('//*[@id="app"]/div[7]/div/table/tbody/tr[@data-fid]')
    for each in urls_:
        fid = each.attrib['data-fid']

        xingqiji = each.xpath('./td[3]/text()')[0]

        if xingqi in xingqiji:
            url = 'http://www.qqshidao.com/index.php?c=odds&a=betfair&fid={}'.format(
                fid)
            A['{}'.format(url)] = xingqiji
            urls.append(url)

    return urls
def main():

    # seed cause random
    random.seed()

    # get from file a pun to share
    try:
        with open("jokes.txt", 'r') as file:
            for i in range(random.randrange(0, 76)):
                read_data = file.readline()
    except:
        print('Could not open file.')
        exit(1)

    file.close()

    # Facebook
    service = Service(
        '/python-selenium/chromedriver')  #path to the chromedriver
    service.start()

    driver = webdriver.Remote(service.service_url)
    driver.get('https://www.facebook.com')

    username = driver.find_element_by_id("email")
    password = driver.find_element_by_id("pass")
    submit = driver.find_element_by_id("loginbutton")

    # facebook login
    username.send_keys("YOUR-EMAIL")
    password.send_keys(pw)
    time.sleep(3.2)  # wait for a bit
    submit.click()

    time.sleep(4.8)  # wait for a bit

    # setup the post to share a joke
    status = driver.find_element_by_xpath("//textarea[@name='xhpc_message']")
    status.send_keys(str(read_data))
    postbutton = driver.find_element_by_xpath("//button[contains(.,'Post')]")
    # post!
    postbutton.click()
    time.sleep(7.8)  # wait for a bit
    driver.quit()
Пример #27
0
class Headless2:
    def __init__(self, loop=None, pool_size=10):
        self.loop = loop if loop else asyncio.get_event_loop()
        self.driver_options = webdriver.ChromeOptions()
        self.driver_options.add_argument('headless')

        self.sem = asyncio.Semaphore(pool_size)
        self.service = Service('chromedriver')
        self.service.start()

    async def init_pool(self, pool_size):
        for i in range(pool_size):
            driver = webdriver.Remote(
                self.service.service_url,
                desired_capabilities=self.driver_options.to_capabilities())
            await self.driver_pool.put(driver)

    async def get(self, url, locator=None, timeout=10):
        def _get(_driver, _url):
            wait = WebDriverWait(_driver, timeout, poll_frequency=1)
            try:
                _driver.delete_all_cookies()
                _driver.get(_url)
                if locator:
                    wait.until(EC.presence_of_all_elements_located(locator))
                else:
                    wait.until(lambda x: x.execute_script(
                        'return document.readyState;') == 'complete')
                return _driver.page_source
            except Exception as e:
                print(url, e)

            return None

        async with self.sem:
            print(url)
            driver = webdriver.Remote(
                self.service.service_url,
                desired_capabilities=self.driver_options.to_capabilities())
            html = await _get(driver, url)
            driver.quit()
            print(url)
            return html
Пример #28
0
def GetEpubFromHaoDoo(pLink, code, book):
    service = Service('/usr/bin/chromedriver')
    service.start()
    driver = webdriver.Remote(service.service_url)
    driver.implicitly_wait(100)  # seconds
    driver.get(pLink)
    submitElement = driver.find_element_by_xpath('//input[@value="下載 epub 檔"]')
    submitElement.click()
    btn_in_modal_locator = (By.ID, 'okButton')
    wait = WebDriverWait(driver, 100)
    btn_in_modal = wait.until(EC.element_to_be_clickable(btn_in_modal_locator))
    btn_in_modal.click()
    file_path = '/home/raylex/Downloads/' + code + '.epub'
    while not os.path.exists(file_path):
        time.sleep(1)
    os.chdir('/home/raylex/Downloads')
    os.rename(code + '.epub', book + '.epub')
    driver.quit()
    return
Пример #29
0
class StartScrape():
    def __enter__(self):
        self.chrome_path = '/usr/bin/chromium-browser'
        self.chromedriver_path = '/usr/lib/chromium/chromedriver'
        self.o = Options()
        self.o.binary_location = '/usr/bin/chromium-browser'
        self.o.add_argument('--headless')
        self.o.add_argument('--disable-gpu')
        self.o.add_argument('--no-sandbox')
        self.o.add_argument('--window-size=1200x600')
        self.s = Service(executable_path=self.chromedriver_path)
        self.s.start()
        self.driver = webdriver.Remote(
            self.s.service_url,
            desired_capabilities=self.o.to_capabilities()
        )
        return self.driver

    def __exit__(self, exception_type, exception_value, traceback):
        self.driver.quit()
Пример #30
0
def download_by_webdriver(url, charset='utf-8', proxy=None, user_agent=None):
    # 传入URL,使用浏览器下载后,返回页面。
    print("[download_by_webdriver]: begin download the link %s" % url)
    try:
        # 进入浏览器设置
        options = webdriver.ChromeOptions()
        # 谷歌无头模式
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        # options.add_argument('window-size=1200x600')
        # 设置中文
        options.add_argument('lang=zh_CN.UTF-8')
        # 设置代理
        if proxy:
            print("[download_by_webdriver]: use proxy %s" % proxy)
            options.add_argument('proxy-server=' + proxy)
        # 添加头
        if user_agent:
            options.add_argument('user-agent=' + user_agent)
        else:
            options.add_argument(
                'user-agent=' + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) '
                                'AppleWebKit/537.36 (KHTML, like Gecko) '
                                'Chrome/71.0.3578.98 Safari/537.36')
        # 设置驱动服务
        c_service = Service('/usr/local/bin/chromedriver')
        c_service.command_line_args()
        c_service.start()
        driver = webdriver.Chrome(chrome_options=options)
        driver.get(url)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        driver.implicitly_wait(10)
        driver.set_page_load_timeout(15)
        p_content = driver.page_source.encode(charset, "ignore").decode(charset, 'ignore')
        current_url = driver.current_url
        driver.quit()
        c_service.stop()
    except Exception as e:
        print("[download_by_webdriver]:", e)
        p_content, current_url = None, None
    return p_content, current_url
Пример #31
0
def download_by_webdriver(url, charset='utf-8'):
    # 传入URL,使用浏览器下载后,返回页面。
    print("[download_by_webdriver]: begin download the link %s" % url)
    try:
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        c_service = Service('/usr/local/bin/chromedriver')
        c_service.command_line_args()
        c_service.start()
        driver = webdriver.Chrome(chrome_options=chrome_options)
        driver.get(url)
        driver.implicitly_wait(10)
        content = driver.page_source.encode(charset, "ignore").decode(
            charset, 'ignore')
        current_url = driver.current_url
        driver.quit()
        c_service.stop()
    except Exception as e:
        print("[download_by_webdriver]:", e)
        content, current_url = None, None
    return content, current_url
Пример #32
0
def get_product_links():
    service = Service(DRIVER_PATH)
    service.start()
    driver = webdriver.Remote(service.service_url)
    ssl._create_default_https_context = ssl._create_unverified_context

    logging.info(f"Starting scraping of links for: {base_url}")

    # get all product links from all pages
    i = 1
    products_per_page = 24
    links = []
    while True:
        logging.info(f"Getting links on page # {i}")
        driver.get(f'{base_url}/product-category/all-livestock/page/{i}/')

        # __scroll_down_page(driver)

        # get the image source
        current_page_links = [
            element.get_attribute("href")
            for element in driver.find_elements_by_tag_name('a')
        ]
        current_page_links = [
            link for link in current_page_links
            if link.startswith(f"{base_url}/product/")
        ]

        logging.info(f"Found {len(current_page_links)} products on page # {i}")

        i += 1
        links.extend(current_page_links)

        if len(current_page_links) < products_per_page:
            logging.info("No more links available")
            break

    driver.quit()

    return set(links)