Python CustomDriver.smooth_scroll示例

编程语言: Python

命名空间/包名称: custom_browser

类/类型: CustomDriver

方法/功能: smooth_scroll

hotexamples.com的示例: 3

Python CustomDriver.smooth_scroll - 已找到3个示例。这些是从开源项目中提取的最受好评的custom_browser.CustomDriver.smooth_scroll现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get(25)

waitclick(13)

wait_for_xpath(9)

text_input(7)

CustomDriver(6)

check_exists_by_xpath(4)

save_page(3)

smooth_scroll(3)

quit(1)

respawn(1)

scroll_to_bottom(1)

示例#1

显示文件

# Categories scraping
for ctg, url in urls_ctgs_dict.items():
    categories[ctg] = []
    number_of_pdcts_in_ctg = 0
    for p in range(10):
        print(ctg, p)
        fpath = fpath_namer(shop_id, 'ctg', ctg, p)
        if not op.exists(fpath):
            if not saucey_was_initialised:
                init_saucey(driver)
                saucey_was_initialised = True
            driver.get(url.format(page=p * 60))
            driver.wait_for_xpath('//*[@itemtype="http://schema.org/Product"]',
                                  timeout=10)
            driver.smooth_scroll(sleep_time=0.3)
            driver.save_page(fpath, scroll_to_bottom=True)
        # Parsing
        tree = etree.parse(BytesIO(open(fpath, 'rb').read()), parser=parser)
        for li in tree.xpath('//*[@itemtype="http://schema.org/Product"]'):
            produrl = "".join(li.xpath('.//a[@itemprop="url"]/@href'))
            produrl = parse_qs(
                urlsplit(produrl).query)['url'][0] if 'url' in parse_qs(
                    urlsplit(produrl).query) else produrl
            produrl = clean_url(produrl, root_url)
            products[produrl] = {
                'pdct_name_on_eretailer':
                " ".join(''.join(
                    li.xpath('.//*[@itemprop="name"]//text()')).split()),
                'raw_price':
                ' '.join(

示例#2

显示文件

文件： seijoishii.py 项目： maker-project-1/webscrapping

# # KW searches scrapping ############
######################################

# KW searches Scraping - with requests - one page per search
kw_search_url = "https://www.seijoishii.com/s?search_word={kw}&x=0&y=0"  # TODO : modify URL
for kw in keywords:
    searches[kw] = []
    number_of_pdcts_in_kw_search = 0
    if not op.exists(fpath_namer(shop_id, 'search', kw, 0)):
        driver.get(kw_search_url.format(kw=kw))

    for p in range(2):
        fpath = fpath_namer(shop_id, 'search', kw, p)
        if not op.exists(fpath):
            sleep(2)
            driver.smooth_scroll()
            driver.save_page(fpath, scroll_to_bottom=True)
        searches, products = kw_parsing(fpath, kw, searches, products)

    print(kw, len(searches[kw]))

######################################
# # Product pages scraping ###########
######################################

# Download the pages - with selenium
brm = BrandMatcher()
for url in sorted(list(set(products))):
    d = products[url]
    if brm.find_brand(d['pdct_name_on_eretailer'],
                      special_country='JP')['brand'] in mh_brands:

示例#3

显示文件

文件： morrisons.py 项目： maker-project-1/webscrapping

    'rum':
    'https://groceries.morrisons.com/webshop/getCategories.do?tags=%7C105651%7C103120%7C105916%7C151520&Asidebar=1',
    'liquor':
    'https://groceries.morrisons.com/webshop/getCategories.do?tags=%7C105651%7C103120%7C105916%7C151516&Asidebar=1',
}

for ctg, url in urls_ctgs_dict.items():
    categories[ctg] = []
    number_of_pdcts_in_ctg = 0

    for p in range(1):
        fpath = fpath_namer(shop_id, 'ctg', ctg, p)
        if not op.exists(fpath):
            driver.get(url)
            sleep(2)
            driver.smooth_scroll(10)
            driver.save_page(fpath, scroll_to_bottom=True)
        tree = etree.parse(BytesIO(open(fpath, 'rb').read()), parser=parser)

        for li in tree.xpath(
                '//ul[contains(@class, "fops-shelf")]/li[@class="fops-item"]'):
            if not li.xpath('.//a/@href') or not li.xpath('.//h4//text()'):
                continue
            produrl = li.xpath('.//a/@href')[0]
            produrl = parse_qs(
                urlsplit(produrl).query)['url'][0] if 'url' in parse_qs(
                    urlsplit(produrl).query) else produrl
            produrl = clean_url(produrl, root_url)
            products[produrl] = {
                'pdct_name_on_eretailer':
                li.xpath('.//h4//text()')[0].strip(),