示例#1
0
web = Edge(executable_path="M.exe")
html = web.get(url)
time.sleep(1)
web.find_element_by_xpath(
    '//*[@id="J_filter"]/div[1]/div[1]/a[2]').click()  # 点击销量排序
# web.find_elements_by_class_name('fs-tit').click
time.sleep(1)

# for i in range(1000,20000,1000):
#     js = "var q=document.documentElement.scrollTop={}".format(i)
#     web.execute_script(js)
#     time.sleep(1)
#
# web.execute_script("document.documentElement.scrollTop=0")

goods = web.find_elements_by_class_name('gl-warp > li')

j = 1
for i in goods:
    results = {}
    results['ID'] = j
    results['书名'] = i.find_element_by_xpath('.//div[@class="p-name"]//em').text
    time.sleep(1)
    results['价格'] = i.find_element_by_xpath(
        './/div[@class="p-price"]//i').text + "¥"
    time.sleep(1)
    results['出版社'] = i.find_element_by_xpath(
        './/div[@class="p-shopnum"]//a').text

    table = 'books'
    keys = ', '.join(results.keys())
from selenium.webdriver import Edge
import time
from bs4 import BeautifulSoup

driver = Edge(
    'C:\\Users\\cakarst\\IdeaProjects\\Documentation Web Scrapping\\msedgedriver.exe'
)
driver.get(
    'https://docs.microsoft.com//en-us//sql//t-sql//data-types//date-transact-sql?view=sql-server-ver15'
)

left_nav_buttons = driver.find_elements_by_class_name('tree-item')


def click_buttons(buttonList):
    for x in range(len(buttonList)):
        if buttonList[x].is_displayed() and buttonList[x].get_attribute(
                'aria-expanded') == "false":
            driver.execute_script(
                "arguments[0].click();",
                buttonList[x].find_element_by_class_name('tree-expander'))
            child_list = buttonList[x].find_elements_by_class_name('tree-item')
            click_buttons(child_list)
            time.sleep(.01)


#######################################################
click_buttons(left_nav_buttons)

bs = BeautifulSoup(driver.page_source, 'html.parser')
for link in bs.find_all(class_="tree-item is-leaf"):