def login(): # Login via Wechat browser.get("https://passport.jd.com/uc/login?ltype=logout&ReturnUrl=https://global.jd.com/") time.sleep(2) if browser.find_element_by_link_text("微信"): browser.find_element_by_link_text("微信").click() print("Please login within 6 sec") time.sleep(6)
def openurl(url, num): browser = webdriver.Chrome() #打开浏览器 browser.get(url) #进入相关网站 html = browser.page_source #获取网站源码 data = str(pq(html)) #str() 函数将对象转化为适于人阅读的形式。 print(data) dic = {} re_rule = r'<div class="news-item-container">(.*?)<div data-v-00b2e9bc=""/>' datalist = re.findall(re_rule, data, re.S) for i in range(0, len(datalist)): rule1 = r'<img src="/img/icon-lihao.png" data-v-6c26747a=""/>(.*?)<!----></span>' bullish = re.findall(rule1, datalist[i], re.S) if len(bullish) == 0: rule1 = r'<img src="/img/icon-likong.png" data-v-6c26747a=""/>(.*?)</span>' bullish = re.findall(rule1, datalist[i], re.S) rule2 = r'<span class="stock-group-item-name" data-v-f97d9694="">(.*?)</span>' stock_name = re.findall(rule2, datalist[i], re.S) if len(stock_name) > 0 and len(bullish) > 0: for c in range(0, len(stock_name)): dic[stock_name[c]] = bullish[0] print("正在爬取第", len(dic) + 1, "个请稍等.....") c = len(datalist) if len(dic) < num: while (1): browser.find_element_by_class_name("home-news-footer").click() time.sleep(1) html = browser.page_source data = str(pq(html)) datalist = re.findall(re_rule, data, re.S) for i in range(c, len(datalist)): rule3 = r'<img data-v-6c26747a="" src="/img/icon-lihao.png"/>(.*?)<!----></span>' bullish = re.findall(rule3, datalist[i], re.S) if len(bullish) == 0: rule5 = r'<img data-v-6c26747a="" src="/img/icon-likong.png"/>(.*?)</span>' bullish = re.findall(rule5, datalist[i], re.S) rule4 = r'<span data-v-f97d9694="" class="stock-group-item-name">(.*?)</span>' stock_name = re.findall(rule4, datalist[i], re.S) if len(stock_name) > 0 and len(bullish) > 0: for c in range(0, len(stock_name)): dic[stock_name[c]] = bullish[0] c = len(datalist) if len(dic) > num: browser.quit() print("爬取完毕!!") break print("正在爬取第", len(dic) + 1, "个请稍等.....") else: browser.quit() print("爬取完毕!!") return dic
def getDate(): # 3.发送请求 browser.get(url) # 4.定位全国 browser.find_element_by_css_selector("#changeCityBox > p.checkTips > a").click() # 5.找到文本框输入数据分析 browser.find_element_by_css_selector("#search_input").send_keys("数据分析") # 6.找到搜索按钮并点击 browser.find_element_by_css_selector("#search_button").click() time.sleep(3) # 7.自动翻页 pager_next_disabled pager_next i = 1 # 控制页数 j = 1 # 控制条数 while (browser.page_source.find('pager_next_disabled') == -1): # 找到li li_List = browser.find_elements_by_css_selector("#s_position_list > ul > li") # 循环遍历 123456789101112131415161718192021 for li in li_List: # 标题 title = li.find_element_by_css_selector("div.list_item_top > div.position > div.p_top > a > h3").text # 地址 address = \ li.find_element_by_css_selector("div.list_item_top > div.position > div.p_top > a > span > em").text.split( "·")[0] # 公司 company = li.find_element_by_css_selector("div.list_item_top > div.company > div.company_name > a").text # 薪水,经验 result = li.find_element_by_css_selector("div.list_item_top > div.position > div.p_bot > div").text # 薪水 salary = result.split(" ")[0] # 经验 exp = result.split(" ")[1].replace("经验", "") # 学历 edu = result.split(" ")[-1] # 类型 type = li.find_element_by_class_name("industry").text.split("/")[0].strip() # 写入cvs with open("lagou_date.cvs", "a", encoding="utf-8", newline="") as file: csvWriter = csv.writer(file).writerow([title, address, company, salary, exp, edu, type]) print("第{}页第{}条下载成功".format(i, j)) j += 1 # 找到 下一页点击 browser.find_element_by_class_name("pager_next").click() i += 1 time.sleep(3)
from selenium import webdriver import time import math from idlelib import browser link = "http://suninjuly.github.io/selects1.html" try: browser = webdriver.Chrome() browser.get(link) num1 = browser.find_element_by_id("num1") x = num1.text num2 = browser.find_element_by_id("num2") y = num2.text z = str(int(x) + int(y)) from selenium.webdriver.support.ui import Select select = Select(browser.find_element_by_tag_name("select")) select.select_by_value(z) button = browser.find_element_by_css_selector("button.btn") button.click() finally: time.sleep(15) browser.quit()
def picking(): # Open your cart browser.get("https://cart.jd.com/cart.action#none") time.sleep(2)