def login(): global b b = Browser(driver_name="chrome") for i in range(0,3): b.visit(gift_url) b.find_by_id("ptLoginBtn").click() sleep(1) with b.get_iframe('loginFrame') as iframe: iframe.find_by_id('u').fill(username[i]) iframe.find_by_id('p').fill(passwd[i]) iframe.find_by_id('go').click() sleep(1) b.find_by_tag("a")[2].click() sleep(1) #大区,需要改成自己的,这是大地飞鹰 b.find_by_xpath('//select[@id="area1ContentId_wuxia"]/option[@value="7609516"]')._element.click() sleep(1) #服务器,需要改成自己的,这是藏锋谷 b.find_by_xpath('//select[@id="areaContentId_wuxia"]/option[@value="2002"]')._element.click() sleep(1) b.find_by_id("confirmButtonId_wuxia").click() sleep(1) b.get_alert().dismiss() sleep(1) b.find_by_id("ptLogoutBtn").click() sleep(5) print u"领取完毕" sleep(3)
def qianpiao(): global b b = Browser(driver_name="chrome") b.visit(login_url) login() b.visit(zhangxueyou_url) ul = b.find_by_name(u"2017[A CLASSIC TOUR学友.经典] 世界巡回演唱会—扬州站") print(len(ul)) for i in range(index,len(ul)): ul[i].check() if ul[i]['class'] != 'on': sleep(0.5) b.find_by_xpath(u'//*[@id="JqueBoxer"]/div[1]/a').click() else: break if login_url in b.url: login() b.find_by_xpath(u'//*[@id="JreltList"]/ul/li[3]/dl/input').fill(ticket_num) b.find_by_xpath(u"/html/body/div[12]/div[2]/div[3]/div[5]/a").click() b.find_by_xpath(u"//*[@id='zidTabs']/li[2]").check() b.find_by_xpath(u'//*[@id="payNew-conts-show"]/div[2]/ul/li[2]/input').click() while pay_oreder not in b.url: b.find_by_id(u'saveOrder').click()
def supreme(url, keyword, color): # all are strings '' b = Browser(driver_name = "chrome") input("Press Enter to continue...") b.visit(url) b.find_link_by_partial_text(keyword).click() #choose color if color == 'Black': b.find_by_xpath("/html/body//a[@data-style-name='Black']").click() elif color == 'Red': b.find_by_xpath("/html/body//a[@data-style-name='Red']").click() elif color == 'Blue': b.find_by_xpath("/html/body//a[@data-style-name='Blue]").click() elif color == 'Navy': b.find_by_xpath("/html/body//a[@data-style-name='Navy]").click() elif color == 'Green': b.find_by_xpath("/html/body//a[@data-style-name='Green']").click() while True: elementList = b.find_by_value("add to cart") if not elementList.is_empty(): break b.find_by_value("add to cart").click() while True: elementList = b.find_link_by_href("https://www.supremenewyork.com/checkout") if not elementList.is_empty(): break b.find_link_by_href("https://www.supremenewyork.com/checkout").click() while True: elementList = b.find_by_value('USA') if not elementList.is_empty(): break threads = [] t_1 = Thread(target = AutoInfo, args = [b]) t_1.start() threads.append(t_1) t_2 = Thread(target = AutoAddress, args = [b]) t_2.start() threads.append(t_2) t_3= Thread(target = AutoCardInfoOne, args = [b]) t_3.start() threads.append(t_3) t_4= Thread(target = AutoCardInfoTwo, args = [b]) t_4.start() threads.append(t_4)
def webscrabe(self,string,account,password,index): borwser = Browser("chrome") borwser.visit("https://aca.nuk.edu.tw/Aca/login.asp") borwser.find_by_name("Account").first.fill(account) borwser.find_by_name("Password").first.fill(password) borwser.find_by_name("B1").click() borwser.visit("https://aca.nuk.edu.tw/SN2/Basic/Student.asp") borwser.find_by_name("No").first.fill(string) borwser.find_by_name("B3").click() name = borwser.find_by_xpath("//tbody[1]/tr[2]/td[3]").text department = borwser.find_by_xpath("//tbody[1]/tr[2]/td[5]").text self.typename(index,name,department) borwser.quit
class SuperApply(): def __init__(self, username, password): self.username = username self.password = password self.browser = Browser(driver_name="chrome") self.url = "https://www.vipkid.com.cn/login" def login(self): self.browser.visit(self.url) self.browser.find_by_xpath("//input[@type='text']").first.fill(self.username) self.browser.find_by_xpath("//input[@type='password']").first.fill(self.password) login_button = self.browser.find_by_xpath("//span[@class='login-button']") login_button.click() def close_popup(self): try: close_button = self.browser.find_by_xpath("//div[@class='user-task-modal-close']") print(type(close_button)) print(dir(close_button)) close_button.click() except: pass def book(self): book_link = self.browser.find_link_by_href("/parent/preschedule") book_link.click()
def test(): """ 用于测试的方法,实际运行时不执行 """ browser = Browser(executable_path="../driver/firefox.exe") browser.visit( "https://s.weibo.com/weibo/%25E5%25B0%25B1%25E5%25" "BC%2580%25E5%25A7%258B%25E5%25A4%25A7%25E5%25B9%2585?topnav=1&wvr=6&b=1" ) cards = browser.find_by_xpath('//div[@class="card"]') for c in cards: etree_html = etree.HTML(c.html) a = etree_html.xpath( '//div[@class="card-act"]//a[@title="赞"]/em/text()') print(a[0])
def enterprise(self): driver = Browser(driver_name=BROWSER['SPLINTER']['NAME'], executable_path=BROWSER['SPLINTER']['PATH'], headless=True) driver.visit(self.text) fiscal = False n = 0 items = driver.find_by_tag('strong') for seq, item in enumerate(items): n += 1 if not item.text == '财报信息': if seq == 0: content = '全球企业动态[%s]\n\n' % datetime.now().date() elif seq == 1: content += '概要:\n%s\n' % item.text else: content += '\n%d. %s\n' % (seq-1, item.text) else: fiscal = True break if fiscal: content += '\n财报信息:\n' + driver.find_by_xpath('//*[@id="js_content"]/p[%s]' % str(n*2)).text content += '\n\n' + self.text return content
class huoche(object): """docstring for huoche""" driver_name='' executable_path='' #用户名,密码 username = u"*****@*****.**" passwd = u"13785255141x" # cookies值得自己去找, 下面两个分别是上海, 太原南 starts = u"%u6B66%u6C49%2CWHN" ends = u"%u4FDD%u5B9A%2CBDP" # 时间格式2018-01-19 dtime = u"2018-02-07" # 车次,选择第几趟,0则从上之下依次点击 order = 21 ###乘客名 users = [u"乔稳"] ##席位 xb = u"二等座" pz=u"成人票" #时间 从这个时间开始选车次,0-24 date_tic=12 date_tic_end=18 #车次类型 K G D Z等 car_types=["K","G"] """网址""" ticket_url = "https://kyfw.12306.cn/otn/leftTicket/init" login_url = "https://kyfw.12306.cn/otn/login/init" initmy_url = "https://kyfw.12306.cn/otn/index/initMy12306" buy="https://kyfw.12306.cn/otn/confirmPassenger/initDc" login_url='https://kyfw.12306.cn/otn/login/init' def __init__(self): self.driver_name='chrome' self.executable_path='C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe' def login(self): self.driver.visit(self.login_url) self.driver.fill("loginUserDTO.user_name", self.username) # sleep(1) self.driver.fill("userDTO.password", self.passwd) print (u"等待验证码,自行输入...") while True: if self.driver.url != self.initmy_url: sleep(1) else: break def start(self): self.driver=Browser(driver_name=self.driver_name,executable_path=self.executable_path) self.driver.driver.set_window_size(1400, 1000) self.login() # sleep(1) self.driver.visit(self.ticket_url) try: print (u"购票页面开始...") # sleep(1) # 加载查询信息 self.driver.cookies.add({"_jc_save_fromStation": self.starts}) self.driver.cookies.add({"_jc_save_toStation": self.ends}) self.driver.cookies.add({"_jc_save_fromDate": self.dtime}) self.driver.reload() count=0 if self.order!=0: while self.driver.url==self.ticket_url: self.driver.find_by_text(u"查询").click() sleep(2) count += 1 print (u"循环点击查询... 第 %s 次" % count) # sleep(1) try: self.driver.find_by_text(u"预订")[self.order - 1].click() except Exception as e: print (e) print (u"还没开始预订") continue else: while self.driver.url == self.ticket_url: self.driver.find_by_text(u"查询").click() count += 1 print (u"循环点击查询... 第 %s 次" % count) # sleep(0.8) try: # for i in self.driver.find_by_text(u"预订"): # print(i) # i.click() # sleep(1) #查询符合条件的预订 for t in self.driver.find_by_xpath("//*[@id='queryLeftTable']/tr/td/div/div[3]/strong[1]"): tl = t.text.split(":") car=t.find_by_xpath("../..//div[1]/a")[0] #符合时间和车次 if (self.date_tic <= int(tl[0]) and car.text[0] in self.car_types and self.data_tic_end >=int (tl[0])): ti_id = t.find_by_xpath("../../../..")[0]["id"] elem = self.driver.find_by_xpath("//*[@id='" + ti_id + "']/td[13]/a") if len(elem) > 0: elem[0].click() sleep(1) if count > 1000: print("超过最大次数") break except Exception as e: print (e) print (u"还没开始预订 %s" %count) continue print (u"开始预订...") # sleep(3) # self.driver.reload() sleep(1) print (u'开始选择用户...') for user in self.users: self.driver.find_by_text(user).last.click() print (u"提交订单...") sleep(1) # self.driver.find_by_text(self.pz).click() # self.driver.find_by_id('').select(self.pz) # # sleep(1) # self.driver.find_by_text(self.xb).click() # sleep(1) #提交订单 self.driver.find_by_id('submitOrder_id').click() # print u"开始选座..." # self.driver.find_by_id('1D').last.click() # self.driver.find_by_id('1F').last.click() sleep(1.5) print (u"确认选座...") self.driver.find_by_id('qr_submit_id').click() except Exception as e: print (e)
class get_ticket(object): login_status = False driver_name = '' executable_path = '' # 用户名,密码 username = u"" passwd = u"" # cookies starts = u"%u4E0A%u6D77%2CSHH" ends = u"%u592A%u539F%2CTYV" # 时间格式2018-01-19 dtime = u"2018-01-19" # type type = '' # 车次,0则从上之下依次点击 order = 0 ###乘客名 users = [] ##席位 xb = u"二等座" pz = u"成人票" #抢票失败,则重新开启抢票 is_fail = True #默认选择时间范围 starttime = 8 endtime = 24 #默认抢票时间间隔 duration = 1.5 info = '' count = 0 """网址""" ticket_url = "https://kyfw.12306.cn/otn/leftTicket/init" login_url = "https://kyfw.12306.cn/otn/login/init" initmy_url = "https://kyfw.12306.cn/otn/index/initMy12306" buy = "https://kyfw.12306.cn/otn/confirmPassenger/initDc" login_url = 'https://kyfw.12306.cn/otn/login/init' def cli(self): arguments = docopt(__doc__) from_station = stations.get(arguments['<from>']) to_station = stations.get(arguments['<to>']) date = arguments['<date>'] url = 'https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date={}' \ '&leftTicketDTO.from_station={}' \ '&leftTicketDTO.to_station={}' \ '&purpose_codes=ADULT'.format( date, from_station, to_station ) r = requests.get(url, verify=False) rows = r.json()['data']['result'] station_map = r.json()['data']['map'] headers = '车次 车站 时间 历时 商务 一等 二等 软卧 硬卧 软座 硬座 无座'.split() pt = PrettyTable() pt._set_field_names(headers) for row in rows: fields = row.split('|') pretty_row = [] # checi pretty_row.append(fields[3]) # chezhan pretty_row.append(station_map[fields[6]] + '-' + station_map[fields[7]]) # time pretty_row.append(fields[8] + '-' + fields[9]) # duration pretty_row.append(fields[10]) # business pretty_row.append(fields[30]) # first_class pretty_row.append(fields[31]) # second_class pretty_row.append(fields[32]) # ruan wo pretty_row.append(fields[23]) # ruan zuo pretty_row.append(fields[24]) # ying wo pretty_row.append(fields[28]) # ying zuo pretty_row.append(fields[29]) # wuzuo pretty_row.append(fields[26]) pt.add_row(pretty_row) print(pt) def __init__(self): self.driver_name = 'chrome' self.executable_path = '/usr/local/bin/chromedriver' def login(self): self.driver.visit(self.login_url) sleep(1) # 填充密码 fill_success = True while fill_success: try: self.driver.fill("loginUserDTO.user_name", self.username) # sleep(1) self.driver.fill("userDTO.password", self.passwd) fill_success = False print(u"等待验证码,自行输入...") except: print("无法填入账号密码!") while True: if self.driver.url != self.initmy_url: sleep(1) else: self.login_status = True break def working(self): try: self.driver.visit(self.ticket_url) print(u"购票页面开始...") # 加载查询信息 self.driver.cookies.add({"_jc_save_fromStation": self.starts}) self.driver.cookies.add({"_jc_save_toStation": self.ends}) self.driver.cookies.add({"_jc_save_fromDate": self.dtime}) self.driver.cookies.add({"_jc_save_wfdc_flag": "dc"}) self.driver.reload() # 选择车次类型 for i in self.type: self.driver.find_by_xpath("//input[@value='%s']" % i).click() # 只显示可购买车次 self.driver.find_by_xpath("//input[@id='%s']" % 'avail_ticket').click() sleep(1) while self.driver.url == self.ticket_url: if not self.login_status: return self.driver.find_by_text(u"查询").click() self.count += 1 print(u"已抢票... 第 %s 次" % self.count) try: # SWZ-特等座,ZY-一等座, ZE-二等座 # class_type = self.driver.find_by_xpath("//tbody[@id = 'queryLeftTable']/tr/td[starts-with(@id,%s)]/div" % 'ZE') # 商务座=2,一等座=3, 二等座=4 # class_type = self.driver.find_by_xpath("//tbody[@id = 'queryLeftTable']/tr/td[%d]/div" % 4) enable_buttons = self.driver.find_by_xpath("//tbody[@id = 'queryLeftTable']/tr/td/a[contains(text(),'预订')]") self.qulifier(enable_buttons) except Exception as e: print(e) print(u"界面出错,重新开始抢票 %s" % self.count) continue # print(u"开始预订...") # self.driver.reload() print(u'开始选择用户...') for user in self.users: self.driver.find_by_text(user).last.click() print(u"提交订单...") self.driver.find_by_id('submitOrder_id').click() sleep(0.5) #判断是否票已被抢 if self.driver.find_by_xpath("//p[@id = 'sy_ticket_num_id']/font[starts-with(text(),'目前排队人数已经超过余票张数')]").__len__() > 0: return elif int(re.sub("\D", "", self.driver.find_by_xpath("//p[@id = 'sy_ticket_num_id']").text)) == 0: return #发送邮件 _thread = threading.Thread(target=self.play_sound) _thread.setDaemon(True) _thread.start() # 启动线程 #可能需要验证码 self.driver.find_by_id('qr_submit_id').click() sleep(2) if self.driver.find_by_xpath("//div[@class = 'tit'][contains(text(),'失败')]").__len__() > 0: return else: self.is_fail = False except Exception as e: print(e) def qulifier(self, check_buttons): tr_obj = self.driver.find_by_xpath("//tbody[@id = 'queryLeftTable']/tr/td/a[contains(text(),'预订')]/../..") for index, value in enumerate(check_buttons): # tr_info = tr_obj[index].text.split('\n') tr_info = re.split(' |\n', tr_obj[index].text) timehour = int(tr_info[3].split(':')[0]) train_num = tr_info[0] is_class_qulify = False for i in self.type: for j in sit[i]: if tr_info[j] != '无' and tr_info[j] != '--': is_class_qulify = True print(" ".join(tr_info)) break if not is_class_qulify: continue # 判断时间是否符合要求 if self.starttime <= timehour <= self.endtime: print(train_num) find_button = self.driver.find_by_xpath( "//tbody[@id = 'queryLeftTable']/tr[starts-with(@id,'ticket')][contains(@id,'%s')]/td/a[contains(text(),'预订')]" % train_num) if find_button.__len__() > 0: find_button.click() else: self.driver.find_by_xpath( "//tbody[@id = 'queryLeftTable']/tr[starts-with(@id,'ticket')]/td/a[contains(text(),'预订')]")[0].click() self.info = tr_info sleep(0.5) return True # break sleep(self.duration) return False def start(self): self.driver = Browser(driver_name=self.driver_name) self.driver.driver.set_window_size(1400, 1000) # self.login() # 开启定时任务监听是否退出登陆 scheduler = BackgroundScheduler() scheduler.add_job(self.watch_login, 'interval', minutes=3) scheduler.start() while self.is_fail: # 判断是否为登陆状态 if self.login_status: self.working() else: self.login() self.working() print(u"确认选座...") def play_sound(self): sender_obj = sentEmail.Sender() result = sender_obj.mail(self.users[0] + " ".join(self.info)) # os.system("say bingo. come to get your ticket!") # 监听是否为登陆状态 def watch_login(self): print("start mission") start_watch = True while start_watch: try: if self.driver._cookie_manager.all().__contains__('tk'): print("----------登陆-----------") self.login_status = True else: if self.driver.find_by_xpath("//a[@id = 'login_user'][contains(text(),'登录')]").__len__() > 0: print("----------未登录---------") self.login_status = False # 发邮件通知登录状态以退出 sender_obj = sentEmail.Sender() result = sender_obj.mail("登录状态以退出") start_watch = False except Exception as e: print(e) start_watch = True print("end mission")
class WeiboSpider(object): page_count = 0 # 记录每个页面抓取的数据量 all_count = 0 # 记录抓取的数据总量 save_data = OrderedDict({ # 保存抓取的数据 "昵称": [], "微博正文": [], "微博链接": [], "时间": [], "收藏数": [], "转发数": [], "评论数": [], "点赞数": [], "设备": [] }) xpath_dict = { # 解析数据用的 xpath '昵称': '//div[@class="info"]//a[@class="name"]/text()', '微博正文': '//div[@class="content"]/p[@node-type="feed_list_content"]//text()', '微博链接': '//div[@class="content"]/p[@class="from"]/a[1]/@href', '时间': '//div[@class="content"]/p[@class="from"]/a[1]/text()', '收藏数': '//a[@action-type="feed_list_favorite"]/text()', '转发数': '//a[@action-type="feed_list_forward"]/text()', '评论数': '//a[@action-type="feed_list_comment"]/text()', '点赞数': '//div[@class="card-act"]//a[@action-type="feed_list_like"]//em/text()', '设备': '//div[@class="content"]/p/a[@rel="nofollow"]/text()' } def __init__(self, keyword, start_time, end_time, sleep_time=10, username=None, password=None): self.username = username # 微博用户名 self.password = password # 微博密码 self.browser = None self.browser_name = "firefox" # 浏览器名 self.driver_path = "../driver/firefoxdriver.exe" # 打开浏览器的驱动 self.base_url = "https://s.weibo.com/" # 微博搜索主页 self.search_url = 'https://s.weibo.com/weibo/{keyword}' \ '×cope=custom:{start_time}:{end_time}&refer=g' # 搜索结果的url self.keyword = keyword # 搜索关键字 self.sleep_time = sleep_time # 点击下一页的时间间隔 self.start_time = start_time # 搜索内容的起始时间 self.end_time = end_time # 搜索内容的结束时间 self.base_sava_path = '../files/' # 输出文件的保存路径 self.save_file_name = self.keyword + self.start_time + "~" + self.end_time # 输出文件名 def refactor_date(self, start_time, end_time): # 构造搜索的起止时间 self.start_time = datetime.strptime( start_time, "%Y-%m-%d-%H").strftime("%Y-%m-%d-%H") if end_time: self.end_time = datetime.strptime( end_time, "%Y-%m-%d-%H").strftime("%Y-%m-%d-%H") else: self.end_time = datetime.now().strftime('%Y-%m-%d-%H') def get_search_url(self): # 构造搜索url self.refactor_date(self.start_time, self.end_time) # 将输入的时间重构成搜索用的标准参数 return self.search_url.format(keyword=self.keyword, start_time=self.start_time, end_time=self.end_time) def login(self): """ 用于登录微博 """ self.browser = Browser(driver_name=self.browser_name, executable_path=self.driver_path, service_log_path='../files/log.log') # 打开浏览器 self.browser.visit(self.base_url) # 访问微博搜索页面 self.browser.click_link_by_text('登录') # 点击登录 # 填充用户名的密码 if self.username is not None: self.browser.fill("username", self.username) if self.password is not None: self.browser.fill("password", self.password) print("请在打开的浏览器中登录........") time.sleep(2) # 暂停两秒,等待浏览器加载完成 logining_url = self.browser.url # 获取正在登录时的 url # 防止网络不好时获取不到正在登录时的 url while logining_url == self.base_url: time.sleep(2) logining_url = self.browser.url # 通过验证 url 保证已经登录 while 1: if self.browser.url != logining_url: break time.sleep(2) print("已成功登录,开始抓取信息.......") def search(self): """ 通过构造的搜索 url,跳转到搜索结果页面 """ self.browser.visit(self.get_search_url()) def get_card_data(self, card, xapth_dict: dict): """ 用户获取每一篇博客的信息 """ etree_html = etree.HTML(card.html) number = ['收藏数', '转发数', '评论数', '点赞数'] self.page_count += 1 # 统计每一页抓取的数据量 for key in xapth_dict.keys(): xpath = xapth_dict.get(key) data = etree_html.xpath(xpath) if data: if key in number: self.save_data[key].append(Utils.get_num(data[0])) elif key == '时间': self.save_data[key].append( Utils.get_date(data[0]).strftime('%Y-%m-%d')) elif key == '微博正文': content = ''.join(data).replace(' ', '').replace('\n', '') self.save_data[key].append(content) else: self.save_data[key].append(data[0]) else: if key in number: self.save_data[key].append('0') else: self.save_data[key].append('') def download_data(self): self.browser.execute_script( "window.scrollTo(0,document.body.scrollHeight)") # 跳转到页面底部 try: self.browser.click_link_by_text('查看全部搜索结果') except ElementDoesNotExist: pass page_index = 1 # 记录页码 while page_index <= 50: # 微博搜索结果最多为50页 # 获取真实的页码 try: # 微博搜索结果有时候一直点击下一页,到了最后一页会跳转到第一页 # 这段代码用于防止出现这种情况 real_page = re.findall(r'page=(\d+)', self.browser.url) if real_page: real_page = int(real_page[0]) else: real_page = 1 if real_page < page_index: break print('正在抓取第%s页内容:' % page_index, end='') self.browser.execute_script( "window.scrollTo(0,document.body.scrollHeight)") # 跳转到页面底部 cards = self.browser.find_by_xpath( '//div[@class="card"]') # 获取所有的博文 except KeyboardInterrupt: # 如果觉得抓取时间过长,可以按下ctrl c 中止抓取,然后可以保存已抓取的信息 print('中途退出抓取,正则保存中.....') break for card in cards: # 遍历所有的文章,获取数据 try: self.get_card_data(card, self.xpath_dict) except KeyboardInterrupt: pass try: print('本页抓取了%s条数据,模拟等待中.....' % self.page_count) self.all_count += self.page_count # 统计获取的所有数据量 self.page_count = 0 # 基于设置的休眠时间,随机设置一个休眠值 sleep_time = random.randint(self.sleep_time, self.sleep_time + 5) time.sleep(sleep_time) # 模拟用户浏览网页的时间 try: self.browser.click_link_by_text('下一页') # 点击下一页 page_index += 1 except ElementDoesNotExist: break except KeyboardInterrupt: print('中途退出抓取,正则保存中.....') break def save(self): """ 保存数据 """ print('--------------------------------------------') print('本次共抓取了%s条数据' % self.all_count) try: data = pandas.DataFrame(self.save_data) file_path = self.base_sava_path + self.save_file_name + '.xlsx' data.to_excel(file_path, index=False) # 将数据保存到 excel print('文件正在保存...', end='\n\n') except Exception as e: print('文件保存失败!!!', end='\n\n') print(e) def close(self): self.browser.quit() # 关闭浏览器 @staticmethod def test(): """ 用于测试的方法,实际运行时不执行 """ browser = Browser(executable_path="../driver/firefox.exe") browser.visit( "https://s.weibo.com/weibo/%25E5%25B0%25B1%25E5%25" "BC%2580%25E5%25A7%258B%25E5%25A4%25A7%25E5%25B9%2585?topnav=1&wvr=6&b=1" ) cards = browser.find_by_xpath('//div[@class="card"]') for c in cards: etree_html = etree.HTML(c.html) a = etree_html.xpath( '//div[@class="card-act"]//a[@title="赞"]/em/text()') print(a[0])
def function(): # 账户登录 browser = Browser('chrome') browser.visit(url) # 账户注册 captcha = input() browser.fill("ctl00$plhMain$MyCaptchaControl1", captcha) # 第二个页面 同行人数 browser.choose("ctl00$plhMain$cboVisaCategory", "Beijing") # 第一个页面 提交地点选择 browser.find_by_xpath('//input[@type="submit"]').click() # 第一个页面 提交 browser.fill("ctl00$plhMain$tbxNumOfApplicants", 2) # 第二个页面 同行人数 browser.choose("ctl00$plhMain$cboVisaCategory", "17") # 第二个页面 签证类型选择 browser.find_by_xpath( '//*[@id="ctl00_plhMain_ControlsTable"]/tbody/tr[8]/td/a[1]/input' ).click() # 第二个页面 提交 # 抢票 browser.fill("name", "*****@*****.**") # 第三个页面 邮箱输入 browser.find_by_xpath( '//*[@id="ctl00_plhMain_ImageButton"]').click() # 第三个页面 提交 browser.fill("ctl00$plhMain$txtPassword1", "Aa123123") # 第四个页面 密码 browser.fill('ctl00$plhMain$txtCnfPassword1', "Aa123123") # 第四个页面 确认密码 browser.find_by_xpath( '//*[@id="ctl00_plhMain_ImageButton"]').click() # 第四个页面 提交 browser.fill("ctl00$plhMain$txtEmail", "*****@*****.**") # 第四个页面 邮箱输入 browser.fill("", "Aa123123") # 第四个页面 确认密码 browser.find_by_xpath( '//*[@id="ctl00_plhMain_ImageButton"]').click() # 第四个页面 提交 browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxPassportNo", "G11111111") # 第五个页面 护照 browser.choose("ctl00$plhMain$repAppVisaDetails$ctl01$cboTitle", "MR.") # 第五个页面 性别 browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxFName", "伟") # 第五个页面 名 browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxLName", "阿") # 第五个页面 性 browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxSTDCode", "+86") # 第五个页面 区号 browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxMobileNumber", "12345678900") # 第五个页面 电话 browser.find_by_xpath( '//*[@id="ctl00_plhMain_btnSubmit"]').click() # 第五个页面 提交 browser.find_by_xpath( '//*[@id="ctl00_plhMain_cldAppointment"]/tbody/tr[5]/td[5]/a').click( ) # 日期页面提交 browser.find_by_xpath( '//*[@id="ctl00_plhMain_gvSlot_ctl02_lnkTimeSlot"]').click() # 时间页面提交 browser.find_by_xpath( '//*[@id="ctl00_plhMain_ImageButton1"]').click() # 提交
def scrape(): # Scrape the NASA Mars News Site and collect the latest News Title and Paragragh Text url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') result = soup.find('div', class_='content_title') news_title = result.find('a').text result_p = soup.find('div', class_='image_and_description_container') news_p = result_p.find('div', class_='rollover_description_inner').text url = "https://twitter.com/marswxreport?lang=en" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') result = soup.find_all('div', class_='js-tweet-text-container')[0] mars_weather = result.find('p').contents[0] url = "https://space-facts.com/mars/" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # Use Pandas to scrape the table containing facts about the planet including Diameter, Mass table = soup.find_all('table') df = pd.read_html(str(table))[0] df = df.rename(columns={0: '', 1: 'value'}) df.set_index('', inplace=True) mars_fact_html = df.to_html() # Visit the USGS Astrogeology site here to obtain high resolution images for each of Mar's hemispheres executable_path = {'executable_path': '/Users/jingc/chromedriver'} browser = Browser('chrome', **executable_path, headless=True) # browser = Browser('chrome', headless=True) url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' browser.visit(url) hemisphere_image_urls = [] html = browser.html soup = BeautifulSoup(html, 'html.parser') results = soup.find_all('div', class_='item') for result in results: hemisphere = {} newurl = 'https://astrogeology.usgs.gov' + result.find('a')['href'] browser.visit(newurl) new_html = browser.html soup = BeautifulSoup(new_html, 'html.parser') sample = soup.find('div', class_='downloads') hemisphere['img_url'] = sample.find('a')['href'] title = soup.find('h2', class_="title").text hemisphere['title'] = title.replace(' Enhanced', '') hemisphere_image_urls.append(hemisphere) time.sleep(1) #Use splinter to navigate the site and find the image url for the current Featured Mars Image url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars' browser.visit(url) search_bar_xpath = '//*[@id="full_image"]' search_button = browser.find_by_xpath(search_bar_xpath)[0] search_button.click() time.sleep(2) moreinfo_button = browser.find_link_by_partial_text('more info') moreinfo_button.click() time.sleep(2) response = browser.html featuredimg_soup = BeautifulSoup(response, 'html.parser') featured_img_url = 'https://www.jpl.nasa.gov' + featuredimg_soup.find( 'figure', class_='lede').a['href'] browser.quit() mars_dict = { 'news_title': news_title, 'news_paragraph': news_p, 'current_weather': mars_weather, 'featured_img': featured_img_url, 'mars_fact': mars_fact_html, 'hemisphere_images': hemisphere_image_urls } return mars_dict
class TestCase(ptc.PloneTestCase): def __init__(self): self.browser = Browser(driver_name='firefox') self.host, self.port = startZServer() def afterSetUp(self): self.browser.visit('http://%s:%s/plone' % (self.host, self.port)) def beforeTearDown(self): self.browser.quit() def portal_visit(self, url): self.browser.visit('http://%s:%s/plone/%s' % (self.host, self.port, url)) def portal_home(self): self.browser.visit('http://%s:%s/plone/' % (self.host, self.port)) def portal_login(self, user, password): self.portal_visit('login_form') self.browser.fill('__ac_name', user) self.browser.fill('__ac_password', password) self.browser.find_by_name('submit').first.click() def portal_login_as_owner(self): self.portal_login(user=portal_owner, password=default_password) def portal_logout(self): self.portal_visit('logout') def portal_search(self, search_word): self.browser.fill('SearchableText','%s' % (search_word)) self.browser.find_by_css('.searchButton').first.click() def portal_navigate_submenu(self, option): self.browser.find_by_xpath("//li[contains(@id, 'contentview')]/a[text()='%s']" % (option)).first.click() def portal_click_a_personaltool(self, personaltool): self.browser.click_link_by_href('http://%s:%s/plone/dashboard' % (self.host, self.port)) self.browser.click_link_by_text('%s' % (personaltool)) def portal_add_user(self, fullname, username, email, password): self.portal_click_a_personaltool('Site Setup') self.browser.click_link_by_text('Users and Groups') self.browser.find_by_name('form.button.AddUser').first.click() self.browser.fill('form.fullname','%s' % (fullname)) self.browser.fill('form.username','%s' % (username)) self.browser.fill('form.email','%s' % (email)) self.browser.fill('form.password','%s' % (password)) self.browser.fill('form.password_ctl','%s' % (password)) self.browser.find_by_id('form.actions.register').first.click() def portal_add_user_as_manager(self, fullname, username, email, password): self.portal_click_a_personaltool('Site Setup') self.browser.click_link_by_text('Users and Groups') self.browser.find_by_name('form.button.AddUser').first.click() self.browser.fill('form.fullname','%s' % (fullname)) self.browser.fill('form.username','%s' % (username)) self.browser.fill('form.email','%s' % (email)) self.browser.fill('form.password','%s' % (password)) self.browser.fill('form.password_ctl','%s' % (password)) self.browser.find_by_id('form.groups.0').first.click() self.browser.find_by_id('form.actions.register').first.click() def portal_change_user_role(self, username, new_role): self.portal_click_a_personaltool('Site Setup') self.browser.click_link_by_text('Users and Groups') self.browser.find_by_xpath("//tr[*/input[@value='%s']]//input[@value='%s']" % (username, new_role)).first.click() self.browser.find_by_name('form.button.Modify').first.click() def portal_click_enable_content_types(self): self.browser.find_by_css('a[title="Add new items inside this item"]').first.click() def portal_add_content_type(self, type): self.portal_click_enable_content_types() self.browser.click_link_by_text('%s' % (type)) def portal_click_content_item_action(self): self.browser.find_by_css('a[title="Actions for the current content item"]').first.click() def portal_add_item_action(self, type): self.portal_click_content_item_action() self.browser.click_link_by_text('%s' % (type)) def portal_list_states(self): self.browser.find_by_css('a[title="Change the state of this item"]').first.click() def portal_modify_state_to(self, state): self.portal_list_states() self.browser.click_link_by_text('%s' % (state))
class huoche(object): """docstring for huoche""" driver_name='' executable_path='' #用户名,密码 username = u'' passwd = u'' # cookies值得自己去找, 下面两个分别是北京, 郑州 starts = u'%u5317%u4EAC%u897F%2CBXP' ends = u'%u6708%u5C71%2CYBF' # 时间格式2018-01-19 dtime = u'2018-02-09' #车次类型 G D Z T K QT train_type = '' # 车次,选择第几趟,0则从上之下依次点击 order = 10 ###乘客名 users = [u'', u''] ##席位 # 0 二等座 1 一等座 2 商务座 # 0 硬卧 1 硬座 2 软卧 xb = 0 # 座位号 1A 1B 1C 1D 1F seat_num = '1F' pz=u'成人票' """网址""" ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init' login_url = 'https://kyfw.12306.cn/otn/login/init' initmy_url = 'https://kyfw.12306.cn/otn/index/initMy12306' buy='https://kyfw.12306.cn/otn/confirmPassenger/initDc' login_url='https://kyfw.12306.cn/otn/login/init' def __init__(self): self.driver_name='chrome' self.executable_path='/usr/bin/chromedriver' def login(self): self.driver.visit(self.login_url) self.driver.fill('loginUserDTO.user_name', self.username) # sleep(1) self.driver.fill('userDTO.password', self.passwd) print(u'等待验证码,自行输入...') while True: if self.driver.url != self.initmy_url: sleep(1) else: break def start(self): self.driver=Browser(driver_name=self.driver_name,executable_path=self.executable_path) self.driver.driver.set_window_size(1400, 1000) self.login() # sleep(1) self.driver.visit(self.ticket_url) try: print(u'购票页面开始...') # sleep(1) # 加载查询信息 self.driver.cookies.add({'_jc_save_fromStation': self.starts}) self.driver.cookies.add({'_jc_save_toStation': self.ends}) self.driver.cookies.add({'_jc_save_fromDate': self.dtime}) self.driver.reload() #选择车次类型 高铁 动车 if self.train_type != '': self.driver.find_by_xpath('//input[@value="'+self.train_type+'"]').click() count=0 if self.order!=0: while self.driver.url==self.ticket_url: self.driver.find_by_text(u'查询').click() count += 1 print(u'单次循环点击查询... 第 %s 次' % count) # sleep(1) try: self.driver.find_by_text(u'预订')[self.order - 1].click() except Exception as e: # print(e) print(u'抢票未开始') continue else: while self.driver.url == self.ticket_url: self.driver.find_by_text(u'查询').click() count += 1 print(u'列表循环点击查询... 第 %s 次' % count) # sleep(0.8) try: for i in self.driver.find_by_text(u'预订'): i.click() sleep(1) except Exception as e: # print(e) print(u'抢票未开始 %s' %count) continue print(u'开始预订...') # sleep(3) # self.driver.reload() sleep(1) print(u'开始选择用户...') for user in self.users: self.driver.find_by_text(user).last.click() print(u'开始选择席位...') self.driver.find_by_id('seatType_1').find_by_tag('option')[self.xb].click() # self.driver.find_by_text(self.pz).click() # self.driver.find_by_id('').select(self.pz) # sleep(1) # self.driver.find_by_text(self.xb).click() # sleep(1) print(u'提交订单...') self.driver.find_by_id('submitOrder_id').click() if self.train_type == 'G': print(u'开始选择座位...') # sleep(5) # self.driver.find_by_id('1D').click() # self.driver.find_by_id('erdeng1').last.click() self.driver.find_by_xpath('//div[@id="erdeng1"]').find_by_id(self.seat_num).click() print(u'确认选择座位号为' + self.seat_num + '...') print(u'确认订单') else: print(u'确认订单') sleep(1.5) self.driver.find_by_id('qr_submit_id').click() print(u'购票成功...') except Exception as e: print(e)
class LemonLemon_douyin(object): def __init__(self, width=500, height=300): """ 抖音App视频下载 """ # 无头浏览器 chrome_options = Options() chrome_options.add_argument( 'user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"' ) self.driver = Browser(driver_name="chrome", options=chrome_options, headless=True) def get_video_urls(self, input_f): """ 获得视频播放地址 Parameters: user_id:查询的用户ID Returns: video_names: 视频名字列表 video_urls: 视频链接列表 nickname: 用户昵称 """ video_names = [] video_urls = [] i = 1 now_date = datetime.datetime.now() self.date_today = (str(now_date.date()) + "_" + str(now_date.hour) + ":" + str(now_date.minute)) # unique_id = '' # while unique_id != user_id: # search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id # req = requests.get(url = search_url, verify = False) # html = json.loads(req.text) # aweme_count = html['user_list'][0]['user_info']['aweme_count'] # uid = html['user_list'][0]['user_info']['uid'] # nickname = html['user_list'][0]['user_info']['nickname'] # unique_id = html['user_list'][0]['user_info']['unique_id'] # user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count) # req = requests.get(url = user_url, verify = False) # html = json.loads(req.text) # i = 1 # for each in html['aweme_list']: # share_desc = each['share_info']['share_desc'] # if '抖音-原创音乐短视频社区' == share_desc: # video_names.append(str(i) + '.mp4') # i += 1 # else: # video_names.append(share_desc + '.mp4') # video_urls.append(each['share_info']['share_url']) with open(input_f) as f: for line in f: if line.startswith("http"): video_urls.append(line) video_names.append(self.date_today + "_" + str(i) + ".mp4") i += 1 return video_names, video_urls # video_names, video_urls, nickname def get_download_url(self, video_url): """ 获得带水印的视频播放地址 Parameters: video_url:带水印的视频播放地址 Returns: download_url: 带水印的视频下载地址 """ req = requests.get(url=video_url, verify=False) bf = BeautifulSoup(req.text, "lxml") script = bf.find_all("script")[-1] video_url_js = re.findall("var data = \[(.+)\];", str(script))[0] video_html = json.loads(video_url_js) download_url = video_html["video"]["play_addr"]["url_list"][0] return download_url def video_downloader(self, video_url, video_name, watermark_flag=True): """ 视频下载 Parameters: video_url: 带水印的视频地址 video_name: 视频名 watermark_flag: 是否下载不带水印的视频 Returns: 无 """ size = 0 if watermark_flag == True: video_url = self.remove_watermark(video_url) else: video_url = self.get_download_url(video_url) with closing(requests.get(video_url, stream=True, verify=False)) as response: chunk_size = 1024 content_size = int(response.headers["content-length"]) if response.status_code == 200: sys.stdout.write(" [文件大小]:%0.2f MB\n" % (content_size / chunk_size / 1024)) with open(video_name, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) size += len(data) file.flush() sys.stdout.write(" [下载进度]:%.2f%%" % float(size / content_size * 100) + "\r") sys.stdout.flush() def remove_watermark(self, video_url): """ 获得无水印的视频播放地址 Parameters: video_url: 带水印的视频地址 Returns: 无水印的视频下载地址 """ self.driver.visit("http://douyin.iiilab.com/") self.driver.find_by_tag("input").fill(video_url) self.driver.find_by_xpath('//button[@class="btn btn-default"]').click() html = self.driver.find_by_xpath( '//div[@class="thumbnail"]/div/p')[0].html bf = BeautifulSoup(html, "lxml") return bf.find("a").get("href") def run(self, input_f): """ 运行函数 Parameters: None Returns: None """ # self.hello() # user_id = input('请输入ID(例如40103580):') error_url = [] video_names, video_urls = self.get_video_urls(input_f) if "douyin_download" + self.date_today not in os.listdir(): os.mkdir("douyin_download" + self.date_today) print("视频下载中:共有%d个作品!\n" % len(video_urls)) for num in range(len(video_urls)): print(" 解析第%d个视频链接 [%s] 中,请稍后!\n" % (num + 1, video_urls[num])) random_wait = random.uniform(3, 5) print("waiting...", random_wait) time.sleep(random_wait) if "\\" in video_names[num]: video_name = video_names[num].replace("\\", "") elif "/" in video_names[num]: video_name = video_names[num].replace("/", "") else: video_name = video_names[num] try: self.video_downloader( video_urls[num], os.path.join("douyin_download" + self.date_today, video_name), ) except: print("**************************") print("ERROR", video_urls[num]) error_url.append(video_urls[num]) print("\n") self.driver.close() with open("error_url.txt", "w") as f: f = error_url print("下载完成!") print("出错数量:", len(error_url))
class Ticket(object): def __init__(self, config_file): ## config parser setting self.config_file = config_file self.settings = configparser.ConfigParser() self.settings._interpolation = configparser.ExtendedInterpolation() self.settings.read(self.config_file) ## environment setting self.brower='chrome' self.b = Browser(driver_name=self.brower) self.station={} self.url = "https://kyfw.12306.cn/otn/leftTicket/init" # 席别类型(对应列标号) self.ticket_index = [ '', u'商务座', u'特等座', u'一等座', u'二等座', u'高级软卧', u'软卧', u'硬卧', u'软座', u'硬座', u'无座' ] self.username = '' self.password = '' self.date_format='%Y-%m-%d' self.tolerance = -1 self.blacklist = {} self.date = [] self.isStudent = False self.success = 0 self.find_ticket = 0 self.config_parser() self.playmusic = False self.count = 0 def ConfigSectionMap(self,section): dict1 = {} options = self.settings.options(section) for option in options: try: dict1[option] = self.settings.get(section, option) if dict1[option] == -1: DebugPrint("skip: %s" % option) except: print("exception on %s!" % option) dict1[option] = None return dict1 def daterange(self, start_date, end_date): for n in range(int ((end_date - start_date).days) + 1): yield start_date + timedelta(n) def config_parser(self): if self.retrieve_station_dict() == -1: sys.exit() if self.retrieve_book_options() == -1: sys.exit() def retrieve_station_dict(self): dict_helper=self.ConfigSectionMap('STATIONCOOKIE') for name, value in dict_helper.iteritems(): self.station[name]=value def retrieve_book_options(self): login_info=self.ConfigSectionMap('GLOBAL') self.username = login_info['username'].strip() self.password = login_info['password'].strip() self.brower = login_info['browser'] book_settings = self.ConfigSectionMap('TICKET') self.fromStation = [ station.strip() for station in book_settings['from_station'].split(',')] self.toStation = [ station.strip() for station in book_settings['to_station'].split(',')] trains = [ train.strip() for train in book_settings['trains'].split(',')] if len(trains) == 1 and trains[0] == '': self.trains = [] else: self.trains = trains self.ticket_type =[ _type.strip() for _type in book_settings['ticket_type'].split(',')] rangeQuery = book_settings['range_query'].strip() if rangeQuery == 'Y': date = [ d.strip() for d in book_settings['date'].split(',')] if len(date) < 2: print "未设置正确的起至时间" return -1 else: start_date = datetime.strptime(date[0],self.date_format) end_date = datetime.strptime(date[1],self.date_format) if end_date < start_date: print "查询截止日期不可大于开始日期!" return -1 for single_date in self.daterange(start_date, end_date): self.date.append(single_date.strftime(self.date_format)) else: self.date = [ d.strip() for d in book_settings['date'].split(',')] if book_settings['student'].strip() == 'Y': self.isStudent = True self.tolerance = int(book_settings['tolerance']) self.people = [ people.strip() for people in book_settings['people'].split(',') ] if book_settings['alarm'].strip() == 'Y': self.playmusic = True def login(self): self.b.visit(self.url) self.b.find_by_text(u"登录").click() self.b.fill("loginUserDTO.user_name",self.username) self.b.fill("userDTO.password",self.password) pdb.set_trace() def page_has_loaded(self): page_state = self.b.evaluate_script("document.readyState") return page_state == 'complete' def switch_to_order_page(self): order = [] while len(order) == 0: order = self.b.find_by_text(u"车票预订") order[0].click() def checkTicket(self, date, fromStation, toStation): print 'date: %s, from %s, to %s'%(date, fromStation, toStation) self.b.cookies.add({"_jc_save_fromDate":date}) self.b.cookies.add({"_jc_save_fromStation":self.station[fromStation]}) self.b.cookies.add({"_jc_save_toStation":self.station[toStation]}) self.b.cookies.all() self.b.reload() if self.isStudent: self.b.find_by_text(u'学生').click() self.b.find_by_text(u"查询").click() all_trains = [] while self.page_has_loaded() == False: continue while len(all_trains) == 0: all_trains = self.b.find_by_id('queryLeftTable').find_by_tag('tr') for k, train in enumerate(all_trains): tds = train.find_by_tag('td') if tds and len(tds) >= 10: has_ticket= tds.last.find_by_tag('a') if len(has_ticket) != 0: if k + 1 < len(all_trains): this_train = all_trains[k+1]['datatran'] if len(self.trains) != 0 and this_train not in self.trains: continue if self.tolerance != -1 and this_train in self.blacklist and self.blacklist[this_train] >= self.tolerance: print u"%s 失败 %d 次, 跳过"%(this_train, self.blacklist[this_train]) continue for cat in self.ticket_type: if cat in self.ticket_index: i = self.ticket_index.index(cat) else: print '无效的席别信息' return 0, '' if tds[i].text != u'无' and tds[i].text != '--': if tds[i].text != u'有': print u'%s 的 %s 有余票 %s 张!'%(this_train, cat ,tds[i].text) else: print u'%s 的 %s 有余票若干张!'%(this_train, cat) self.find_ticket = 1 tds.last.click() break if self.find_ticket: break return this_train def book(self): while self.page_has_loaded() == False: continue if len(self.people) == 0: print '没有选择乘车人!' return 1 more = [] while len(more) == 0: more = self.b.find_by_text(u"更多") more[0].click() person = [] people = self.people while len(person) == 0: person=self.b.find_by_xpath('//ul[@id="normal_passenger_id"]/li/label[contains(text(),"%s")]'%people[0]) for p in people: self.b.find_by_xpath('//ul[@id="normal_passenger_id"]/li/label[contains(text(),"%s")]'%p).click() if self.b.find_by_xpath('//div[@id="dialog_xsertcj"]').visible: self.b.find_by_xpath('//div[@id="dialog_xsertcj"]/div/div/div/a[text()="确认"]').click() return 1 def ring(self): pygame.mixer.pre_init(64000, -16, 2, 4096) pygame.init() pygame.display.init() screen=pygame.display.set_mode([300,300]) #pygame.display.flip() pygame.time.delay(1000)#等待1秒让mixer完成初始化 tracker=pygame.mixer.music.load("media/sound.ogg") #track = pygame.mixer.music.load("sound.ogg") pygame.mixer.music.play() # while pygame.mixer.music.get_busy(): #pygame.time.Clock().tick(10) running = True img=pygame.image.load("media/img.jpg") while running: screen.blit(img,(0,0)) pygame.display.flip() for event in pygame.event.get(): if event.type==pygame.QUIT: running = False pygame.quit () return 1 def executor(self): self.login() self.switch_to_order_page() while self.success == 0: self.find_ticket = 0 while self.find_ticket == 0: for date in self.date: try: self.count += 1 print "Try %d times" % self.count for fromStation in self.fromStation: for toStation in self.toStation: this_train = self.checkTicket(date, fromStation, toStation) if self.find_ticket: break if self.find_ticket: break if self.find_ticket: break except KeyboardInterrupt: self.b.find_by_text(u'退出').click() sys.exit() except: continue # book ticket for target people self.find_ticket = 0 while self.find_ticket == 0: try: self.find_ticket = self.book() except KeyboardInterrupt: self.b.find_by_text(u'退出').click() sys.exit() except: continue if self.playmusic: self.ring() print "订票成功了吗?(Y/N)" input_var = '' while input_var == '': input_var= sys.stdin.read(1) if input_var == 'Y' or input_var == 'y': self.success = 1 elif input_var == 'N' or input_var == 'n': if this_train in self.blacklist: self.blacklist[this_train] += 1 else: self.blacklist[this_train] = 1 print u"%s 失败 %d 次"%(this_train, self.blacklist[this_train]) self.b.back() self.b.reload() else: input_var = '' continue self.b.find_by_text(u'退出').click()
from splinter.browser import Browser browser = Browser(driver_name="chrome") url = "file:///C:/Users/lc/Desktop/vipkids/%E9%A2%84%E7%BA%A6%E8%AF%BE%E7%A8%8B%20-%20VIPKID%E5%9C%A8%E7%BA%BF%E5%B0%91%E5%84%BF%E8%8B%B1%E8%AF%AD.html" browser.visit(url) row = browser.find_by_xpath("//tr[th='15:30']/td") print(type(row[0])) # row[0].click()
class but_ticket_obj(object): def __init__(self, username, password, order, passengers, dtime, starts, ends): self.username = username # 用户名 self.passwd = password # 密码 self.order = order # 乘客名 self.passengers = passengers # 起始地和终点 self.starts = starts self.ends = ends # 日期 self.dtime = dtime self.login_url = 'https://kyfw.12306.cn/otn/login/init' # 12306登录页 self.initMy_url = 'https://kyfw.12306.cn/otn/index/initMy12306' # 登录成功的地址 self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init' # 购票的地址 self.driver_name = 'chrome' self.executable_path = 'F:\cro\chromedriver.exe' def loginOn(self): self.driver.visit(self.login_url) # 访问地址栏 self.driver.fill('loginUserDTO.user_name', self.username) # 通过name 添加用户名 self.driver.fill('userDTO.password', self.passwd) # 通过name 添加用户名 print "该到输入验证码了......" while True: if self.driver.url != self.initMy_url: sleep(1) else: break # 结束后 才会走下面的代码 def buy_ticket(self): self.driver = Browser(driver_name=self.driver_name, executable_path=self.executable_path) # 窗口大小的操作 self.driver.driver.set_window_size(1200, 800) self.loginOn() self.driver.visit(self.ticket_url) # 进入买票的页面 try: # 加载查询信息 self.driver.cookies.add({"_jc_save_fromStation": self.starts}) self.driver.cookies.add({"_jc_save_toStation": self.ends}) self.driver.cookies.add({"_jc_save_fromDate": self.dtime}) self.driver.reload() count = 0 while self.driver.url == self.ticket_url: count += 1 self.driver.find_by_text(u"查询").click() print u"循环点击查询... 第 %s 次" % count sleep(1) # 这是我找到我要买的那个票的有无 的那个标签的id 然后获取里面的value 二哥要的YW_13000K483303 tarVal = self.driver.find_by_id('YZ_02000K718602').value print tarVal if tarVal != u'无': if self.order != 0: self.driver.find_by_text(u"预订")[self.order - 1].click() else: self.driver.find_by_text(u"预订")[0].click() break else: continue print u"开始预订..." sleep(1) print u'开始选择用户...' for user in self.passengers: self.driver.find_by_text(user).click() if self.driver.find_by_id("dialog_xsertcj_ok"): self.driver.find_by_id("dialog_xsertcj_ok").click() print u"提交订单..." # #这里是买票种 1 成人 2 儿童 3 学生 4 残疾 参数是name self.driver.find_by_xpath( '//select[@id="ticketType_1"]/option[@value="1"]' )._element.click() sleep(1) # # 席座 1硬座 3 硬卧 4软卧 9商务座 O是二等座(是o不是0) M一等座 self.driver.find_by_xpath( '//select[@id="seatType_1"]/option[@value="1"]' )._element.click() sleep(1) # 提交订单 print u"提交订单..." self.driver.find_by_id('submitOrder_id').click() sleep(1.5) print u"确认选座..." self.driver.find_by_id('qr_submit_id').click() return True except: return False
def splinter_scrape_bf(city, state): """PURPOSE: To """ city = city.lower() state = state.lower() br = Browser() citystate_string = city+'_'+state+'_us/' url = 'http://www.bringfido.com/lodging/city/'+citystate_string br.visit(url) page = 1 npages = len(br.find_by_xpath('//*[@id="results_paging_controls_bottom"]/span')) columns = ['hotel_id', 'hotel_img_url', 'hotel_url', 'hotel_name', 'hotel_address', 'hotel_city', 'hotel_state', 'hotel_rating', 'hotel_latitude', 'hotel_longitude', 'review_count', 'hotel_address', 'business_id', 'review_id', 'user_id', 'username', 'review_title', 'review_text', 'review_rating', 'review_date'] bigdf = pd.DataFrame(columns=columns) while (page == 1 or page < npages): print('*'*70) print('Now on page {}'.format(page)) archive_links = br.find_by_xpath('//*[@id="results_list"]/div') hotel_names = [] text_summaries = [] links = [] biz_ids = [] hotel_img_urls = [] df = pd.DataFrame(columns=columns) texts = [] titles = [] authors = [] ratings = [] hnms = [] hiurls = [] bids = [] lnks = [] for lnk in archive_links: hotel_names.append(lnk.find_by_xpath('div[2]/h1/a').value) text_summaries.append(lnk.text) this_link = lnk.find_by_xpath('div/h1/a')['href'] links.append(this_link) hotel_img_urls.append(lnk.find_by_xpath('div/div[@class="photo_inner"]/a/img')['src']) biz_ids.append(lnk['id'].split('_')[-1]) for hotel_id, link in enumerate(links): print('*'*75) print('Now on {}: {}'.format(hotel_id, link)) print('*'*75) br.visit(link) # hotel_description = br.find_by_xpath('//*[@class="body"]').text # scrape the address details section of the page details = br.find_by_xpath('//*[@class="address"]').text.split('\n') # now get just the address: address = details[0] # and just the city, state, country, and zip code: csczip = details[1] # and just the phone number # phone = details[2] # now separate the city, state, and zip: city, state, zipcode = csczip.strip().split(',') zipcode = zipcode[3:] #Now using correct Xpath we are fetching URL of archives reviews = br.find_by_xpath('//*[@class="review_container"]') print(reviews) print('') for rev in reviews: titles.append(rev.find_by_xpath('div/div[1]').text) authors.append(rev.find_by_xpath('div/div[2]').text) texts.append(rev.find_by_xpath('div/div[3]').text) ratings.append(rev.find_by_xpath('div[2]/img')['src'].split('/')[-1][0:1]) hnms.append(hotel_names[hotel_id]) hiurls.append(hotel_img_urls[hotel_id]) bids.append(biz_ids[hotel_id]) lnks.append(link) print(rev.find_by_xpath('div[2]/img')['src'].split('/')[-1][0:1]) print('Number of new titles: {}'.format(len(titles))) print('Number of new ratings: {}'.format(len(ratings))) df['review_title'] = titles df['username'] = authors df['review_text'] = texts df['review_rating'] = ratings df['hotel_id'] = hotel_id df['hotel_name'] = hnms df['hotel_url'] = lnks df['hotel_img_url'] = hiurls df['hotel_address'] = address df['hotel_city'] = city df['hotel_state'] = state df['hotel_rating'] = np.mean([int(rat) for rat in ratings]) df['hotel_latitude'] = None df['hotel_longitude'] = None df['review_count'] = len(texts) df['review_id'] = 0 df['user_id'] = 0 df['business_id'] = bids print('new entries from this page: {}'.format(len(df))) bigdf = bigdf.append(df.copy()) page += 1 if page < npages: page_timeout = True while page_timeout: br.visit(url) time.sleep(1) print('Now scraping page {} of {}'.format(page, npages)) button = br.find_by_id('page_'+str(page)) print(button) if len(button) > 0: button.click() page_timeout = False bigdf_reviews = bigdf[['hotel_id', 'review_id', 'business_id', 'user_id', 'username', 'review_title', 'review_text', 'review_rating']].copy() bigdf_hotels = bigdf[['hotel_id', 'hotel_url', 'hotel_img_url', 'hotel_name', 'hotel_address', 'hotel_city', 'hotel_state', 'hotel_rating', 'hotel_latitude', 'hotel_longitude', 'business_id', 'review_count']].copy() bigdf_hotels.drop_duplicates(subset='business_id', inplace=True) bigdf_hotels['hotel_id'] = None bigdf_reviews['review_id'] = None print('Number of bf reviews to add: {}'.format(len(bigdf_reviews))) engine = cadb.connect_aws_db(write_unicode=True) bigdf_reviews.to_sql('bf_reviews', engine, if_exists='append', index=False) bigdf_hotels.to_sql('bf_hotels', engine, if_exists='append', index=False)
class LemonLemon_douyin(object): def __init__(self, width=500, height=300): """ 抖音App视频下载 """ # 无头浏览器 chrome_options = Options() chrome_options.add_argument( 'user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"' ) self.driver = Browser(driver_name="chrome", options=chrome_options, headless=True) self.tool = Tools() def get_video_urls(self, input_f): """ 获得视频播放地址 Parameters: user_id:查询的用户ID Returns: video_names: 视频名字列表 video_urls: 视频链接列表 nickname: 用户昵称 """ video_names = [] video_urls = [] i = 1 now_date = datetime.datetime.now() self.date_today = (str(now_date.date()) + "_" + str(now_date.hour) + ":" + str(now_date.minute)) with open(input_f) as f: for line in f: info = eval(line) ID = info["ID"] des_md5 = info["des_md5"] url = info["link"] video_urls.append(url) video_names.append(ID + "_" + des_md5 + ".mp4") i += 1 return video_names, video_urls # video_names, video_urls, nickname def get_download_url(self, video_url): """ 获得带水印的视频播放地址 Parameters: video_url:带水印的视频播放地址 Returns: download_url: 带水印的视频下载地址 """ req = requests.get(url=video_url, verify=False) bf = BeautifulSoup(req.text, "lxml") script = bf.find_all("script")[-1] video_url_js = re.findall("var data = \[(.+)\];", str(script))[0] video_html = json.loads(video_url_js) download_url = video_html["video"]["play_addr"]["url_list"][0] return download_url def video_downloader(self, video_url, video_name, watermark_flag=True): """ 视频下载 Parameters: video_url: 带水印的视频地址 video_name: 视频名 watermark_flag: 是否下载不带水印的视频 """ size = 0 if watermark_flag == True: video_url = self.remove_watermark(video_url) else: video_url = self.get_download_url(video_url) with closing(requests.get(video_url, stream=True, verify=False)) as response: chunk_size = 1024 content_size = int(response.headers["content-length"]) if response.status_code == 200: sys.stdout.write(" [文件大小]:%0.2f MB\n" % (content_size / chunk_size / 1024)) with open(video_name, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) size += len(data) file.flush() sys.stdout.write(" [下载进度]:%.2f%%" % float(size / content_size * 100) + "\r") sys.stdout.flush() def remove_watermark(self, video_url): """ 获得无水印的视频播放地址 Parameters: video_url: 带水印的视频地址 Returns: 无水印的视频下载地址 """ self.driver.visit("http://douyin.iiilab.com/") self.driver.find_by_tag("input").fill(video_url) self.driver.find_by_xpath('//button[@class="btn btn-default"]').click() html = self.driver.find_by_xpath( '//div[@class="thumbnail"]/div/p')[0].html bf = BeautifulSoup(html, "lxml") return bf.find("a").get("href") def run(self, input_f): """ 运行函数 Parameters: None Returns: None """ # self.hello() # user_id = input('请输入ID(例如40103580):') error_url = [] video_names, video_urls = self.get_video_urls(input_f) file = os.path.join(os.path.abspath(os.path.dirname(os.getcwd())), "DOWNLOAD") if not os.path.exists(file): os.mkdir(file) print("视频下载中:共有%d个作品!\n" % len(video_urls)) for num in range(len(video_urls)): print(" 解析第%d个视频链接 [%s] 中,请稍后!\n" % (num + 1, video_urls[num])) random_wait = random.uniform(3, 5) print("waiting...", random_wait) time.sleep(random_wait) """ if "\\" in video_names[num]: video_name = video_names[num].replace("\\", "") elif "/" in video_names[num]: video_name = video_names[num].replace("/", "") else: video_name = video_names[num] """ video_name = video_names[num] ID = video_name.split("_")[0] # 判断文件夹是否存在 if ID not in os.listdir(file): os.mkdir(os.path.join(file, ID)) # 判断要下载的文件是否存在 video_file = os.path.join(file, ID, video_name) if not os.path.exists(video_file): try: self.video_downloader( video_urls[num], os.path.join(file, ID, video_name), ) self.tool.writeToFile(video_name, "SuccessDownload") except: print("**************************") print("ERROR", video_urls[num]) error_url.append(video_urls[num]) print("\n") else: print(video_name + "文件已存在") #self.driver.close() with open("error_url.txt", "w") as f: f = error_url print("下载完成!") print("出错数量:", len(error_url))
class Scrapy12306(): def __init__(self): config = ConfigParser.ConfigParser() config.readfp(codecs.open("12306.conf", "r", "utf-8")) self.url = config.get("config","url") self.username = config.get("config","username") self.password = config.get("config","password") self.bType = config.get("config","browser") #self._from = config.get("config","_from") #self._to = config.get("config","_to") self._from = u'北京' self._to = u'邯郸' self.date = config.get("config","date") print "browser :%s url:%s" % (self.bType,self.url) self.browser = Browser(self.bType) self.browser.visit(self.url) def login(self): if self.username=="" or self.password=="": print "error:username and password must not be empty" self.browser.quit() sys.exit(-1) self.browser.fill("loginUserDTO.user_name",self.username) self.browser.fill("userDTO.password",self.password) time.sleep(10) self.browser.find_by_id("loginSub").click() loginFlag = self.browser.find_by_id("link_4_myTicket") return not loginFlag.is_empty() def getTicket(self): self.browser.find_by_xpath("//a[@href='/otn/leftTicket/init']")[0].click() #self.browser.fill("leftTicketDTO.from_station_name",self._from) #self.browser.fill("leftTicketDTO.to_station_name",self._to) self.browser.cookies.add({"_jc_save_fromStation":"%u5317%u4EAC%2CBJP"}) self.browser.cookies.add({'_jc_save_toStation':'%u90AF%u90F8%2CHDP'}) #self.browser.fill("leftTicketDTO.train_date",self.date) self.browser.cookies.add({"_jc_save_fromDate":self.date}) isload = False while not isload: self.browser.reload() isload = self.browser.is_element_present_by_id("query_ticket",wait_time=10) if not isload: time.sleep(30) continue qt = self.browser.find_by_id("query_ticket") qt.click() print self.browser.cookies.all() flag = False while not flag: try: stations = self.browser.find_by_text(u"预订") station = [] for i in range(11,21): station = stations[i] flag =station.has_class("btn72") if flag: break if not flag: qt = self.browser.find_by_id("query_ticket").click() time.sleep(2) continue station.click() time.sleep(1) except Exception as e: print e.message flag = False p1 = self.browser.find_by_xpath("//label[@for='normalPassenger_0']") p2 = self.browser.find_by_xpath("//label[@for='normalPassenger_3']") print 'p1,p2:%s %s' % (p1,p2) if p1.is_empty(): p1 = self.browser.find_by_id("normalPassenger_0")[0].click() else : p1[0].click() if p2.is_empty(): p2 = self.browser.find_by_id("normalPassenger_3")[0].click() else : p2[0].click() self.browser.evaluate_script("confirm('抢到票了帅哥,订票吗')") time.sleep(15) self.browser.find_by_id("submitOrder_id").click()
class TestEngine(object): __sleep_time = 2 __mouse_over = True __mouse_over_sleep = 1 def __init__(self, browser_name, execute_path=None): if execute_path is None: self.__browser = Browser(browser_name, fullscreen=True) self.__quit = False else: self.__browser = Browser(browser_name, executable_path=execute_path, fullscreen=True) self.__quit = False @staticmethod def set_config(config): TestEngine.__sleep_time = 2 if config.get("sleep_time") is None else config.get("sleep_time") TestEngine.__mouse_over = True if config.get("mouse_over") is None else config.get("mouse_over") TestEngine.__mouse_over_sleep = 1 if config.get("mouse_over_sleep") is None else config.get("mouse_over_sleep") def test_list_acts(self, domain, action_list, back_fun=None, result_back=None): thread_deal = threading.Thread(target=self.__test_list_thread, args=(domain, action_list, back_fun, result_back), name="TestEngine deal tester") thread_deal.start() def test_deal(self, domain, action_obj, back_fun=None, result_back=None): thread_deal = threading.Thread(target=self.__test_do_thread, args=(domain, action_obj, back_fun, result_back), name="TestEngine deal tester") # hasattr(result_back, "__call__") thread_deal.start() def quit(self): self.__quit = True self.__browser.quit() def is_quited(self): return self.__quit def __test_list_thread(self, domain, action_list, back_fun=None, result_back=None): try: for action in action_list: self.__test_do(domain, action, result_back) except Exception as e: raise Exception("[Error code] deal test list failed, error code=", e) finally: if action_list[0].waitClose != 0: sleep(action_list[0].waitClose) if back_fun is None: self.quit() else: back_fun() def __test_do_thread(self, domain, action_obj, back_fun=None, result_back=None): try: self.__test_do(domain, action_obj, result_back) except Exception as e: raise Exception("[Error code] deal test failed, error code=", e) finally: if action_obj.waitClose != 0: sleep(action_obj.waitClose) if back_fun is None: self.quit() else: back_fun() def __test_do(self, domain, action_obj, result_back=None): test_url = domain+action_obj.urlPath self.__browser.visit(test_url) # form表单默认为第一个action循环测试,之后的action按照顺序执行 action_list = TesterActionData().dict_to_list(action_obj.actionList) if action_obj.forms is not None: form_action = action_list[0] if action_list else None forms = TesterForms().dict_to_list(action_obj.forms) for form in forms: params = TesterFormData().dict_to_list(form.params) for param in params: self.__set_value(int(param.formType), param.formElName, param.formElValue.decode("utf-8"), int(param.index)) sleep(TestEngine.__sleep_time) if form_action is not None: self.__deal_action(form_action, result_back) sleep(action_obj.sleepTime) for action_deal in action_list[1:]: self.__deal_action(action_deal, result_back) sleep(action_obj.sleepTime) else: for action_deal in action_list: self.__deal_action(action_deal, result_back) sleep(action_obj.sleepTime) def __set_value(self, form_type, el_name, el_value, index): elements = self.__event_element(form_type, el_name) element = elements[index] if element['type'] in ['text', 'password', 'tel'] or element.tag_name == 'textarea': element.value = el_value elif element['type'] == 'checkbox': if el_value: element.check() else: element.uncheck() elif element['type'] == 'radio': element.click() elif element._element.tag_name == 'select': element.find_by_value(el_value).first._element.click() else: element.value = el_value def __event_element(self, el_type, el_value): ele_type = EL_TYPE.value(el_type) if ele_type == "id": return self.__browser.find_by_id(el_value) elif ele_type == "name": return self.__browser.find_by_name(el_value) elif ele_type == "tag": return self.__browser.find_by_tag(el_value) elif ele_type == "value": return self.__browser.find_by_value(el_value) elif ele_type == "selector": return self.__browser.find_by_xpath(el_value) elif ele_type == "css": return self.__browser.find_by_css(el_value) else: raise ValueError("Test Engine can't deal the element type:%s, el_type:%s", ele_type, el_type) def __deal_action(self, action_data, result_back=None): action_type = ACTION_TYPE.value(action_data.action) # 当页面跳转是抓取最后一个打开的窗口页面 self.__browser.windows.current = self.__browser.windows[-1] if action_type == "click": self.__mouse_of_click(self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)]) elif action_type == "double click": self.__mouse_of_double_click(self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)]) elif action_type == "right click": self.__mouse_of_right_click(self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)]) elif action_type == "mouse over": self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)].mouse_over() elif action_type == "mouse out": self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)].mouse_out() elif action_type == "select": self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)].select() else: raise Exception("don't find action for action:%s", action_data.action) try: if action_data.testerResult is not None and result_back is not None: sleep(3) result_back(TesterResult(action_data.testerResult, self.__browser.is_text_present(action_data.testerResult))) except Exception: result_back(TesterResult(action_data.testerResult, False)) def __mouse_of_click(self, event_deal_obj): if TestEngine.__mouse_over: event_deal_obj.mouse_over() sleep(TestEngine.__mouse_over_sleep) event_deal_obj.click() else: event_deal_obj.click() def __mouse_of_right_click(self, event_deal_obj): if TestEngine.__mouse_over: event_deal_obj.mouse_over() sleep(TestEngine.__mouse_over_sleep) event_deal_obj.right_click() else: event_deal_obj.click() def __mouse_of_double_click(self, event_deal_obj): if TestEngine.__mouse_over: event_deal_obj.mouse_over() sleep(TestEngine.__mouse_over_sleep) event_deal_obj.double_click() else: event_deal_obj.click()
b.find_by_id('object_id').fill(username) b.find_by_id('password').fill(pwd) b.find_by_name('keep_pwd') b.find_by_id('confirmLogin').click() time.sleep(1) b.visit('http://sdp.nd/main.html') b.find_by_id('appSearchTxt').fill(component_name) while b.find_link_by_partial_href( '/modules/mobileComponent/detail.html?appId') is []: print '.' b.find_link_by_partial_href( '/modules/mobileComponent/detail.html?appId').click() # 选择分支 time.sleep(0.5) b.find_by_xpath('/html/body/div[4]/a[2]').first.click() b.find_by_xpath('//*[@id="appDeatil"]/div[1]/span[2]/span').first.click() b.find_by_id(env_id).first.find_by_tag('a').click() b.find_by_id('publish').first.click() # 版本号 b.find_by_id('version').fill(versionNo) b.find_by_id('versionDesc').fill(detail) # b.find_by_xpath('//*[@id="gitlist"]/div').first.click() # b.find_by_id('publishConfirm').click()
b.find_by_xpath('//*[@id="Jprice"]/li[6]').click() b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a') time.sleep(5) loop(b) except Exception as e: b.reload() time.sleep(1) loop(b) b = Browser(driver_name='chrome') b.visit(url) login(b) b.visit(order_url) b.find_by_xpath('//*[@id="Jprice"]/li[6]').click() b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a').click() time.sleep(5) while True: loop(b) if b.title=='【五月天郑州演唱会门票】2017五月天 LIFE [ 人生无限公司 ] 巡回演唱会—郑州站-永乐票务': b.find_by_xpath('//*[@id="Jprice"]/li[6]').click() b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a') time.sleep(2) loop(b) else: print('SUCCESS!') break
class Ticket(object): def __init__(self, config_file): ## config parser setting self.config_file = config_file self.settings = configparser.ConfigParser() self.settings._interpolation = configparser.ExtendedInterpolation() self.settings.read(self.config_file,'utf-8') ## environment setting self.brower='chrome' self.b = Browser(driver_name=self.brower) self.station={} self.url = "https://kyfw.12306.cn/otn/leftTicket/init" # 席别类型(对应列标号) self.ticket_index = [ '', u'商务座', u'特等座', u'一等座', u'二等座', u'高级软卧', u'软卧', u'硬卧', u'软座', u'硬座', u'无座' ] self.username = '' self.password = '' self.date_format='%Y-%m-%d' self.tolerance = -1 self.blacklist = {} self.date = [] self.isStudent = False self.success = 0 self.find_ticket = 0 self.config_parser() self.playmusic = False self.count = 0 def ConfigSectionMap(self,section): dict1 = {} options = self.settings.options(section) for option in options: try: dict1[option] = self.settings.get(section, option) if dict1[option] == -1: DebugPrint("skip: %s" % option) except: print("exception on %s!" % option) dict1[option] = None return dict1 def daterange(self, start_date, end_date): for n in range(int ((end_date - start_date).days) + 1): yield start_date + timedelta(n) def config_parser(self): if self.retrieve_station_dict() == -1: sys.exit() if self.retrieve_book_options() == -1: sys.exit() def retrieve_station_dict(self): dict_helper=self.ConfigSectionMap('STATIONCOOKIE') for name, value in dict_helper.iteritems(): self.station[name]=value def retrieve_book_options(self): login_info=self.ConfigSectionMap('GLOBAL') self.username = login_info['username'].strip() self.password = login_info['password'].strip() self.brower = login_info['browser'] book_settings = self.ConfigSectionMap('TICKET') self.fromStation = [ station.strip() for station in book_settings['from_station'].split(',')] self.toStation = [ station.strip() for station in book_settings['to_station'].split(',')] trains = [ train.strip() for train in book_settings['trains'].split(',')] if len(trains) == 1 and trains[0] == '': self.trains = [] else: self.trains = trains self.ticket_type =[ _type.strip() for _type in book_settings['ticket_type'].split(',')] rangeQuery = book_settings['range_query'].strip() if rangeQuery == 'Y': date = [ d.strip() for d in book_settings['date'].split(',')] if len(date) < 2: print "未设置正确的起至时间" return -1 else: start_date = datetime.strptime(date[0],self.date_format) end_date = datetime.strptime(date[1],self.date_format) if end_date < start_date: print "查询截止日期不可大于开始日期!" return -1 for single_date in self.daterange(start_date, end_date): self.date.append(single_date.strftime(self.date_format)) else: self.date = [ d.strip() for d in book_settings['date'].split(',')] if book_settings['student'].strip() == 'Y': self.isStudent = True self.tolerance = int(book_settings['tolerance']) self.people = [ people.strip() for people in book_settings['people'].split(',') ] if book_settings['alarm'].strip() == 'Y': self.playmusic = True def login(self): self.b.visit(self.url) self.b.find_by_text(u"登录").click() self.b.fill("loginUserDTO.user_name",self.username) self.b.fill("userDTO.password",self.password) pdb.set_trace() def page_has_loaded(self): page_state = self.b.evaluate_script("document.readyState") return page_state == 'complete' def switch_to_order_page(self): order = [] while len(order) == 0: order = self.b.find_by_text(u"车票预订") order[0].click() def checkTicket(self, date, fromStation, toStation): print 'date: %s, from %s, to %s'%(date, fromStation, toStation) self.b.cookies.add({"_jc_save_fromDate":date}) self.b.cookies.add({"_jc_save_fromStation":self.station[fromStation]}) self.b.cookies.add({"_jc_save_toStation":self.station[toStation]}) self.b.cookies.all() self.b.reload() if self.isStudent: self.b.find_by_text(u'学生').click() self.b.find_by_text(u"查询").click() all_trains = [] while self.page_has_loaded() == False: continue while len(all_trains) == 0: all_trains = self.b.find_by_id('queryLeftTable').find_by_tag('tr') for k, train in enumerate(all_trains): tds = train.find_by_tag('td') if tds and len(tds) >= 10: has_ticket= tds.last.find_by_tag('a') if len(has_ticket) != 0: if k + 1 < len(all_trains): this_train = all_trains[k+1]['datatran'] if len(self.trains) != 0 and this_train not in self.trains: continue if self.tolerance != -1 and this_train in self.blacklist and self.blacklist[this_train] >= self.tolerance: print u"%s 失败 %d 次, 跳过"%(this_train, self.blacklist[this_train]) continue for cat in self.ticket_type: if cat in self.ticket_index: i = self.ticket_index.index(cat) else: print '无效的席别信息' return 0, '' if tds[i].text != u'无' and tds[i].text != '--': if tds[i].text != u'有': print u'%s 的 %s 有余票 %s 张!'%(this_train, cat ,tds[i].text) else: print u'%s 的 %s 有余票若干张!'%(this_train, cat) self.find_ticket = 1 tds.last.click() break if self.find_ticket: break return this_train def book(self): while self.page_has_loaded() == False: continue if len(self.people) == 0: print '没有选择乘车人!' return 1 more = [] while len(more) == 0: more = self.b.find_by_text(u"更多") more[0].click() person = [] people = self.people while len(person) == 0: person=self.b.find_by_xpath('//ul[@id="normal_passenger_id"]/li/label[contains(text(),"%s")]'%people[0]) for p in people: self.b.find_by_xpath('//ul[@id="normal_passenger_id"]/li/label[contains(text(),"%s")]'%p).click() if self.b.find_by_xpath('//div[@id="dialog_xsertcj"]').visible: self.b.find_by_xpath('//div[@id="dialog_xsertcj"]/div/div/div/a[text()="确认"]').click() return 1 def ring(self): pygame.mixer.pre_init(64000, -16, 2, 4096) pygame.init() pygame.display.init() screen=pygame.display.set_mode([300,300]) #pygame.display.flip() pygame.time.delay(1000)#等待1秒让mixer完成初始化 tracker=pygame.mixer.music.load("media/sound.ogg") #track = pygame.mixer.music.load("sound.ogg") pygame.mixer.music.play() # while pygame.mixer.music.get_busy(): #pygame.time.Clock().tick(10) running = True img=pygame.image.load("media/img.jpg") while running: screen.blit(img,(0,0)) pygame.display.flip() for event in pygame.event.get(): if event.type==pygame.QUIT: running = False pygame.quit () return 1 def executor(self): self.login() self.switch_to_order_page() while self.success == 0: self.find_ticket = 0 while self.find_ticket == 0: for date in self.date: try: self.count += 1 print "Try %d times" % self.count for fromStation in self.fromStation: for toStation in self.toStation: this_train = self.checkTicket(date, fromStation, toStation) if self.find_ticket: break if self.find_ticket: break if self.find_ticket: break except KeyboardInterrupt: self.b.find_by_text(u'退出').click() sys.exit() except: continue # book ticket for target people self.find_ticket = 0 while self.find_ticket == 0: try: self.find_ticket = self.book() except KeyboardInterrupt: self.b.find_by_text(u'退出').click() sys.exit() except: continue if self.playmusic: self.ring() print "订票成功了吗?(Y/N)" input_var = '' while input_var == '': input_var= sys.stdin.read(1) if input_var == 'Y' or input_var == 'y': self.success = 1 elif input_var == 'N' or input_var == 'n': if this_train in self.blacklist: self.blacklist[this_train] += 1 else: self.blacklist[this_train] = 1 print u"%s 失败 %d 次"%(this_train, self.blacklist[this_train]) self.b.back() self.b.reload() else: input_var = '' continue self.b.find_by_text(u'退出').click()
class Hujiang(object): """docstring for Train""" driver_name = '' executable_path = '' #用户名 密码 username = u"Peters_mom" passwd = u"wydycg98" """网址""" #查询课程URL ticket_url = "https://class.hujiang.com/talk/kids/calendar?classId=17380104" #登录URL login_url = "https://login.hujiang.com/" #登陆成功,我的URL initmy_url = "https://my.hujiang.com/account/" #需要定课的星期几在page中对应的index indexs = [] #需要定课的时间在page中对应的坐标列 times = ['41', '42'] #20点的课程的纵坐标为41;20:30的课程纵坐标为42 def __init__(self): self.driver_name = 'chrome' #self.executable_path = os.getcwd()+'/chromedriver' print("Welcome To Use The Tool") def login(self): try: self.driver.visit(self.login_url) #填充密码 self.driver.fill("username", self.username) #sleep(1) self.driver.fill("password", self.passwd) self.driver.find_by_tag("button").click() #self.driver.find_by_text(u"登陆").first().click() #self.driver.find_element_by_xpath('//*[@id="hp-login-normal"]/button').first().click() print("登陆成功") while True: if self.driver.url != self.initmy_url: sleep(1) else: break except Exception as e: print(e) def start(self): self.driver = Browser(driver_name=self.driver_name) self.driver.driver.set_window_size(1400, 1000) self.login() #sleep(1) print("查询课程开始....") self.driver.visit(self.ticket_url) try: #sleep(1) self.driver.find_link_by_text("按老师预约").click() print("找第一个老师并预约") self.driver.find_link_by_text(u"预约").click() weekday = self.driver.find_by_xpath( '//div[@class="_3fqOfVfQ"]').first.value if (weekday == u'周一'): #今天是周一,预约周2、5的课程的横向坐标索引 self.indexs = ['2', '5'] elif (weekday == u'周二'): self.indexs = ['1', '4'] elif (weekday == u'周三'): self.indexs = ['7', '3'] elif (weekday == u'周四'): self.indexs = ['6', '2'] elif (weekday == u'周五'): self.indexs = ['5', '1'] elif (weekday == u'周六'): self.indexs = ['4', '7'] elif (weekday == u'周日'): self.indexs = ['3', '6'] else: self.indexs = ['0', '0'] #无效值 print("周二index={0}, 周五index={1}", format(self.indexs[0], self.indexs[1])) print("20:00 time={0}, 20:30 time={1}", format(self.times[0], self.times[1])) except Exception as e: print(e) self.subByTeacher() def subByTeacherPage(self): for index in self.indexs: for time in self.times: xpath_20 = '//*[@id="timeList"]/ul/li[' + index + ']/div/div[' + time + ']/a' #20点的课程xpath #xpath_20='//*[@id="timeList"]/ul/li[' + index + ']/div/div[' + time + ']' #20点的课程空的xpath #xpath_20_span='//*[@id="timeList"]/ul/li[' + index + ']/div/div[' + time + ']/span' #20点的课程"已预约""约满"的xpath canBeSubscribeds = self.driver.find_by_xpath(xpath_20) if canBeSubscribeds: try: print("找到老师并预约对应时间index={0},time={1}", format(index, time)) canBeSubscribeds.click() self.driver.find_link_by_text("确认预约").click() confirm = self.driver.find_by_xpath( '//*[@id="root"]/div/div[2]/div/div/div[2]/div/div[5]/div/div[2]/div[3]/div[2]/a[1]' ) confirm.click() print("预约成功") return True except Exception as e: print(e) else: continue #if (canBeSubscribeds.first.value == u'可预约'): return False def subByTeacher(self): success = self.subByTeacherPage() if (success): return else: print("找到下一页") success = False try: nextpage = self.driver.find_by_xpath( '//div[@class="js_next trace_1 _1jM3tRIW"]') #nextpage=self.driver.find_by_xpath('//*[@id="root"]/div/div[2]/div/div/div[2]/div/div[2]/div[1]/div[2]') #nextpage=self.driver.find_by_xpath('//i[@class="hui-icon hui-icon-carat-r-small"]') nextpage.click() except Exception as e: print(e) success = self.subByTeacherPage() return success def subClass(self): print("查询课程开始....") for i in range(3): #前3个老师 self.driver.visit(self.ticket_url) print("查询第{0}个老师的课程:", format(str(i))) self.driver.find_link_by_text(u"预约")[i].click() #第3个老师预约 self.subByTeacher()
class DouYin(object): def __init__(self, width=500, height=300): """ 抖音App视频下载 """ # 无头浏览器 chrome_options = Options() chrome_options.add_argument( 'user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"' ) self.driver = Browser(driver_name='chrome', executable_path='F:/chromedriver_win32', options=chrome_options, headless=True) def get_video_urls(self, user_id): """ 获得视频播放地址 Parameters: user_id:查询的用户ID Returns: video_names: 视频名字列表 video_urls: 视频链接列表 nickname: 用户昵称 """ video_names = [] video_urls = [] unique_id = '' while unique_id != user_id: search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id req = requests.get(url=search_url, verify=False) html = json.loads(req.text) aweme_count = html['user_list'][0]['user_info']['aweme_count'] uid = html['user_list'][0]['user_info']['uid'] nickname = html['user_list'][0]['user_info']['nickname'] unique_id = html['user_list'][0]['user_info']['unique_id'] user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % ( uid, aweme_count) req = requests.get(url=user_url, verify=False) html = json.loads(req.text) i = 1 for each in html['aweme_list']: share_desc = each['share_info']['share_desc'] if '抖音-原创音乐短视频社区' == share_desc: video_names.append(str(i) + '.mp4') i += 1 else: video_names.append(share_desc + '.mp4') video_urls.append(each['share_info']['share_url']) return video_names, video_urls, nickname def get_download_url(self, video_url): """ 获得带水印的视频播放地址 Parameters: video_url:带水印的视频播放地址 Returns: download_url: 带水印的视频下载地址 """ req = requests.get(url=video_url, verify=False) bf = BeautifulSoup(req.text, 'lxml') script = bf.find_all('script')[-1] video_url_js = re.findall('var data = \[(.+)\];', str(script))[0] video_html = json.loads(video_url_js) download_url = video_html['video']['play_addr']['url_list'][0] return download_url def video_downloader(self, video_url, video_name, watermark_flag=True): """ 视频下载 Parameters: video_url: 带水印的视频地址 video_name: 视频名 watermark_flag: 是否下载不带水印的视频 Returns: 无 """ size = 0 if watermark_flag == True: video_url = self.remove_watermark(video_url) else: video_url = self.get_download_url(video_url) with closing(requests.get(video_url, stream=True, verify=False)) as response: chunk_size = 1024 content_size = int(response.headers['content-length']) if response.status_code == 200: sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) with open(video_name, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) size += len(data) file.flush() sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r') sys.stdout.flush() def remove_watermark(self, video_url): """ 获得无水印的视频播放地址 Parameters: video_url: 带水印的视频地址 Returns: 无水印的视频下载地址 """ self.driver.visit('http://douyin.iiilab.com/') self.driver.find_by_tag('input').fill(video_url) self.driver.find_by_xpath('//button[@class="btn btn-default"]').click() html = self.driver.find_by_xpath( '//div[@class="thumbnail"]/div/p')[0].html bf = BeautifulSoup(html, 'lxml') return bf.find('a').get('href') def run(self): """ 运行函数 Parameters: None Returns: None """ self.hello() user_id = input('请输入ID(例如40103580):') video_names, video_urls, nickname = self.get_video_urls(user_id) if nickname not in os.listdir(): os.mkdir(nickname) print('视频下载中:共有%d个作品!\n' % len(video_urls)) for num in range(len(video_urls)): print(' 解析第%d个视频链接 [%s] 中,请稍后!\n' % (num + 1, video_urls[num])) if '\\' in video_names[num]: video_name = video_names[num].replace('\\', '') elif '/' in video_names[num]: video_name = video_names[num].replace('/', '') else: video_name = video_names[num] self.video_downloader(video_urls[num], os.path.join(nickname, video_name)) print('\n') print('下载完成!') def hello(self): """ 打印欢迎界面 Parameters: None Returns: None """ print('*' * 100) print('\t\t\t\t抖音App视频下载小助手') print('\t\t作者:Jack Cui') print('*' * 100)
class tp(object): """初始化""" driver_name='' executable_path='' #用户名,密码 username = "******" passwd = "?" # 城市,地区 starts = "sc01" ends = "sc0108" """网址""" ticket_url = "http://wb.zk789.cn/ApplyExamination/EditApplyExamination.aspx" login_url = "http://wb.zk789.cn/Login.aspx" initmy_url = "http://wb.zk789.cn/Notice.aspx" relay_url="http://wb.zk789.cn/Main.aspx" def __init__(self): self.driver_name='chrome' self.executable_path='D:/ProgramData/chromedriver' def send (self): self.msg_from = '*****@*****.**' # 发送方邮箱 self.passwd = 'ndnatjdouhdkbbha' # 填入发送方邮箱的授权码 self.msg_to = '*****@*****.**' # 收件人邮箱 self.subject = "已抢到座位,速来付款" # 主题 self.content = "来自新——邮箱" msg = MIMEText (self.content) msg ['Subject'] = self.subject msg ['From'] = self.msg_from msg ['To'] = self.msg_to try: s = smtplib.SMTP_SSL ("smtp.qq.com", 465) # 邮件服务器及端口号 s.login (self.msg_from, self.passwd) s.sendmail (self.msg_from, self.msg_to, msg.as_string ()) print ("发送成功") except: print ("发送失败") finally: s.quit () def login(self): self.driver.visit(self.login_url) self.driver.fill("ctl00$ContentPlaceHolder1$TBZjhm", self.username) self.driver.fill("ctl00$ContentPlaceHolder1$TBPassWord", self.passwd) print ("等待验证码,自行输入...") while True: if self.driver.url != self.initmy_url: sleep(1) else: break def start(self): self.driver=Browser(driver_name=self.driver_name,executable_path=self.executable_path) self.driver.driver.set_window_size(1400, 1000) self.login() #同意 sleep (3) self.driver.find_by_xpath ('//input[@type="checkbox"]').click () self.driver.find_by_xpath ('//input[@type="submit"]').click () sleep (2) #self.driver.visit (self.relay_url) self.driver.find_by_xpath ('//a[@id="ctl00_ContentPlaceHolder1_lbtnBKEdit"]').click () # sleep(1)s #self.driver.visit(self.ticket_url) i=0 while True: if self.driver.url == self.ticket_url: try: print ("考位页面开始...") # 加载信息 sleep(2) self.driver.select('ctl00$ContentPlaceHolder1$aecEdit$fvData$ddlKSDSZ', self.starts) if i==0: print ('选择成华区') self.driver.select('ctl00$ContentPlaceHolder1$aecEdit$fvData$ddlKSQX', self.ends) i+=1 else: print('选择双流区') self.driver.select ('ctl00$ContentPlaceHolder1$aecEdit$fvData$ddlKSQX','sc0122') i-=1 #print(self.driver.find_by_xpath('//*[@id="ctl00_ContentPlaceHolder1_aecEdit_fvData_ddlKSQX"]/option[6]').bkrl) self.driver.find_by_xpath ('//input[@id="btnAddCourse"]').first.click () print('进入科目选择') sleep(2) if self.driver.find_by_xpath ('//*[@id="msg_statistic_pay"]').first.value!=0: sleep(2) with self.driver.get_iframe (0) as iframe: print('选择科目') iframe.find_by_xpath ('//*[@id="gvData_ctl03_cbSelect"]').first.check () sleep (1) #iframe.find_by_xpath ('//*[@id="gvData_ctl05_cbSelect"]').first.check () #sleep (1) #iframe.find_by_xpath ('//*[@id="gvData_ctl07_cbSelect"]').first.check () #sleep (1) self.driver.find_by_xpath ('//*[@id="btnConfirm"]').first.click () sleep(2) self.driver.find_by_xpath ('// *[ @ id = "ext-gen29"]').first.click() # print(self.driver.find_by_xpath ('//*[@id="msg_statistic_pay"]').first.value) else: self.send() break except : print ('程序崩溃了,刷新中') self.driver.reload () sleep(5) else: print ('服务器无响应,刷新中') self.driver.quit () self.start() os.system ('shutdown -s -t 60') print('60s 后关机')
class BuyTicket(object): def __init__(self, username, passwd, order, passengers, seatType, ticketType, daytime, starts, ends): # 用户名 密码 self.username = username self.passwd = passwd # 车次,选择第几趟,0则从上之下依次点击 self.order = order # 乘客名 self.passengers = passengers # 席位 self.seatType = seatType self.ticketType = ticketType # 时间格式2018-02-05 self.daytime = daytime # 起始地和终点 self.starts = starts self.ends = ends self.login_url = 'https://kyfw.12306.cn/otn/login/init' self.initMy_url = 'https://kyfw.12306.cn/otn/index/initMy12306' self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init' self.confirm_url = 'https://kyfw.12306.cn/otn/confirmPassenger/initDc' # 浏览器名称 #self.driver_name = 'firefox' # chrome firefox #chrome self.driver_name = 'chrome' self.executable_path = '/usr/local/bin/chromedriver' # 火狐浏览器第三方驱动 #self.executable_path = os.getcwd()+'/geckodriver' # 获取工程目录下的火狐驱动 chromedriver # 座位类型所在td位置 if seat_type == '商务座特等座': seat_type_index = 1 seat_type_value = 9 elif seat_type == '一等座': seat_type_index = 2 seat_type_value = 'M' elif seat_type == '二等座': seat_type_index = 3 seat_type_value = 0 elif seat_type == '高级软卧': seat_type_index = 4 seat_type_value = 6 elif seat_type == '软卧': seat_type_index = 5 seat_type_value = 4 elif seat_type == '动卧': seat_type_index = 6 seat_type_value = 'F' elif seat_type == '硬卧': seat_type_index = 7 seat_type_value = 3 elif seat_type == '软座': seat_type_index = 8 seat_type_value = 2 elif seat_type == '硬座': seat_type_index = 9 seat_type_value = 1 elif seat_type == '无座': seat_type_index = 10 seat_type_value = 1 elif seat_type == '其他': seat_type_index = 11 seat_type_value = 1 else: seat_type_index = 7 seat_type_value = 3 self.seat_type_index = seat_type_index #self.seat_type_value = seat_type_value def send_sms(self, mobile, sms_info): """发送手机通知短信,用的是-互亿无线-的测试短信""" host = "106.ihuyi.com" sms_send_uri = "/webservice/sms.php?method=Submit" account = "C59782899" pass_word = "19d4d9c0796532c7328e8b82e2812655" params = parse.urlencode({ 'account': account, 'password': pass_word, 'content': sms_info, 'mobile': mobile, 'format': 'json' }) headers = { "Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain" } conn = httplib2.HTTPConnectionWithTimeout(host, port=80, timeout=30) conn.request("POST", sms_send_uri, params, headers) response = conn.getresponse() response_str = response.read() conn.close() return response_str def login(self): # 访问登录网址 self.driver.visit(self.login_url) # 填充用户名 self.driver.fill("loginUserDTO.user_name", self.username) # sleep(1) # 填充密码 self.driver.fill("userDTO.password", self.passwd) #logbticket.info("请手动输入验证码...") print('请手动输入验证码...') # 目前没有自动验证码 # 循环等待登录,登录成功,跳出循环 while True: if self.driver.url != self.initMy_url: sleep(1) else: break def start_buy(self): # 这些设置都是必要的 # chrome_options = Options() # chrome_options.add_argument("--no-sandbox") # chrome_options.add_argument("--disable-setuid-sandbox") # chrome_options.add_argument("disable-infobars") # 禁用网页上部的提示栏 # self.driver = Browser(driver_name=self.driver_name, options=chrome_options, executable_path=self.executable_path) self.driver = Browser(driver_name=self.driver_name, executable_path=self.executable_path) # 设置窗口大小尺寸 self.driver.driver.set_window_size(1000, 800) # 用户登录 self.login() # 进入选票网站 ##self.driver.visit(self.ticket_url) exit_state = 0 while exit_state == 0: self.driver.visit(self.ticket_url) try: print("开始刷票....") # sleep(1) # 加载查询信息 self.driver.cookies.add({"_jc_save_fromStation": self.starts}) self.driver.cookies.add({"_jc_save_toStation": self.ends}) self.driver.cookies.add({"_jc_save_fromDate": self.daytime}) self.driver.reload() count = 0 if self.order != 0: while self.driver.url == self.ticket_url: self.driver.find_by_text("查询").click() count = count + 1 print("第 %d 次点击查询..." % count) # sleep(1) try: self.driver.find_by_text("预订")[self.order - 1].click() sleep(1.5) except Exception as e: print(e) print("预订失败...") continue else: while self.driver.url == self.ticket_url: self.driver.find_by_text("查询").click() count += 1 print("第 %d 次点击查询..." % count) state = 0 sleep(1) try: for i in self.driver.find_by_xpath( "//tbody[@id='queryLeftTable']/tr/td/a[contains(text(), '预订')]" ): #self.driver.find_by_text("预订"): current_tr = i.find_by_xpath("./../..") if current_tr.find_by_tag('td')[ self.seat_type_index].text == '--': print('无此座位类型出售,继续尝试...') #sys.exit(1) elif current_tr.find_by_tag('td')[ self.seat_type_index].text == '无': print('无票,继续尝试...') else: print('查询到火车票') state = 1 i.click() break sleep(1) if state == 1: break except Exception as e: print(e) print("预订失败...") continue print("开始预订...") # self.driver.reload() #sleep(1) if self.driver.find_by_id("content_defaultwarningAlert_hearder" ).text.strip() == '当前时间不可以订票': print('当前时间不可以订票') sys.exit(1) while True: if self.driver.url == self.confirm_url: break else: sleep(1) if self.driver.is_text_present('splinter.readthedocs.io'): print("开始选择乘客...") #sleep(5) if self.driver.url == self.confirm_url: for p in self.passengers: pg = self.driver.find_by_xpath( "//ul[@id='normal_passenger_id']/li/label[contains(text(), '" + p + "')]") #.last.click() pg.last.click() print("乘客选择完毕...\n") print("开始选座...") sleep(1) i = 0 while len(self.passengers) > 0: i = i + 1 seat_id_string = "seatType_" + str(i) ticket_id_string = "ticketType_" + str(i) self.driver.find_by_xpath( '//select[@id="%s"]/option[@value="%s"]' % (seat_id_string, self.seatType)).first._element.click() self.driver.find_by_xpath( '//select[@id="%s"]//option[@value="%s"]' % (ticket_id_string, self.ticketType)).first._element.click() # self.driver.select("confirmTicketType", "3") self.passengers.pop() sleep(1) self.driver.find_by_id("submitOrder_id").click() sleep(1.5) print("选座完毕...\n") print("提交订单...") ##input('按任意键继续:') sleep(1) self.driver.find_by_id("qr_submit_id").click() if self.driver.is_text_present('splinter.readthedocs.io'): print('正在检查余票...') if self.driver.find_by_id("orderResultInfo_id").find_by_tag( 'div').text.strip() == '出票失败!': print( self.driver.find_by_id( "orderResultInfo_id").find_by_tag('p')[0].text) else: print('火车票订票成功,请速度支付订单!\n') exit_state = 1 input('按任意键继续:') except Exception as e: print(e) exit_state = 0
class BrushTicket(object): """买票类及实现方法""" def __init__(self, passengers, from_time, from_station, to_station, numbers, seat_type, receiver_email): """定义实例属性,初始化""" # 乘客姓名 self.passengers = passengers # 起始站和终点站 self.from_station = from_station self.to_station = to_station # 乘车日期 self.from_time = from_time # 车次编号 self.numbers = numbers # 座位类型所在td位置 if seat_type == '商务座特等座': seat_type_index = 1 seat_type_value = 9 elif seat_type == '一等座': seat_type_index = 2 seat_type_value = 'M' elif seat_type == '二等座': seat_type_index = 3 seat_type_value = 0 elif seat_type == '高级软卧': seat_type_index = 4 seat_type_value = 6 elif seat_type == '软卧': seat_type_index = 5 seat_type_value = 4 elif seat_type == '动卧': seat_type_index = 6 seat_type_value = 'F' elif seat_type == '硬卧': seat_type_index = 7 seat_type_value = 3 elif seat_type == '软座': seat_type_index = 8 seat_type_value = 2 elif seat_type == '硬座': seat_type_index = 9 seat_type_value = 1 elif seat_type == '无座': seat_type_index = 10 seat_type_value = 1 elif seat_type == '其他': seat_type_index = 11 seat_type_value = 1 else: seat_type_index = 7 seat_type_value = 3 self.seat_type_index = seat_type_index self.seat_type_value = seat_type_value # 通知信息 self.receiver_email = receiver_email # 新版12306官网主要页面网址 self.login_url = 'https://kyfw.12306.cn/otn/resources/login.html' self.init_my_url = 'https://kyfw.12306.cn/otn/view/index.html' self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc' # 浏览器驱动信息,驱动下载页:https://sites.google.com/a/chromium.org/chromedriver/downloads self.driver_name = 'chrome' self.driver = Browser(driver_name=self.driver_name) def do_login(self): """登录功能实现,手动识别验证码进行登录""" self.driver.visit(self.login_url) sleep(1) # 选择登陆方式登陆 print('请扫码登陆或者账号登陆……') while True: if self.driver.url != self.init_my_url: sleep(1) else: break def start_brush(self): """买票功能实现""" # 浏览器窗口最大化 self.driver.driver.maximize_window() # 登陆 self.do_login() # 跳转到抢票页面 self.driver.visit(self.ticket_url) try: print('开始刷票……') # 加载车票查询信息 self.driver.cookies.add( {"_jc_save_fromStation": self.from_station}) self.driver.cookies.add({"_jc_save_toStation": self.to_station}) self.driver.cookies.add({"_jc_save_fromDate": self.from_time}) self.driver.reload() count = 0 while self.driver.url == self.ticket_url: self.driver.find_by_text('查询').click() sleep(1) count += 1 print('第%d次点击查询……' % count) try: for number in self.numbers: current_tr = self.driver.find_by_xpath( '//tr[@datatran="' + number + '"]/preceding-sibling::tr[1]') if current_tr: if current_tr.find_by_tag('td')[ self.seat_type_index].text == '--': print('无此座位类型出售,已结束当前刷票,请重新开启!') sys.exit(1) elif current_tr.find_by_tag('td')[ self.seat_type_index].text == '无': print('无票,继续尝试……') sleep(1) else: # 有票,尝试预订 print('刷到票了(余票数:' + str( current_tr.find_by_tag('td')[ self.seat_type_index].text) + '),开始尝试预订……') current_tr.find_by_css('td.no-br>a')[0].click() sleep(1) key_value = 1 for p in self.passengers: # 选择用户 print('开始选择用户……') self.driver.find_by_text(p).last.click() # 选择座位类型 print('开始选择席别……') if self.seat_type_value != 0: self.driver.find_by_xpath( "//select[@id='seatType_" + str(key_value) + "']/option[@value='" + str(self.seat_type_value) + "']").first.click() key_value += 1 sleep(0.2) if p[-1] == ')': self.driver.find_by_id( 'dialog_xsertcj_ok').click() print('正在提交订单……') self.driver.find_by_id( 'submitOrder_id').click() sleep(2) # 查看放回结果是否正常 submit_false_info = self.driver.find_by_id( 'orderResultInfo_id')[0].text if submit_false_info != '': print(submit_false_info) self.driver.find_by_id( 'qr_closeTranforDialog_id').click() sleep(0.2) self.driver.find_by_id( 'preStep_id').click() sleep(0.3) continue print('正在确认订单……') self.driver.find_by_id('qr_submit_id').click() print('预订成功,请及时前往支付……') # 发送通知信息 self.send_mail(self.receiver_email, '恭喜您,抢到票了,请及时前往12306支付订单!') else: print('不存在当前车次【%s】,已结束当前刷票,请重新开启!' % self.numbers) sys.exit(1) except Exception as error_info: print(error_info) except Exception as error_info: print(error_info) def send_mail(self, receiver_address, content): """发送邮件通知""" # 连接邮箱服务器信息 host = 'smtp.163.com' port = 25 sender = '*****@*****.**' # 你的发件邮箱号码 pwd = 'Dy93082' # 不是登陆密码,是客户端授权密码 # 发件信息 receiver = receiver_address body = '<h2>温馨提醒:</h2><p>' + content + '</p>' msg = MIMEText(body, 'html', _charset="utf-8") msg['subject'] = '抢票成功通知!' msg['from'] = sender msg['to'] = receiver s = smtplib.SMTP(host, port) # 开始登陆邮箱,并发送邮件 s.login(sender, pwd) s.sendmail(sender, receiver, msg.as_string())
class Buy_Tickets(object): def __init__(self, username, passwd, order, passengers, dtime, starts, ends): self.username = username #用户名 self.passwd = passwd #密码 # 车次,0代表所有车次,依次从上到下,1代表所有车次,依次类推 self.order = order # 乘客名 self.passengers = passengers # 起始地和终点 self.starts = starts self.ends = ends # 日期 self.dtime = dtime # self.xb = xb # self.pz = pz self.login_url = 'https://kyfw.12306.cn/otn/login/init' #12306首页 self.initMy_url = 'https://kyfw.12306.cn/otn/index/initMy12306' #登录成功的地址 self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init' #购票的路径 self.driver_name = 'chrome' self.executable_path = 'F:\cro\chromedriver.exe' # 登录功能实现 def login(self): self.driver.visit(self.login_url) #访问地址栏 self.driver.fill('loginUserDTO.user_name', self.username) # 通过name 添加用户名 # sleep(1) self.driver.fill('userDTO.password', self.passwd) #通过name添加密码 # sleep(1) print('请输入验证码...') while True: if self.driver.url != self.initMy_url: sleep(1) else: break #结束后 才会走下面的代码 # 买票功能实现 def start_buy(self): self.driver = Browser(driver_name=self.driver_name, executable_path=self.executable_path) # 窗口大小的操作 self.driver.driver.set_window_size(1200, 800) self.login() self.driver.visit(self.ticket_url) #进入买票的页面 try: print u"购票页面开始..." # sleep(1) # 加载查询信息 self.driver.cookies.add({"_jc_save_fromStation": self.starts}) self.driver.cookies.add({"_jc_save_toStation": self.ends}) self.driver.cookies.add({"_jc_save_fromDate": self.dtime}) self.driver.reload() count = 0 while self.driver.url == self.ticket_url: count += 1 self.driver.find_by_text(u"查询").click() print u"循环点击查询... 第 %s 次" % count sleep(1) # 这是我找到我要买的那个票的有无 的那个标签的id 然后获取里面的value 二哥要的YW_13000K483303 # 测试YZ_0h000K705705无的 测试有的YZ_01000K708310 tarVal = self.driver.find_by_id('YZ_0h000K705705').value print tarVal if tarVal == u'有': if self.order != 0: self.driver.find_by_text(u"预订")[self.order - 1].click() else: self.driver.find_by_text(u"预订")[0].click() break else: continue print u"开始预订..." # sleep(3) # self.driver.reload() sleep(1) print u'开始选择用户...' for user in self.users: self.driver.find_by_text(user).click() print u"提交订单..." sleep(1) # #这里是买票种 1 成人 2 儿童 3 学生 4 残疾 参数是name self.driver.find_by_xpath( '//select[@id="ticketType_1"]/option[@value="1"]' )._element.click() sleep(1) # # 席座 1硬座 3 硬卧 4软卧 9商务座 O是二等座(是o不是0) M一等座 self.driver.find_by_xpath( '//select[@id="seatType_1"]/option[@value="1"]' )._element.click() sleep(1) # ======================================= # 提交订单 print u"提交订单..." self.driver.find_by_id('submitOrder_id').click() sleep(1.5) print u"确认选座..." self.driver.find_by_id('qr_submit_id').click() return 1 except Exception as e: print e return -1
class BrushTicket(object): """买票类及实现方法""" def __init__(self, passengers_number): """定义实例属性,初始化""" cp = ConfigParser() cp.read("conf/city.conf", encoding='UTF-8') sections = cp.sections() city = sections[0] key_list = cp.options(city) self.city_dict = {} for key in key_list: self.city_dict[key] = cp.get(city, key) cp = ConfigParser() cp.read("conf/12306.conf", encoding='UTF-8') sections = cp.sections() pessenger = sections[passengers_number] # 乘客姓名 self.passengers = cp.get(pessenger, 'name') # 起始站和终点站 self.from_station = self.city_dict[cp.get(pessenger, 'from_station')] self.to_station = self.city_dict[cp.get(pessenger, 'to_station')] # 乘车日期 self.from_time = cp.get(pessenger, 'from_time') # 车次编号 self.number = cp.get(pessenger, 'coach_number') seat_type = cp.get(pessenger, 'seat_type') # 座位类型所在td位置 if seat_type == '商务座特等座': seat_type_index = 1 seat_type_value = 9 elif seat_type == '一等座': seat_type_index = 2 seat_type_value = 'M' elif seat_type == '二等座': seat_type_index = 3 seat_type_value = 0 elif seat_type == '高级软卧': seat_type_index = 4 seat_type_value = 6 elif seat_type == '软卧': seat_type_index = 5 seat_type_value = 4 elif seat_type == '动卧': seat_type_index = 6 seat_type_value = 'F' elif seat_type == '硬卧': seat_type_index = 7 seat_type_value = 3 elif seat_type == '软座': seat_type_index = 8 seat_type_value = 2 elif seat_type == '硬座': seat_type_index = 9 seat_type_value = 1 elif seat_type == '无座': seat_type_index = 10 seat_type_value = 1 elif seat_type == '其他': seat_type_index = 11 seat_type_value = 1 else: seat_type_index = 7 seat_type_value = 3 self.seat_type_index = seat_type_index self.seat_type_value = seat_type_value # 通知信息 # self.receiver_mobile = receiver_mobile self.receiver_email = cp.get(pessenger, 'email') # 新版12306官网主要页面网址 self.login_url = 'https://kyfw.12306.cn/otn/resources/login.html' self.init_my_url = 'https://kyfw.12306.cn/otn/view/index.html' self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc' # 浏览器驱动信息,驱动下载页:https://sites.google.com/a/chromium.org/chromedriver/downloads self.driver_name = 'chrome' self.driver = Browser(driver_name=self.driver_name) def do_login(self): """登录功能实现,识别验证码 可以使用12306APP 扫一扫登录""" self.driver.visit(self.login_url) sleep(1) # 选择登陆方式登陆 print('请扫码登陆或者账号登陆……') while True: if self.driver.url != self.init_my_url: sleep(1) else: break def start_brush(self): """买票功能实现""" # 浏览器窗口最大化 self.driver.driver.maximize_window() # 登陆 self.do_login() # 跳转到抢票页面 self.driver.visit(self.ticket_url) try: print('开始刷票……') title_start = '开始抢票' # self.send_mail(self.receiver_email, '开始抢票' + self.from_time + # '的车票,请关注邮箱抢票通知', title_start) # 加载车票查询信息 self.driver.cookies.add( {"_jc_save_fromStation": self.from_station}) self.driver.cookies.add({"_jc_save_toStation": self.to_station}) self.driver.cookies.add({"_jc_save_fromDate": self.from_time}) self.driver.reload() count = 0 while self.driver.url == self.ticket_url: self.driver.find_by_text('查询').click() sleep(1) count += 1 print('第%d次点击查询……' % count) try: current_tr = self.driver.find_by_xpath( '//tr[@datatran="' + self.number + '"]/preceding-sibling::tr[1]') if current_tr: if current_tr.find_by_tag('td')[ self.seat_type_index].text == '--': print('无此座位类型出售,已结束当前刷票,请重新开启!') sys.exit(1) elif current_tr.find_by_tag('td')[ self.seat_type_index].text == '无': print('无票,继续尝试……') sleep(1) else: # 有票,尝试预订 print('刷到票了(余票数:' + str( current_tr.find_by_tag('td')[ self.seat_type_index].text) + '),开始尝试预订……') current_tr.find_by_css('td.no-br>a')[0].click() sleep(1) key_value = 1 for p in self.passengers: # 选择用户 print('开始选择用户……') self.driver.find_by_text(p).last.click() # 选择座位类型 print('开始选择席别……') if self.seat_type_value != 0: self.driver.find_by_xpath( "//select[@id='seatType_" + str(key_value) + "']/option[@value='" + str(self.seat_type_value) + "']").first.click() key_value += 1 sleep(0.2) if p[-1] == ')': self.driver.find_by_id( 'dialog_xsertcj_ok').click() print('正在提交订单……') self.driver.find_by_id('submitOrder_id').click() sleep(2) # 查看放回结果是否正常 submit_false_info = self.driver.find_by_id( 'orderResultInfo_id')[0].text if submit_false_info != '': print(submit_false_info) self.driver.find_by_id( 'qr_closeTranforDialog_id').click() sleep(0.2) self.driver.find_by_id('preStep_id').click() sleep(0.3) continue print('正在确认订单……') self.driver.find_by_id('qr_submit_id').click() print('预订成功,请及时前往支付……') # 发送通知信息 title = '抢票票成功!!' self.send_mail( self.receiver_email, '恭喜您,抢到了' + self.from_time + '的车票,请及时前往12306支付订单!', title) else: print('不存在当前车次【%s】,已结束当前刷票,请重新开启!' % self.number) sys.exit(1) except Exception as error_info: print(error_info) except Exception as error_info: print(error_info) def send_mail(self, receiver_address, content, title): mail_host = 'smtp.163.com' # 163用户名 mail_user = '******' # 密码(部分邮箱为授权码) mail_pass = '******' # 邮件发送方邮箱地址 sender = '*****@*****.**' # 邮件接受方邮箱地址,注意需要[]包裹,这意味着你可以写多个邮件地址群发 receivers_list = ['*****@*****.**', receiver_address] email = Email(mail_host, mail_user, mail_pass) for receiver in receivers_list: # 设置email信息 # 邮件内容设置 smtpObj = email.getConnection() message = MIMEText(str(content), 'plain', 'utf-8') # 邮件主题 message['Subject'] = title # 发送方信息 message['From'] = sender # 接受方信息 message['To'] = receiver email.send_email(smtpObj, sender, receiver, message)
try: browse.visit("http://www.12306.cn") except Exception as e: raise finally: print(browse.title) browse.click_link_by_href("https://kyfw.12306.cn/otn/index/init") # print(browse.html) browse.windows.current = browse.windows[1] # browse.find_by_id("fromStationText")[0].fill("上海") # browse.find_by_id("toStationText")[0].fill("郑州") # browse.fill("leftTicketDTO.from_station_name","上海") # browse.fill("leftTicketDTO.to_station_name","郑州") # print(browse.html) browse.find_by_id("from_station_imageB")[0].click() print(browse.html) btn = browse.find_by_xpath("//li[@data='SHH']") btn[0].click() btn = browse.find_by_xpath("//li[@data='ZZF']") btn[0].click() btn = browse.find_by_id("a_search_ticket") btn.click()
class DouYin(object): def __init__(self, width = 500, height = 300): """ 抖音App视频下载 """ # 无头浏览器 chrome_options = Options() chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"') self.driver = Browser(driver_name='chrome', executable_path='D:/chromedriver', options=chrome_options, headless=True) def get_video_urls(self, user_id): """ 获得视频播放地址 Parameters: user_id:查询的用户ID Returns: video_names: 视频名字列表 video_urls: 视频链接列表 nickname: 用户昵称 """ video_names = [] video_urls = [] unique_id = '' while unique_id != user_id: search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id req = requests.get(url = search_url, verify = False) html = json.loads(req.text) aweme_count = html['user_list'][0]['user_info']['aweme_count'] uid = html['user_list'][0]['user_info']['uid'] nickname = html['user_list'][0]['user_info']['nickname'] unique_id = html['user_list'][0]['user_info']['unique_id'] user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count) req = requests.get(url = user_url, verify = False) html = json.loads(req.text) i = 1 for each in html['aweme_list']: share_desc = each['share_info']['share_desc'] if '抖音-原创音乐短视频社区' == share_desc: video_names.append(str(i) + '.mp4') i += 1 else: video_names.append(share_desc + '.mp4') video_urls.append(each['share_info']['share_url']) return video_names, video_urls, nickname def get_download_url(self, video_url): """ 获得带水印的视频播放地址 Parameters: video_url:带水印的视频播放地址 Returns: download_url: 带水印的视频下载地址 """ req = requests.get(url = video_url, verify = False) bf = BeautifulSoup(req.text, 'lxml') script = bf.find_all('script')[-1] video_url_js = re.findall('var data = \[(.+)\];', str(script))[0] video_html = json.loads(video_url_js) download_url = video_html['video']['play_addr']['url_list'][0] return download_url def video_downloader(self, video_url, video_name, watermark_flag=True): """ 视频下载 Parameters: video_url: 带水印的视频地址 video_name: 视频名 watermark_flag: 是否下载不带水印的视频 Returns: 无 """ size = 0 if watermark_flag == True: video_url = self.remove_watermark(video_url) else: video_url = self.get_download_url(video_url) with closing(requests.get(video_url, stream=True, verify = False)) as response: chunk_size = 1024 content_size = int(response.headers['content-length']) if response.status_code == 200: sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) with open(video_name, "wb") as file: for data in response.iter_content(chunk_size = chunk_size): file.write(data) size += len(data) file.flush() sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r') sys.stdout.flush() def remove_watermark(self, video_url): """ 获得无水印的视频播放地址 Parameters: video_url: 带水印的视频地址 Returns: 无水印的视频下载地址 """ self.driver.visit('http://douyin.iiilab.com/') self.driver.find_by_tag('input').fill(video_url) self.driver.find_by_xpath('//button[@class="btn btn-default"]').click() html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html bf = BeautifulSoup(html, 'lxml') return bf.find('a').get('href') def run(self): """ 运行函数 Parameters: None Returns: None """ self.hello() user_id = input('请输入ID(例如40103580):') video_names, video_urls, nickname = self.get_video_urls(user_id) if nickname not in os.listdir(): os.mkdir(nickname) print('视频下载中:共有%d个作品!\n' % len(video_urls)) for num in range(len(video_urls)): print(' 解析第%d个视频链接 [%s] 中,请稍后!\n' % (num+1, video_urls[num])) if '\\' in video_names[num]: video_name = video_names[num].replace('\\', '') elif '/' in video_names[num]: video_name = video_names[num].replace('/', '') else: video_name = video_names[num] self.video_downloader(video_urls[num], os.path.join(nickname, video_name)) print('\n') print('下载完成!') def hello(self): """ 打印欢迎界面 Parameters: None Returns: None """ print('*' * 100) print('\t\t\t\t抖音App视频下载小助手') print('\t\t作者:Jack Cui') print('*' * 100)
class Amazon(object): def __init__(self, keyword): self.url = "https://www.amazon.com" self.name = "".join(random.sample(string.ascii_letters + string.digits, 6)) self.email = "".join(random.sample(string.ascii_letters + string.digits, 16)) + "@163.com" self.password = "".join(random.sample(string.ascii_letters + string.digits, 8)) self.keyword = keyword self.chrome_options = Options() logging.basicConfig(filename='amazon.log', level=logging.DEBUG) self.conn = pymysql.connect(host='192.168.0.211', port=6033, user='******', password='******', db='amazon') self.cs = self.conn.cursor() self.cs.execute('select id,ip,port from register_proxy_ips where is_alived = 1 order by id desc limit 1') self.proxy_ip = self.cs.fetchone() self.cs.execute('update register_proxy_ips set is_alived = 0 where id = %s' % self.proxy_ip[0]) self.conn.commit() logging.debug(self.proxy_ip) # self.chrome_options.add_argument('--proxy-server=http://{host}:{port}'.format(host=self.proxy_ip[1], port=self.proxy_ip[2])) self.browser = Browser('chrome', user_agent=generate_user_agent(device_type='desktop')) def run(self): self.acess() self.search_kw() i = 1 while i < 7: ad_list = self.browser.find_by_xpath('//li[contains(@class, "AdHolder")]') if ad_list: ad_list[0].find_by_tag('h2').click() break else: time.sleep(300) self.search_kw() i += 1 if i > 6: self.browser.quit() self.register() self.add_pay_method() self.add_address() self.buy_goods() self.add_list() def acess(self): self.browser.visit(self.url) logging.error("代理IP失效,请求不成功") def search_kw(self): self.browser.find_by_xpath('//input[@id="twotabsearchtextbox"]').first.fill(self.keyword) self.browser.find_by_xpath('//input[@value="Go"]').click() draggble = self.browser.find_by_xpath('//*[@id="searchTemplate"]') target = self.browser.find_by_xpath('//*[@id="footer"]') draggble.drag_and_drop(target) # time.sleep(random.randint(1, 6)) def register(self): self.browser.find_by_xpath('//*[@id="nav-link-accountList"]').click() self.browser.find_by_xpath('//*[@id="createAccountSubmit"]').click() self.browser.find_by_xpath('//input[@id="ap_customer_name"]').first.fill(self.name) self.browser.find_by_xpath('//input[@id="ap_email"]').first.fill(self.email) self.browser.find_by_xpath('//input[@id="ap_password"]').first.fill(self.password) self.browser.find_by_xpath('//input[@id="ap_password_check"]').first.fill(self.password) self.browser.find_by_xpath('//input[@class="a-button-input"]').click() def add_pay_method(self): self.browser.find_by_xpath('//*[@id="nav-link-accountList"]').click() self.browser.find_by_xpath('//*[@id="a-page"]/div[3]/div/div[2]/div[2]/a/div').click() self.browser.fill('ppw-accountHolderName', paras['card_name']) self.browser.fill('addCreditCardNumber', paras['card_num']) self.browser.execute_script('document.getElementsByName("ppw-expirationDate_month")[0].style.display="block"') self.browser.execute_script('document.getElementsByName("ppw-expirationDate_year")[0].style.display="block"') self.browser.find_by_text('04').click() self.browser.find_by_text('2019').click() self.browser.find_by_name('ppw-widgetEvent:AddCreditCardEvent').click() def add_address(self): self.browser.fill('ppw-fullName', paras['fullname']) self.browser.fill('ppw-line1', paras['line']) self.browser.fill('ppw-city', paras['city']) self.browser.fill('ppw-stateOrRegion', paras['stateOrRegion']) self.browser.fill('ppw-postalCode', paras['postalCode']) self.browser.fill('ppw-phoneNumber', paras['phoneNumber']) self.browser.find_by_name('ppw-widgetEvent:AddAddressEvent').click() def buy_goods(self): self.browser.find_by_xpath('//*[@id="nav-recently-viewed"]').click() self.browser.find_by_xpath('//*[@id="asin_list"]/div[1]/div/a/div[1]/span/div').click() try: self.browser.find_by_xpath('//*[@id="add-to-cart-button"]').click() # add cart self.browser.find_by_xpath('//*[@id="smartShelfAddToCartNative"]').click() except Exception as e: logging.error(e) try: time.sleep(5) self.browser.is_element_present_by_xpath('//i[@class="a-icon a-icon-close"]', wait_time=30) self.browser.find_by_xpath('//i[@class="a-icon a-icon-close"]').click() except Exception: pass try: self.browser.find_by_xpath('//a[@id="hlb-view-cart-announce"]').click() # enter cart except Exception: pass self.browser.execute_script('document.getElementsByName("quantity")[0].style.display="block"') self.browser.find_by_value('10').click() self.browser.find_by_name('quantityBox').clear() self.browser.fill('quantityBox', 999) self.browser.find_by_xpath('//span[@id="a-autoid-1"]').click() self.browser.find_by_xpath('//div[@class="sc-proceed-to-checkout"]').click() self.browser.find_by_xpath('//*[@id="address-book-entry-0"]/div[2]/span/a').click() self.browser.find_by_xpath('//*[@id="shippingOptionFormId"]/div[3]/div/div/span[1]').click() self.browser.find_by_xpath('//*[@id="order-summary-container"]/div/div/div').click() try: self.browser.find_by_xpath('//a[contains(@class, "prime-nothanks-button")]').click() except Exception: pass self.browser.find_by_xpath('//span[contains(@class, "place-order-button-link")]').click() def add_list(self): self.browser.find_by_xpath('//*[@id="nav-recently-viewed"]').click() self.browser.find_by_xpath('//*[@id="asin_list"]/div[1]/div/a/div[1]/span/div').click() self.browser.find_by_xpath('//input[@title="Add to List"]').click() if self.browser.is_element_present_by_text('Add to your list', wait_time=10): self.browser.find_by_xpath('//*[@id="WLHUC_result"]/form/div[2]/span[3]/span').click() self.browser.find_by_xpath('//i[@class="a-icon a-icon-close"]').click() time.sleep(2) self.browser.quit()
} browser = Browser('chrome', **executable_path) #browser = Browser('chrome') #Login browser.visit('https://kyfw.12306.cn/otn/login/init') loginBtn = browser.find_by_id('loginSub') browser.fill('loginUserDTO.user_name', 'username') browser.fill('userDTO.password', 'password') loginBtn.click() #Search ticket #browser.visit('https://kyfw.12306.cn/otn/leftTicket/init') time.sleep(4) browser.find_by_xpath('//li[@id="selectYuding"]/a').click() browser.evaluate_script( 'document.getElementById("fromStation").setAttribute("type","visiable")') browser.evaluate_script( 'document.getElementById("toStation").setAttribute("type","visiable")') browser.evaluate_script('document.getElementById("train_date").readOnly=false') browser.fill('leftTicketDTO.from_station', 'TJP') browser.fill('leftTicketDTO.to_station', 'MHL') browser.fill('leftTicketDTO.train_date', '2017-11-30') browser.evaluate_script( 'document.getElementsByClassName("cal-wrap")[0].style.display="none"') #browser.find_by_text('查询').click() time.sleep(2) browser.find_by_text('查询').click() #browser.reload() #browser.find_by_text('预订')[3].click()
def splinter_scrape_ta_hotels(city_url="", city="new_haven", state="ct", write_to_db=False, max_pages=20): """PURPOSE: To """ # this only needs to be done at the very beginning br = Browser() if city_url == "": city_url = get_city(city.lower() + "_" + state.lower()) print("using the following url:") print("{}".format(city_url)) # city_url = "http://www.tripadvisor.com/Hotels-g31310-Phoenix_Arizona-Hotels.html" ##################################################### # do not edit below this line ##################################################### # more_pages is used to keep track if there is more # than one page of hotel results for the given city more_pages = True # scraping will start on page 1 of the hotel results page = 1 # open the URL in a browser object: br.visit(city_url) # find the div to enter the date range. This is needed to get pricing info: date_bar = br.find_by_xpath('//*[contains(@class, "meta_date_wrapper")]') # find the check in calendar span: cin_btn = date_bar.find_by_xpath('span[contains(@class, "meta_date_field check_in")]/span')[0] # now click the check_in span to activate it cin_btn.click() # select the right calendar div (next month) rightcal = br.find_by_xpath('//div[contains(@class, "month")]')[1] # now select the third Friday of next month as the check in date fri_btn = rightcal.find_by_xpath("table/tbody/tr[3]/td[6]/div") # and click it fri_btn.click() # now choose the next day (saturday) as the check out date cout_btn = date_bar.find_by_xpath('span[contains(@class, "meta_date_field check_out")]/span')[0] cout_btn.click() leftcal = br.find_by_xpath('//div[contains(@class, "month")]')[0] sat_btn = leftcal.find_by_xpath("table/tbody/tr[3]/td[7]/div") sat_btn.click() print("Dates selected.") # wait a few seconds for ta to retrieve prices time.sleep(5) # get the city and state info loclist = br.find_by_xpath('//*[contains(@id, "BREADCRUMBS")]') locstring = loclist.text.split(u"\u203a") hotel_city = city = locstring[2].lower() hotel_state = re.findall("\w+ \(([A-Z][A-Z])\)", locstring[1])[0].lower() # create a pandas dataframe that will be used for writing # the results to the DB: columns = [ "hotel_id", "hotel_url", "hotel_img_url", "hotel_name", "hotel_address", "hotel_city", "hotel_state", "hotel_rating", "hotel_latitude", "hotel_longitude", "hotel_price", "business_id", "review_count", "dog_review_count", ] bigdf = pd.DataFrame(columns=columns) # create some lists to fill w. the results from each page hotel_names = [] links = [] img_url = [] hotel_price = [] business_id = [] print("starting scraper loop.") while more_pages and page <= max_pages: print("*" * 75) print("Now scraping page {} of {} of the hotel results".format(page, max_pages)) print("*" * 75) # get all the review divs print("waiting a few seconds before scraping...") time.sleep(np.random.uniform(8, 20)) listing_div = br.find_by_xpath('//*[contains(@class, "hotels_lf_condensed")]') xsts1 = br.is_element_present_by_xpath('//*[contains(@class, "photo_booking")]', wait_time=1) xsts2 = br.is_element_present_by_xpath('//*[contains(@class, "property_details")]', wait_time=1) xsts3 = br.is_element_present_by_xpath( '//*[contains(@class, "prw_rup")]/div/div/div/div[@class="headerContents"]/div[contains(@class, "price")]', wait_time=1, ) while len(listing_div) < 1 or not xsts1 or not xsts2 or not xsts3: print("now waiting for DOIs to return") time.sleep(5) listing_div = br.find_by_xpath('//*[contains(@class, "hotels_lf_condensed")]') xsts1 = br.is_element_present_by_xpath('//*[contains(@class, "photo_booking")]', wait_time=1) xsts2 = br.is_element_present_by_xpath('//*[contains(@class, "property_details")]', wait_time=1) xsts3 = br.is_element_present_by_xpath( '//*[contains(@class, "prw_rup")]/div/div/div/div[@class="headerContents"]/div[contains(@class, "price")]', wait_time=1, ) print("# of listings: {}".format(len(listing_div))) print("photo_booking exists: {}".format(xsts1)) print("property_details exists: {}".format(xsts2)) print("prw_up exists: {}".format(xsts3)) print("Number of hotel listings on this page: {}".format(len(listing_div))) df = pd.DataFrame(columns=columns) for listing in listing_div: try: biz_id = re.findall("hotel_(\d+)", listing["id"]) if len(biz_id) > 0: biz_id = biz_id[0] else: biz_id = None print("business_id: {}".format(biz_id)) business_id.append(biz_id) except: print("!" * 80) print("biz_id DOES NOT EXIST!") print("!" * 80) business_id.append(None) try: prop = listing.find_by_xpath('div/div/div/div[contains(@class, "property_details")]') except: print("!" * 80) print("prop DIV DOES NOT EXIST!") print("!" * 80) try: title = prop.find_by_xpath('div/div[@class="listing_title"]') print(title.text) hotel_names.append(title.text) except: print("!" * 80) print("TITLE DIV DOES NOT EXIST!") print("!" * 80) hotel_names.append(None) try: hotel_link = title.find_by_xpath("a")["href"] print(hotel_link) links.append(hotel_link) except: print("!" * 80) print("hotel_link DOES NOT EXIST!") print("!" * 80) links.append(None) try: hotel_img = prop.find_by_xpath('div[@class="photo_booking"]/div/div/a/img')["src"] print("Hotel img URL: {}".format(hotel_img)) img_url.append(hotel_img) except: print("!" * 80) print("hotel_img DIV DOES NOT EXIST!") print("!" * 80) img_url.append(None) try: price_text = prop.find_by_xpath( 'div[contains(@class, "prw_rup")]/div/div/div/div[@class="headerContents"]/div[contains(@class, "price")]' ).text price = re.findall("(\d+)", price_text)[0] print("Price: ${}".format(price)) hotel_price.append(price) except: print("!" * 80) print("price DIV DOES NOT EXIST!") print("!" * 80) hotel_price.append(None) print("*" * 50) if len(hotel_names) > 0: print("len of hotel_names: {}".format(len(hotel_names))) print("len of hotel_price: {}".format(len(hotel_price))) print("len of img_url: {}".format(len(img_url))) print("len of business_id: {}".format(len(business_id))) print("len of hotel_city: {}".format(len(hotel_city))) print("len of hotel_state: {}".format(len(hotel_state))) df["hotel_name"] = hotel_names df["hotel_price"] = hotel_price df["hotel_img_url"] = img_url df["hotel_url"] = links df["business_id"] = business_id df["hotel_city"] = hotel_city df["hotel_state"] = hotel_state bigdf = bigdf.append(df) # update the page number page += 1 # if more pages are desired, look for a "next" button if page <= max_pages: nxt_btn = br.find_by_xpath( '//div[contains(@class, "deckTools")]/div[contains(@class, "unified")]/a[contains(@class, "next")]' ) # if there is a next button, click it # else exit the while loop if len(nxt_btn) > 0: nxt_btn.click() else: more_pages = False if write_to_db: engine = cadb.connect_aws_db(write_unicode=True) bigdf = remove_ad_hotels(bigdf) remove_duplicate_hotels(bigdf, city, engine) bigdf.to_sql("ta_hotels", engine, if_exists="append", index=False)
return b else: b.visit(order_url) b.find_by_xpath('//*[@id="Jprice"]/li[6]').click() b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a') time.sleep(5) loop(b) except Exception as e: b.reload() time.sleep(1) loop(b) b = Browser(driver_name='chrome') b.visit(url) login(b) b.visit(order_url) b.find_by_xpath('//*[@id="Jprice"]/li[6]').click() b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a').click() time.sleep(5) while True: loop(b) if b.title == '【五月天郑州演唱会门票】2017五月天 LIFE [ 人生无限公司 ] 巡回演唱会—郑州站-永乐票务': b.find_by_xpath('//*[@id="Jprice"]/li[6]').click() b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a') time.sleep(2) loop(b) else: print('SUCCESS!') break
### # Copyright (c) Rice University 2012-13 # This software is subject to # the provisions of the GNU Affero General # Public License version 3 (AGPLv3). # See LICENCE.txt for details. ### from splinter.browser import Browser browser = Browser() browser.visit("http://www.office.mikadosoftware.com") browser.click_link_by_href("/login") openidbox = browser.find_by_xpath("""id('column2')/form/p/input[1]""").first openidbox.fill("https://www.google.com/accounts/o8/id") browser.find_by_value("Sign in").first.click() # id="Email" # id="Passwd" browser.find_by_id("Email").first.fill("*****@*****.**") browser.find_by_id("Passwd").first.fill("empathy1") browser.find_by_value("Sign in").first.click() browser.uncheck("remember_choices") browser.find_by_value("Allow").first.click() print browser.title print for z in browser.cookies.driver.get_cookies(): if z["domain"] == u"www.office.mikadosoftware.com" and z["name"] == u"session":
from splinter.browser import Browser browser = Browser() browser.visit('http://www.office.mikadosoftware.com') browser.click_link_by_href('/login') openidbox = browser.find_by_xpath('''id('column2')/form/p/input[1]''').first openidbox.fill('https://www.google.com/accounts/o8/id') browser.find_by_value('Sign in').first.click() #id="Email" #id="Passwd" browser.find_by_id("Email").first.fill('*****@*****.**') browser.find_by_id("Passwd").first.fill('empathy1') browser.find_by_value('Sign in').first.click() browser.uncheck('remember_choices') browser.find_by_value('Allow').first.click() print browser.title print for z in browser.cookies.driver.get_cookies(): if z['domain'] == u'www.office.mikadosoftware.com' and z['name'] == u'session': print z['value']
class BrushTicket(object): """买票类及实现方法""" def __init__(self, passengers, from_time, from_station, to_station, numbers, seat_type, receiver_mobile, receiver_email): """定义实例属性,初始化""" # 乘客姓名 self.passengers = passengers # 起始站和终点站 self.from_station = from_station self.to_station = to_station # 车次 self.numbers = list(map(lambda number: number.capitalize(), numbers)) # 乘车日期 self.from_time = from_time # 座位类型所在td位置 if seat_type == '商务座特等座': seat_type_index = 1 seat_type_value = 9 elif seat_type == '一等座': seat_type_index = 2 seat_type_value = 'M' elif seat_type == '二等座': seat_type_index = 3 seat_type_value = 0 elif seat_type == '高级软卧': seat_type_index = 4 seat_type_value = 6 elif seat_type == '软卧': seat_type_index = 5 seat_type_value = 4 elif seat_type == '动卧': seat_type_index = 6 seat_type_value = 'F' elif seat_type == '硬卧': seat_type_index = 7 seat_type_value = 3 elif seat_type == '软座': seat_type_index = 8 seat_type_value = 2 elif seat_type == '硬座': seat_type_index = 9 seat_type_value = 1 elif seat_type == '无座': seat_type_index = 10 seat_type_value = 1 elif seat_type == '其他': seat_type_index = 11 seat_type_value = 1 else: seat_type_index = 7 seat_type_value = 3 self.seat_type_index = seat_type_index self.seat_type_value = seat_type_value # 通知信息 self.receiver_mobile = receiver_mobile self.receiver_email = receiver_email # 新版12306官网主要页面网址 self.login_url = 'https://kyfw.12306.cn/otn/resources/login.html' self.init_my_url = 'https://kyfw.12306.cn/otn/view/index.html' self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc' # 浏览器驱动信息,驱动下载页:https://sites.google.com/a/chromium.org/chromedriver/downloads self.driver_name = 'chrome' self.driver = Browser(driver_name=self.driver_name) def do_login(self): """登录功能实现,手动识别验证码进行登录""" self.driver.visit(self.login_url) sleep(1) # 选择登陆方式登陆 print('请扫码登陆或者账号登陆……') while True: if self.driver.url != self.init_my_url: sleep(1) else: break def start_brush(self): """买票功能实现""" # 浏览器窗口最大化 self.driver.driver.maximize_window() # 登陆 self.do_login() # 跳转到抢票页面 self.driver.visit(self.ticket_url) sleep(1) try: print('开始刷票……') # 加载车票查询信息 self.driver.cookies.add( {"_jc_save_fromStation": self.from_station}) self.driver.cookies.add({"_jc_save_toStation": self.to_station}) self.driver.cookies.add({"_jc_save_fromDate": self.from_time}) self.driver.reload() count = 0 while self.driver.url == self.ticket_url: self.driver.find_by_text('查询').click() sleep(1) count += 1 print('第%d次点击查询……' % count) try: start_list = self.driver.find_by_css('.start-t') for start_time in start_list: current_time = start_time.text current_tr = start_time.find_by_xpath('ancestor::tr') if current_tr: car_no = current_tr.find_by_css('.number').text if car_no in self.numbers: if current_tr.find_by_tag('td')[ self.seat_type_index].text == '--': print( '%s无此座位类型出售,继续……' % (car_no + '(' + current_time + ')', )) sleep(0.2) elif current_tr.find_by_tag('td')[ self.seat_type_index].text == '无': print( '%s无票,继续尝试……' % (car_no + '(' + current_time + ')', )) sleep(0.2) else: # 有票,尝试预订 print(car_no + '(' + current_time + ')刷到票了(余票数:' + str( current_tr.find_by_tag('td')[ self.seat_type_index].text) + '),开始尝试预订……') current_tr.find_by_css( 'td.no-br>a')[0].click() sleep(1) key_value = 1 for p in self.passengers: if '()' in p: p = p[:-1] + '学生' + p[-1:] # 选择用户 print('开始选择用户……') self.driver.find_by_text( p).last.click() # 选择座位类型 print('开始选择席别……') if self.seat_type_value != 0: self.driver.find_by_xpath( "//select[@id='seatType_" + str(key_value) + "']/option[@value='" + str(self.seat_type_value) + "']").first.click() key_value += 1 sleep(0.2) if p[-1] == ')': self.driver.find_by_id( 'dialog_xsertcj_ok').click() print('正在提交订单……') self.driver.find_by_id( 'submitOrder_id').click() sleep(2) # 查看放回结果是否正常 submit_false_info = self.driver.find_by_id( 'orderResultInfo_id')[0].text if submit_false_info != '': print(submit_false_info) self.driver.find_by_id( 'qr_closeTranforDialog_id').click( ) sleep(0.2) self.driver.find_by_id( 'preStep_id').click() sleep(0.3) continue print('正在确认订单……') self.driver.find_by_id( 'qr_submit_id').click() print('预订成功,请及时前往支付……') # 发送通知信息 self.send_mail(self.receiver_email, '恭喜您,抢到票了,请及时前往12306支付订单!') self.send_sms( self.receiver_mobile, '您的验证码是:8888。请不要把验证码泄露给其他人。') sys.exit(0) else: print('当前车次异常') except Exception as error_info: print(error_info) except Exception as error_info: print(error_info) def send_sms(self, mobile, sms_info): """发送手机通知短信,用的是-互亿无线-的测试短信""" host = "106.ihuyi.com" sms_send_uri = "/webservice/sms.php?method=Submit" account = "C59782899" pass_word = "19d4d9c0796532c7328e8b82e2812655" params = parse.urlencode({ 'account': account, 'password': pass_word, 'content': sms_info, 'mobile': mobile, 'format': 'json' }) headers = { "Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain" } conn = httplib2.HTTPConnectionWithTimeout(host, port=80, timeout=30) conn.request("POST", sms_send_uri, params, headers) response = conn.getresponse() response_str = response.read() conn.close() return response_str def send_mail(self, receiver_address, content): """发送邮件通知""" # 连接邮箱服务器信息 host = 'smtp.163.com' port = 25 sender = '*****@*****.**' # 你的发件邮箱号码 pwd = 'FatBoy666' # 不是登陆密码,是客户端授权密码 # 发件信息 receiver = receiver_address body = '<h2>温馨提醒:</h2><p>' + content + '</p>' msg = MIMEText(body, 'html', _charset="utf-8") msg['subject'] = '抢票成功通知!' msg['from'] = sender msg['to'] = receiver s = smtplib.SMTP(host, port) # 开始登陆邮箱,并发送邮件 s.login(sender, pwd) s.sendmail(sender, receiver, msg.as_string())
def huoche(): global b b = Browser(driver_name="chrome") b.visit(ticket_url) while b.is_text_present(u"登录"): sleep(1) login() if b.url == initmy_url: break try: print (u"购票页面") # 跳回购票页面 b.visit(ticket_url) # 加载查询信息 b.cookies.add({"_jc_save_fromStation": starts}) b.cookies.add({"_jc_save_toStation": ends}) b.cookies.add({"_jc_save_fromDate": dtime}) b.reload() b.find_by_text(u"更多选项").click() sleep(1) b.find_by_id("inp-train").fill(tnum) b.find_by_id("add-train").click() #b.find_by_text(u"K-快速").click() b.find_by_text(u"请选择")[1].click() b.find_by_text(seatcn)[1].click() b.execute_script("$.closeSelectSeat()") sleep(2) count = 0 flag = "" # 循环点击预订 print ("order is %s" % order) if order != 0: while b.url == ticket_url: b.find_by_text(u"查询").click() count +=1 print (u"循环点击查询... 第 %s 次" % count) sleep(5) try: b.find_by_text(u"预订")[order - 1].click() except: print (u"还没开始预订") continue else: while b.url == ticket_url: b.find_by_text(u"查询").click() count += 1 #flag = b.find_by_id("YW_240000K11711")[0].text #print ("ticket count is %s" % flag) sleep(2) #b.execute_script("$('a:contains("+tnum+")').closest('tr').children('td:eq("+seat+")').addClass('abcde')") #flag = b.find_by_xpath("//td[@class='abcde']").text flag = b.find_by_xpath("//td[contains(@id,'"+seat+"')][contains(@id,'"+tnum+"')]").text print (u"循环点击查询... 第 %s 次 tickets count is %s" % (count,flag)) try: #for i in b.find_by_text(u"预订"): # i.click() if flag != "--" and flag != u"无": b.execute_script("$('a:contains("+tnum+")').closest('tr').children('td:last').children().click()") break except: print (u"还没开始预订") continue sleep(3) pat = pa.split(",") while True: try: flag = True for p in pat: if p == usernamecn: if b.find_by_text(p)[1].checked != True: flag = False b.find_by_text(p)[1].check() flag = True else: if b.find_by_text(p)[0].checked != True: flag = False b.find_by_text(p)[0].check() flag = True print (u"选择乘客:%s" % p) if flag: break except: print (u"努力选中陛下的乘客信息中~~~") sleep(0.5) print ( u"能做的都做了.....不再对浏览器进行任何操作") except Exception as e: print(traceback.print_exc())