示例#1
0
def login():
    global b
    b = Browser(driver_name="chrome")
    for i in range(0,3):
        b.visit(gift_url)
        b.find_by_id("ptLoginBtn").click()
        sleep(1)
        with b.get_iframe('loginFrame') as iframe:
            iframe.find_by_id('u').fill(username[i])
            iframe.find_by_id('p').fill(passwd[i])
            iframe.find_by_id('go').click()
        sleep(1)
        b.find_by_tag("a")[2].click()
        sleep(1)
        #大区,需要改成自己的,这是大地飞鹰
        b.find_by_xpath('//select[@id="area1ContentId_wuxia"]/option[@value="7609516"]')._element.click()
        sleep(1)
        #服务器,需要改成自己的,这是藏锋谷
        b.find_by_xpath('//select[@id="areaContentId_wuxia"]/option[@value="2002"]')._element.click()
        sleep(1)
        b.find_by_id("confirmButtonId_wuxia").click()
        sleep(1)
        b.get_alert().dismiss()
        sleep(1)
        b.find_by_id("ptLogoutBtn").click()
        sleep(5)
    print u"领取完毕"
    sleep(3)
示例#2
0
def qianpiao():
	global b
	b = Browser(driver_name="chrome")
	b.visit(login_url)
	login()
	b.visit(zhangxueyou_url)

	ul = b.find_by_name(u"2017[A CLASSIC TOUR学友.经典] 世界巡回演唱会—扬州站")
	print(len(ul))
	for i in range(index,len(ul)):
		ul[i].check()
		if ul[i]['class'] != 'on':
			sleep(0.5)
			b.find_by_xpath(u'//*[@id="JqueBoxer"]/div[1]/a').click()
		else:
			break
	if login_url in b.url:
	 	login()

	b.find_by_xpath(u'//*[@id="JreltList"]/ul/li[3]/dl/input').fill(ticket_num)

	b.find_by_xpath(u"/html/body/div[12]/div[2]/div[3]/div[5]/a").click()

	b.find_by_xpath(u"//*[@id='zidTabs']/li[2]").check()

	b.find_by_xpath(u'//*[@id="payNew-conts-show"]/div[2]/ul/li[2]/input').click()
	while pay_oreder not in b.url:
		b.find_by_id(u'saveOrder').click()
示例#3
0
def supreme(url, keyword, color):

# all are strings ''
    b = Browser(driver_name = "chrome")
    input("Press Enter to continue...")
    b.visit(url)
    b.find_link_by_partial_text(keyword).click()
    #choose color
    if color == 'Black':
        b.find_by_xpath("/html/body//a[@data-style-name='Black']").click()
    elif color == 'Red':
        b.find_by_xpath("/html/body//a[@data-style-name='Red']").click()
    elif color == 'Blue':
        b.find_by_xpath("/html/body//a[@data-style-name='Blue]").click()
    elif color == 'Navy':
        b.find_by_xpath("/html/body//a[@data-style-name='Navy]").click()
    elif color == 'Green':
        b.find_by_xpath("/html/body//a[@data-style-name='Green']").click()
        
    while True:
        elementList = b.find_by_value("add to cart")
        if not elementList.is_empty():
            break
    b.find_by_value("add to cart").click()
 

    while True:
        elementList = b.find_link_by_href("https://www.supremenewyork.com/checkout")
        if not elementList.is_empty():
            break
    b.find_link_by_href("https://www.supremenewyork.com/checkout").click()

    while True:
        elementList = b.find_by_value('USA')
        if not elementList.is_empty():
            break

    threads = []
    t_1 = Thread(target = AutoInfo, args = [b])
    t_1.start()
    threads.append(t_1)
    
    t_2 = Thread(target = AutoAddress, args = [b])
    t_2.start()
    threads.append(t_2)
    
    t_3= Thread(target = AutoCardInfoOne, args = [b])
    t_3.start()
    threads.append(t_3)
    
    t_4= Thread(target = AutoCardInfoTwo, args = [b])
    t_4.start()
    threads.append(t_4)  
示例#4
0
    def webscrabe(self,string,account,password,index):
        borwser = Browser("chrome")

        borwser.visit("https://aca.nuk.edu.tw/Aca/login.asp")
        borwser.find_by_name("Account").first.fill(account)
        borwser.find_by_name("Password").first.fill(password)
        borwser.find_by_name("B1").click()
        borwser.visit("https://aca.nuk.edu.tw/SN2/Basic/Student.asp")
        borwser.find_by_name("No").first.fill(string)
        borwser.find_by_name("B3").click()
        name = borwser.find_by_xpath("//tbody[1]/tr[2]/td[3]").text
        department = borwser.find_by_xpath("//tbody[1]/tr[2]/td[5]").text
        self.typename(index,name,department)
        borwser.quit
示例#5
0
文件: vipkids.py 项目: chlv/vipkids
class SuperApply():
    def __init__(self, username, password):
        self.username = username
        self.password = password
        self.browser = Browser(driver_name="chrome")
        self.url = "https://www.vipkid.com.cn/login"

    def login(self):
        self.browser.visit(self.url)
        self.browser.find_by_xpath("//input[@type='text']").first.fill(self.username)
        self.browser.find_by_xpath("//input[@type='password']").first.fill(self.password)
        login_button = self.browser.find_by_xpath("//span[@class='login-button']")
        login_button.click()

    def close_popup(self):
        try:
            close_button = self.browser.find_by_xpath("//div[@class='user-task-modal-close']")
            print(type(close_button))
            print(dir(close_button))
            close_button.click()
        except:
            pass

    def book(self):
        book_link = self.browser.find_link_by_href("/parent/preschedule")
        book_link.click()
示例#6
0
    def test():
        """
        用于测试的方法,实际运行时不执行
        """

        browser = Browser(executable_path="../driver/firefox.exe")
        browser.visit(
            "https://s.weibo.com/weibo/%25E5%25B0%25B1%25E5%25"
            "BC%2580%25E5%25A7%258B%25E5%25A4%25A7%25E5%25B9%2585?topnav=1&wvr=6&b=1"
        )

        cards = browser.find_by_xpath('//div[@class="card"]')
        for c in cards:
            etree_html = etree.HTML(c.html)
            a = etree_html.xpath(
                '//div[@class="card-act"]//a[@title="赞"]/em/text()')

            print(a[0])
示例#7
0
    def enterprise(self):
        driver = Browser(driver_name=BROWSER['SPLINTER']['NAME'], executable_path=BROWSER['SPLINTER']['PATH'], headless=True)
        driver.visit(self.text)
        fiscal = False
        n = 0

        items = driver.find_by_tag('strong')
        for seq, item in enumerate(items):
            n += 1
            if not item.text == '财报信息':
                if seq == 0:
                    content = '全球企业动态[%s]\n\n' % datetime.now().date()
                elif seq == 1:
                    content += '概要:\n%s\n' % item.text
                else:
                    content += '\n%d. %s\n' % (seq-1, item.text)
            else:
                fiscal = True
                break
        if fiscal:
            content += '\n财报信息:\n' + driver.find_by_xpath('//*[@id="js_content"]/p[%s]' % str(n*2)).text
        
        content += '\n\n' + self.text
        return content
示例#8
0
class huoche(object):
	"""docstring for huoche"""
	driver_name=''
	executable_path=''
	#用户名,密码
	username = u"*****@*****.**"
	passwd = u"13785255141x"
	# cookies值得自己去找, 下面两个分别是上海, 太原南
	starts = u"%u6B66%u6C49%2CWHN"
	ends = u"%u4FDD%u5B9A%2CBDP"
	# 时间格式2018-01-19
	dtime = u"2018-02-07"
	# 车次,选择第几趟,0则从上之下依次点击
	order = 21
	###乘客名
	users = [u"乔稳"]
	##席位
	xb = u"二等座"
	pz=u"成人票"
	#时间 从这个时间开始选车次,0-24
	date_tic=12
	date_tic_end=18
	#车次类型 K G D Z等
	car_types=["K","G"]
	"""网址"""
	ticket_url = "https://kyfw.12306.cn/otn/leftTicket/init"
	login_url = "https://kyfw.12306.cn/otn/login/init"
	initmy_url = "https://kyfw.12306.cn/otn/index/initMy12306"
	buy="https://kyfw.12306.cn/otn/confirmPassenger/initDc"
	login_url='https://kyfw.12306.cn/otn/login/init'
	
	def __init__(self):
		self.driver_name='chrome'
		self.executable_path='C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe'

	def login(self):
		self.driver.visit(self.login_url)
		self.driver.fill("loginUserDTO.user_name", self.username)
		# sleep(1)
		self.driver.fill("userDTO.password", self.passwd)
		print (u"等待验证码,自行输入...")
		while True:
			if self.driver.url != self.initmy_url:
				sleep(1)
			else:
				break

	def start(self):
		self.driver=Browser(driver_name=self.driver_name,executable_path=self.executable_path)
		self.driver.driver.set_window_size(1400, 1000)
		self.login()
		# sleep(1)
		self.driver.visit(self.ticket_url)
		try:
			print (u"购票页面开始...")
			# sleep(1)
			# 加载查询信息
			self.driver.cookies.add({"_jc_save_fromStation": self.starts})
			self.driver.cookies.add({"_jc_save_toStation": self.ends})
			self.driver.cookies.add({"_jc_save_fromDate": self.dtime})

			self.driver.reload()

			count=0
			if self.order!=0:
				while self.driver.url==self.ticket_url:
					self.driver.find_by_text(u"查询").click()
					sleep(2)
					count += 1
					print (u"循环点击查询... 第 %s 次" % count)
					# sleep(1)
					try:
						self.driver.find_by_text(u"预订")[self.order - 1].click()
					except Exception as e:
						print (e)
						print (u"还没开始预订")
						continue
			else:
				while self.driver.url == self.ticket_url:
					self.driver.find_by_text(u"查询").click()
					count += 1
					print (u"循环点击查询... 第 %s 次" % count)
					# sleep(0.8)
					try:
						# for i in self.driver.find_by_text(u"预订"):
						# 	print(i)
						# 	i.click()
						# 	sleep(1)
						#查询符合条件的预订
						for t in self.driver.find_by_xpath("//*[@id='queryLeftTable']/tr/td/div/div[3]/strong[1]"):
							tl = t.text.split(":")
							car=t.find_by_xpath("../..//div[1]/a")[0]
							#符合时间和车次
							if (self.date_tic <= int(tl[0]) and car.text[0] in self.car_types and self.data_tic_end >=int (tl[0])):
								ti_id = t.find_by_xpath("../../../..")[0]["id"]
								elem = self.driver.find_by_xpath("//*[@id='" + ti_id + "']/td[13]/a")
								if len(elem) > 0:
									elem[0].click()
									sleep(1)
						if count > 1000:
							print("超过最大次数")
							break
					except Exception as e:
						print (e)
						print (u"还没开始预订 %s" %count)
						continue
			print (u"开始预订...")
			# sleep(3)
			# self.driver.reload()
			sleep(1)
			print (u'开始选择用户...')
			for user in self.users:
				self.driver.find_by_text(user).last.click()

			print (u"提交订单...")
			sleep(1)
			# self.driver.find_by_text(self.pz).click()
			# self.driver.find_by_id('').select(self.pz)
			# # sleep(1)
			# self.driver.find_by_text(self.xb).click()
			# sleep(1)

			#提交订单
			self.driver.find_by_id('submitOrder_id').click()

			# print u"开始选座..."
			# self.driver.find_by_id('1D').last.click()
			# self.driver.find_by_id('1F').last.click()

			sleep(1.5)
			print (u"确认选座...")
			self.driver.find_by_id('qr_submit_id').click()

		except Exception as e:
			print (e)
示例#9
0
class get_ticket(object):
    login_status = False
    driver_name = ''
    executable_path = ''
    # 用户名,密码
    username = u""
    passwd = u""
    # cookies
    starts = u"%u4E0A%u6D77%2CSHH"
    ends = u"%u592A%u539F%2CTYV"
    # 时间格式2018-01-19
    dtime = u"2018-01-19"
    # type
    type = ''
    # 车次,0则从上之下依次点击
    order = 0
    ###乘客名
    users = []
    ##席位
    xb = u"二等座"
    pz = u"成人票"
    #抢票失败,则重新开启抢票
    is_fail = True
    #默认选择时间范围
    starttime = 8
    endtime = 24
    #默认抢票时间间隔
    duration = 1.5

    info = ''

    count = 0

    """网址"""
    ticket_url = "https://kyfw.12306.cn/otn/leftTicket/init"
    login_url = "https://kyfw.12306.cn/otn/login/init"
    initmy_url = "https://kyfw.12306.cn/otn/index/initMy12306"
    buy = "https://kyfw.12306.cn/otn/confirmPassenger/initDc"
    login_url = 'https://kyfw.12306.cn/otn/login/init'

    def cli(self):
        arguments = docopt(__doc__)
        from_station = stations.get(arguments['<from>'])
        to_station = stations.get(arguments['<to>'])
        date = arguments['<date>']
        url = 'https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date={}' \
              '&leftTicketDTO.from_station={}' \
              '&leftTicketDTO.to_station={}' \
              '&purpose_codes=ADULT'.format(
            date, from_station, to_station
        )

        r = requests.get(url, verify=False)
        rows = r.json()['data']['result']
        station_map = r.json()['data']['map']

        headers = '车次 车站 时间 历时 商务 一等 二等 软卧 硬卧 软座 硬座 无座'.split()
        pt = PrettyTable()
        pt._set_field_names(headers)
        for row in rows:
            fields = row.split('|')
            pretty_row = []
            # checi
            pretty_row.append(fields[3])
            # chezhan
            pretty_row.append(station_map[fields[6]] + '-' + station_map[fields[7]])
            # time
            pretty_row.append(fields[8] + '-' + fields[9])
            # duration
            pretty_row.append(fields[10])
            # business
            pretty_row.append(fields[30])
            # first_class
            pretty_row.append(fields[31])
            # second_class
            pretty_row.append(fields[32])
            # ruan wo
            pretty_row.append(fields[23])
            # ruan zuo
            pretty_row.append(fields[24])
            # ying wo
            pretty_row.append(fields[28])
            # ying zuo
            pretty_row.append(fields[29])
            # wuzuo
            pretty_row.append(fields[26])

            pt.add_row(pretty_row)

        print(pt)

    def __init__(self):
        self.driver_name = 'chrome'
        self.executable_path = '/usr/local/bin/chromedriver'

    def login(self):
        self.driver.visit(self.login_url)
        sleep(1)
        # 填充密码
        fill_success = True
        while fill_success:
            try:
                self.driver.fill("loginUserDTO.user_name", self.username)
                # sleep(1)
                self.driver.fill("userDTO.password", self.passwd)
                fill_success = False
                print(u"等待验证码,自行输入...")
            except:
                print("无法填入账号密码!")

        while True:
            if self.driver.url != self.initmy_url:
                sleep(1)
            else:
                self.login_status = True
                break

    def working(self):
        try:
            self.driver.visit(self.ticket_url)
            print(u"购票页面开始...")
            # 加载查询信息
            self.driver.cookies.add({"_jc_save_fromStation": self.starts})
            self.driver.cookies.add({"_jc_save_toStation": self.ends})
            self.driver.cookies.add({"_jc_save_fromDate": self.dtime})
            self.driver.cookies.add({"_jc_save_wfdc_flag": "dc"})
            self.driver.reload()

            # 选择车次类型
            for i in self.type:
                self.driver.find_by_xpath("//input[@value='%s']" % i).click()

            # 只显示可购买车次
            self.driver.find_by_xpath("//input[@id='%s']" % 'avail_ticket').click()

            sleep(1)

            while self.driver.url == self.ticket_url:

                if not self.login_status:
                    return

                self.driver.find_by_text(u"查询").click()
                self.count += 1
                print(u"已抢票... 第 %s 次" % self.count)
                try:
                    # SWZ-特等座,ZY-一等座, ZE-二等座
                    # class_type = self.driver.find_by_xpath("//tbody[@id = 'queryLeftTable']/tr/td[starts-with(@id,%s)]/div" % 'ZE')
                    # 商务座=2,一等座=3, 二等座=4
                    # class_type = self.driver.find_by_xpath("//tbody[@id = 'queryLeftTable']/tr/td[%d]/div" % 4)
                    enable_buttons = self.driver.find_by_xpath("//tbody[@id = 'queryLeftTable']/tr/td/a[contains(text(),'预订')]")
                    self.qulifier(enable_buttons)

                except Exception as e:
                    print(e)
                    print(u"界面出错,重新开始抢票 %s" % self.count)
                    continue

            # print(u"开始预订...")
            # self.driver.reload()
            print(u'开始选择用户...')
            for user in self.users:
                self.driver.find_by_text(user).last.click()

            print(u"提交订单...")
            self.driver.find_by_id('submitOrder_id').click()
            sleep(0.5)

            #判断是否票已被抢
            if self.driver.find_by_xpath("//p[@id = 'sy_ticket_num_id']/font[starts-with(text(),'目前排队人数已经超过余票张数')]").__len__() > 0:
                return
            elif int(re.sub("\D", "", self.driver.find_by_xpath("//p[@id = 'sy_ticket_num_id']").text)) == 0:
                return

            #发送邮件
            _thread = threading.Thread(target=self.play_sound)
            _thread.setDaemon(True)
            _thread.start()  # 启动线程

            #可能需要验证码
            self.driver.find_by_id('qr_submit_id').click()
            sleep(2)
            if self.driver.find_by_xpath("//div[@class = 'tit'][contains(text(),'失败')]").__len__() > 0:
                return
            else:
                self.is_fail = False

        except Exception as e:
            print(e)

    def qulifier(self, check_buttons):
        tr_obj = self.driver.find_by_xpath("//tbody[@id = 'queryLeftTable']/tr/td/a[contains(text(),'预订')]/../..")
        for index, value in enumerate(check_buttons):
            # tr_info = tr_obj[index].text.split('\n')
            tr_info = re.split(' |\n', tr_obj[index].text)
            timehour = int(tr_info[3].split(':')[0])
            train_num = tr_info[0]

            is_class_qulify = False
            for i in self.type:
                for j in sit[i]:
                    if tr_info[j] != '无' and tr_info[j] != '--':
                        is_class_qulify = True
                        print(" ".join(tr_info))
                        break

            if not is_class_qulify:
                continue

            # 判断时间是否符合要求
            if self.starttime <= timehour <= self.endtime:
                print(train_num)
                find_button = self.driver.find_by_xpath(
                    "//tbody[@id = 'queryLeftTable']/tr[starts-with(@id,'ticket')][contains(@id,'%s')]/td/a[contains(text(),'预订')]" % train_num)
                if find_button.__len__() > 0:
                    find_button.click()
                else:
                    self.driver.find_by_xpath(
                        "//tbody[@id = 'queryLeftTable']/tr[starts-with(@id,'ticket')]/td/a[contains(text(),'预订')]")[0].click()

                self.info = tr_info
                sleep(0.5)
                return True
                # break

        sleep(self.duration)
        return False

    def start(self):
        self.driver = Browser(driver_name=self.driver_name)
        self.driver.driver.set_window_size(1400, 1000)
        # self.login()

        # 开启定时任务监听是否退出登陆
        scheduler = BackgroundScheduler()
        scheduler.add_job(self.watch_login, 'interval', minutes=3)
        scheduler.start()

        while self.is_fail:
            # 判断是否为登陆状态
            if self.login_status:
                self.working()
            else:
                self.login()
                self.working()

        print(u"确认选座...")


    def play_sound(self):
        sender_obj = sentEmail.Sender()
        result = sender_obj.mail(self.users[0] + " ".join(self.info))
        # os.system("say bingo. come to get your ticket!")

    # 监听是否为登陆状态
    def watch_login(self):
        print("start mission")
        start_watch = True
        while start_watch:
            try:
                if self.driver._cookie_manager.all().__contains__('tk'):
                    print("----------登陆-----------")
                    self.login_status = True
                else:
                    if self.driver.find_by_xpath("//a[@id = 'login_user'][contains(text(),'登录')]").__len__() > 0:
                        print("----------未登录---------")
                        self.login_status = False
                        # 发邮件通知登录状态以退出
                        sender_obj = sentEmail.Sender()
                        result = sender_obj.mail("登录状态以退出")

                start_watch = False
            except Exception as e:
                print(e)
                start_watch = True

        print("end mission")
示例#10
0
class WeiboSpider(object):
    page_count = 0  # 记录每个页面抓取的数据量
    all_count = 0  # 记录抓取的数据总量
    save_data = OrderedDict({  # 保存抓取的数据
        "昵称": [],
        "微博正文": [],
        "微博链接": [],
        "时间": [],
        "收藏数": [],
        "转发数": [],
        "评论数": [],
        "点赞数": [],
        "设备": []
    })

    xpath_dict = {  # 解析数据用的 xpath
        '昵称': '//div[@class="info"]//a[@class="name"]/text()',
        '微博正文':
        '//div[@class="content"]/p[@node-type="feed_list_content"]//text()',
        '微博链接': '//div[@class="content"]/p[@class="from"]/a[1]/@href',
        '时间': '//div[@class="content"]/p[@class="from"]/a[1]/text()',
        '收藏数': '//a[@action-type="feed_list_favorite"]/text()',
        '转发数': '//a[@action-type="feed_list_forward"]/text()',
        '评论数': '//a[@action-type="feed_list_comment"]/text()',
        '点赞数':
        '//div[@class="card-act"]//a[@action-type="feed_list_like"]//em/text()',
        '设备': '//div[@class="content"]/p/a[@rel="nofollow"]/text()'
    }

    def __init__(self,
                 keyword,
                 start_time,
                 end_time,
                 sleep_time=10,
                 username=None,
                 password=None):
        self.username = username  # 微博用户名
        self.password = password  # 微博密码

        self.browser = None
        self.browser_name = "firefox"  # 浏览器名
        self.driver_path = "../driver/firefoxdriver.exe"  # 打开浏览器的驱动

        self.base_url = "https://s.weibo.com/"  # 微博搜索主页
        self.search_url = 'https://s.weibo.com/weibo/{keyword}' \
                          '&timescope=custom:{start_time}:{end_time}&refer=g'  # 搜索结果的url
        self.keyword = keyword  # 搜索关键字
        self.sleep_time = sleep_time  # 点击下一页的时间间隔
        self.start_time = start_time  # 搜索内容的起始时间
        self.end_time = end_time  # 搜索内容的结束时间

        self.base_sava_path = '../files/'  # 输出文件的保存路径
        self.save_file_name = self.keyword + self.start_time + "~" + self.end_time  # 输出文件名

    def refactor_date(self, start_time, end_time):  # 构造搜索的起止时间
        self.start_time = datetime.strptime(
            start_time, "%Y-%m-%d-%H").strftime("%Y-%m-%d-%H")
        if end_time:
            self.end_time = datetime.strptime(
                end_time, "%Y-%m-%d-%H").strftime("%Y-%m-%d-%H")
        else:
            self.end_time = datetime.now().strftime('%Y-%m-%d-%H')

    def get_search_url(self):  # 构造搜索url
        self.refactor_date(self.start_time, self.end_time)  # 将输入的时间重构成搜索用的标准参数

        return self.search_url.format(keyword=self.keyword,
                                      start_time=self.start_time,
                                      end_time=self.end_time)

    def login(self):
        """
        用于登录微博
        """
        self.browser = Browser(driver_name=self.browser_name,
                               executable_path=self.driver_path,
                               service_log_path='../files/log.log')  # 打开浏览器

        self.browser.visit(self.base_url)  # 访问微博搜索页面
        self.browser.click_link_by_text('登录')  # 点击登录

        # 填充用户名的密码
        if self.username is not None:
            self.browser.fill("username", self.username)
        if self.password is not None:
            self.browser.fill("password", self.password)

        print("请在打开的浏览器中登录........")

        time.sleep(2)  # 暂停两秒,等待浏览器加载完成
        logining_url = self.browser.url  # 获取正在登录时的 url

        # 防止网络不好时获取不到正在登录时的 url
        while logining_url == self.base_url:
            time.sleep(2)
            logining_url = self.browser.url

        # 通过验证 url 保证已经登录
        while 1:
            if self.browser.url != logining_url:
                break
            time.sleep(2)
        print("已成功登录,开始抓取信息.......")

    def search(self):
        """
        通过构造的搜索 url,跳转到搜索结果页面
        """

        self.browser.visit(self.get_search_url())

    def get_card_data(self, card, xapth_dict: dict):
        """
        用户获取每一篇博客的信息
        """

        etree_html = etree.HTML(card.html)
        number = ['收藏数', '转发数', '评论数', '点赞数']

        self.page_count += 1  # 统计每一页抓取的数据量

        for key in xapth_dict.keys():
            xpath = xapth_dict.get(key)
            data = etree_html.xpath(xpath)

            if data:
                if key in number:
                    self.save_data[key].append(Utils.get_num(data[0]))

                elif key == '时间':
                    self.save_data[key].append(
                        Utils.get_date(data[0]).strftime('%Y-%m-%d'))

                elif key == '微博正文':
                    content = ''.join(data).replace(' ', '').replace('\n', '')
                    self.save_data[key].append(content)

                else:
                    self.save_data[key].append(data[0])

            else:
                if key in number:
                    self.save_data[key].append('0')

                else:
                    self.save_data[key].append('')

    def download_data(self):
        self.browser.execute_script(
            "window.scrollTo(0,document.body.scrollHeight)")  # 跳转到页面底部

        try:
            self.browser.click_link_by_text('查看全部搜索结果')
        except ElementDoesNotExist:
            pass

        page_index = 1  # 记录页码
        while page_index <= 50:  # 微博搜索结果最多为50页

            # 获取真实的页码
            try:
                # 微博搜索结果有时候一直点击下一页,到了最后一页会跳转到第一页
                # 这段代码用于防止出现这种情况
                real_page = re.findall(r'page=(\d+)', self.browser.url)
                if real_page:
                    real_page = int(real_page[0])
                else:
                    real_page = 1
                if real_page < page_index:
                    break

                print('正在抓取第%s页内容:' % page_index, end='')
                self.browser.execute_script(
                    "window.scrollTo(0,document.body.scrollHeight)")  # 跳转到页面底部

                cards = self.browser.find_by_xpath(
                    '//div[@class="card"]')  # 获取所有的博文

            except KeyboardInterrupt:  # 如果觉得抓取时间过长,可以按下ctrl c 中止抓取,然后可以保存已抓取的信息
                print('中途退出抓取,正则保存中.....')
                break

            for card in cards:  # 遍历所有的文章,获取数据
                try:
                    self.get_card_data(card, self.xpath_dict)
                except KeyboardInterrupt:
                    pass

            try:
                print('本页抓取了%s条数据,模拟等待中.....' % self.page_count)
                self.all_count += self.page_count  # 统计获取的所有数据量
                self.page_count = 0

                # 基于设置的休眠时间,随机设置一个休眠值
                sleep_time = random.randint(self.sleep_time,
                                            self.sleep_time + 5)

                time.sleep(sleep_time)  # 模拟用户浏览网页的时间

                try:
                    self.browser.click_link_by_text('下一页')  # 点击下一页
                    page_index += 1

                except ElementDoesNotExist:
                    break

            except KeyboardInterrupt:
                print('中途退出抓取,正则保存中.....')
                break

    def save(self):
        """
        保存数据
        """

        print('--------------------------------------------')
        print('本次共抓取了%s条数据' % self.all_count)

        try:
            data = pandas.DataFrame(self.save_data)
            file_path = self.base_sava_path + self.save_file_name + '.xlsx'

            data.to_excel(file_path, index=False)  # 将数据保存到 excel
            print('文件正在保存...', end='\n\n')

        except Exception as e:
            print('文件保存失败!!!', end='\n\n')
            print(e)

    def close(self):
        self.browser.quit()  # 关闭浏览器

    @staticmethod
    def test():
        """
        用于测试的方法,实际运行时不执行
        """

        browser = Browser(executable_path="../driver/firefox.exe")
        browser.visit(
            "https://s.weibo.com/weibo/%25E5%25B0%25B1%25E5%25"
            "BC%2580%25E5%25A7%258B%25E5%25A4%25A7%25E5%25B9%2585?topnav=1&wvr=6&b=1"
        )

        cards = browser.find_by_xpath('//div[@class="card"]')
        for c in cards:
            etree_html = etree.HTML(c.html)
            a = etree_html.xpath(
                '//div[@class="card-act"]//a[@title="赞"]/em/text()')

            print(a[0])
示例#11
0
def function():
    # 账户登录
    browser = Browser('chrome')
    browser.visit(url)

    # 账户注册
    captcha = input()

    browser.fill("ctl00$plhMain$MyCaptchaControl1", captcha)  # 第二个页面 同行人数
    browser.choose("ctl00$plhMain$cboVisaCategory", "Beijing")  # 第一个页面 提交地点选择
    browser.find_by_xpath('//input[@type="submit"]').click()  # 第一个页面 提交

    browser.fill("ctl00$plhMain$tbxNumOfApplicants", 2)  # 第二个页面 同行人数
    browser.choose("ctl00$plhMain$cboVisaCategory", "17")  # 第二个页面 签证类型选择
    browser.find_by_xpath(
        '//*[@id="ctl00_plhMain_ControlsTable"]/tbody/tr[8]/td/a[1]/input'
    ).click()  # 第二个页面 提交

    # 抢票
    browser.fill("name", "*****@*****.**")  # 第三个页面 邮箱输入
    browser.find_by_xpath(
        '//*[@id="ctl00_plhMain_ImageButton"]').click()  # 第三个页面 提交

    browser.fill("ctl00$plhMain$txtPassword1", "Aa123123")  # 第四个页面 密码
    browser.fill('ctl00$plhMain$txtCnfPassword1', "Aa123123")  # 第四个页面 确认密码
    browser.find_by_xpath(
        '//*[@id="ctl00_plhMain_ImageButton"]').click()  # 第四个页面 提交

    browser.fill("ctl00$plhMain$txtEmail", "*****@*****.**")  # 第四个页面 邮箱输入
    browser.fill("", "Aa123123")  # 第四个页面 确认密码
    browser.find_by_xpath(
        '//*[@id="ctl00_plhMain_ImageButton"]').click()  # 第四个页面 提交

    browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxPassportNo",
                 "G11111111")  # 第五个页面 护照
    browser.choose("ctl00$plhMain$repAppVisaDetails$ctl01$cboTitle",
                   "MR.")  # 第五个页面 性别
    browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxFName",
                 "伟")  # 第五个页面 名
    browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxLName",
                 "阿")  # 第五个页面 性
    browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxSTDCode",
                 "+86")  # 第五个页面 区号
    browser.fill("ctl00$plhMain$repAppVisaDetails$ctl01$tbxMobileNumber",
                 "12345678900")  # 第五个页面 电话
    browser.find_by_xpath(
        '//*[@id="ctl00_plhMain_btnSubmit"]').click()  # 第五个页面 提交

    browser.find_by_xpath(
        '//*[@id="ctl00_plhMain_cldAppointment"]/tbody/tr[5]/td[5]/a').click(
        )  # 日期页面提交
    browser.find_by_xpath(
        '//*[@id="ctl00_plhMain_gvSlot_ctl02_lnkTimeSlot"]').click()  # 时间页面提交
    browser.find_by_xpath(
        '//*[@id="ctl00_plhMain_ImageButton1"]').click()  # 提交
示例#12
0
def scrape():
    # Scrape the NASA Mars News Site and collect the latest News Title and Paragragh Text

    url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    result = soup.find('div', class_='content_title')
    news_title = result.find('a').text
    result_p = soup.find('div', class_='image_and_description_container')
    news_p = result_p.find('div', class_='rollover_description_inner').text

    url = "https://twitter.com/marswxreport?lang=en"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    result = soup.find_all('div', class_='js-tweet-text-container')[0]

    mars_weather = result.find('p').contents[0]

    url = "https://space-facts.com/mars/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # Use Pandas to scrape the table containing facts about the planet including Diameter, Mass
    table = soup.find_all('table')
    df = pd.read_html(str(table))[0]

    df = df.rename(columns={0: '', 1: 'value'})
    df.set_index('', inplace=True)
    mars_fact_html = df.to_html()

    # Visit the USGS Astrogeology site here to obtain high resolution images for each of Mar's hemispheres

    executable_path = {'executable_path': '/Users/jingc/chromedriver'}
    browser = Browser('chrome', **executable_path, headless=True)
    #  browser = Browser('chrome', headless=True)

    url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

    browser.visit(url)
    hemisphere_image_urls = []

    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    results = soup.find_all('div', class_='item')

    for result in results:
        hemisphere = {}
        newurl = 'https://astrogeology.usgs.gov' + result.find('a')['href']
        browser.visit(newurl)
        new_html = browser.html
        soup = BeautifulSoup(new_html, 'html.parser')
        sample = soup.find('div', class_='downloads')
        hemisphere['img_url'] = sample.find('a')['href']
        title = soup.find('h2', class_="title").text
        hemisphere['title'] = title.replace(' Enhanced', '')
        hemisphere_image_urls.append(hemisphere)

        time.sleep(1)

#Use splinter to navigate the site and find the image url for the current Featured Mars Image

    url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

    browser.visit(url)
    search_bar_xpath = '//*[@id="full_image"]'
    search_button = browser.find_by_xpath(search_bar_xpath)[0]
    search_button.click()

    time.sleep(2)
    moreinfo_button = browser.find_link_by_partial_text('more info')
    moreinfo_button.click()
    time.sleep(2)
    response = browser.html
    featuredimg_soup = BeautifulSoup(response, 'html.parser')
    featured_img_url = 'https://www.jpl.nasa.gov' + featuredimg_soup.find(
        'figure', class_='lede').a['href']

    browser.quit()

    mars_dict = {
        'news_title': news_title,
        'news_paragraph': news_p,
        'current_weather': mars_weather,
        'featured_img': featured_img_url,
        'mars_fact': mars_fact_html,
        'hemisphere_images': hemisphere_image_urls
    }
    return mars_dict
示例#13
0
class TestCase(ptc.PloneTestCase):

    def __init__(self):
        self.browser = Browser(driver_name='firefox')
        self.host, self.port = startZServer()

    def afterSetUp(self):
        self.browser.visit('http://%s:%s/plone' % (self.host, self.port))

    def beforeTearDown(self):
        self.browser.quit()

    def portal_visit(self, url):
        self.browser.visit('http://%s:%s/plone/%s' % (self.host, self.port, url))

    def portal_home(self):
        self.browser.visit('http://%s:%s/plone/' % (self.host, self.port))

    def portal_login(self, user, password):
        self.portal_visit('login_form')
        self.browser.fill('__ac_name', user)
        self.browser.fill('__ac_password', password)
        self.browser.find_by_name('submit').first.click()

    def portal_login_as_owner(self):
        self.portal_login(user=portal_owner, password=default_password)

    def portal_logout(self):
        self.portal_visit('logout')

    def portal_search(self, search_word):
        self.browser.fill('SearchableText','%s' % (search_word))
        self.browser.find_by_css('.searchButton').first.click()

    def portal_navigate_submenu(self, option):
        self.browser.find_by_xpath("//li[contains(@id, 'contentview')]/a[text()='%s']" % (option)).first.click()

    def portal_click_a_personaltool(self, personaltool):
        self.browser.click_link_by_href('http://%s:%s/plone/dashboard' % (self.host, self.port))
        self.browser.click_link_by_text('%s' % (personaltool))

    def portal_add_user(self, fullname, username, email, password):
        self.portal_click_a_personaltool('Site Setup')
        self.browser.click_link_by_text('Users and Groups')
        self.browser.find_by_name('form.button.AddUser').first.click()
        self.browser.fill('form.fullname','%s' % (fullname))
        self.browser.fill('form.username','%s' % (username))
        self.browser.fill('form.email','%s' % (email))
        self.browser.fill('form.password','%s' % (password))
        self.browser.fill('form.password_ctl','%s' % (password))
        self.browser.find_by_id('form.actions.register').first.click()

    def portal_add_user_as_manager(self, fullname, username, email, password):
        self.portal_click_a_personaltool('Site Setup')
        self.browser.click_link_by_text('Users and Groups')
        self.browser.find_by_name('form.button.AddUser').first.click()
        self.browser.fill('form.fullname','%s' % (fullname))
        self.browser.fill('form.username','%s' % (username))
        self.browser.fill('form.email','%s' % (email))
        self.browser.fill('form.password','%s' % (password))
        self.browser.fill('form.password_ctl','%s' % (password))
        self.browser.find_by_id('form.groups.0').first.click()
        self.browser.find_by_id('form.actions.register').first.click()

    def portal_change_user_role(self, username, new_role):
        self.portal_click_a_personaltool('Site Setup')
        self.browser.click_link_by_text('Users and Groups')
        self.browser.find_by_xpath("//tr[*/input[@value='%s']]//input[@value='%s']" % (username, new_role)).first.click()
        self.browser.find_by_name('form.button.Modify').first.click()

    def portal_click_enable_content_types(self):
        self.browser.find_by_css('a[title="Add new items inside this item"]').first.click()

    def portal_add_content_type(self, type):
        self.portal_click_enable_content_types()
        self.browser.click_link_by_text('%s' % (type))

    def portal_click_content_item_action(self):
        self.browser.find_by_css('a[title="Actions for the current content item"]').first.click()

    def portal_add_item_action(self, type):
        self.portal_click_content_item_action()
        self.browser.click_link_by_text('%s' % (type))

    def portal_list_states(self):
        self.browser.find_by_css('a[title="Change the state of this item"]').first.click()

    def portal_modify_state_to(self, state):
        self.portal_list_states()
        self.browser.click_link_by_text('%s' % (state))
示例#14
0
class huoche(object):
	"""docstring for huoche"""
	driver_name=''
	executable_path=''
	#用户名,密码
	username = u''
	passwd = u''
	# cookies值得自己去找, 下面两个分别是北京, 郑州
	starts = u'%u5317%u4EAC%u897F%2CBXP'
	ends = u'%u6708%u5C71%2CYBF'
	# 时间格式2018-01-19
	dtime = u'2018-02-09'
	#车次类型 G D Z T K QT
	train_type = ''
	# 车次,选择第几趟,0则从上之下依次点击
	order = 10
	###乘客名
	users = [u'', u'']
	##席位
	# 0 二等座  1 一等座  2 商务座
	# 0 硬卧  1 硬座  2 软卧
	xb = 0

	# 座位号 1A 1B 1C   1D 1F
	seat_num = '1F'

	pz=u'成人票'

	"""网址"""
	ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init'
	login_url = 'https://kyfw.12306.cn/otn/login/init'
	initmy_url = 'https://kyfw.12306.cn/otn/index/initMy12306'
	buy='https://kyfw.12306.cn/otn/confirmPassenger/initDc'
	login_url='https://kyfw.12306.cn/otn/login/init'
	
	def __init__(self):
		self.driver_name='chrome'
		self.executable_path='/usr/bin/chromedriver'

	def login(self):
		self.driver.visit(self.login_url)
		self.driver.fill('loginUserDTO.user_name', self.username)
		# sleep(1)
		self.driver.fill('userDTO.password', self.passwd)
		print(u'等待验证码,自行输入...')
		while True:
			if self.driver.url != self.initmy_url:
				sleep(1)
			else:
				break

	def start(self):
		self.driver=Browser(driver_name=self.driver_name,executable_path=self.executable_path)
		self.driver.driver.set_window_size(1400, 1000)
		self.login()
		# sleep(1)
		self.driver.visit(self.ticket_url)
		try:
			print(u'购票页面开始...')
			# sleep(1)
			# 加载查询信息
			self.driver.cookies.add({'_jc_save_fromStation': self.starts})
			self.driver.cookies.add({'_jc_save_toStation': self.ends})
			self.driver.cookies.add({'_jc_save_fromDate': self.dtime})

			self.driver.reload()

			#选择车次类型 高铁 动车
			if self.train_type != '':
				self.driver.find_by_xpath('//input[@value="'+self.train_type+'"]').click()

			count=0
			if self.order!=0:
				while self.driver.url==self.ticket_url:
					self.driver.find_by_text(u'查询').click()
					count += 1
					print(u'单次循环点击查询... 第 %s 次' % count)
					# sleep(1)
					try:
						self.driver.find_by_text(u'预订')[self.order - 1].click()
					except Exception as e:
						# print(e)
						print(u'抢票未开始')
						continue
			else:
				while self.driver.url == self.ticket_url:
					self.driver.find_by_text(u'查询').click()
					count += 1
					print(u'列表循环点击查询... 第 %s 次' % count)
					# sleep(0.8)
					try:
						for i in self.driver.find_by_text(u'预订'):
							i.click()
							sleep(1)
					except Exception as e:
						# print(e)
						print(u'抢票未开始 %s' %count)
						continue
			print(u'开始预订...')
			# sleep(3)
			# self.driver.reload()
			sleep(1)
			print(u'开始选择用户...')
			for user in self.users:
				self.driver.find_by_text(user).last.click()
			
			print(u'开始选择席位...')
			self.driver.find_by_id('seatType_1').find_by_tag('option')[self.xb].click()
			# self.driver.find_by_text(self.pz).click()
			# self.driver.find_by_id('').select(self.pz)
			# sleep(1)
			# self.driver.find_by_text(self.xb).click()
			# sleep(1)

			print(u'提交订单...')
			self.driver.find_by_id('submitOrder_id').click()

			if self.train_type == 'G':
				print(u'开始选择座位...')
				# sleep(5)
				# self.driver.find_by_id('1D').click()
				# self.driver.find_by_id('erdeng1').last.click()
				self.driver.find_by_xpath('//div[@id="erdeng1"]').find_by_id(self.seat_num).click()
				print(u'确认选择座位号为' + self.seat_num + '...')
				print(u'确认订单')
			else:
				print(u'确认订单')
			sleep(1.5)
			self.driver.find_by_id('qr_submit_id').click()
			print(u'购票成功...')

		except Exception as e:
			print(e)
class LemonLemon_douyin(object):
    def __init__(self, width=500, height=300):
        """
        抖音App视频下载
        """
        # 无头浏览器
        chrome_options = Options()
        chrome_options.add_argument(
            'user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"'
        )
        self.driver = Browser(driver_name="chrome",
                              options=chrome_options,
                              headless=True)

    def get_video_urls(self, input_f):
        """
        获得视频播放地址
        Parameters:
            user_id:查询的用户ID
        Returns:
            video_names: 视频名字列表
            video_urls: 视频链接列表
            nickname: 用户昵称
        """
        video_names = []
        video_urls = []
        i = 1
        now_date = datetime.datetime.now()
        self.date_today = (str(now_date.date()) + "_" + str(now_date.hour) +
                           ":" + str(now_date.minute))
        # unique_id = ''
        # while unique_id != user_id:
        #    search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
        #    req = requests.get(url = search_url, verify = False)
        #    html = json.loads(req.text)
        #    aweme_count = html['user_list'][0]['user_info']['aweme_count']
        #    uid = html['user_list'][0]['user_info']['uid']
        #    nickname = html['user_list'][0]['user_info']['nickname']
        #    unique_id = html['user_list'][0]['user_info']['unique_id']
        # user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
        # req = requests.get(url = user_url, verify = False)
        # html = json.loads(req.text)
        # i = 1
        # for each in html['aweme_list']:
        #    share_desc = each['share_info']['share_desc']
        #    if '抖音-原创音乐短视频社区' == share_desc:
        #        video_names.append(str(i) + '.mp4')
        #        i += 1
        #    else:
        #        video_names.append(share_desc + '.mp4')
        #    video_urls.append(each['share_info']['share_url'])
        with open(input_f) as f:
            for line in f:
                if line.startswith("http"):
                    video_urls.append(line)
                    video_names.append(self.date_today + "_" + str(i) + ".mp4")
                    i += 1

        return video_names, video_urls  # video_names, video_urls, nickname

    def get_download_url(self, video_url):
        """
        获得带水印的视频播放地址
        Parameters:
            video_url:带水印的视频播放地址
        Returns:
            download_url: 带水印的视频下载地址
        """
        req = requests.get(url=video_url, verify=False)
        bf = BeautifulSoup(req.text, "lxml")
        script = bf.find_all("script")[-1]
        video_url_js = re.findall("var data = \[(.+)\];", str(script))[0]
        video_html = json.loads(video_url_js)
        download_url = video_html["video"]["play_addr"]["url_list"][0]
        return download_url

    def video_downloader(self, video_url, video_name, watermark_flag=True):
        """
        视频下载
        Parameters:
            video_url: 带水印的视频地址
            video_name: 视频名
            watermark_flag: 是否下载不带水印的视频
        Returns:
            无
        """
        size = 0
        if watermark_flag == True:
            video_url = self.remove_watermark(video_url)
        else:
            video_url = self.get_download_url(video_url)
        with closing(requests.get(video_url, stream=True,
                                  verify=False)) as response:
            chunk_size = 1024
            content_size = int(response.headers["content-length"])
            if response.status_code == 200:
                sys.stdout.write("  [文件大小]:%0.2f MB\n" %
                                 (content_size / chunk_size / 1024))

                with open(video_name, "wb") as file:
                    for data in response.iter_content(chunk_size=chunk_size):
                        file.write(data)
                        size += len(data)
                        file.flush()

                        sys.stdout.write("  [下载进度]:%.2f%%" %
                                         float(size / content_size * 100) +
                                         "\r")
                        sys.stdout.flush()

    def remove_watermark(self, video_url):
        """
        获得无水印的视频播放地址
        Parameters:
            video_url: 带水印的视频地址
        Returns:
            无水印的视频下载地址
        """
        self.driver.visit("http://douyin.iiilab.com/")
        self.driver.find_by_tag("input").fill(video_url)
        self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
        html = self.driver.find_by_xpath(
            '//div[@class="thumbnail"]/div/p')[0].html
        bf = BeautifulSoup(html, "lxml")
        return bf.find("a").get("href")

    def run(self, input_f):
        """
        运行函数
        Parameters:
            None
        Returns:
            None
        """
        # self.hello()
        # user_id = input('请输入ID(例如40103580):')
        error_url = []
        video_names, video_urls = self.get_video_urls(input_f)
        if "douyin_download" + self.date_today not in os.listdir():
            os.mkdir("douyin_download" + self.date_today)
        print("视频下载中:共有%d个作品!\n" % len(video_urls))
        for num in range(len(video_urls)):
            print("  解析第%d个视频链接 [%s] 中,请稍后!\n" % (num + 1, video_urls[num]))
            random_wait = random.uniform(3, 5)
            print("waiting...", random_wait)
            time.sleep(random_wait)
            if "\\" in video_names[num]:
                video_name = video_names[num].replace("\\", "")
            elif "/" in video_names[num]:
                video_name = video_names[num].replace("/", "")
            else:
                video_name = video_names[num]
            try:
                self.video_downloader(
                    video_urls[num],
                    os.path.join("douyin_download" + self.date_today,
                                 video_name),
                )
            except:
                print("**************************")
                print("ERROR", video_urls[num])
                error_url.append(video_urls[num])

            print("\n")
        self.driver.close()
        with open("error_url.txt", "w") as f:
            f = error_url
        print("下载完成!")
        print("出错数量:", len(error_url))
示例#16
0
class Ticket(object):
    def __init__(self, config_file):
        ## config parser setting
        self.config_file = config_file
        self.settings = configparser.ConfigParser()
        self.settings._interpolation = configparser.ExtendedInterpolation()
        self.settings.read(self.config_file)
        ## environment setting
        self.brower='chrome'
        self.b = Browser(driver_name=self.brower) 
        self.station={}
        self.url = "https://kyfw.12306.cn/otn/leftTicket/init"
        # 席别类型(对应列标号)
        self.ticket_index = [
                            '',
                            u'商务座',
                            u'特等座',
                            u'一等座', 
                            u'二等座',
                            u'高级软卧',
                            u'软卧',
                            u'硬卧',
                            u'软座',
                            u'硬座',
                            u'无座'
                            ]
        self.username = ''
        self.password = ''
        self.date_format='%Y-%m-%d'
        self.tolerance = -1
        self.blacklist = {}
        self.date = []
        self.isStudent = False
        self.success = 0
        self.find_ticket = 0
        self.config_parser()
        self.playmusic = False
        self.count = 0

    def ConfigSectionMap(self,section):
            dict1 = {}
            options = self.settings.options(section)
            for option in options:
                try:
                    dict1[option] = self.settings.get(section, option)
                    if dict1[option] == -1:
                        DebugPrint("skip: %s" % option)
                except:
                        print("exception on %s!" % option)
                        dict1[option] = None
            return dict1
    def daterange(self, start_date, end_date):
        for n in range(int ((end_date - start_date).days) + 1):
            yield start_date + timedelta(n) 

    def config_parser(self):
        if self.retrieve_station_dict() == -1:
            sys.exit()
        if self.retrieve_book_options() == -1:
            sys.exit()
        
    def retrieve_station_dict(self):
        dict_helper=self.ConfigSectionMap('STATIONCOOKIE')
        for name, value in dict_helper.iteritems():
            self.station[name]=value

    def retrieve_book_options(self):
        login_info=self.ConfigSectionMap('GLOBAL')
        self.username = login_info['username'].strip() 
        self.password = login_info['password'].strip()
        self.brower = login_info['browser']
        book_settings = self.ConfigSectionMap('TICKET')
        self.fromStation = [ station.strip() for station in book_settings['from_station'].split(',')]
        self.toStation = [ station.strip() for station in book_settings['to_station'].split(',')]
        trains = [ train.strip() for train in book_settings['trains'].split(',')]
        if len(trains) == 1 and trains[0] == '':
            self.trains = []
        else:
            self.trains =  trains
        self.ticket_type =[ _type.strip() for _type in book_settings['ticket_type'].split(',')]
        rangeQuery = book_settings['range_query'].strip()
        if rangeQuery == 'Y':
            date = [ d.strip() for d in book_settings['date'].split(',')]
            if len(date) < 2:
                print "未设置正确的起至时间"
                return -1
            else: 
                start_date = datetime.strptime(date[0],self.date_format) 
                end_date = datetime.strptime(date[1],self.date_format) 
                if end_date < start_date:
                    print "查询截止日期不可大于开始日期!"
                    return -1
                for single_date in self.daterange(start_date, end_date): 
                    self.date.append(single_date.strftime(self.date_format))
        else:
            self.date = [ d.strip() for d in book_settings['date'].split(',')]
        if book_settings['student'].strip() == 'Y':
            self.isStudent = True
        self.tolerance = int(book_settings['tolerance'])
        self.people = [ people.strip() for people in book_settings['people'].split(',') ]
        if book_settings['alarm'].strip() == 'Y':
            self.playmusic = True

    def login(self):
        self.b.visit(self.url)
        self.b.find_by_text(u"登录").click()
        self.b.fill("loginUserDTO.user_name",self.username)
        self.b.fill("userDTO.password",self.password)
        pdb.set_trace()
    
    def page_has_loaded(self):
        page_state = self.b.evaluate_script("document.readyState")
        return page_state == 'complete'

    def switch_to_order_page(self):
        order = []
        while len(order) == 0:
            order = self.b.find_by_text(u"车票预订")
        order[0].click()

    def checkTicket(self, date, fromStation, toStation):
        print 'date: %s, from %s, to %s'%(date, fromStation, toStation)
        self.b.cookies.add({"_jc_save_fromDate":date})
        self.b.cookies.add({"_jc_save_fromStation":self.station[fromStation]})
        self.b.cookies.add({"_jc_save_toStation":self.station[toStation]})
        self.b.cookies.all()
        self.b.reload()
        if self.isStudent:
            self.b.find_by_text(u'学生').click()
        self.b.find_by_text(u"查询").click()
        all_trains = []
        while self.page_has_loaded() == False:
            continue
        while len(all_trains) == 0:
            all_trains = self.b.find_by_id('queryLeftTable').find_by_tag('tr')
        for k, train in enumerate(all_trains):
            tds = train.find_by_tag('td')
            if tds and len(tds) >= 10:
                has_ticket= tds.last.find_by_tag('a')
                if len(has_ticket) != 0:
                    if k + 1 < len(all_trains):
                        this_train = all_trains[k+1]['datatran']
                        if len(self.trains) != 0 and this_train not in self.trains:
                            continue
                        if self.tolerance != -1 and this_train in self.blacklist and self.blacklist[this_train] >= self.tolerance:
                            print u"%s 失败 %d 次, 跳过"%(this_train, self.blacklist[this_train])
                            continue
                    for cat in self.ticket_type:
                        if cat in self.ticket_index:
                            i = self.ticket_index.index(cat)
                        else:
                            print '无效的席别信息'
                            return 0, ''
                        if tds[i].text != u'无' and tds[i].text != '--':
                            if tds[i].text != u'有':
                                print u'%s 的 %s 有余票 %s 张!'%(this_train, cat ,tds[i].text)
                            else:
                                print u'%s 的 %s 有余票若干张!'%(this_train, cat)
                            self.find_ticket = 1
                            tds.last.click()
                            break
            if self.find_ticket:
                break
        return this_train

    def book(self):
        while self.page_has_loaded() == False:
            continue
        if len(self.people) == 0:
            print '没有选择乘车人!'
            return 1
        more = []
        while len(more) == 0:
            more = self.b.find_by_text(u"更多")
        more[0].click()
        person = []
        people = self.people
        while len(person) == 0:
            person=self.b.find_by_xpath('//ul[@id="normal_passenger_id"]/li/label[contains(text(),"%s")]'%people[0])
        for p in people:
            self.b.find_by_xpath('//ul[@id="normal_passenger_id"]/li/label[contains(text(),"%s")]'%p).click()
            if self.b.find_by_xpath('//div[@id="dialog_xsertcj"]').visible:
                self.b.find_by_xpath('//div[@id="dialog_xsertcj"]/div/div/div/a[text()="确认"]').click()
            return 1

    def ring(self):
        pygame.mixer.pre_init(64000, -16, 2, 4096)
        pygame.init()
        pygame.display.init()
        screen=pygame.display.set_mode([300,300])
        #pygame.display.flip()
        pygame.time.delay(1000)#等待1秒让mixer完成初始化
        tracker=pygame.mixer.music.load("media/sound.ogg")
        #track = pygame.mixer.music.load("sound.ogg")
        pygame.mixer.music.play()
        # while pygame.mixer.music.get_busy():
        #pygame.time.Clock().tick(10)
        running = True
        img=pygame.image.load("media/img.jpg")
        while running:
            screen.blit(img,(0,0))
            pygame.display.flip()
            for event in pygame.event.get():
                if event.type==pygame.QUIT:
                    running = False
        pygame.quit ()
        return 1
    def executor(self):
        self.login()
        self.switch_to_order_page()
        while self.success == 0:
            self.find_ticket = 0
            while self.find_ticket == 0:
                for date in self.date:
                    try:
                        self.count += 1
                        print "Try %d times" % self.count
                        for fromStation in self.fromStation:
                            for toStation in self.toStation:
                                this_train = self.checkTicket(date, fromStation, toStation)
                                if self.find_ticket:
                                    break
                            if self.find_ticket:
                                break
                        if self.find_ticket:
                            break
                    except KeyboardInterrupt:
                        self.b.find_by_text(u'退出').click()
                        sys.exit()
                    except:
                        continue
            # book ticket for target people
            self.find_ticket = 0
            while self.find_ticket == 0:
                try:
                    self.find_ticket = self.book() 
                except KeyboardInterrupt:
                    self.b.find_by_text(u'退出').click()
                    sys.exit()
                except:
                    continue
            if self.playmusic:
                self.ring()
            print "订票成功了吗?(Y/N)"
            input_var = ''
            while input_var == '':
                input_var= sys.stdin.read(1)
                if input_var == 'Y' or input_var == 'y':
                    self.success = 1
                elif input_var == 'N' or input_var == 'n':
                    if this_train in self.blacklist:
                        self.blacklist[this_train] += 1
                    else:
                        self.blacklist[this_train] = 1
                    print u"%s 失败 %d 次"%(this_train, self.blacklist[this_train])
                    self.b.back()
                    self.b.reload()
                else:
                    input_var = ''
                    continue
        self.b.find_by_text(u'退出').click()
示例#17
0
from splinter.browser import Browser

browser = Browser(driver_name="chrome")
url = "file:///C:/Users/lc/Desktop/vipkids/%E9%A2%84%E7%BA%A6%E8%AF%BE%E7%A8%8B%20-%20VIPKID%E5%9C%A8%E7%BA%BF%E5%B0%91%E5%84%BF%E8%8B%B1%E8%AF%AD.html"
browser.visit(url)
row = browser.find_by_xpath("//tr[th='15:30']/td")
print(type(row[0]))
# row[0].click()
示例#18
0
class but_ticket_obj(object):
    def __init__(self, username, password, order, passengers, dtime, starts,
                 ends):
        self.username = username  # 用户名
        self.passwd = password  # 密码
        self.order = order
        # 乘客名
        self.passengers = passengers
        # 起始地和终点
        self.starts = starts
        self.ends = ends
        # 日期
        self.dtime = dtime
        self.login_url = 'https://kyfw.12306.cn/otn/login/init'  # 12306登录页
        self.initMy_url = 'https://kyfw.12306.cn/otn/index/initMy12306'  # 登录成功的地址
        self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init'  # 购票的地址
        self.driver_name = 'chrome'
        self.executable_path = 'F:\cro\chromedriver.exe'

    def loginOn(self):
        self.driver.visit(self.login_url)  # 访问地址栏
        self.driver.fill('loginUserDTO.user_name',
                         self.username)  # 通过name 添加用户名
        self.driver.fill('userDTO.password', self.passwd)  # 通过name 添加用户名
        print "该到输入验证码了......"
        while True:
            if self.driver.url != self.initMy_url:
                sleep(1)
            else:
                break  # 结束后 才会走下面的代码

    def buy_ticket(self):
        self.driver = Browser(driver_name=self.driver_name,
                              executable_path=self.executable_path)
        # 窗口大小的操作
        self.driver.driver.set_window_size(1200, 800)
        self.loginOn()
        self.driver.visit(self.ticket_url)  # 进入买票的页面
        try:
            # 加载查询信息
            self.driver.cookies.add({"_jc_save_fromStation": self.starts})
            self.driver.cookies.add({"_jc_save_toStation": self.ends})
            self.driver.cookies.add({"_jc_save_fromDate": self.dtime})
            self.driver.reload()
            count = 0
            while self.driver.url == self.ticket_url:
                count += 1
                self.driver.find_by_text(u"查询").click()
                print u"循环点击查询... 第 %s 次" % count
                sleep(1)
                # 这是我找到我要买的那个票的有无 的那个标签的id  然后获取里面的value 二哥要的YW_13000K483303
                tarVal = self.driver.find_by_id('YZ_02000K718602').value
                print tarVal
                if tarVal != u'无':
                    if self.order != 0:
                        self.driver.find_by_text(u"预订")[self.order - 1].click()
                    else:
                        self.driver.find_by_text(u"预订")[0].click()
                    break
                else:
                    continue
            print u"开始预订..."
            sleep(1)
            print u'开始选择用户...'
            for user in self.passengers:
                self.driver.find_by_text(user).click()
                if self.driver.find_by_id("dialog_xsertcj_ok"):
                    self.driver.find_by_id("dialog_xsertcj_ok").click()
            print u"提交订单..."
            # #这里是买票种  1 成人  2 儿童 3 学生 4 残疾  参数是name
            self.driver.find_by_xpath(
                '//select[@id="ticketType_1"]/option[@value="1"]'
            )._element.click()
            sleep(1)
            # # 席座  1硬座   3 硬卧   4软卧   9商务座  O是二等座(是o不是0)  M一等座
            self.driver.find_by_xpath(
                '//select[@id="seatType_1"]/option[@value="1"]'
            )._element.click()
            sleep(1)
            # 提交订单
            print u"提交订单..."
            self.driver.find_by_id('submitOrder_id').click()
            sleep(1.5)
            print u"确认选座..."
            self.driver.find_by_id('qr_submit_id').click()
            return True
        except:
            return False
示例#19
0
def splinter_scrape_bf(city, state):
    """PURPOSE: To """
    city = city.lower()
    state = state.lower()
    br = Browser()
    citystate_string = city+'_'+state+'_us/'
    url = 'http://www.bringfido.com/lodging/city/'+citystate_string
    br.visit(url)

    page = 1
    npages = len(br.find_by_xpath('//*[@id="results_paging_controls_bottom"]/span'))

    columns = ['hotel_id',
               'hotel_img_url',
               'hotel_url',
               'hotel_name',
               'hotel_address',
               'hotel_city',
               'hotel_state',
               'hotel_rating',
               'hotel_latitude',
               'hotel_longitude',
               'review_count',
               'hotel_address',
               'business_id',
               'review_id',
               'user_id',
               'username',
               'review_title',
               'review_text',
               'review_rating',
               'review_date']

    bigdf = pd.DataFrame(columns=columns)

    while (page == 1 or page < npages):
        print('*'*70)
        print('Now on page {}'.format(page))
        archive_links = br.find_by_xpath('//*[@id="results_list"]/div')

        hotel_names = []
        text_summaries = []
        links = []
        biz_ids = []
        hotel_img_urls = []

        df = pd.DataFrame(columns=columns)

        texts = []
        titles = []
        authors = []
        ratings = []
        hnms = []
        hiurls = []
        bids = []
        lnks = []

        for lnk in archive_links:
            hotel_names.append(lnk.find_by_xpath('div[2]/h1/a').value)
            text_summaries.append(lnk.text)
            this_link = lnk.find_by_xpath('div/h1/a')['href']
            links.append(this_link)
            hotel_img_urls.append(lnk.find_by_xpath('div/div[@class="photo_inner"]/a/img')['src'])
            biz_ids.append(lnk['id'].split('_')[-1])

        for hotel_id, link in enumerate(links):
            print('*'*75)
            print('Now on {}: {}'.format(hotel_id, link))
            print('*'*75)
            br.visit(link)

            # hotel_description = br.find_by_xpath('//*[@class="body"]').text

            # scrape the address details section of the page
            details = br.find_by_xpath('//*[@class="address"]').text.split('\n')

            # now get just the address:
            address = details[0]

            # and just the city, state, country, and zip code:
            csczip = details[1]

            # and just the phone number
            # phone = details[2]

            # now separate the city, state, and zip:
            city, state, zipcode = csczip.strip().split(',')
            zipcode = zipcode[3:]

            #Now using correct Xpath we are fetching URL of archives
            reviews = br.find_by_xpath('//*[@class="review_container"]')

            print(reviews)
            print('')
            for rev in reviews:
                titles.append(rev.find_by_xpath('div/div[1]').text)
                authors.append(rev.find_by_xpath('div/div[2]').text)
                texts.append(rev.find_by_xpath('div/div[3]').text)
                ratings.append(rev.find_by_xpath('div[2]/img')['src'].split('/')[-1][0:1])
                hnms.append(hotel_names[hotel_id])
                hiurls.append(hotel_img_urls[hotel_id])
                bids.append(biz_ids[hotel_id])
                lnks.append(link)
                print(rev.find_by_xpath('div[2]/img')['src'].split('/')[-1][0:1])

        print('Number of new titles: {}'.format(len(titles)))
        print('Number of new ratings: {}'.format(len(ratings)))

        df['review_title'] = titles
        df['username'] = authors
        df['review_text'] = texts
        df['review_rating'] = ratings
        df['hotel_id'] = hotel_id
        df['hotel_name'] = hnms
        df['hotel_url'] = lnks
        df['hotel_img_url'] = hiurls
        df['hotel_address'] = address
        df['hotel_city'] = city
        df['hotel_state'] = state
        df['hotel_rating'] = np.mean([int(rat) for rat in ratings])
        df['hotel_latitude'] = None
        df['hotel_longitude'] = None
        df['review_count'] = len(texts)
        df['review_id'] = 0
        df['user_id'] = 0
        df['business_id'] = bids

        print('new entries from this page: {}'.format(len(df)))
        bigdf = bigdf.append(df.copy())
        page += 1
        if page < npages:
            page_timeout = True
            while page_timeout:
                br.visit(url)
                time.sleep(1)
                print('Now scraping page {} of {}'.format(page, npages))
                button = br.find_by_id('page_'+str(page))
                print(button)
                if len(button) > 0:
                    button.click()
                    page_timeout = False

    bigdf_reviews = bigdf[['hotel_id', 'review_id', 'business_id', 'user_id',
                          'username', 'review_title', 'review_text', 'review_rating']].copy()

    bigdf_hotels = bigdf[['hotel_id', 'hotel_url', 'hotel_img_url', 'hotel_name',
                          'hotel_address', 'hotel_city', 'hotel_state', 'hotel_rating',
                          'hotel_latitude', 'hotel_longitude', 'business_id', 'review_count']].copy()

    bigdf_hotels.drop_duplicates(subset='business_id', inplace=True)
    bigdf_hotels['hotel_id'] = None
    bigdf_reviews['review_id'] = None

    print('Number of bf reviews to add: {}'.format(len(bigdf_reviews)))

    engine = cadb.connect_aws_db(write_unicode=True)
    bigdf_reviews.to_sql('bf_reviews', engine, if_exists='append', index=False)
    bigdf_hotels.to_sql('bf_hotels', engine, if_exists='append', index=False)
示例#20
0
class LemonLemon_douyin(object):
    def __init__(self, width=500, height=300):
        """
        抖音App视频下载
        """
        # 无头浏览器
        chrome_options = Options()
        chrome_options.add_argument(
            'user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"'
        )
        self.driver = Browser(driver_name="chrome",
                              options=chrome_options,
                              headless=True)

        self.tool = Tools()

    def get_video_urls(self, input_f):
        """
        获得视频播放地址
        Parameters:
            user_id:查询的用户ID
        Returns:
            video_names: 视频名字列表
            video_urls: 视频链接列表
            nickname: 用户昵称
        """
        video_names = []
        video_urls = []
        i = 1
        now_date = datetime.datetime.now()
        self.date_today = (str(now_date.date()) + "_" + str(now_date.hour) +
                           ":" + str(now_date.minute))

        with open(input_f) as f:
            for line in f:
                info = eval(line)
                ID = info["ID"]
                des_md5 = info["des_md5"]
                url = info["link"]
                video_urls.append(url)
                video_names.append(ID + "_" + des_md5 + ".mp4")
                i += 1

        return video_names, video_urls  # video_names, video_urls, nickname

    def get_download_url(self, video_url):
        """
        获得带水印的视频播放地址
        Parameters:
            video_url:带水印的视频播放地址
        Returns:
            download_url: 带水印的视频下载地址
        """
        req = requests.get(url=video_url, verify=False)
        bf = BeautifulSoup(req.text, "lxml")
        script = bf.find_all("script")[-1]
        video_url_js = re.findall("var data = \[(.+)\];", str(script))[0]
        video_html = json.loads(video_url_js)
        download_url = video_html["video"]["play_addr"]["url_list"][0]
        return download_url

    def video_downloader(self, video_url, video_name, watermark_flag=True):
        """
        视频下载
        Parameters:
            video_url: 带水印的视频地址
            video_name: 视频名
            watermark_flag: 是否下载不带水印的视频

        """
        size = 0
        if watermark_flag == True:
            video_url = self.remove_watermark(video_url)
        else:
            video_url = self.get_download_url(video_url)
        with closing(requests.get(video_url, stream=True,
                                  verify=False)) as response:
            chunk_size = 1024
            content_size = int(response.headers["content-length"])
            if response.status_code == 200:
                sys.stdout.write("  [文件大小]:%0.2f MB\n" %
                                 (content_size / chunk_size / 1024))

                with open(video_name, "wb") as file:
                    for data in response.iter_content(chunk_size=chunk_size):
                        file.write(data)
                        size += len(data)
                        file.flush()

                        sys.stdout.write("  [下载进度]:%.2f%%" %
                                         float(size / content_size * 100) +
                                         "\r")
                        sys.stdout.flush()

    def remove_watermark(self, video_url):
        """
        获得无水印的视频播放地址
        Parameters:
            video_url: 带水印的视频地址
        Returns:
            无水印的视频下载地址
        """
        self.driver.visit("http://douyin.iiilab.com/")
        self.driver.find_by_tag("input").fill(video_url)
        self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
        html = self.driver.find_by_xpath(
            '//div[@class="thumbnail"]/div/p')[0].html
        bf = BeautifulSoup(html, "lxml")
        return bf.find("a").get("href")

    def run(self, input_f):
        """
        运行函数
        Parameters:
            None
        Returns:
            None
        """
        # self.hello()
        # user_id = input('请输入ID(例如40103580):')
        error_url = []
        video_names, video_urls = self.get_video_urls(input_f)

        file = os.path.join(os.path.abspath(os.path.dirname(os.getcwd())),
                            "DOWNLOAD")

        if not os.path.exists(file):
            os.mkdir(file)

        print("视频下载中:共有%d个作品!\n" % len(video_urls))
        for num in range(len(video_urls)):
            print("  解析第%d个视频链接 [%s] 中,请稍后!\n" % (num + 1, video_urls[num]))
            random_wait = random.uniform(3, 5)
            print("waiting...", random_wait)
            time.sleep(random_wait)
            """
            if "\\" in video_names[num]:
                video_name = video_names[num].replace("\\", "")
            elif "/" in video_names[num]:
                video_name = video_names[num].replace("/", "")
            else:
                video_name = video_names[num]
            """
            video_name = video_names[num]
            ID = video_name.split("_")[0]

            # 判断文件夹是否存在
            if ID not in os.listdir(file):
                os.mkdir(os.path.join(file, ID))

            # 判断要下载的文件是否存在

            video_file = os.path.join(file, ID, video_name)
            if not os.path.exists(video_file):
                try:

                    self.video_downloader(
                        video_urls[num],
                        os.path.join(file, ID, video_name),
                    )

                    self.tool.writeToFile(video_name, "SuccessDownload")

                except:
                    print("**************************")
                    print("ERROR", video_urls[num])
                    error_url.append(video_urls[num])
                print("\n")
            else:
                print(video_name + "文件已存在")

        #self.driver.close()
        with open("error_url.txt", "w") as f:
            f = error_url
        print("下载完成!")

        print("出错数量:", len(error_url))
示例#21
0
class Scrapy12306():

    def __init__(self):

        config = ConfigParser.ConfigParser()
        config.readfp(codecs.open("12306.conf", "r", "utf-8"))
        self.url = config.get("config","url")
        self.username = config.get("config","username")
        self.password = config.get("config","password")
        self.bType = config.get("config","browser")
        #self._from = config.get("config","_from")
        #self._to = config.get("config","_to")
        self._from = u'北京'
        self._to = u'邯郸'
        self.date = config.get("config","date")
        
        print "browser :%s url:%s" % (self.bType,self.url)
        self.browser = Browser(self.bType)
        self.browser.visit(self.url)

    def login(self):
        if self.username=="" or self.password=="":
            print "error:username and password must not be empty"
            self.browser.quit()
            sys.exit(-1)
        self.browser.fill("loginUserDTO.user_name",self.username)
        self.browser.fill("userDTO.password",self.password)
        time.sleep(10)
        self.browser.find_by_id("loginSub").click()
        loginFlag = self.browser.find_by_id("link_4_myTicket")
        return not loginFlag.is_empty()


    def getTicket(self):
        self.browser.find_by_xpath("//a[@href='/otn/leftTicket/init']")[0].click()
        #self.browser.fill("leftTicketDTO.from_station_name",self._from)
        #self.browser.fill("leftTicketDTO.to_station_name",self._to)
        self.browser.cookies.add({"_jc_save_fromStation":"%u5317%u4EAC%2CBJP"})
        self.browser.cookies.add({'_jc_save_toStation':'%u90AF%u90F8%2CHDP'})
        #self.browser.fill("leftTicketDTO.train_date",self.date)
        self.browser.cookies.add({"_jc_save_fromDate":self.date})
        isload = False
        while not isload:
            self.browser.reload()
            isload = self.browser.is_element_present_by_id("query_ticket",wait_time=10)
            if not isload:
                time.sleep(30)
                continue

            qt = self.browser.find_by_id("query_ticket")
            qt.click()

        print self.browser.cookies.all()
        flag = False
        while not flag:
            try:
                stations = self.browser.find_by_text(u"预订")
                station = []
                for i in range(11,21):

                    station = stations[i]
                    flag =station.has_class("btn72")
                     
                    if flag:
                        break

                if not flag:
                    qt = self.browser.find_by_id("query_ticket").click()
                    time.sleep(2)
                    continue
                station.click()
                time.sleep(1)
            except Exception as e:
                print  e.message
                
                flag = False

        p1 = self.browser.find_by_xpath("//label[@for='normalPassenger_0']")
        p2 = self.browser.find_by_xpath("//label[@for='normalPassenger_3']")
        print 'p1,p2:%s %s' % (p1,p2)

        if p1.is_empty():
            p1 = self.browser.find_by_id("normalPassenger_0")[0].click()
        else :
            p1[0].click()

        if p2.is_empty():
            p2 = self.browser.find_by_id("normalPassenger_3")[0].click()
        else :
            p2[0].click()
        self.browser.evaluate_script("confirm('抢到票了帅哥,订票吗')")
        time.sleep(15)
        self.browser.find_by_id("submitOrder_id").click()
示例#22
0
class TestEngine(object):
    __sleep_time = 2
    __mouse_over = True
    __mouse_over_sleep = 1

    def __init__(self, browser_name, execute_path=None):
        if execute_path is None:
            self.__browser = Browser(browser_name, fullscreen=True)
            self.__quit = False
        else:
            self.__browser = Browser(browser_name, executable_path=execute_path, fullscreen=True)
            self.__quit = False

    @staticmethod
    def set_config(config):
        TestEngine.__sleep_time = 2 if config.get("sleep_time") is None else config.get("sleep_time")
        TestEngine.__mouse_over = True if config.get("mouse_over") is None else config.get("mouse_over")
        TestEngine.__mouse_over_sleep = 1 if config.get("mouse_over_sleep") is None else config.get("mouse_over_sleep")

    def test_list_acts(self, domain, action_list, back_fun=None, result_back=None):
        thread_deal = threading.Thread(target=self.__test_list_thread, args=(domain, action_list, back_fun, result_back), name="TestEngine deal tester")
        thread_deal.start()

    def test_deal(self, domain, action_obj, back_fun=None, result_back=None):
        thread_deal = threading.Thread(target=self.__test_do_thread, args=(domain, action_obj, back_fun, result_back), name="TestEngine deal tester")
        # hasattr(result_back, "__call__")
        thread_deal.start()

    def quit(self):
        self.__quit = True
        self.__browser.quit()

    def is_quited(self):
        return self.__quit

    def __test_list_thread(self, domain, action_list, back_fun=None, result_back=None):
        try:
            for action in action_list:
                self.__test_do(domain, action, result_back)
        except Exception as e:
            raise Exception("[Error code] deal test list failed, error code=", e)
        finally:
            if action_list[0].waitClose != 0:
                sleep(action_list[0].waitClose)

                if back_fun is None:
                    self.quit()
                else:
                    back_fun()


    def __test_do_thread(self, domain, action_obj, back_fun=None, result_back=None):
        try:
            self.__test_do(domain, action_obj, result_back)
        except Exception as e:
            raise Exception("[Error code] deal test failed, error code=", e)
        finally:
            if action_obj.waitClose != 0:
                sleep(action_obj.waitClose)

                if back_fun is None:
                    self.quit()
                else:
                    back_fun()


    def __test_do(self, domain, action_obj, result_back=None):
        test_url = domain+action_obj.urlPath
        self.__browser.visit(test_url)

        # form表单默认为第一个action循环测试,之后的action按照顺序执行
        action_list = TesterActionData().dict_to_list(action_obj.actionList)
        if action_obj.forms is not None:
            form_action = action_list[0] if action_list else None

            forms = TesterForms().dict_to_list(action_obj.forms)
            for form in forms:
                params = TesterFormData().dict_to_list(form.params)
                for param in params:
                    self.__set_value(int(param.formType), param.formElName, param.formElValue.decode("utf-8"), int(param.index))
                    sleep(TestEngine.__sleep_time)

                if form_action is not None:
                    self.__deal_action(form_action, result_back)

                sleep(action_obj.sleepTime)

            for action_deal in action_list[1:]:
                self.__deal_action(action_deal, result_back)
                sleep(action_obj.sleepTime)
        else:
            for action_deal in action_list:
                self.__deal_action(action_deal, result_back)
                sleep(action_obj.sleepTime)


    def __set_value(self, form_type, el_name, el_value, index):
        elements = self.__event_element(form_type, el_name)
        element = elements[index]
        if element['type'] in ['text', 'password', 'tel'] or element.tag_name == 'textarea':
            element.value = el_value
        elif element['type'] == 'checkbox':
            if el_value:
                element.check()
            else:
                element.uncheck()
        elif element['type'] == 'radio':
            element.click()
        elif element._element.tag_name == 'select':
            element.find_by_value(el_value).first._element.click()
        else:
            element.value = el_value


    def __event_element(self, el_type, el_value):
        ele_type = EL_TYPE.value(el_type)

        if ele_type == "id":
            return self.__browser.find_by_id(el_value)
        elif ele_type == "name":
            return self.__browser.find_by_name(el_value)
        elif ele_type == "tag":
            return self.__browser.find_by_tag(el_value)
        elif ele_type == "value":
            return self.__browser.find_by_value(el_value)
        elif ele_type == "selector":
            return self.__browser.find_by_xpath(el_value)
        elif ele_type == "css":
            return self.__browser.find_by_css(el_value)
        else:
            raise ValueError("Test Engine can't deal the element type:%s, el_type:%s", ele_type, el_type)


    def __deal_action(self, action_data, result_back=None):
        action_type = ACTION_TYPE.value(action_data.action)

        # 当页面跳转是抓取最后一个打开的窗口页面
        self.__browser.windows.current = self.__browser.windows[-1]

        if action_type == "click":
            self.__mouse_of_click(self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)])
        elif action_type == "double click":
            self.__mouse_of_double_click(self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)])
        elif action_type == "right click":
            self.__mouse_of_right_click(self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)])
        elif action_type == "mouse over":
            self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)].mouse_over()
        elif action_type == "mouse out":
            self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)].mouse_out()
        elif action_type == "select":
            self.__event_element(action_data.elType, action_data.elValue)[int(action_data.index)].select()
        else:
            raise Exception("don't find action for action:%s", action_data.action)

        try:
            if action_data.testerResult is not None and result_back is not None:
                sleep(3)
                result_back(TesterResult(action_data.testerResult, self.__browser.is_text_present(action_data.testerResult)))
        except Exception:
            result_back(TesterResult(action_data.testerResult, False))


    def __mouse_of_click(self, event_deal_obj):
        if TestEngine.__mouse_over:
            event_deal_obj.mouse_over()
            sleep(TestEngine.__mouse_over_sleep)
            event_deal_obj.click()
        else:
            event_deal_obj.click()


    def __mouse_of_right_click(self, event_deal_obj):
        if TestEngine.__mouse_over:
            event_deal_obj.mouse_over()
            sleep(TestEngine.__mouse_over_sleep)
            event_deal_obj.right_click()
        else:
            event_deal_obj.click()


    def __mouse_of_double_click(self, event_deal_obj):
        if TestEngine.__mouse_over:
            event_deal_obj.mouse_over()
            sleep(TestEngine.__mouse_over_sleep)
            event_deal_obj.double_click()
        else:
            event_deal_obj.click()
示例#23
0
b.find_by_id('object_id').fill(username)
b.find_by_id('password').fill(pwd)
b.find_by_name('keep_pwd')
b.find_by_id('confirmLogin').click()
time.sleep(1)
b.visit('http://sdp.nd/main.html')

b.find_by_id('appSearchTxt').fill(component_name)

while b.find_link_by_partial_href(
        '/modules/mobileComponent/detail.html?appId') is []:
    print '.'

b.find_link_by_partial_href(
    '/modules/mobileComponent/detail.html?appId').click()

# 选择分支
time.sleep(0.5)
b.find_by_xpath('/html/body/div[4]/a[2]').first.click()

b.find_by_xpath('//*[@id="appDeatil"]/div[1]/span[2]/span').first.click()
b.find_by_id(env_id).first.find_by_tag('a').click()
b.find_by_id('publish').first.click()

# 版本号
b.find_by_id('version').fill(versionNo)
b.find_by_id('versionDesc').fill(detail)

# b.find_by_xpath('//*[@id="gitlist"]/div').first.click()
# b.find_by_id('publishConfirm').click()
示例#24
0
            b.find_by_xpath('//*[@id="Jprice"]/li[6]').click()
            b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a')
            time.sleep(5)
            loop(b)
    except Exception as e:
        b.reload()
        time.sleep(1)
        loop(b)



b = Browser(driver_name='chrome')
b.visit(url)
login(b)
b.visit(order_url)
b.find_by_xpath('//*[@id="Jprice"]/li[6]').click()
b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a').click()
time.sleep(5)
while True:
    loop(b)
    if b.title=='【五月天郑州演唱会门票】2017五月天 LIFE [ 人生无限公司 ] 巡回演唱会—郑州站-永乐票务':
        b.find_by_xpath('//*[@id="Jprice"]/li[6]').click()
        b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a')
        time.sleep(2)
        loop(b)
    else:
        print('SUCCESS!')
        break


示例#25
0
class Ticket(object):
    def __init__(self, config_file):
        ## config parser setting
        self.config_file = config_file
        self.settings = configparser.ConfigParser()
        self.settings._interpolation = configparser.ExtendedInterpolation()
        self.settings.read(self.config_file,'utf-8')
        ## environment setting
        self.brower='chrome'
        self.b = Browser(driver_name=self.brower) 
        self.station={}
        self.url = "https://kyfw.12306.cn/otn/leftTicket/init"
        # 席别类型(对应列标号)
        self.ticket_index = [
                            '',
                            u'商务座',
                            u'特等座',
                            u'一等座', 
                            u'二等座',
                            u'高级软卧',
                            u'软卧',
                            u'硬卧',
                            u'软座',
                            u'硬座',
                            u'无座'
                            ]
        self.username = ''
        self.password = ''
        self.date_format='%Y-%m-%d'
        self.tolerance = -1
        self.blacklist = {}
        self.date = []
        self.isStudent = False
        self.success = 0
        self.find_ticket = 0
        self.config_parser()
        self.playmusic = False
        self.count = 0

    def ConfigSectionMap(self,section):
            dict1 = {}
            options = self.settings.options(section)
            for option in options:
                try:
                    dict1[option] = self.settings.get(section, option)
                    if dict1[option] == -1:
                        DebugPrint("skip: %s" % option)
                except:
                        print("exception on %s!" % option)
                        dict1[option] = None
            return dict1
    def daterange(self, start_date, end_date):
        for n in range(int ((end_date - start_date).days) + 1):
            yield start_date + timedelta(n) 

    def config_parser(self):
        if self.retrieve_station_dict() == -1:
            sys.exit()
        if self.retrieve_book_options() == -1:
            sys.exit()
        
    def retrieve_station_dict(self):
        dict_helper=self.ConfigSectionMap('STATIONCOOKIE')
        for name, value in dict_helper.iteritems():
            self.station[name]=value

    def retrieve_book_options(self):
        login_info=self.ConfigSectionMap('GLOBAL')
        self.username = login_info['username'].strip() 
        self.password = login_info['password'].strip()
        self.brower = login_info['browser']
        book_settings = self.ConfigSectionMap('TICKET')
        self.fromStation = [ station.strip() for station in book_settings['from_station'].split(',')]
        self.toStation = [ station.strip() for station in book_settings['to_station'].split(',')]
        trains = [ train.strip() for train in book_settings['trains'].split(',')]
        if len(trains) == 1 and trains[0] == '':
            self.trains = []
        else:
            self.trains =  trains
        self.ticket_type =[ _type.strip() for _type in book_settings['ticket_type'].split(',')]
        rangeQuery = book_settings['range_query'].strip()
        if rangeQuery == 'Y':
            date = [ d.strip() for d in book_settings['date'].split(',')]
            if len(date) < 2:
                print "未设置正确的起至时间"
                return -1
            else: 
                start_date = datetime.strptime(date[0],self.date_format) 
                end_date = datetime.strptime(date[1],self.date_format) 
                if end_date < start_date:
                    print "查询截止日期不可大于开始日期!"
                    return -1
                for single_date in self.daterange(start_date, end_date): 
                    self.date.append(single_date.strftime(self.date_format))
        else:
            self.date = [ d.strip() for d in book_settings['date'].split(',')]
        if book_settings['student'].strip() == 'Y':
            self.isStudent = True
        self.tolerance = int(book_settings['tolerance'])
        self.people = [ people.strip() for people in book_settings['people'].split(',') ]
        if book_settings['alarm'].strip() == 'Y':
            self.playmusic = True

    def login(self):
        self.b.visit(self.url)
        self.b.find_by_text(u"登录").click()
        self.b.fill("loginUserDTO.user_name",self.username)
        self.b.fill("userDTO.password",self.password)
        pdb.set_trace()
    
    def page_has_loaded(self):
        page_state = self.b.evaluate_script("document.readyState")
        return page_state == 'complete'

    def switch_to_order_page(self):
        order = []
        while len(order) == 0:
            order = self.b.find_by_text(u"车票预订")
        order[0].click()

    def checkTicket(self, date, fromStation, toStation):
        print 'date: %s, from %s, to %s'%(date, fromStation, toStation)
        self.b.cookies.add({"_jc_save_fromDate":date})
        self.b.cookies.add({"_jc_save_fromStation":self.station[fromStation]})
        self.b.cookies.add({"_jc_save_toStation":self.station[toStation]})
        self.b.cookies.all()
        self.b.reload()
        if self.isStudent:
            self.b.find_by_text(u'学生').click()
        self.b.find_by_text(u"查询").click()
        all_trains = []
        while self.page_has_loaded() == False:
            continue
        while len(all_trains) == 0:
            all_trains = self.b.find_by_id('queryLeftTable').find_by_tag('tr')
        for k, train in enumerate(all_trains):
            tds = train.find_by_tag('td')
            if tds and len(tds) >= 10:
                has_ticket= tds.last.find_by_tag('a')
                if len(has_ticket) != 0:
                    if k + 1 < len(all_trains):
                        this_train = all_trains[k+1]['datatran']
                        if len(self.trains) != 0 and this_train not in self.trains:
                            continue
                        if self.tolerance != -1 and this_train in self.blacklist and self.blacklist[this_train] >= self.tolerance:
                            print u"%s 失败 %d 次, 跳过"%(this_train, self.blacklist[this_train])
                            continue
                    for cat in self.ticket_type:
                        if cat in self.ticket_index:
                            i = self.ticket_index.index(cat)
                        else:
                            print '无效的席别信息'
                            return 0, ''
                        if tds[i].text != u'无' and tds[i].text != '--':
                            if tds[i].text != u'有':
                                print u'%s 的 %s 有余票 %s 张!'%(this_train, cat ,tds[i].text)
                            else:
                                print u'%s 的 %s 有余票若干张!'%(this_train, cat)
                            self.find_ticket = 1
                            tds.last.click()
                            break
            if self.find_ticket:
                break
        return this_train

    def book(self):
        while self.page_has_loaded() == False:
            continue
        if len(self.people) == 0:
            print '没有选择乘车人!'
            return 1
        more = []
        while len(more) == 0:
            more = self.b.find_by_text(u"更多")
        more[0].click()
        person = []
        people = self.people
        while len(person) == 0:
            person=self.b.find_by_xpath('//ul[@id="normal_passenger_id"]/li/label[contains(text(),"%s")]'%people[0])
        for p in people:
            self.b.find_by_xpath('//ul[@id="normal_passenger_id"]/li/label[contains(text(),"%s")]'%p).click()
            if self.b.find_by_xpath('//div[@id="dialog_xsertcj"]').visible:
                self.b.find_by_xpath('//div[@id="dialog_xsertcj"]/div/div/div/a[text()="确认"]').click()
            return 1

    def ring(self):
        pygame.mixer.pre_init(64000, -16, 2, 4096)
        pygame.init()
        pygame.display.init()
        screen=pygame.display.set_mode([300,300])
        #pygame.display.flip()
        pygame.time.delay(1000)#等待1秒让mixer完成初始化
        tracker=pygame.mixer.music.load("media/sound.ogg")
        #track = pygame.mixer.music.load("sound.ogg")
        pygame.mixer.music.play()
        # while pygame.mixer.music.get_busy():
        #pygame.time.Clock().tick(10)
        running = True
        img=pygame.image.load("media/img.jpg")
        while running:
            screen.blit(img,(0,0))
            pygame.display.flip()
            for event in pygame.event.get():
                if event.type==pygame.QUIT:
                    running = False
        pygame.quit ()
        return 1
    def executor(self):
        self.login()
        self.switch_to_order_page()
        while self.success == 0:
            self.find_ticket = 0
            while self.find_ticket == 0:
                for date in self.date:
                    try:
                        self.count += 1
                        print "Try %d times" % self.count
                        for fromStation in self.fromStation:
                            for toStation in self.toStation:
                                this_train = self.checkTicket(date, fromStation, toStation)
                                if self.find_ticket:
                                    break
                            if self.find_ticket:
                                break
                        if self.find_ticket:
                            break
                    except KeyboardInterrupt:
                        self.b.find_by_text(u'退出').click()
                        sys.exit()
                    except:
                        continue
            # book ticket for target people
            self.find_ticket = 0
            while self.find_ticket == 0:
                try:
                    self.find_ticket = self.book() 
                except KeyboardInterrupt:
                    self.b.find_by_text(u'退出').click()
                    sys.exit()
                except:
                    continue
            if self.playmusic:
                self.ring()
            print "订票成功了吗?(Y/N)"
            input_var = ''
            while input_var == '':
                input_var= sys.stdin.read(1)
                if input_var == 'Y' or input_var == 'y':
                    self.success = 1
                elif input_var == 'N' or input_var == 'n':
                    if this_train in self.blacklist:
                        self.blacklist[this_train] += 1
                    else:
                        self.blacklist[this_train] = 1
                    print u"%s 失败 %d 次"%(this_train, self.blacklist[this_train])
                    self.b.back()
                    self.b.reload()
                else:
                    input_var = ''
                    continue
        self.b.find_by_text(u'退出').click()
示例#26
0
class Hujiang(object):
    """docstring for Train"""
    driver_name = ''
    executable_path = ''
    #用户名 密码
    username = u"Peters_mom"
    passwd = u"wydycg98"
    """网址"""
    #查询课程URL
    ticket_url = "https://class.hujiang.com/talk/kids/calendar?classId=17380104"
    #登录URL
    login_url = "https://login.hujiang.com/"
    #登陆成功,我的URL
    initmy_url = "https://my.hujiang.com/account/"
    #需要定课的星期几在page中对应的index
    indexs = []
    #需要定课的时间在page中对应的坐标列
    times = ['41', '42']  #20点的课程的纵坐标为41;20:30的课程纵坐标为42

    def __init__(self):
        self.driver_name = 'chrome'
        #self.executable_path = os.getcwd()+'/chromedriver'
        print("Welcome To Use The Tool")

    def login(self):
        try:
            self.driver.visit(self.login_url)
            #填充密码
            self.driver.fill("username", self.username)
            #sleep(1)
            self.driver.fill("password", self.passwd)
            self.driver.find_by_tag("button").click()
            #self.driver.find_by_text(u"登陆").first().click()
            #self.driver.find_element_by_xpath('//*[@id="hp-login-normal"]/button').first().click()
            print("登陆成功")
            while True:
                if self.driver.url != self.initmy_url:
                    sleep(1)
                else:
                    break
        except Exception as e:
            print(e)

    def start(self):
        self.driver = Browser(driver_name=self.driver_name)
        self.driver.driver.set_window_size(1400, 1000)
        self.login()
        #sleep(1)

        print("查询课程开始....")
        self.driver.visit(self.ticket_url)
        try:
            #sleep(1)
            self.driver.find_link_by_text("按老师预约").click()
            print("找第一个老师并预约")
            self.driver.find_link_by_text(u"预约").click()

            weekday = self.driver.find_by_xpath(
                '//div[@class="_3fqOfVfQ"]').first.value
            if (weekday == u'周一'):  #今天是周一,预约周2、5的课程的横向坐标索引
                self.indexs = ['2', '5']
            elif (weekday == u'周二'):
                self.indexs = ['1', '4']
            elif (weekday == u'周三'):
                self.indexs = ['7', '3']
            elif (weekday == u'周四'):
                self.indexs = ['6', '2']
            elif (weekday == u'周五'):
                self.indexs = ['5', '1']
            elif (weekday == u'周六'):
                self.indexs = ['4', '7']
            elif (weekday == u'周日'):
                self.indexs = ['3', '6']
            else:
                self.indexs = ['0', '0']  #无效值
            print("周二index={0}, 周五index={1}",
                  format(self.indexs[0], self.indexs[1]))
            print("20:00 time={0}, 20:30 time={1}",
                  format(self.times[0], self.times[1]))

        except Exception as e:
            print(e)
        self.subByTeacher()

    def subByTeacherPage(self):
        for index in self.indexs:
            for time in self.times:
                xpath_20 = '//*[@id="timeList"]/ul/li[' + index + ']/div/div[' + time + ']/a'  #20点的课程xpath
                #xpath_20='//*[@id="timeList"]/ul/li[' + index + ']/div/div[' + time + ']'  #20点的课程空的xpath
                #xpath_20_span='//*[@id="timeList"]/ul/li[' + index + ']/div/div[' + time + ']/span'  #20点的课程"已预约""约满"的xpath
                canBeSubscribeds = self.driver.find_by_xpath(xpath_20)
                if canBeSubscribeds:
                    try:
                        print("找到老师并预约对应时间index={0},time={1}",
                              format(index, time))
                        canBeSubscribeds.click()
                        self.driver.find_link_by_text("确认预约").click()

                        confirm = self.driver.find_by_xpath(
                            '//*[@id="root"]/div/div[2]/div/div/div[2]/div/div[5]/div/div[2]/div[3]/div[2]/a[1]'
                        )
                        confirm.click()
                        print("预约成功")
                        return True
                    except Exception as e:
                        print(e)
                else:
                    continue
                #if (canBeSubscribeds.first.value == u'可预约'):
            return False

    def subByTeacher(self):
        success = self.subByTeacherPage()
        if (success):
            return
        else:
            print("找到下一页")
            success = False
            try:
                nextpage = self.driver.find_by_xpath(
                    '//div[@class="js_next trace_1 _1jM3tRIW"]')
                #nextpage=self.driver.find_by_xpath('//*[@id="root"]/div/div[2]/div/div/div[2]/div/div[2]/div[1]/div[2]')
                #nextpage=self.driver.find_by_xpath('//i[@class="hui-icon hui-icon-carat-r-small"]')
                nextpage.click()
            except Exception as e:
                print(e)

            success = self.subByTeacherPage()
            return success

    def subClass(self):
        print("查询课程开始....")
        for i in range(3):  #前3个老师
            self.driver.visit(self.ticket_url)
            print("查询第{0}个老师的课程:", format(str(i)))
            self.driver.find_link_by_text(u"预约")[i].click()  #第3个老师预约
            self.subByTeacher()
示例#27
0
class DouYin(object):
    def __init__(self, width=500, height=300):
        """

		抖音App视频下载

		"""

        # 无头浏览器

        chrome_options = Options()

        chrome_options.add_argument(
            'user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"'
        )

        self.driver = Browser(driver_name='chrome',
                              executable_path='F:/chromedriver_win32',
                              options=chrome_options,
                              headless=True)

    def get_video_urls(self, user_id):
        """

		获得视频播放地址

		Parameters:

			user_id:查询的用户ID

		Returns:

			video_names: 视频名字列表

			video_urls: 视频链接列表

			nickname: 用户昵称

		"""

        video_names = []

        video_urls = []

        unique_id = ''

        while unique_id != user_id:

            search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id

            req = requests.get(url=search_url, verify=False)

            html = json.loads(req.text)

            aweme_count = html['user_list'][0]['user_info']['aweme_count']

            uid = html['user_list'][0]['user_info']['uid']

            nickname = html['user_list'][0]['user_info']['nickname']

            unique_id = html['user_list'][0]['user_info']['unique_id']

        user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (
            uid, aweme_count)

        req = requests.get(url=user_url, verify=False)

        html = json.loads(req.text)

        i = 1

        for each in html['aweme_list']:

            share_desc = each['share_info']['share_desc']

            if '抖音-原创音乐短视频社区' == share_desc:

                video_names.append(str(i) + '.mp4')

                i += 1

            else:

                video_names.append(share_desc + '.mp4')

            video_urls.append(each['share_info']['share_url'])

        return video_names, video_urls, nickname

    def get_download_url(self, video_url):
        """

		获得带水印的视频播放地址

		Parameters:

			video_url:带水印的视频播放地址

		Returns:

			download_url: 带水印的视频下载地址

		"""

        req = requests.get(url=video_url, verify=False)

        bf = BeautifulSoup(req.text, 'lxml')

        script = bf.find_all('script')[-1]

        video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]

        video_html = json.loads(video_url_js)

        download_url = video_html['video']['play_addr']['url_list'][0]

        return download_url

    def video_downloader(self, video_url, video_name, watermark_flag=True):
        """

		视频下载

		Parameters:

			video_url: 带水印的视频地址

			video_name: 视频名

			watermark_flag: 是否下载不带水印的视频

		Returns:

			无

		"""

        size = 0

        if watermark_flag == True:

            video_url = self.remove_watermark(video_url)

        else:

            video_url = self.get_download_url(video_url)

        with closing(requests.get(video_url, stream=True,
                                  verify=False)) as response:

            chunk_size = 1024

            content_size = int(response.headers['content-length'])

            if response.status_code == 200:

                sys.stdout.write('  [文件大小]:%0.2f MB\n' %
                                 (content_size / chunk_size / 1024))

                with open(video_name, "wb") as file:

                    for data in response.iter_content(chunk_size=chunk_size):

                        file.write(data)

                        size += len(data)

                        file.flush()

                        sys.stdout.write('  [下载进度]:%.2f%%' %
                                         float(size / content_size * 100) +
                                         '\r')

                        sys.stdout.flush()

    def remove_watermark(self, video_url):
        """

		获得无水印的视频播放地址

		Parameters:

			video_url: 带水印的视频地址

		Returns:

			无水印的视频下载地址

		"""

        self.driver.visit('http://douyin.iiilab.com/')

        self.driver.find_by_tag('input').fill(video_url)

        self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()

        html = self.driver.find_by_xpath(
            '//div[@class="thumbnail"]/div/p')[0].html

        bf = BeautifulSoup(html, 'lxml')

        return bf.find('a').get('href')

    def run(self):
        """

		运行函数

		Parameters:

			None

		Returns:

			None

		"""

        self.hello()

        user_id = input('请输入ID(例如40103580):')

        video_names, video_urls, nickname = self.get_video_urls(user_id)

        if nickname not in os.listdir():

            os.mkdir(nickname)

        print('视频下载中:共有%d个作品!\n' % len(video_urls))

        for num in range(len(video_urls)):

            print('  解析第%d个视频链接 [%s] 中,请稍后!\n' % (num + 1, video_urls[num]))

            if '\\' in video_names[num]:

                video_name = video_names[num].replace('\\', '')

            elif '/' in video_names[num]:

                video_name = video_names[num].replace('/', '')

            else:

                video_name = video_names[num]

            self.video_downloader(video_urls[num],
                                  os.path.join(nickname, video_name))

            print('\n')

        print('下载完成!')

    def hello(self):
        """

		打印欢迎界面

		Parameters:

			None

		Returns:

			None

		"""

        print('*' * 100)

        print('\t\t\t\t抖音App视频下载小助手')

        print('\t\t作者:Jack Cui')

        print('*' * 100)
示例#28
0
class tp(object):
    """初始化"""
    driver_name=''
    executable_path=''
    #用户名,密码
    username = "******"
    passwd = "?"
    # 城市,地区
    starts = "sc01"
    ends = "sc0108"
    """网址"""
    ticket_url = "http://wb.zk789.cn/ApplyExamination/EditApplyExamination.aspx"
    login_url = "http://wb.zk789.cn/Login.aspx"
    initmy_url = "http://wb.zk789.cn/Notice.aspx"
    relay_url="http://wb.zk789.cn/Main.aspx"
    
    
    def __init__(self):
        self.driver_name='chrome'
        self.executable_path='D:/ProgramData/chromedriver'

    def send (self):
        self.msg_from = '*****@*****.**'  # 发送方邮箱
        self.passwd = 'ndnatjdouhdkbbha'  # 填入发送方邮箱的授权码
        self.msg_to = '*****@*****.**'  # 收件人邮箱
    
        self.subject = "已抢到座位,速来付款"  # 主题
        self.content = "来自新——邮箱"
        msg = MIMEText (self.content)
        msg ['Subject'] = self.subject
        msg ['From'] = self.msg_from
        msg ['To'] = self.msg_to
        try:
            s = smtplib.SMTP_SSL ("smtp.qq.com", 465)  # 邮件服务器及端口号
            s.login (self.msg_from, self.passwd)
            s.sendmail (self.msg_from, self.msg_to, msg.as_string ())
            print ("发送成功")
        except:
            print ("发送失败")
        finally:
            s.quit ()

    def login(self):
        self.driver.visit(self.login_url)
        self.driver.fill("ctl00$ContentPlaceHolder1$TBZjhm", self.username)
        self.driver.fill("ctl00$ContentPlaceHolder1$TBPassWord", self.passwd)
        print ("等待验证码,自行输入...")
        while True:
            if self.driver.url != self.initmy_url:
                sleep(1)
            else:
                break
        
    def start(self):
        self.driver=Browser(driver_name=self.driver_name,executable_path=self.executable_path)
        self.driver.driver.set_window_size(1400, 1000)
        self.login()
        #同意
        sleep (3)
        self.driver.find_by_xpath ('//input[@type="checkbox"]').click ()
        self.driver.find_by_xpath ('//input[@type="submit"]').click ()
        sleep (2)
        #self.driver.visit (self.relay_url)
        self.driver.find_by_xpath ('//a[@id="ctl00_ContentPlaceHolder1_lbtnBKEdit"]').click ()
        # sleep(1)s
        #self.driver.visit(self.ticket_url)
        i=0
        while True:
            if self.driver.url == self.ticket_url:
                try:
                    print ("考位页面开始...")
                    # 加载信息
                    sleep(2)
                    self.driver.select('ctl00$ContentPlaceHolder1$aecEdit$fvData$ddlKSDSZ', self.starts)
                    if i==0:
                        print ('选择成华区')
                        self.driver.select('ctl00$ContentPlaceHolder1$aecEdit$fvData$ddlKSQX', self.ends)
                        i+=1
                    else:
                        print('选择双流区')
                        self.driver.select ('ctl00$ContentPlaceHolder1$aecEdit$fvData$ddlKSQX','sc0122')
                        i-=1
                    #print(self.driver.find_by_xpath('//*[@id="ctl00_ContentPlaceHolder1_aecEdit_fvData_ddlKSQX"]/option[6]').bkrl)
                    self.driver.find_by_xpath ('//input[@id="btnAddCourse"]').first.click ()
                    
                    print('进入科目选择')
                    
                    sleep(2)
                    if  self.driver.find_by_xpath ('//*[@id="msg_statistic_pay"]').first.value!=0:
                        sleep(2)
                        with self.driver.get_iframe (0) as iframe:
                            print('选择科目')
                            iframe.find_by_xpath ('//*[@id="gvData_ctl03_cbSelect"]').first.check ()
                            sleep (1)
                            #iframe.find_by_xpath ('//*[@id="gvData_ctl05_cbSelect"]').first.check ()
                            #sleep (1)
                            #iframe.find_by_xpath ('//*[@id="gvData_ctl07_cbSelect"]').first.check ()
                            #sleep (1)
                            self.driver.find_by_xpath ('//*[@id="btnConfirm"]').first.click ()
                            sleep(2)
                        self.driver.find_by_xpath ('// *[ @ id = "ext-gen29"]').first.click()
                        # print(self.driver.find_by_xpath ('//*[@id="msg_statistic_pay"]').first.value)
                    else:
                        self.send()
                        break
                    
                except :
                    print ('程序崩溃了,刷新中')
                    self.driver.reload ()
                    sleep(5)
            else:
                print ('服务器无响应,刷新中')
                self.driver.quit ()
                self.start()
                os.system ('shutdown -s -t 60')
                print('60s 后关机')
示例#29
0
class BuyTicket(object):
    def __init__(self, username, passwd, order, passengers, seatType,
                 ticketType, daytime, starts, ends):
        # 用户名 密码
        self.username = username
        self.passwd = passwd
        # 车次,选择第几趟,0则从上之下依次点击
        self.order = order
        # 乘客名
        self.passengers = passengers
        # 席位
        self.seatType = seatType
        self.ticketType = ticketType
        # 时间格式2018-02-05
        self.daytime = daytime
        # 起始地和终点
        self.starts = starts
        self.ends = ends

        self.login_url = 'https://kyfw.12306.cn/otn/login/init'
        self.initMy_url = 'https://kyfw.12306.cn/otn/index/initMy12306'
        self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init'
        self.confirm_url = 'https://kyfw.12306.cn/otn/confirmPassenger/initDc'
        # 浏览器名称
        #self.driver_name = 'firefox' # chrome firefox
        #chrome
        self.driver_name = 'chrome'
        self.executable_path = '/usr/local/bin/chromedriver'
        # 火狐浏览器第三方驱动
        #self.executable_path = os.getcwd()+'/geckodriver' # 获取工程目录下的火狐驱动 chromedriver

        # 座位类型所在td位置
        if seat_type == '商务座特等座':
            seat_type_index = 1
            seat_type_value = 9
        elif seat_type == '一等座':
            seat_type_index = 2
            seat_type_value = 'M'
        elif seat_type == '二等座':
            seat_type_index = 3
            seat_type_value = 0
        elif seat_type == '高级软卧':
            seat_type_index = 4
            seat_type_value = 6
        elif seat_type == '软卧':
            seat_type_index = 5
            seat_type_value = 4
        elif seat_type == '动卧':
            seat_type_index = 6
            seat_type_value = 'F'
        elif seat_type == '硬卧':
            seat_type_index = 7
            seat_type_value = 3
        elif seat_type == '软座':
            seat_type_index = 8
            seat_type_value = 2
        elif seat_type == '硬座':
            seat_type_index = 9
            seat_type_value = 1
        elif seat_type == '无座':
            seat_type_index = 10
            seat_type_value = 1
        elif seat_type == '其他':
            seat_type_index = 11
            seat_type_value = 1
        else:
            seat_type_index = 7
            seat_type_value = 3
        self.seat_type_index = seat_type_index
        #self.seat_type_value = seat_type_value

    def send_sms(self, mobile, sms_info):
        """发送手机通知短信,用的是-互亿无线-的测试短信"""
        host = "106.ihuyi.com"
        sms_send_uri = "/webservice/sms.php?method=Submit"
        account = "C59782899"
        pass_word = "19d4d9c0796532c7328e8b82e2812655"
        params = parse.urlencode({
            'account': account,
            'password': pass_word,
            'content': sms_info,
            'mobile': mobile,
            'format': 'json'
        })
        headers = {
            "Content-type": "application/x-www-form-urlencoded",
            "Accept": "text/plain"
        }
        conn = httplib2.HTTPConnectionWithTimeout(host, port=80, timeout=30)
        conn.request("POST", sms_send_uri, params, headers)
        response = conn.getresponse()
        response_str = response.read()
        conn.close()
        return response_str

    def login(self):
        # 访问登录网址
        self.driver.visit(self.login_url)
        # 填充用户名
        self.driver.fill("loginUserDTO.user_name", self.username)
        # sleep(1)
        # 填充密码
        self.driver.fill("userDTO.password", self.passwd)
        #logbticket.info("请手动输入验证码...")
        print('请手动输入验证码...')  # 目前没有自动验证码
        # 循环等待登录,登录成功,跳出循环
        while True:
            if self.driver.url != self.initMy_url:
                sleep(1)
            else:
                break

    def start_buy(self):
        # 这些设置都是必要的
        # chrome_options = Options()
        # chrome_options.add_argument("--no-sandbox")
        # chrome_options.add_argument("--disable-setuid-sandbox")
        # chrome_options.add_argument("disable-infobars") # 禁用网页上部的提示栏
        # self.driver = Browser(driver_name=self.driver_name, options=chrome_options, executable_path=self.executable_path)
        self.driver = Browser(driver_name=self.driver_name,
                              executable_path=self.executable_path)
        # 设置窗口大小尺寸
        self.driver.driver.set_window_size(1000, 800)
        # 用户登录
        self.login()
        # 进入选票网站
        ##self.driver.visit(self.ticket_url)
        exit_state = 0
        while exit_state == 0:
            self.driver.visit(self.ticket_url)
            try:
                print("开始刷票....")
                # sleep(1)
                # 加载查询信息
                self.driver.cookies.add({"_jc_save_fromStation": self.starts})
                self.driver.cookies.add({"_jc_save_toStation": self.ends})
                self.driver.cookies.add({"_jc_save_fromDate": self.daytime})

                self.driver.reload()

                count = 0
                if self.order != 0:
                    while self.driver.url == self.ticket_url:
                        self.driver.find_by_text("查询").click()
                        count = count + 1
                        print("第 %d 次点击查询..." % count)
                        # sleep(1)
                        try:
                            self.driver.find_by_text("预订")[self.order -
                                                           1].click()
                            sleep(1.5)
                        except Exception as e:
                            print(e)
                            print("预订失败...")
                            continue
                else:
                    while self.driver.url == self.ticket_url:
                        self.driver.find_by_text("查询").click()
                        count += 1
                        print("第 %d 次点击查询..." % count)
                        state = 0
                        sleep(1)
                        try:
                            for i in self.driver.find_by_xpath(
                                    "//tbody[@id='queryLeftTable']/tr/td/a[contains(text(), '预订')]"
                            ):  #self.driver.find_by_text("预订"):
                                current_tr = i.find_by_xpath("./../..")
                                if current_tr.find_by_tag('td')[
                                        self.seat_type_index].text == '--':
                                    print('无此座位类型出售,继续尝试...')
                                    #sys.exit(1)
                                elif current_tr.find_by_tag('td')[
                                        self.seat_type_index].text == '无':
                                    print('无票,继续尝试...')
                                else:
                                    print('查询到火车票')
                                    state = 1
                                    i.click()
                                    break
                                sleep(1)
                            if state == 1:
                                break
                        except Exception as e:
                            print(e)
                            print("预订失败...")
                            continue

                print("开始预订...")
                # self.driver.reload()
                #sleep(1)
                if self.driver.find_by_id("content_defaultwarningAlert_hearder"
                                          ).text.strip() == '当前时间不可以订票':
                    print('当前时间不可以订票')
                    sys.exit(1)
                while True:
                    if self.driver.url == self.confirm_url:
                        break
                    else:
                        sleep(1)
                if self.driver.is_text_present('splinter.readthedocs.io'):
                    print("开始选择乘客...")
                #sleep(5)
                if self.driver.url == self.confirm_url:
                    for p in self.passengers:
                        pg = self.driver.find_by_xpath(
                            "//ul[@id='normal_passenger_id']/li/label[contains(text(), '"
                            + p + "')]")  #.last.click()
                        pg.last.click()
                print("乘客选择完毕...\n")

                print("开始选座...")
                sleep(1)
                i = 0
                while len(self.passengers) > 0:
                    i = i + 1
                    seat_id_string = "seatType_" + str(i)
                    ticket_id_string = "ticketType_" + str(i)
                    self.driver.find_by_xpath(
                        '//select[@id="%s"]/option[@value="%s"]' %
                        (seat_id_string,
                         self.seatType)).first._element.click()
                    self.driver.find_by_xpath(
                        '//select[@id="%s"]//option[@value="%s"]' %
                        (ticket_id_string,
                         self.ticketType)).first._element.click()
                    # self.driver.select("confirmTicketType", "3")
                    self.passengers.pop()
                    sleep(1)
                self.driver.find_by_id("submitOrder_id").click()
                sleep(1.5)
                print("选座完毕...\n")

                print("提交订单...")
                ##input('按任意键继续:')
                sleep(1)
                self.driver.find_by_id("qr_submit_id").click()
                if self.driver.is_text_present('splinter.readthedocs.io'):
                    print('正在检查余票...')
                if self.driver.find_by_id("orderResultInfo_id").find_by_tag(
                        'div').text.strip() == '出票失败!':
                    print(
                        self.driver.find_by_id(
                            "orderResultInfo_id").find_by_tag('p')[0].text)
                else:
                    print('火车票订票成功,请速度支付订单!\n')
                    exit_state = 1
                    input('按任意键继续:')
            except Exception as e:
                print(e)
                exit_state = 0
示例#30
0
文件: test.py 项目: waxi-i/reptilia
class BrushTicket(object):
    """买票类及实现方法"""
    def __init__(self, passengers, from_time, from_station, to_station,
                 numbers, seat_type, receiver_email):
        """定义实例属性,初始化"""
        # 乘客姓名
        self.passengers = passengers
        # 起始站和终点站
        self.from_station = from_station
        self.to_station = to_station
        # 乘车日期
        self.from_time = from_time
        # 车次编号
        self.numbers = numbers
        # 座位类型所在td位置
        if seat_type == '商务座特等座':
            seat_type_index = 1
            seat_type_value = 9
        elif seat_type == '一等座':
            seat_type_index = 2
            seat_type_value = 'M'
        elif seat_type == '二等座':
            seat_type_index = 3
            seat_type_value = 0
        elif seat_type == '高级软卧':
            seat_type_index = 4
            seat_type_value = 6
        elif seat_type == '软卧':
            seat_type_index = 5
            seat_type_value = 4
        elif seat_type == '动卧':
            seat_type_index = 6
            seat_type_value = 'F'
        elif seat_type == '硬卧':
            seat_type_index = 7
            seat_type_value = 3
        elif seat_type == '软座':
            seat_type_index = 8
            seat_type_value = 2
        elif seat_type == '硬座':
            seat_type_index = 9
            seat_type_value = 1
        elif seat_type == '无座':
            seat_type_index = 10
            seat_type_value = 1
        elif seat_type == '其他':
            seat_type_index = 11
            seat_type_value = 1
        else:
            seat_type_index = 7
            seat_type_value = 3
        self.seat_type_index = seat_type_index
        self.seat_type_value = seat_type_value
        # 通知信息
        self.receiver_email = receiver_email
        # 新版12306官网主要页面网址
        self.login_url = 'https://kyfw.12306.cn/otn/resources/login.html'
        self.init_my_url = 'https://kyfw.12306.cn/otn/view/index.html'
        self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc'
        # 浏览器驱动信息,驱动下载页:https://sites.google.com/a/chromium.org/chromedriver/downloads
        self.driver_name = 'chrome'
        self.driver = Browser(driver_name=self.driver_name)

    def do_login(self):
        """登录功能实现,手动识别验证码进行登录"""
        self.driver.visit(self.login_url)
        sleep(1)
        # 选择登陆方式登陆
        print('请扫码登陆或者账号登陆……')
        while True:
            if self.driver.url != self.init_my_url:
                sleep(1)
            else:
                break

    def start_brush(self):
        """买票功能实现"""
        # 浏览器窗口最大化
        self.driver.driver.maximize_window()
        # 登陆
        self.do_login()
        # 跳转到抢票页面
        self.driver.visit(self.ticket_url)
        try:
            print('开始刷票……')
            # 加载车票查询信息
            self.driver.cookies.add(
                {"_jc_save_fromStation": self.from_station})
            self.driver.cookies.add({"_jc_save_toStation": self.to_station})
            self.driver.cookies.add({"_jc_save_fromDate": self.from_time})
            self.driver.reload()
            count = 0
            while self.driver.url == self.ticket_url:
                self.driver.find_by_text('查询').click()
                sleep(1)
                count += 1
                print('第%d次点击查询……' % count)
                try:
                    for number in self.numbers:
                        current_tr = self.driver.find_by_xpath(
                            '//tr[@datatran="' + number +
                            '"]/preceding-sibling::tr[1]')
                        if current_tr:
                            if current_tr.find_by_tag('td')[
                                    self.seat_type_index].text == '--':
                                print('无此座位类型出售,已结束当前刷票,请重新开启!')
                                sys.exit(1)
                            elif current_tr.find_by_tag('td')[
                                    self.seat_type_index].text == '无':
                                print('无票,继续尝试……')
                                sleep(1)
                            else:
                                # 有票,尝试预订
                                print('刷到票了(余票数:' + str(
                                    current_tr.find_by_tag('td')[
                                        self.seat_type_index].text) +
                                      '),开始尝试预订……')
                                current_tr.find_by_css('td.no-br>a')[0].click()
                                sleep(1)
                                key_value = 1
                                for p in self.passengers:
                                    # 选择用户
                                    print('开始选择用户……')
                                    self.driver.find_by_text(p).last.click()
                                    # 选择座位类型
                                    print('开始选择席别……')
                                    if self.seat_type_value != 0:
                                        self.driver.find_by_xpath(
                                            "//select[@id='seatType_" +
                                            str(key_value) +
                                            "']/option[@value='" +
                                            str(self.seat_type_value) +
                                            "']").first.click()
                                    key_value += 1
                                    sleep(0.2)
                                    if p[-1] == ')':
                                        self.driver.find_by_id(
                                            'dialog_xsertcj_ok').click()
                                print('正在提交订单……')
                                self.driver.find_by_id(
                                    'submitOrder_id').click()
                                sleep(2)
                                # 查看放回结果是否正常
                                submit_false_info = self.driver.find_by_id(
                                    'orderResultInfo_id')[0].text
                                if submit_false_info != '':
                                    print(submit_false_info)
                                    self.driver.find_by_id(
                                        'qr_closeTranforDialog_id').click()
                                    sleep(0.2)
                                    self.driver.find_by_id(
                                        'preStep_id').click()
                                    sleep(0.3)
                                    continue
                                print('正在确认订单……')
                                self.driver.find_by_id('qr_submit_id').click()
                                print('预订成功,请及时前往支付……')
                                # 发送通知信息
                                self.send_mail(self.receiver_email,
                                               '恭喜您,抢到票了,请及时前往12306支付订单!')
                        else:
                            print('不存在当前车次【%s】,已结束当前刷票,请重新开启!' % self.numbers)
                            sys.exit(1)
                except Exception as error_info:
                    print(error_info)
        except Exception as error_info:
            print(error_info)

    def send_mail(self, receiver_address, content):
        """发送邮件通知"""
        # 连接邮箱服务器信息
        host = 'smtp.163.com'
        port = 25
        sender = '*****@*****.**'  # 你的发件邮箱号码
        pwd = 'Dy93082'  # 不是登陆密码,是客户端授权密码
        # 发件信息
        receiver = receiver_address
        body = '<h2>温馨提醒:</h2><p>' + content + '</p>'
        msg = MIMEText(body, 'html', _charset="utf-8")
        msg['subject'] = '抢票成功通知!'
        msg['from'] = sender
        msg['to'] = receiver
        s = smtplib.SMTP(host, port)
        # 开始登陆邮箱,并发送邮件
        s.login(sender, pwd)
        s.sendmail(sender, receiver, msg.as_string())
示例#31
0
class Buy_Tickets(object):
    def __init__(self, username, passwd, order, passengers, dtime, starts,
                 ends):
        self.username = username  #用户名
        self.passwd = passwd  #密码
        # 车次,0代表所有车次,依次从上到下,1代表所有车次,依次类推
        self.order = order
        # 乘客名
        self.passengers = passengers
        # 起始地和终点
        self.starts = starts
        self.ends = ends
        # 日期
        self.dtime = dtime
        # self.xb = xb
        # self.pz = pz
        self.login_url = 'https://kyfw.12306.cn/otn/login/init'  #12306首页
        self.initMy_url = 'https://kyfw.12306.cn/otn/index/initMy12306'  #登录成功的地址
        self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init'  #购票的路径
        self.driver_name = 'chrome'
        self.executable_path = 'F:\cro\chromedriver.exe'

    # 登录功能实现
    def login(self):
        self.driver.visit(self.login_url)  #访问地址栏
        self.driver.fill('loginUserDTO.user_name',
                         self.username)  # 通过name 添加用户名
        # sleep(1)
        self.driver.fill('userDTO.password', self.passwd)  #通过name添加密码
        # sleep(1)
        print('请输入验证码...')
        while True:
            if self.driver.url != self.initMy_url:
                sleep(1)
            else:
                break  #结束后 才会走下面的代码

    # 买票功能实现
    def start_buy(self):
        self.driver = Browser(driver_name=self.driver_name,
                              executable_path=self.executable_path)
        # 窗口大小的操作
        self.driver.driver.set_window_size(1200, 800)
        self.login()
        self.driver.visit(self.ticket_url)  #进入买票的页面
        try:
            print u"购票页面开始..."
            # sleep(1)
            # 加载查询信息
            self.driver.cookies.add({"_jc_save_fromStation": self.starts})
            self.driver.cookies.add({"_jc_save_toStation": self.ends})
            self.driver.cookies.add({"_jc_save_fromDate": self.dtime})
            self.driver.reload()
            count = 0
            while self.driver.url == self.ticket_url:
                count += 1
                self.driver.find_by_text(u"查询").click()
                print u"循环点击查询... 第 %s 次" % count
                sleep(1)
                # 这是我找到我要买的那个票的有无 的那个标签的id  然后获取里面的value 二哥要的YW_13000K483303
                # 测试YZ_0h000K705705无的  测试有的YZ_01000K708310
                tarVal = self.driver.find_by_id('YZ_0h000K705705').value
                print tarVal
                if tarVal == u'有':
                    if self.order != 0:
                        self.driver.find_by_text(u"预订")[self.order - 1].click()
                    else:
                        self.driver.find_by_text(u"预订")[0].click()
                    break
                else:
                    continue
            print u"开始预订..."
            # sleep(3)
            # self.driver.reload()
            sleep(1)
            print u'开始选择用户...'
            for user in self.users:
                self.driver.find_by_text(user).click()
            print u"提交订单..."
            sleep(1)
            # #这里是买票种  1 成人  2 儿童 3 学生 4 残疾  参数是name
            self.driver.find_by_xpath(
                '//select[@id="ticketType_1"]/option[@value="1"]'
            )._element.click()
            sleep(1)
            # # 席座  1硬座   3 硬卧   4软卧   9商务座  O是二等座(是o不是0)  M一等座
            self.driver.find_by_xpath(
                '//select[@id="seatType_1"]/option[@value="1"]'
            )._element.click()
            sleep(1)
            # =======================================
            # 提交订单
            print u"提交订单..."
            self.driver.find_by_id('submitOrder_id').click()
            sleep(1.5)
            print u"确认选座..."
            self.driver.find_by_id('qr_submit_id').click()
            return 1
        except Exception as e:
            print e
            return -1
示例#32
0
class BrushTicket(object):
    """买票类及实现方法"""
    def __init__(self, passengers_number):
        """定义实例属性,初始化"""
        cp = ConfigParser()
        cp.read("conf/city.conf", encoding='UTF-8')
        sections = cp.sections()
        city = sections[0]
        key_list = cp.options(city)
        self.city_dict = {}
        for key in key_list:
            self.city_dict[key] = cp.get(city, key)

        cp = ConfigParser()
        cp.read("conf/12306.conf", encoding='UTF-8')
        sections = cp.sections()
        pessenger = sections[passengers_number]

        # 乘客姓名
        self.passengers = cp.get(pessenger, 'name')
        # 起始站和终点站
        self.from_station = self.city_dict[cp.get(pessenger, 'from_station')]
        self.to_station = self.city_dict[cp.get(pessenger, 'to_station')]
        # 乘车日期
        self.from_time = cp.get(pessenger, 'from_time')
        # 车次编号
        self.number = cp.get(pessenger, 'coach_number')
        seat_type = cp.get(pessenger, 'seat_type')
        # 座位类型所在td位置
        if seat_type == '商务座特等座':
            seat_type_index = 1
            seat_type_value = 9
        elif seat_type == '一等座':
            seat_type_index = 2
            seat_type_value = 'M'
        elif seat_type == '二等座':
            seat_type_index = 3
            seat_type_value = 0
        elif seat_type == '高级软卧':
            seat_type_index = 4
            seat_type_value = 6
        elif seat_type == '软卧':
            seat_type_index = 5
            seat_type_value = 4
        elif seat_type == '动卧':
            seat_type_index = 6
            seat_type_value = 'F'
        elif seat_type == '硬卧':
            seat_type_index = 7
            seat_type_value = 3
        elif seat_type == '软座':
            seat_type_index = 8
            seat_type_value = 2
        elif seat_type == '硬座':
            seat_type_index = 9
            seat_type_value = 1
        elif seat_type == '无座':
            seat_type_index = 10
            seat_type_value = 1
        elif seat_type == '其他':
            seat_type_index = 11
            seat_type_value = 1
        else:
            seat_type_index = 7
            seat_type_value = 3
        self.seat_type_index = seat_type_index
        self.seat_type_value = seat_type_value
        # 通知信息
        # self.receiver_mobile = receiver_mobile
        self.receiver_email = cp.get(pessenger, 'email')
        # 新版12306官网主要页面网址
        self.login_url = 'https://kyfw.12306.cn/otn/resources/login.html'
        self.init_my_url = 'https://kyfw.12306.cn/otn/view/index.html'
        self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc'
        # 浏览器驱动信息,驱动下载页:https://sites.google.com/a/chromium.org/chromedriver/downloads
        self.driver_name = 'chrome'
        self.driver = Browser(driver_name=self.driver_name)

    def do_login(self):
        """登录功能实现,识别验证码 可以使用12306APP 扫一扫登录"""
        self.driver.visit(self.login_url)
        sleep(1)
        # 选择登陆方式登陆
        print('请扫码登陆或者账号登陆……')
        while True:
            if self.driver.url != self.init_my_url:
                sleep(1)
            else:
                break

    def start_brush(self):
        """买票功能实现"""
        # 浏览器窗口最大化
        self.driver.driver.maximize_window()
        # 登陆
        self.do_login()
        # 跳转到抢票页面
        self.driver.visit(self.ticket_url)
        try:
            print('开始刷票……')
            title_start = '开始抢票'
            # self.send_mail(self.receiver_email, '开始抢票' + self.from_time +
            #               '的车票,请关注邮箱抢票通知', title_start)
            # 加载车票查询信息
            self.driver.cookies.add(
                {"_jc_save_fromStation": self.from_station})
            self.driver.cookies.add({"_jc_save_toStation": self.to_station})
            self.driver.cookies.add({"_jc_save_fromDate": self.from_time})
            self.driver.reload()
            count = 0
            while self.driver.url == self.ticket_url:
                self.driver.find_by_text('查询').click()
                sleep(1)
                count += 1
                print('第%d次点击查询……' % count)
                try:
                    current_tr = self.driver.find_by_xpath(
                        '//tr[@datatran="' + self.number +
                        '"]/preceding-sibling::tr[1]')
                    if current_tr:
                        if current_tr.find_by_tag('td')[
                                self.seat_type_index].text == '--':
                            print('无此座位类型出售,已结束当前刷票,请重新开启!')
                            sys.exit(1)
                        elif current_tr.find_by_tag('td')[
                                self.seat_type_index].text == '无':
                            print('无票,继续尝试……')
                            sleep(1)
                        else:
                            # 有票,尝试预订
                            print('刷到票了(余票数:' + str(
                                current_tr.find_by_tag('td')[
                                    self.seat_type_index].text) + '),开始尝试预订……')
                            current_tr.find_by_css('td.no-br>a')[0].click()
                            sleep(1)
                            key_value = 1
                            for p in self.passengers:
                                # 选择用户
                                print('开始选择用户……')
                                self.driver.find_by_text(p).last.click()
                                # 选择座位类型
                                print('开始选择席别……')
                                if self.seat_type_value != 0:
                                    self.driver.find_by_xpath(
                                        "//select[@id='seatType_" +
                                        str(key_value) + "']/option[@value='" +
                                        str(self.seat_type_value) +
                                        "']").first.click()
                                key_value += 1
                                sleep(0.2)
                                if p[-1] == ')':
                                    self.driver.find_by_id(
                                        'dialog_xsertcj_ok').click()
                            print('正在提交订单……')
                            self.driver.find_by_id('submitOrder_id').click()
                            sleep(2)
                            # 查看放回结果是否正常
                            submit_false_info = self.driver.find_by_id(
                                'orderResultInfo_id')[0].text
                            if submit_false_info != '':
                                print(submit_false_info)
                                self.driver.find_by_id(
                                    'qr_closeTranforDialog_id').click()
                                sleep(0.2)
                                self.driver.find_by_id('preStep_id').click()
                                sleep(0.3)
                                continue
                            print('正在确认订单……')
                            self.driver.find_by_id('qr_submit_id').click()
                            print('预订成功,请及时前往支付……')
                            # 发送通知信息
                            title = '抢票票成功!!'
                            self.send_mail(
                                self.receiver_email, '恭喜您,抢到了' +
                                self.from_time + '的车票,请及时前往12306支付订单!', title)
                    else:
                        print('不存在当前车次【%s】,已结束当前刷票,请重新开启!' % self.number)
                        sys.exit(1)
                except Exception as error_info:
                    print(error_info)
        except Exception as error_info:
            print(error_info)

    def send_mail(self, receiver_address, content, title):
        mail_host = 'smtp.163.com'
        # 163用户名
        mail_user = '******'
        # 密码(部分邮箱为授权码)
        mail_pass = '******'
        # 邮件发送方邮箱地址
        sender = '*****@*****.**'
        # 邮件接受方邮箱地址,注意需要[]包裹,这意味着你可以写多个邮件地址群发
        receivers_list = ['*****@*****.**', receiver_address]

        email = Email(mail_host, mail_user, mail_pass)

        for receiver in receivers_list:
            # 设置email信息
            # 邮件内容设置
            smtpObj = email.getConnection()
            message = MIMEText(str(content), 'plain', 'utf-8')
            # 邮件主题
            message['Subject'] = title
            # 发送方信息
            message['From'] = sender
            # 接受方信息
            message['To'] = receiver
            email.send_email(smtpObj, sender, receiver, message)
示例#33
0
try:
    browse.visit("http://www.12306.cn")
except Exception as e:
    raise
finally:
    print(browse.title)

browse.click_link_by_href("https://kyfw.12306.cn/otn/index/init")

# print(browse.html)
browse.windows.current = browse.windows[1]
# browse.find_by_id("fromStationText")[0].fill("上海")
# browse.find_by_id("toStationText")[0].fill("郑州")

# browse.fill("leftTicketDTO.from_station_name","上海")
# browse.fill("leftTicketDTO.to_station_name","郑州")
# print(browse.html)

browse.find_by_id("from_station_imageB")[0].click()

print(browse.html)
btn = browse.find_by_xpath("//li[@data='SHH']")
btn[0].click()

btn = browse.find_by_xpath("//li[@data='ZZF']")
btn[0].click()

btn = browse.find_by_id("a_search_ticket")
btn.click()
示例#34
0
class DouYin(object):
	def __init__(self, width = 500, height = 300):
		"""
		抖音App视频下载
		"""
		# 无头浏览器
		chrome_options = Options()
		chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"')
		self.driver = Browser(driver_name='chrome', executable_path='D:/chromedriver', options=chrome_options, headless=True)

	def get_video_urls(self, user_id):
		"""
		获得视频播放地址
		Parameters:
			user_id:查询的用户ID
		Returns:
			video_names: 视频名字列表
			video_urls: 视频链接列表
			nickname: 用户昵称
		"""
		video_names = []
		video_urls = []
		unique_id = ''
		while unique_id != user_id:
			search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
			req = requests.get(url = search_url, verify = False)
			html = json.loads(req.text)
			aweme_count = html['user_list'][0]['user_info']['aweme_count']
			uid = html['user_list'][0]['user_info']['uid']
			nickname = html['user_list'][0]['user_info']['nickname']
			unique_id = html['user_list'][0]['user_info']['unique_id']
		user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
		req = requests.get(url = user_url, verify = False)
		html = json.loads(req.text)
		i = 1
		for each in html['aweme_list']:
			share_desc = each['share_info']['share_desc']
			if '抖音-原创音乐短视频社区' == share_desc:
				video_names.append(str(i) + '.mp4')
				i += 1
			else:
				video_names.append(share_desc + '.mp4')
			video_urls.append(each['share_info']['share_url'])

		return video_names, video_urls, nickname

	def get_download_url(self, video_url):
		"""
		获得带水印的视频播放地址
		Parameters:
			video_url:带水印的视频播放地址
		Returns:
			download_url: 带水印的视频下载地址
		"""
		req = requests.get(url = video_url, verify = False)
		bf = BeautifulSoup(req.text, 'lxml')
		script = bf.find_all('script')[-1]
		video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]
		video_html = json.loads(video_url_js)
		download_url = video_html['video']['play_addr']['url_list'][0]
		return download_url

	def video_downloader(self, video_url, video_name, watermark_flag=True):
		"""
		视频下载
		Parameters:
			video_url: 带水印的视频地址
			video_name: 视频名
			watermark_flag: 是否下载不带水印的视频
		Returns:
			无
		"""
		size = 0
		if watermark_flag == True:
			video_url = self.remove_watermark(video_url)
		else:
			video_url = self.get_download_url(video_url)
		with closing(requests.get(video_url, stream=True, verify = False)) as response:
			chunk_size = 1024
			content_size = int(response.headers['content-length']) 
			if response.status_code == 200:
				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))

				with open(video_name, "wb") as file:  
					for data in response.iter_content(chunk_size = chunk_size):
						file.write(data)
						size += len(data)
						file.flush()

						sys.stdout.write('  [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
						sys.stdout.flush()


	def remove_watermark(self, video_url):
		"""
		获得无水印的视频播放地址
		Parameters:
			video_url: 带水印的视频地址
		Returns:
			无水印的视频下载地址
		"""
		self.driver.visit('http://douyin.iiilab.com/')
		self.driver.find_by_tag('input').fill(video_url)
		self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
		html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html
		bf = BeautifulSoup(html, 'lxml')
		return bf.find('a').get('href')

	def run(self):
		"""
		运行函数
		Parameters:
			None
		Returns:
			None
		"""
		self.hello()
		user_id = input('请输入ID(例如40103580):')
		video_names, video_urls, nickname = self.get_video_urls(user_id)
		if nickname not in os.listdir():
			os.mkdir(nickname)
		print('视频下载中:共有%d个作品!\n' % len(video_urls))
		for num in range(len(video_urls)):
			print('  解析第%d个视频链接 [%s] 中,请稍后!\n' % (num+1, video_urls[num]))
			if '\\' in video_names[num]:
				video_name = video_names[num].replace('\\', '')
			elif '/' in video_names[num]:
				video_name = video_names[num].replace('/', '')
			else:
				video_name = video_names[num]
			self.video_downloader(video_urls[num], os.path.join(nickname, video_name))
			print('\n')

		print('下载完成!')

	def hello(self):
		"""
		打印欢迎界面
		Parameters:
			None
		Returns:
			None
		"""
		print('*' * 100)
		print('\t\t\t\t抖音App视频下载小助手')
		print('\t\t作者:Jack Cui')
		print('*' * 100)
示例#35
0
class Amazon(object):
    def __init__(self, keyword):
        self.url = "https://www.amazon.com"
        self.name = "".join(random.sample(string.ascii_letters + string.digits, 6))
        self.email = "".join(random.sample(string.ascii_letters + string.digits, 16)) + "@163.com"
        self.password = "".join(random.sample(string.ascii_letters + string.digits, 8))
        self.keyword = keyword
        self.chrome_options = Options()
        logging.basicConfig(filename='amazon.log', level=logging.DEBUG)
        self.conn = pymysql.connect(host='192.168.0.211', port=6033, user='******', password='******', db='amazon')
        self.cs = self.conn.cursor()
        self.cs.execute('select id,ip,port from register_proxy_ips where is_alived = 1 order by id desc limit 1')
        self.proxy_ip = self.cs.fetchone()
        self.cs.execute('update register_proxy_ips set is_alived = 0 where id = %s' % self.proxy_ip[0])
        self.conn.commit()
        logging.debug(self.proxy_ip)
        # self.chrome_options.add_argument('--proxy-server=http://{host}:{port}'.format(host=self.proxy_ip[1], port=self.proxy_ip[2]))
        self.browser = Browser('chrome', user_agent=generate_user_agent(device_type='desktop'))

    def run(self):
        self.acess()
        self.search_kw()
        i = 1
        while i < 7:
            ad_list = self.browser.find_by_xpath('//li[contains(@class, "AdHolder")]')
            if ad_list:
                ad_list[0].find_by_tag('h2').click()
                break
            else:
                time.sleep(300)
                self.search_kw()
                i += 1
        if i > 6:
            self.browser.quit()
        self.register()
        self.add_pay_method()
        self.add_address()
        self.buy_goods()
        self.add_list()

    def acess(self):
        self.browser.visit(self.url)
        logging.error("代理IP失效,请求不成功")

    def search_kw(self):
        self.browser.find_by_xpath('//input[@id="twotabsearchtextbox"]').first.fill(self.keyword)
        self.browser.find_by_xpath('//input[@value="Go"]').click()
        draggble = self.browser.find_by_xpath('//*[@id="searchTemplate"]')
        target = self.browser.find_by_xpath('//*[@id="footer"]')
        draggble.drag_and_drop(target)
        # time.sleep(random.randint(1, 6))

    def register(self):
        self.browser.find_by_xpath('//*[@id="nav-link-accountList"]').click()
        self.browser.find_by_xpath('//*[@id="createAccountSubmit"]').click()
        self.browser.find_by_xpath('//input[@id="ap_customer_name"]').first.fill(self.name)
        self.browser.find_by_xpath('//input[@id="ap_email"]').first.fill(self.email)
        self.browser.find_by_xpath('//input[@id="ap_password"]').first.fill(self.password)
        self.browser.find_by_xpath('//input[@id="ap_password_check"]').first.fill(self.password)
        self.browser.find_by_xpath('//input[@class="a-button-input"]').click()

    def add_pay_method(self):
        self.browser.find_by_xpath('//*[@id="nav-link-accountList"]').click()
        self.browser.find_by_xpath('//*[@id="a-page"]/div[3]/div/div[2]/div[2]/a/div').click()
        self.browser.fill('ppw-accountHolderName', paras['card_name'])
        self.browser.fill('addCreditCardNumber', paras['card_num'])
        self.browser.execute_script('document.getElementsByName("ppw-expirationDate_month")[0].style.display="block"')
        self.browser.execute_script('document.getElementsByName("ppw-expirationDate_year")[0].style.display="block"')
        self.browser.find_by_text('04').click()
        self.browser.find_by_text('2019').click()
        self.browser.find_by_name('ppw-widgetEvent:AddCreditCardEvent').click()

    def add_address(self):
        self.browser.fill('ppw-fullName', paras['fullname'])
        self.browser.fill('ppw-line1', paras['line'])
        self.browser.fill('ppw-city', paras['city'])
        self.browser.fill('ppw-stateOrRegion', paras['stateOrRegion'])
        self.browser.fill('ppw-postalCode', paras['postalCode'])
        self.browser.fill('ppw-phoneNumber', paras['phoneNumber'])
        self.browser.find_by_name('ppw-widgetEvent:AddAddressEvent').click()

    def buy_goods(self):
        self.browser.find_by_xpath('//*[@id="nav-recently-viewed"]').click()
        self.browser.find_by_xpath('//*[@id="asin_list"]/div[1]/div/a/div[1]/span/div').click()
        try:
            self.browser.find_by_xpath('//*[@id="add-to-cart-button"]').click()  # add cart
            self.browser.find_by_xpath('//*[@id="smartShelfAddToCartNative"]').click()
        except Exception as e:
            logging.error(e)
        try:
            time.sleep(5)
            self.browser.is_element_present_by_xpath('//i[@class="a-icon a-icon-close"]', wait_time=30)
            self.browser.find_by_xpath('//i[@class="a-icon a-icon-close"]').click()
        except Exception:
            pass
        try:
            self.browser.find_by_xpath('//a[@id="hlb-view-cart-announce"]').click()  # enter cart
        except Exception:
            pass
        self.browser.execute_script('document.getElementsByName("quantity")[0].style.display="block"')
        self.browser.find_by_value('10').click()
        self.browser.find_by_name('quantityBox').clear()
        self.browser.fill('quantityBox', 999)
        self.browser.find_by_xpath('//span[@id="a-autoid-1"]').click()
        self.browser.find_by_xpath('//div[@class="sc-proceed-to-checkout"]').click()
        self.browser.find_by_xpath('//*[@id="address-book-entry-0"]/div[2]/span/a').click()
        self.browser.find_by_xpath('//*[@id="shippingOptionFormId"]/div[3]/div/div/span[1]').click()
        self.browser.find_by_xpath('//*[@id="order-summary-container"]/div/div/div').click()
        try:
            self.browser.find_by_xpath('//a[contains(@class, "prime-nothanks-button")]').click()
        except Exception:
            pass
        self.browser.find_by_xpath('//span[contains(@class, "place-order-button-link")]').click()

    def add_list(self):
        self.browser.find_by_xpath('//*[@id="nav-recently-viewed"]').click()
        self.browser.find_by_xpath('//*[@id="asin_list"]/div[1]/div/a/div[1]/span/div').click()
        self.browser.find_by_xpath('//input[@title="Add to List"]').click()
        if self.browser.is_element_present_by_text('Add to your list', wait_time=10):
            self.browser.find_by_xpath('//*[@id="WLHUC_result"]/form/div[2]/span[3]/span').click()
            self.browser.find_by_xpath('//i[@class="a-icon a-icon-close"]').click()
        time.sleep(2)
        self.browser.quit()
示例#36
0
}
browser = Browser('chrome', **executable_path)
#browser = Browser('chrome')

#Login
browser.visit('https://kyfw.12306.cn/otn/login/init')

loginBtn = browser.find_by_id('loginSub')
browser.fill('loginUserDTO.user_name', 'username')
browser.fill('userDTO.password', 'password')
loginBtn.click()

#Search ticket
#browser.visit('https://kyfw.12306.cn/otn/leftTicket/init')
time.sleep(4)
browser.find_by_xpath('//li[@id="selectYuding"]/a').click()
browser.evaluate_script(
    'document.getElementById("fromStation").setAttribute("type","visiable")')
browser.evaluate_script(
    'document.getElementById("toStation").setAttribute("type","visiable")')
browser.evaluate_script('document.getElementById("train_date").readOnly=false')
browser.fill('leftTicketDTO.from_station', 'TJP')
browser.fill('leftTicketDTO.to_station', 'MHL')
browser.fill('leftTicketDTO.train_date', '2017-11-30')
browser.evaluate_script(
    'document.getElementsByClassName("cal-wrap")[0].style.display="none"')
#browser.find_by_text('查询').click()
time.sleep(2)
browser.find_by_text('查询').click()
#browser.reload()
#browser.find_by_text('预订')[3].click()
def splinter_scrape_ta_hotels(city_url="", city="new_haven", state="ct", write_to_db=False, max_pages=20):
    """PURPOSE: To """
    # this only needs to be done at the very beginning
    br = Browser()

    if city_url == "":
        city_url = get_city(city.lower() + "_" + state.lower())

    print("using the following url:")
    print("{}".format(city_url))
    # city_url = "http://www.tripadvisor.com/Hotels-g31310-Phoenix_Arizona-Hotels.html"

    #####################################################
    # do not edit below this line
    #####################################################
    # more_pages is used to keep track if there is more
    # than one page of hotel results for the given city
    more_pages = True

    # scraping will start on page 1 of the hotel results
    page = 1

    # open the URL in a browser object:
    br.visit(city_url)

    # find the div to enter the date range. This is needed to get pricing info:
    date_bar = br.find_by_xpath('//*[contains(@class, "meta_date_wrapper")]')

    # find the check in calendar span:
    cin_btn = date_bar.find_by_xpath('span[contains(@class, "meta_date_field   check_in")]/span')[0]

    # now click the check_in span to activate it
    cin_btn.click()

    # select the right calendar div (next month)
    rightcal = br.find_by_xpath('//div[contains(@class, "month")]')[1]

    # now select the third Friday of next month as the check in date
    fri_btn = rightcal.find_by_xpath("table/tbody/tr[3]/td[6]/div")

    # and click it
    fri_btn.click()

    # now choose the next day (saturday) as the check out date
    cout_btn = date_bar.find_by_xpath('span[contains(@class, "meta_date_field   check_out")]/span')[0]
    cout_btn.click()
    leftcal = br.find_by_xpath('//div[contains(@class, "month")]')[0]
    sat_btn = leftcal.find_by_xpath("table/tbody/tr[3]/td[7]/div")
    sat_btn.click()
    print("Dates selected.")

    # wait a few seconds for ta to retrieve prices
    time.sleep(5)

    # get the city and state info
    loclist = br.find_by_xpath('//*[contains(@id, "BREADCRUMBS")]')
    locstring = loclist.text.split(u"\u203a")
    hotel_city = city = locstring[2].lower()

    hotel_state = re.findall("\w+ \(([A-Z][A-Z])\)", locstring[1])[0].lower()

    # create a pandas dataframe that will be used for writing
    # the results to the DB:

    columns = [
        "hotel_id",
        "hotel_url",
        "hotel_img_url",
        "hotel_name",
        "hotel_address",
        "hotel_city",
        "hotel_state",
        "hotel_rating",
        "hotel_latitude",
        "hotel_longitude",
        "hotel_price",
        "business_id",
        "review_count",
        "dog_review_count",
    ]

    bigdf = pd.DataFrame(columns=columns)

    # create some lists to fill w. the results from each page
    hotel_names = []
    links = []
    img_url = []
    hotel_price = []
    business_id = []
    print("starting scraper loop.")
    while more_pages and page <= max_pages:
        print("*" * 75)
        print("Now scraping page {} of {} of the hotel results".format(page, max_pages))
        print("*" * 75)
        # get all the review divs
        print("waiting a few seconds before scraping...")
        time.sleep(np.random.uniform(8, 20))
        listing_div = br.find_by_xpath('//*[contains(@class, "hotels_lf_condensed")]')
        xsts1 = br.is_element_present_by_xpath('//*[contains(@class, "photo_booking")]', wait_time=1)
        xsts2 = br.is_element_present_by_xpath('//*[contains(@class, "property_details")]', wait_time=1)
        xsts3 = br.is_element_present_by_xpath(
            '//*[contains(@class, "prw_rup")]/div/div/div/div[@class="headerContents"]/div[contains(@class, "price")]',
            wait_time=1,
        )
        while len(listing_div) < 1 or not xsts1 or not xsts2 or not xsts3:
            print("now waiting for DOIs to return")
            time.sleep(5)
            listing_div = br.find_by_xpath('//*[contains(@class, "hotels_lf_condensed")]')
            xsts1 = br.is_element_present_by_xpath('//*[contains(@class, "photo_booking")]', wait_time=1)
            xsts2 = br.is_element_present_by_xpath('//*[contains(@class, "property_details")]', wait_time=1)
            xsts3 = br.is_element_present_by_xpath(
                '//*[contains(@class, "prw_rup")]/div/div/div/div[@class="headerContents"]/div[contains(@class, "price")]',
                wait_time=1,
            )
            print("# of listings: {}".format(len(listing_div)))
            print("photo_booking exists: {}".format(xsts1))
            print("property_details exists: {}".format(xsts2))
            print("prw_up exists: {}".format(xsts3))

        print("Number of hotel listings on this page: {}".format(len(listing_div)))

        df = pd.DataFrame(columns=columns)

        for listing in listing_div:
            try:
                biz_id = re.findall("hotel_(\d+)", listing["id"])
                if len(biz_id) > 0:
                    biz_id = biz_id[0]
                else:
                    biz_id = None
                print("business_id: {}".format(biz_id))
                business_id.append(biz_id)
            except:
                print("!" * 80)
                print("biz_id DOES NOT EXIST!")
                print("!" * 80)
                business_id.append(None)
            try:
                prop = listing.find_by_xpath('div/div/div/div[contains(@class, "property_details")]')
            except:
                print("!" * 80)
                print("prop DIV DOES NOT EXIST!")
                print("!" * 80)
            try:
                title = prop.find_by_xpath('div/div[@class="listing_title"]')
                print(title.text)
                hotel_names.append(title.text)
            except:
                print("!" * 80)
                print("TITLE DIV DOES NOT EXIST!")
                print("!" * 80)
                hotel_names.append(None)
            try:
                hotel_link = title.find_by_xpath("a")["href"]
                print(hotel_link)
                links.append(hotel_link)
            except:
                print("!" * 80)
                print("hotel_link DOES NOT EXIST!")
                print("!" * 80)
                links.append(None)
            try:
                hotel_img = prop.find_by_xpath('div[@class="photo_booking"]/div/div/a/img')["src"]
                print("Hotel img URL: {}".format(hotel_img))
                img_url.append(hotel_img)
            except:
                print("!" * 80)
                print("hotel_img DIV DOES NOT EXIST!")
                print("!" * 80)
                img_url.append(None)
            try:
                price_text = prop.find_by_xpath(
                    'div[contains(@class, "prw_rup")]/div/div/div/div[@class="headerContents"]/div[contains(@class, "price")]'
                ).text
                price = re.findall("(\d+)", price_text)[0]
                print("Price: ${}".format(price))
                hotel_price.append(price)
            except:
                print("!" * 80)
                print("price DIV DOES NOT EXIST!")
                print("!" * 80)
                hotel_price.append(None)
            print("*" * 50)

        if len(hotel_names) > 0:
            print("len of hotel_names: {}".format(len(hotel_names)))
            print("len of hotel_price: {}".format(len(hotel_price)))
            print("len of img_url: {}".format(len(img_url)))
            print("len of business_id: {}".format(len(business_id)))
            print("len of hotel_city: {}".format(len(hotel_city)))
            print("len of hotel_state: {}".format(len(hotel_state)))
            df["hotel_name"] = hotel_names
            df["hotel_price"] = hotel_price
            df["hotel_img_url"] = img_url
            df["hotel_url"] = links
            df["business_id"] = business_id
            df["hotel_city"] = hotel_city
            df["hotel_state"] = hotel_state
            bigdf = bigdf.append(df)

        # update the page number
        page += 1

        # if more pages are desired, look for a "next" button
        if page <= max_pages:
            nxt_btn = br.find_by_xpath(
                '//div[contains(@class, "deckTools")]/div[contains(@class, "unified")]/a[contains(@class, "next")]'
            )
            # if there is a next button, click it
            # else exit the while loop
            if len(nxt_btn) > 0:
                nxt_btn.click()
            else:
                more_pages = False

    if write_to_db:
        engine = cadb.connect_aws_db(write_unicode=True)
        bigdf = remove_ad_hotels(bigdf)
        remove_duplicate_hotels(bigdf, city, engine)
        bigdf.to_sql("ta_hotels", engine, if_exists="append", index=False)
示例#38
0
            return b
        else:
            b.visit(order_url)
            b.find_by_xpath('//*[@id="Jprice"]/li[6]').click()
            b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a')
            time.sleep(5)
            loop(b)
    except Exception as e:
        b.reload()
        time.sleep(1)
        loop(b)


b = Browser(driver_name='chrome')
b.visit(url)
login(b)
b.visit(order_url)
b.find_by_xpath('//*[@id="Jprice"]/li[6]').click()
b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a').click()
time.sleep(5)
while True:
    loop(b)
    if b.title == '【五月天郑州演唱会门票】2017五月天 LIFE [ 人生无限公司 ] 巡回演唱会—郑州站-永乐票务':
        b.find_by_xpath('//*[@id="Jprice"]/li[6]').click()
        b.find_by_xpath('/html/body/div[11]/div[2]/div[4]/div[5]/a')
        time.sleep(2)
        loop(b)
    else:
        print('SUCCESS!')
        break
示例#39
0
###
# Copyright (c) Rice University 2012-13
# This software is subject to
# the provisions of the GNU Affero General
# Public License version 3 (AGPLv3).
# See LICENCE.txt for details.
###


from splinter.browser import Browser

browser = Browser()
browser.visit("http://www.office.mikadosoftware.com")
browser.click_link_by_href("/login")
openidbox = browser.find_by_xpath("""id('column2')/form/p/input[1]""").first
openidbox.fill("https://www.google.com/accounts/o8/id")
browser.find_by_value("Sign in").first.click()
# id="Email"
# id="Passwd"
browser.find_by_id("Email").first.fill("*****@*****.**")
browser.find_by_id("Passwd").first.fill("empathy1")
browser.find_by_value("Sign in").first.click()

browser.uncheck("remember_choices")
browser.find_by_value("Allow").first.click()
print browser.title
print

for z in browser.cookies.driver.get_cookies():
    if z["domain"] == u"www.office.mikadosoftware.com" and z["name"] == u"session":
from splinter.browser import Browser
browser = Browser()
browser.visit('http://www.office.mikadosoftware.com')
browser.click_link_by_href('/login')
openidbox = browser.find_by_xpath('''id('column2')/form/p/input[1]''').first
openidbox.fill('https://www.google.com/accounts/o8/id')
browser.find_by_value('Sign in').first.click()
#id="Email"
#id="Passwd"
browser.find_by_id("Email").first.fill('*****@*****.**') 
browser.find_by_id("Passwd").first.fill('empathy1') 
browser.find_by_value('Sign in').first.click()

browser.uncheck('remember_choices')
browser.find_by_value('Allow').first.click()
print browser.title
print 

for z in browser.cookies.driver.get_cookies():
    if z['domain'] == u'www.office.mikadosoftware.com' and z['name'] == u'session': 
        print z['value']
示例#41
0
class BrushTicket(object):
    """买票类及实现方法"""
    def __init__(self, passengers, from_time, from_station, to_station,
                 numbers, seat_type, receiver_mobile, receiver_email):
        """定义实例属性,初始化"""
        # 乘客姓名
        self.passengers = passengers
        # 起始站和终点站
        self.from_station = from_station
        self.to_station = to_station
        # 车次
        self.numbers = list(map(lambda number: number.capitalize(), numbers))
        # 乘车日期
        self.from_time = from_time
        # 座位类型所在td位置
        if seat_type == '商务座特等座':
            seat_type_index = 1
            seat_type_value = 9
        elif seat_type == '一等座':
            seat_type_index = 2
            seat_type_value = 'M'
        elif seat_type == '二等座':
            seat_type_index = 3
            seat_type_value = 0
        elif seat_type == '高级软卧':
            seat_type_index = 4
            seat_type_value = 6
        elif seat_type == '软卧':
            seat_type_index = 5
            seat_type_value = 4
        elif seat_type == '动卧':
            seat_type_index = 6
            seat_type_value = 'F'
        elif seat_type == '硬卧':
            seat_type_index = 7
            seat_type_value = 3
        elif seat_type == '软座':
            seat_type_index = 8
            seat_type_value = 2
        elif seat_type == '硬座':
            seat_type_index = 9
            seat_type_value = 1
        elif seat_type == '无座':
            seat_type_index = 10
            seat_type_value = 1
        elif seat_type == '其他':
            seat_type_index = 11
            seat_type_value = 1
        else:
            seat_type_index = 7
            seat_type_value = 3
        self.seat_type_index = seat_type_index
        self.seat_type_value = seat_type_value
        # 通知信息
        self.receiver_mobile = receiver_mobile
        self.receiver_email = receiver_email
        # 新版12306官网主要页面网址
        self.login_url = 'https://kyfw.12306.cn/otn/resources/login.html'
        self.init_my_url = 'https://kyfw.12306.cn/otn/view/index.html'
        self.ticket_url = 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc'
        # 浏览器驱动信息,驱动下载页:https://sites.google.com/a/chromium.org/chromedriver/downloads
        self.driver_name = 'chrome'
        self.driver = Browser(driver_name=self.driver_name)

    def do_login(self):
        """登录功能实现,手动识别验证码进行登录"""
        self.driver.visit(self.login_url)
        sleep(1)
        # 选择登陆方式登陆
        print('请扫码登陆或者账号登陆……')
        while True:
            if self.driver.url != self.init_my_url:
                sleep(1)
            else:
                break

    def start_brush(self):
        """买票功能实现"""
        # 浏览器窗口最大化
        self.driver.driver.maximize_window()
        # 登陆
        self.do_login()
        # 跳转到抢票页面
        self.driver.visit(self.ticket_url)
        sleep(1)
        try:
            print('开始刷票……')
            # 加载车票查询信息
            self.driver.cookies.add(
                {"_jc_save_fromStation": self.from_station})
            self.driver.cookies.add({"_jc_save_toStation": self.to_station})
            self.driver.cookies.add({"_jc_save_fromDate": self.from_time})
            self.driver.reload()
            count = 0
            while self.driver.url == self.ticket_url:
                self.driver.find_by_text('查询').click()
                sleep(1)
                count += 1
                print('第%d次点击查询……' % count)
                try:
                    start_list = self.driver.find_by_css('.start-t')
                    for start_time in start_list:
                        current_time = start_time.text
                        current_tr = start_time.find_by_xpath('ancestor::tr')
                        if current_tr:
                            car_no = current_tr.find_by_css('.number').text
                            if car_no in self.numbers:
                                if current_tr.find_by_tag('td')[
                                        self.seat_type_index].text == '--':
                                    print(
                                        '%s无此座位类型出售,继续……' %
                                        (car_no + '(' + current_time + ')', ))
                                    sleep(0.2)
                                elif current_tr.find_by_tag('td')[
                                        self.seat_type_index].text == '无':
                                    print(
                                        '%s无票,继续尝试……' %
                                        (car_no + '(' + current_time + ')', ))
                                    sleep(0.2)
                                else:
                                    # 有票,尝试预订
                                    print(car_no + '(' + current_time +
                                          ')刷到票了(余票数:' + str(
                                              current_tr.find_by_tag('td')[
                                                  self.seat_type_index].text) +
                                          '),开始尝试预订……')
                                    current_tr.find_by_css(
                                        'td.no-br>a')[0].click()
                                    sleep(1)
                                    key_value = 1
                                    for p in self.passengers:
                                        if '()' in p:
                                            p = p[:-1] + '学生' + p[-1:]
                                        # 选择用户
                                        print('开始选择用户……')
                                        self.driver.find_by_text(
                                            p).last.click()
                                        # 选择座位类型
                                        print('开始选择席别……')
                                        if self.seat_type_value != 0:
                                            self.driver.find_by_xpath(
                                                "//select[@id='seatType_" +
                                                str(key_value) +
                                                "']/option[@value='" +
                                                str(self.seat_type_value) +
                                                "']").first.click()
                                        key_value += 1
                                        sleep(0.2)
                                        if p[-1] == ')':
                                            self.driver.find_by_id(
                                                'dialog_xsertcj_ok').click()
                                    print('正在提交订单……')
                                    self.driver.find_by_id(
                                        'submitOrder_id').click()
                                    sleep(2)
                                    # 查看放回结果是否正常
                                    submit_false_info = self.driver.find_by_id(
                                        'orderResultInfo_id')[0].text
                                    if submit_false_info != '':
                                        print(submit_false_info)
                                        self.driver.find_by_id(
                                            'qr_closeTranforDialog_id').click(
                                            )
                                        sleep(0.2)
                                        self.driver.find_by_id(
                                            'preStep_id').click()
                                        sleep(0.3)
                                        continue
                                    print('正在确认订单……')
                                    self.driver.find_by_id(
                                        'qr_submit_id').click()
                                    print('预订成功,请及时前往支付……')
                                    # 发送通知信息
                                    self.send_mail(self.receiver_email,
                                                   '恭喜您,抢到票了,请及时前往12306支付订单!')
                                    self.send_sms(
                                        self.receiver_mobile,
                                        '您的验证码是:8888。请不要把验证码泄露给其他人。')
                                    sys.exit(0)
                        else:
                            print('当前车次异常')
                except Exception as error_info:
                    print(error_info)
        except Exception as error_info:
            print(error_info)

    def send_sms(self, mobile, sms_info):
        """发送手机通知短信,用的是-互亿无线-的测试短信"""
        host = "106.ihuyi.com"
        sms_send_uri = "/webservice/sms.php?method=Submit"
        account = "C59782899"
        pass_word = "19d4d9c0796532c7328e8b82e2812655"
        params = parse.urlencode({
            'account': account,
            'password': pass_word,
            'content': sms_info,
            'mobile': mobile,
            'format': 'json'
        })
        headers = {
            "Content-type": "application/x-www-form-urlencoded",
            "Accept": "text/plain"
        }
        conn = httplib2.HTTPConnectionWithTimeout(host, port=80, timeout=30)
        conn.request("POST", sms_send_uri, params, headers)
        response = conn.getresponse()
        response_str = response.read()
        conn.close()
        return response_str

    def send_mail(self, receiver_address, content):
        """发送邮件通知"""
        # 连接邮箱服务器信息
        host = 'smtp.163.com'
        port = 25
        sender = '*****@*****.**'  # 你的发件邮箱号码
        pwd = 'FatBoy666'  # 不是登陆密码,是客户端授权密码
        # 发件信息
        receiver = receiver_address
        body = '<h2>温馨提醒:</h2><p>' + content + '</p>'
        msg = MIMEText(body, 'html', _charset="utf-8")
        msg['subject'] = '抢票成功通知!'
        msg['from'] = sender
        msg['to'] = receiver
        s = smtplib.SMTP(host, port)
        # 开始登陆邮箱,并发送邮件
        s.login(sender, pwd)
        s.sendmail(sender, receiver, msg.as_string())
示例#42
0
文件: a.py 项目: ueu2715/python
def huoche():
    global b
    b = Browser(driver_name="chrome")
    b.visit(ticket_url)

    while b.is_text_present(u"登录"):
        sleep(1)
        login()
        if b.url == initmy_url:
           break

    try:
        print (u"购票页面")
        # 跳回购票页面
        b.visit(ticket_url)

        # 加载查询信息
        b.cookies.add({"_jc_save_fromStation": starts})
        b.cookies.add({"_jc_save_toStation": ends})
        b.cookies.add({"_jc_save_fromDate": dtime})
        b.reload()

        

        b.find_by_text(u"更多选项").click()
        sleep(1)
        b.find_by_id("inp-train").fill(tnum)
        b.find_by_id("add-train").click()
        #b.find_by_text(u"K-快速").click()
        b.find_by_text(u"请选择")[1].click()
        b.find_by_text(seatcn)[1].click()
        b.execute_script("$.closeSelectSeat()")
        sleep(2)

        count = 0
        flag = ""
        # 循环点击预订
        print ("order is %s" % order)
        if order != 0:
            
            while b.url == ticket_url:
                b.find_by_text(u"查询").click()
                count +=1
                print (u"循环点击查询... 第 %s 次" % count)
                sleep(5)
                try:
                    b.find_by_text(u"预订")[order - 1].click()
                except:
                    print (u"还没开始预订")
                    continue
        else:
            while b.url == ticket_url:
                b.find_by_text(u"查询").click()
                count += 1
                #flag = b.find_by_id("YW_240000K11711")[0].text
                
                                
               
                
                #print ("ticket count is %s" % flag)
                sleep(2)
                #b.execute_script("$('a:contains("+tnum+")').closest('tr').children('td:eq("+seat+")').addClass('abcde')")
                #flag = b.find_by_xpath("//td[@class='abcde']").text
                flag = b.find_by_xpath("//td[contains(@id,'"+seat+"')][contains(@id,'"+tnum+"')]").text
                print (u"循环点击查询... 第 %s 次 tickets count is %s" % (count,flag))
                try:
                    #for i in b.find_by_text(u"预订"):
                    #    i.click()
                    if flag != "--" and flag != u"无":
                        b.execute_script("$('a:contains("+tnum+")').closest('tr').children('td:last').children().click()")
                        break
                except:
                    print (u"还没开始预订")
                    continue
                sleep(3)

        pat = pa.split(",")
        
        while True:
            try:
                flag = True
                for p in pat:
                    if p == usernamecn:
                        if b.find_by_text(p)[1].checked != True:
                            flag = False
                            b.find_by_text(p)[1].check()
                            flag = True
                    else:
                        if b.find_by_text(p)[0].checked != True:
                            flag = False
                            b.find_by_text(p)[0].check()
                            flag = True
                    print (u"选择乘客:%s" % p)
                if flag:
                    break
            except:
                print (u"努力选中陛下的乘客信息中~~~")
            sleep(0.5)
        print ( u"能做的都做了.....不再对浏览器进行任何操作")
    except Exception as e:
        print(traceback.print_exc())