def get_captcha_login(self, response): post_data = response.meta.get("post_data", "") try: from PIL import Image except: pass with open('captcha.gif', 'wb') as f: f.write(response.body) f.close() # 用pillow 的 Image 显示验证码 # 如果没有安装 pillow 到源代码所在的目录去找到验证码然后手动输入 from zheye import zheye z = zheye() positions = z.Recognize('captcha.gif') position_arr = [] if len(positions)==2: if positions[1][1] < positions[0][1]: position_arr.append([positions[1][1], int(positions[1][0])]) position_arr.append([positions[0][1], int(positions[0][0])]) else: position_arr.append([positions[0][1], positions[0][0]]) position_arr.append([positions[1][1], positions[1][0]]) else: position_arr.append([positions[0][1], positions[0][0]]) print(positions) # try: # im = Image.open('captcha.jpg') # im.show() # im.close() # except: # print(u'请到 %s 目录找到captcha.jpg 手动输入' % os.path.abspath('captcha.jpg')) # captcha = input("please input the captcha\n>") # img_width = 0 # img_height = 0 # with Image.open("captcha.jpg") as img: # img_width, img_height = img.size if len(positions) == 2: post_data["captcha"] = '{"img_size": [200, 44], "input_points": [[%.2f, %.f], [%.2f, %.f]]}' % ( position_arr[0][0] / 2, position_arr[0][1] / 2, position_arr[1][0] / 2, position_arr[1][1] / 2) else: post_data["captcha"] = '{"img_size": [200, 44], "input_points": [[%.2f, %.f]]}' % ( position_arr[0][0] / 2, position_arr[0][1] / 2) post_data['captcha_type'] = "cn" print (post_data) return [scrapy.FormRequest( url = "https://www.zhihu.com/login/phone_num", formdata = post_data, headers=self.headers, callback=self.check_login )]
def login(self): try: self.browser.maximize_window() # 最大化窗口 except: # 已最大化的情况 代码会出错 捕获错误 pass while not self.cheeck_login(): self.browser.get('https://www.zhihu.com/signin') # 打开知乎登录页面 time.sleep(5) move(914, 329) # 点击 帐号密码登录 click() time.sleep(2) self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") # 全选 然后输入账户密码 self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys(self.user_name) self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys(self.password) move(955, 566) click() if self.cheeck_login(): Cookies = self.browser.get_cookies() # 获取登录成功的cookie print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: cookie_dict[cookie['name']] = cookie['value'] # self.browser.close() # 暂时不关闭 return cookie_dict else: try: english_captcha_element = self.browser.find_element_by_class_name("Captcha-englishImg") # 是否出现英文验证码 except: english_captcha_element = None try: chinese_captcha_element = self.browser.find_element_by_class_name("Captcha-chineseImg") # 是否出现中文验证码 except: chinese_captcha_element = None if chinese_captcha_element: # 如果产生中文验证码 time.sleep(1) ele_position = chinese_captcha_element.location # 获取节点坐标 x_relative = ele_position["x"] # x坐标 y_relative = ele_position["y"] # y坐标 browser_navigation_panel_height = self.browser.execute_script( 'return window.outerHeight - window.innerHeight;') # 浏览器上栏高度 browser_navigation_panel_height = 70 time.sleep(3) base64_text = chinese_captcha_element.get_attribute("src") # 提取中文验证码节点的arc属性 code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") # 消除图片bs64编码中的无用符号 fh = open("yzm_cn.jpeg", "wb") # 保存文件 fh.write(base64.b64decode(code)) fh.close() z = zheye() positions = z.Recognize('yzm_cn.jpeg') # 使用者也 提取倒立文字坐标 last_position = [] if len(positions) == 2: if positions[0][0] > positions[1][0]: # 按照顺序排列倒立文字坐标 last_position.append([positions[1][0], positions[1][1]]) last_position.append([positions[0][0], positions[0][1]]) else: last_position.append([positions[0][0], positions[0][1]]) last_position.append([positions[1][0], positions[1][0]]) if len(positions) == 2: first_position = [int(last_position[0][1] / 2) + x_relative, int(last_position[0][ 0] / 2) + y_relative + browser_navigation_panel_height] # 实际页面中 倒立文字图片为正常图片缩放的一倍 所有坐标需要除2取整 来获得可以在页面中使用的坐标 second_position = [int(last_position[1][1] / 2) + x_relative, int(last_position[1][0] / 2) + y_relative + browser_navigation_panel_height] move(first_position[0], first_position[1]) # 坐标 起始点x坐标+倒立文字x坐标 起始点y坐标+浏览器地址栏高度+倒立文字y坐标 click() move(second_position[0], second_position[1]) click() else: # 如果只有一个倒立文字 last_position.append([positions[0][1], positions[0][1]]) first_position = [int(last_position[0][1] / 2) + x_relative, int(last_position[0][0] / 2) + browser_navigation_panel_height + y_relative] time.sleep(5) move(first_position[0], first_position[1]) time.sleep(5) click() self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") # 全选 然后输入账户密码 self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys(self.user_name) self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys(self.password) move(954, 619) click() if english_captcha_element: # 如果产生英文验证码 time.sleep(1) base64_text = english_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") # 消除图片bs64编码中的无用符号 fh = open("yzm_en.jpeg", "wb") # 保存文件 fh.write(base64.b64decode(code)) fh.close() cjy_cli = chaojiying.Chaojiying_Client(settings.CJY_USERNAME, settings.CJY_PASSWORD, '96001') im = open('yzm_en.jpeg', "rb").read() json_data = cjy_cli.PostPic(im, 1902) if json_data["err_no"] == 0: print("识别成功!") code = json_data["pic_str"] print(json_data["pic_str"]) else: print("识别失败,继续尝试!") return # while True: # 若识别失败 不停识别 直至成功 # if code == "": # # code = Yundama.decode("yzm_en.jpeg", 5000, 60) # time.sleep(0.5) # else: # break self.browser.find_element_by_xpath('//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/label/input').send_keys(Keys.CONTROL + "a") # 找到英文验证码位置 self.browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/label/input').send_keys(code) move(956, 600) click() time.sleep(5) Cookies = self.browser.get_cookies() # 获取登录成功的cookie cookie_dict = {} for cookie in Cookies: cookie_dict[cookie['name']] = cookie['value'] # self.browser.close() # 暂时不关闭 return cookie_dict
def login(): """登录,需要输入用户名和密码 """ login_url = 'https://www.zhihu.com/signin' print('xxxxxxxx') browser.get(login_url) print('xxxxxxxx') time.sleep(1) print('open:' + login_url) #username, password = '', '' username, password = '******', 'shenzhenmao18' # 输入用户名和密码 """ while username.strip() == '': username = raw_input('username:'******'': password = getpass.getpass('passord:') """ browser.find_elements_by_class_name('SignFlow-tab')[1].click() #time.sleep(3) browser.find_element_by_name('username').send_keys(username) print('username') #time.sleep(3) browser.find_element_by_name('password').send_keys(password) print('password') #time.sleep(3) browser.find_element_by_class_name('SignFlow-submitButton').click() time.sleep(3) img = None is_ch, is_en = False, False try: img = browser.find_element_by_class_name('Captcha-chineseImg') is_ch = True except Exception as e: try: img = browser.find_element_by_class_name('Captcha-englishImg') is_en = True except Exception as e: print('img not found') if img is None: print('img not found') exit("xxx") print("size:") print(img.size) print('get img src') imgsrc = img.get_attribute('src') print(imgsrc) src_prefix, base64_img = imgsrc.split(',') #imgdata = base64.b64decode(imgsrc[len('data:image/jpg;base64,'):]) missing_padding = len(base64_img) % 4 if missing_padding != 0: base64_img += '=' * (4 - missing_padding) base64_img = parse.unquote(base64_img) imgdata = base64.b64decode(base64_img) if is_ch: pic_name = 'catcha_ch.jpg' elif is_en: pic_name = 'catcha_en.gif' file = open(pic_name, 'wb') file.write(imgdata) file.close() if is_ch: z = zheye() positions = z.Recognize(pic_name) print(positions) mouse_action = ActionChains(browser) for pos in positions: x, y = pos print(x, y) x, y = x / 2, y / 2 print(x, y) mouse_action.move_to_element_with_offset(img, y, x).click().perform() time.sleep(2) mouse_action.click().perform() if is_en: ocr(pic_name) browser.find_element_by_class_name('SignFlow-submitButton').click() time.sleep(1) browser.find_element_by_class_name('SignFlow-submitButton').click()
def start_requests(self): cookies = [] if os.path.exists(BASE_DIR + r"\cookies\zhihu.cookie"): cookies = pickle.load(open(BASE_DIR + "/cookies/lagou.cookie", "rb")) if not cookies: from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys chrome_option = Options() chrome_option.add_argument('--disable-extensions') chrome_option.add_experimental_option('debuggerAddress', '127.0.0.1:9222') browser = webdriver.Chrome(executable_path=r"D:\scrapytest\ArticleSpider\venv\Scripts\chromedriver.exe", chrome_options=chrome_option) browser.get("https://www.zhihu.com/signin") browser.find_element_by_xpath('//*[@id="root"]/div/main/div/div/div[1]/div/form/div[1]/div[2]').click() browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys("13643095504") time.sleep(1) browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") time.sleep(1) browser.find_element_by_css_selector(".SignFlow-password input").send_keys("634498qxp@") time.sleep(2) # browser.find_element_by_css_selector(".Button SignFlow-submitButton.Button--primary Button--blue").click() # browser.find_element_by_xpath('//*[@id = "root"]/div/main/div/div/div[1]/div/form/button').click() move(678, 511) click() time.sleep(6) login_success = False while not login_success: try: notify_ele = browser.find_element_by_class_name("Popover PushNotifications AppHeader-notifications") login_success = True return [scrapy.Request(url=self.start_urls[0], dont_filter=True)] except: pass try: browser.maximize_window() except: pass try: english_captcha = browser.find_element_by_class_name('Captcha-englishImg') except: english_captcha = None try: chinese_captcha = browser.find_element_by_class_name('Captcha-chineseImg') except: chinese_captcha = None if chinese_captcha: ele_postion = chinese_captcha.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;' ) base64_text = chinese_captcha.get_attribute("src") import base64 code = base64_text.replace("data:image/jpg;base64,", '').replace("%0A", "") fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() position = z.Recognize('yzm_cn.jpeg') last_position = [] if len(position) == 2: if position[0][1] > position[1][1]: last_position.append([position[1][1], position[1][0]]) last_position.append([position[0][1], position[0][0]]) else: last_position.append([position[0][1], position[0][0]]) last_position.append([position[1][1], position[1][0]]) first_position = [int(last_position[0][0] / 2), int(last_position[0][1] / 2)] second_position = [int(last_position[1][0] / 2), int(last_position[1][1] / 2)] move(x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() time.sleep(2) move(x_relative + second_position[0], y_relative + browser_navigation_panel_height + second_position[1]) click() # for url in self.start_urls: # yield scrapy.Request(url, dont_filter=True, headers=self.headers) else: last_position.append([position[0][1], position[0][0]]) first_position = [int(last_position[0][0] / 2), int(last_position[0][1] / 2)] move(x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() time.sleep(1) move(663, 569) click() if english_captcha: base64_text = english_captcha.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from tools.yundama_requests import YDMHttp yundama = YDMHttp("zzzzqxp", "634498qxp", 8954, "fd03eddd0dc7ebe6eb4ce5c00012bb31") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/div[1]/input').send_keys( Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/div[1]/input').send_keys(code) time.sleep(1) move(663, 544) click() time.sleep(2) time.sleep(1) browser.get("https://www.zhihu.com/") cookies = browser.get_cookies() pickle.dump(cookies,open(r'D:\scrapytest\ArticleSpider\cookies\zhihu.cookie','wb')) cookie_dict={} for cookie in cookies: cookie_dict[cookie["name"]]=cookie["value"]#cookie储存到本地后就可以在开始打开获取,就不用seleniun return [scrapy.Request(url=self.start_urls[0],dont_filter=True,cookies=cookie_dict,headers=self.headers)] time.sleep(1) browser.get("https://www.zhihu.com/") cookies = browser.get_cookies() pickle.dump(cookies, open(BASE_DIR + r"\cookies\zhihu.cookie", 'wb')) cookie_dict = {} for cookie in cookies: cookie_dict[cookie["name"]] = cookie["value"] # cookie储存到本地后就可以在开始打开获取,就不用seleniun return [scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict, headers=self.headers)]
def start_requests(self): """ 用selenium完成知乎自动登陆 """ chrome_options = Options() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-extensions') chrome_options.add_experimental_option('debuggerAddress', '127.0.0.1:9222') browser = webdriver.Chrome( executable_path='/usr/local/chromedriver/chromedriver', chrome_options=chrome_options) try: browser.maximize_window() except: pass browser.get("https://www.zhihu.com/signin") try: # 未登录知乎 time.sleep(3) # 清空默认填充值,clear()方法失效,使用control+a全选再输入,全选之前先等待3秒让浏览器加载了默认填充值 browser.find_element_by_css_selector( '.SignFlow-accountInput > input:nth-child(1)').send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( '.SignFlow-accountInput > input:nth-child(1)').send_keys( "15986744115") browser.find_element_by_css_selector( 'div.SignFlowInput:nth-child(1) > div:nth-child(1) > input:nth-child(1)' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( 'div.SignFlowInput:nth-child(1) > div:nth-child(1) > input:nth-child(1)' ).send_keys('**********') # css捕获’登陆‘按钮并鼠标点击 browser.find_element_by_css_selector( 'button.Button:nth-child(5)').click() time.sleep(5) login_success = False while not login_success: try: notify_element = browser.find_element_by_css_selector( '.Zi--Bell > path:nth-child(1)') login_success = True except: pass try: english_captcha_element = browser.find_element_by_class_name( 'Captcha-englishImg') except: english_captcha_element = None try: chinese_cptcha_element = browser.find_element_by_class_name( 'Captcha-chineseImg') except: chinese_cptcha_element = None if chinese_cptcha_element: element_position = chinese_cptcha_element.location x_relative = element_position['x'] y_relative = element_position['y'] # browser_navigtion_panel_height = browser.execute_script( # 'return window.outerHeight - window.innerHeight;' # ) browser_navigtion_panel_height = 103 base64_text = chinese_cptcha_element.get_attribute('src') code = base64_text.replace('data:image/jpg;base64,', '').replace('%0A', '') fh = open('yzm_cn.jpeg', 'wb') fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() try: positions = z.Recognize('yzm_cn.jpeg') except: time.sleep(3) browser.find_element_by_css_selector( '.SignFlow-accountInput > input:nth-child(1)' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( '.SignFlow-accountInput > input:nth-child(1)' ).send_keys("15986744115") browser.find_element_by_css_selector( 'div.SignFlowInput:nth-child(1) > div:nth-child(1) > input:nth-child(1)' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( 'div.SignFlowInput:nth-child(1) > div:nth-child(1) > input:nth-child(1)' ).send_keys('29q82q8t7q') # css捕获’登陆‘按钮并鼠标点击 browser.find_element_by_css_selector( 'button.Button:nth-child(5)').click() time.sleep(5) continue last_position = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] second_position = [ int(last_position[1][0] / 2), int(last_position[1][1] / 2) ] time.sleep(1) pyautogui.moveTo(x_relative + first_position[0], y_relative + browser_navigtion_panel_height + first_position[1], duration=0.5) pyautogui.click() time.sleep(1) pyautogui.moveTo(x_relative + second_position[0], y_relative + browser_navigtion_panel_height + second_position[1], duration=0.5) pyautogui.click() else: last_position.append( [positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] time.sleep(1) pyautogui.moveTo(x_relative + first_position[0], y_relative + browser_navigtion_panel_height + first_position[1], duration=0.5) pyautogui.click() if english_captcha_element: base64_text = english_captcha_element.get_attribute('src') code = base64_text.replace('data:image/jpg;base64,', '').replace('%0A', '').replace( r'\n', '') fh = open('yzm_en.jpeg', 'wb') fh.write(base64.b64decode(code)) fh.close() from tool.yundama_requests import YDMHttp yundama = YDMHttp('915263031', 'slzkszkbjz', 7346, '8da3657e176de5e60bce4bf449eae130') code = yundama.decode('yzm_en.jpeg', 5000, 60) while True: if code == '': code = yundama.decode('yzm_en.jpeg', 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(code) if english_captcha_element or chinese_cptcha_element: # 再次输入用户名和密码 time.sleep(3) browser.find_element_by_css_selector( '.SignFlow-accountInput > input:nth-child(1)' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( '.SignFlow-accountInput > input:nth-child(1)' ).send_keys("15986744115") browser.find_element_by_css_selector( 'div.SignFlowInput:nth-child(1) > div:nth-child(1) > input:nth-child(1)' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( 'div.SignFlowInput:nth-child(1) > div:nth-child(1) > input:nth-child(1)' ).send_keys('29q82q8t7q') # css捕获’登陆‘按钮并鼠标点击 browser.find_element_by_css_selector( 'button.Button:nth-child(5)').click() time.sleep(5) except: # 已登录 不做任何操作 pass cookies = browser.get_cookies() cookie_dict = {} for cookie in cookies: cookie_dict[cookie['name']] = cookie['value'] return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ]
def login(self): import time try: self.browser.maximize_window() # 将窗口最大化防止定位错误 except Exception as e: pass while not self.check_login(): self.browser.get("https://www.zhihu.com/signin") #选择用户密码登陆元素并点击 time.sleep(5) login_element = self.browser.find_element_by_css_selector(".SignFlow-tabs div.SignFlow-tab") login_element.click() browser_navigation_panel_height = self.browser.execute_script('return window.outerHeight - window.innerHeight;') time.sleep(5) self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys(Keys.CONTROL + "a") self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( self.user_name) self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys( self.pass_word) self.browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(15) from mouse import move, click print("判断登录是否成功") if self.check_login(): break try: # 查询是否有英文验证码 english_captcha_element = self.browser.find_element_by_class_name("Captcha-englishImg") except: english_captcha_element = None try: # 查询是否有中文验证码 chinese_captcha_element = self.browser.find_element_by_class_name("Captcha-chineseImg") except: chinese_captcha_element = None if chinese_captcha_element: y_relative_coord = chinese_captcha_element.location['y'] y_absolute_coord = y_relative_coord + browser_navigation_panel_height x_absolute_coord = chinese_captcha_element.location['x'] """ 保存图片 1. 通过保存base64编码 2. 通过crop方法 """ # 1. 通过保存base64编码 base64_text = chinese_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() positions = z.Recognize("yzm_cn.jpeg") pos_arr = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: pos_arr.append([positions[1][1], positions[1][0]]) pos_arr.append([positions[0][1], positions[0][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) pos_arr.append([positions[1][1], positions[1][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) if len(positions) == 2: first_point = [int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2)] second_point = [int(pos_arr[1][0] / 2), int(pos_arr[1][1] / 2)] move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) click() move((x_absolute_coord + second_point[0]), y_absolute_coord + second_point[1]) click() else: first_point = [int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2)] move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) click() self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( self.user_name) self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys( self.pass_word) self.browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() self.browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() if english_captcha_element: # 2. 通过crop方法 # from pil import Image # image = Image.open(path) # image = image.crop((locations["x"], locations["y"], locations["x"] + image_size["width"], # locations["y"] + image_size["height"])) # defines crop points # # rgb_im = image.convert('RGB') # rgb_im.save("D:/ImoocProjects/python_scrapy/coding-92/ArticleSpider/tools/image/yzm.jpeg", # 'jpeg') # saves new cropped image # # 1. 通过保存base64编码 base64_text = english_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() cjy_cli = chaojiying.Chaojiying_Client(settings.CJY_USERNAME, settings.CJY_PASSWORD, '96001') im = open("yzm_en.jpeg", 'rb').read() json_data = cjy_cli.PostPic(im, 1902) if json_data["err_no"] == 0: print("识别成功!") code = json_data["pic_str"] print("英文验证码: {code}".format(code=code)) else: print("识别失败,继续尝试!") return self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") self.browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input').send_keys(code) self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") self.browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( self.user_name) self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") self.browser.find_element_by_css_selector(".SignFlow-password input").send_keys( self.pass_word) submit_ele = self.browser.find_element_by_css_selector(".Button.SignFlow-submitButton") self.browser.find_element_by_css_selector(".Button.SignFlow-submitButton").click() # 等待登录成功后加载个人中心信息 time.sleep(10) Cookies = self.browser.get_cookies() print(Cookies) cookie_dict = {} for cookie in Cookies: cookie_dict[cookie['name']] = cookie['value'] self.browser.close() return cookie_dict
def start_requests(self): # 手动启动本地Chrome from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument("--disable-extensions") chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") # 建立浏览器对象 browser = webdriver.Chrome( executable_path="/usr/local/bin/chromedriver", chrome_options=chrome_options) try: # 窗口最大化 browser.maximize_window() except: pass browser.get("https://www.zhihu.com/signin") # 浏览器执行js代码获取浏览器工具栏高度 browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') # browser_navigation_panel_height = 71 browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL, "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "18612345678") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL, "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("123456") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() # 等待加载完成 time.sleep(5) # 判断是否登录成功 login_success = False while not login_success: try: # 如果能找到某个元素证明登录成功 notify_element = browser.find_element_by_class_name( "Popover PushNotifications AppHeader-notifications") login_success = True except: pass try: # 查询是否有英文验证码 english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") except: english_captcha_element = None try: # 查询是否有中文验证码 chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") except: chinese_captcha_element = None if chinese_captcha_element: y_relative_coord = chinese_captcha_element.location['y'] y_absolute_coord = y_relative_coord + browser_navigation_panel_height x_absolute_coord = chinese_captcha_element.location['x'] # x_absolute_coord = 842 # y_absolute_coord = 428 """ 保存图片 1. 通过保存base64编码 2. 通过crop方法 """ # 1. 通过保存base64编码 base64_text = chinese_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() positions = z.Recognize("yzm_cn.jpeg") pos_arr = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: pos_arr.append([positions[1][1], positions[1][0]]) pos_arr.append([positions[0][1], positions[0][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) pos_arr.append([positions[1][1], positions[1][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) if len(positions) == 2: # 保存到本地的图片长宽是原图的两倍 first_point = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] second_point = [ int(pos_arr[1][0] / 2), int(pos_arr[1][1] / 2) ] # move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) # click() # # move((x_absolute_coord + second_point[0]), y_absolute_coord + second_point[1]) # click() pyautogui.moveTo((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) pyautogui.click() else: first_point = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] # move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) # click() pyautogui.moveTo((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) pyautogui.click() # 输入验证码后重新登录 browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click()
def start_requests(self): chrome_options = Options() chrome_options.add_argument("--disable-extensions") # chrome_options.add_experimental_option("excludeSwitches", ['enable-automation']) chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome(chrome_options=chrome_options) try: browser.maximize_window() # 将窗口最大化防止定位错误 except: pass browser.get("https://www.zhihu.com/signin") # logo_element = browser.find_element_by_class_name("SignFlowHeader") # y_relative_coord = logo_element.location['y'] #此处一定不要将浏览器放大 会造成高度获取失败!!! browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') time.sleep(5) browser.implicitly_wait(5) #用来测试验证码,特意输入错误的密码 # # try: # tabs = browser.find_elements_by_css_selector( # ".SignFlow-tabs div") # tabs[1].click() # except: # pass # browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys(Keys.CONTROL + "a") # browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( # "17628040175") # # browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") # browser.find_element_by_css_selector(".SignFlow-password input").send_keys( # "2014@ltg1") # browser.find_element_by_css_selector( # ".Button.SignFlow-submitButton").click() # browser.implicitly_wait(5) # 先判断是否登录成功 login_success = False while not login_success: try: notify_element = browser.find_element_by_class_name( "AppHeader-profile") login_success = True except: pass try: tabs = browser.find_elements_by_css_selector( ".SignFlow-tabs div") tabs[1].click() except: pass try: #查询是否有英文验证码 english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") base64_text = english_captcha_element.get_attribute("src") #'data:image/jpg;base64,null' code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") if code == 'null': english_captcha_element = None except: english_captcha_element = None try: # 查询是否有中文验证码 chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") base64_text = chinese_captcha_element.get_attribute("src") #'data:image/jpg;base64,null' code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") if code == 'null': chinese_captcha_element = None except: chinese_captcha_element = None if not english_captcha_element and not chinese_captcha_element and not login_success: browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "17628040175") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("2014@ltg") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() browser.implicitly_wait(5) if chinese_captcha_element: y_relative_coord = chinese_captcha_element.location['y'] y_absolute_coord = y_relative_coord + browser_navigation_panel_height x_absolute_coord = chinese_captcha_element.location['x'] # x_absolute_coord = 842 # y_absolute_coord = 428 """ 保存图片 1. 通过保存base64编码 2. 通过crop方法 """ # 1. 通过保存base64编码 # base64_text = chinese_captcha_element.get_attribute("src") # #'data:image/jpg;base64,null' # code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() positions = z.Recognize("yzm_cn.jpeg") pos_arr = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: pos_arr.append([positions[1][1], positions[1][0]]) pos_arr.append([positions[0][1], positions[0][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) pos_arr.append([positions[1][1], positions[1][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) if len(positions) == 2: first_point = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] second_point = [ int(pos_arr[1][0] / 2), int(pos_arr[1][1] / 2) ] move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) click() move((x_absolute_coord + second_point[0]), y_absolute_coord + second_point[1]) click() else: first_point = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "17628040175") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("2014@ltg") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() if english_captcha_element: # 2. 通过crop方法 # from pil import Image # image = Image.open(path) # image = image.crop((locations["x"], locations["y"], locations["x"] + image_size["width"], # locations["y"] + image_size["height"])) # defines crop points # # rgb_im = image.convert('RGB') # rgb_im.save("D:/ImoocProjects/python_scrapy/coding-92/ArticleSpider/tools/image/yzm.jpeg", # 'jpeg') # saves new cropped image # # 1. 通过保存base64编码 # base64_text = english_captcha_element.get_attribute("src") # code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() # from tools.yundama_requests import YDMHttp # yundama = YDMHttp("da_ge_da1", "dageda", 3129, "40d5ad41c047179fc797631e3b9c3025") # code = yundama.decode("yzm_en.jpeg", 5000, 60) # while True: # if code == "": # code = yundama.decode("yzm_en.jpeg", 5000, 60) # else: # break chaojiying = Chaojiying_Client( 'yiqieanran01', '1qazxsw23edc', '904611') # 用户中心>>软件ID 生成一个替换 96001 im = open('yzm_en.jpeg', 'rb').read() # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要// code = chaojiying.PostPic(im, 1902)['pic_str'] while True: if code == '': code = chaojiying.PostPic(im, 1902)['pic_str'] else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(code) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "17628040175") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("2014@ltg") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() browser.implicitly_wait(5) try: notify_element = browser.find_element_by_class_name( "AppHeader-profile") login_success = True Cookies = browser.get_cookies() print(Cookies) cookie_dict = {} import pickle f = open( 'C:\\LTG\\code\\ArticleSpider\\ArticleSpider\\cookies\\zhihu.cookie', 'wb') pickle.dump(Cookies, f) f.close() for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 # f = open('C:\\LTG\\code\\ArticleSpider\\ArticleSpider\\cookies\\' + cookie['name'] + '.zhihu', 'wb') # pickle.dump(cookie, f) # f.close() cookie_dict[cookie['name']] = cookie['value'] # browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] except: pass print("yes")
def get_pwd_login(self): t = random.uniform(0.5, 1) pwd_login = self.driver.find_element_by_xpath( '//div[@class="SignFlow-tab"]') pwd_login.click() time.sleep(1) username = self.driver.find_element_by_xpath( '//div[@class="SignFlow-account"]/div/label/input') username.send_keys('账号') time.sleep(t) password = self.driver.find_element_by_xpath( '//div[@class="SignFlow-password"]/div/label/input') password.send_keys('密码') time.sleep(t) login_button = self.driver.find_element_by_xpath( '//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]' ) login_button.click() time.sleep(t) try: image_code = self.driver.find_element_by_xpath( '//div[@class="Captcha-englishContainer"]/img').get_attribute( 'src') print(image_code) if 'null' not in str(image_code): print('******************') ''' # 截取整个网页图片 self.driver.save_screenshot(r'./Images/zh_full.png') # 定位验证码位置 image_code_input = self.driver.find_element_by_xpath('//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/div[1]/input') # 获取验证码x,y轴 尺寸 location = image_code_input.location # 获取验证码的长和宽 size = image_code_input.size # 截取的验证码位置坐标 left, top, right, bottom = location['x'], location['y'], location['x']+size['width'], location['y'] + size['height'] image_full = Image.open(r'./Images/zh_full.png') # 使用Image的crop函数,从截图中再次截取我们需要的区域 image_code_pic = image_full.crop((left, top, right, bottom)) ''' print('^^^^^^^^^^^^^^^^^') # 保存截取的验证码图片 request.urlretrieve(image_code, './Images/zh_code.png') time.sleep(1) # 调用打码平台解析验证码, 缺少代码 如果打码平台解析错误需要做出判断并重新调用,且设置重调次数 image_code_cont = identify(r'./Images/zh_code.png') print(image_code_cont) # 定位验证码填写位置 image_code_input = self.driver.find_element_by_xpath( '//div[@class="Captcha SignFlow-captchaContainer"]/div/div/label/input' ) image_code_input.send_keys(image_code_cont) except Exception as e: if '//div[@class="Captcha-englishContainer"]' in str(e): pass else: print(e) try: zh_code_cn = self.driver.find_element_by_xpath( '//div[@class="Captcha-chineseContainer"]/img').get_attribute( 'src') # print(zh_code_cn) if 'null' not in str(zh_code_cn): ''' request.urlretrieve(zh_code_cn, './Images/zh_code_cn.png') time.sleep(5) # 睡5秒-->人工点击(目前还没有好的解决方法) ''' # 定位抓取文字验证码图片 chinese_captcha_element = self.driver.find_element_by_class_name( "Captcha-chineseImg") ele_postion = chinese_captcha_element.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] browser_navigation_panel_height = 70 base64_text = chinese_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") fh = open("./Images/zh_code_cn.png", "wb") fh.write(base64.b64decode(code)) fh.close() # 调用zheye库 from zheye import zheye from mouse import move, click z = zheye() positions = z.Recognize('./Images/zh_code_cn.png') print(positions) last_position = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] second_position = [ int(last_position[1][0] / 2), int(last_position[1][1] / 2) ] # +15 是x轴坐标上右移15,+30是y轴下移30 move( x_relative + first_position[0] + 15, y_relative + browser_navigation_panel_height + first_position[1] + 30) click() move( x_relative + second_position[0] + 15, y_relative + browser_navigation_panel_height + second_position[1] + 30) click() else: last_position.append([positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] move( x_relative + first_position[0] + 15, y_relative + browser_navigation_panel_height + first_position[1] + 30) click() print(last_position) except Exception as e: if '//div[@class="Captcha-chineseContainer"]' in str(e): pass else: print(e) time.sleep(3) login_button = self.driver.find_element_by_xpath( '//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]' ) # login_button.click() ActionChains( self.driver).move_to_element(login_button).click().perform() time.sleep(2) print('------------') 获取cookie cookie = {} cookies = self.driver.get_cookies() for i in cookies: a = i['name'] b = i['value'] cookie[a] = b return cookie
def start_requests(self): from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument("--disable-extensions") #chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") #browser = webdriver.Chrome(executable_path="E:/chromedriver/chromedriver_win32/chromedriver.exe", chrome_options=chrome_options) browser = webdriver.Chrome( executable_path= "E:/chromedriver/chromedriver_win32/chromedriver.exe") import time try: browser.maximize_window() #将窗口最大化防止定位错误 except: pass browser.get("https://www.zhihu.com/signin") logo_element = browser.find_element_by_class_name("SignFlowHeader") # y_relative_coord = logo_element.location['y'] browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') browser_navigation_panel_height = 71 time.sleep(5) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys("xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(15) from mouse import move, click # move(800, 400 ,True) # actions = ActionChains(browser) # actions.move_to_element(browser.find_element_by_css_selector( # ".Button.SignFlow-submitButton")) # actions.click(browser.find_element_by_css_selector( # ".Button.SignFlow-submitButton")) # actions.perform() # actions.move_to_element_with_offset(browser.find_element_by_css_selector( # ".Button.SignFlow-submitButton"), 30, 30).perform() #chrome的版本问题有两种解决方案 #1. 自己启动chrome(推荐) 可以防止chromedriver被识别,因为chromedriver出现的一些js变量可以被服务器识别出来 #2. 使用chrome60(版本) # 先判断是否登录成功 login_success = False while not login_success: try: notify_element = browser.find_element_by_class_name( "Popover PushNotifications AppHeader-notifications") login_success = True except: pass try: #查询是否有英文验证码 english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") except: english_captcha_element = None try: # 查询是否有中文验证码 chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") except: chinese_captcha_element = None if chinese_captcha_element: y_relative_coord = chinese_captcha_element.location['y'] y_absolute_coord = y_relative_coord + browser_navigation_panel_height x_absolute_coord = chinese_captcha_element.location['x'] # x_absolute_coord = 842 # y_absolute_coord = 428 """ 保存图片 1. 通过保存base64编码 2. 通过crop方法 """ # 1. 通过保存base64编码 base64_text = chinese_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() positions = z.Recognize("yzm_cn.jpeg") pos_arr = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: pos_arr.append([positions[1][1], positions[1][0]]) pos_arr.append([positions[0][1], positions[0][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) pos_arr.append([positions[1][1], positions[1][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) if len(positions) == 2: first_point = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] second_point = [ int(pos_arr[1][0] / 2), int(pos_arr[1][1] / 2) ] move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) click() move((x_absolute_coord + second_point[0]), y_absolute_coord + second_point[1]) click() else: first_point = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() if english_captcha_element: # 2. 通过crop方法 # from pil import Image # image = Image.open(path) # image = image.crop((locations["x"], locations["y"], locations["x"] + image_size["width"], # locations["y"] + image_size["height"])) # defines crop points # # rgb_im = image.convert('RGB') # rgb_im.save("D:/ImoocProjects/python_scrapy/coding-92/ArticleSpider/tools/image/yzm.jpeg", # 'jpeg') # saves new cropped image # # 1. 通过保存base64编码 base64_text = english_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from tools.yundama_requests import YDMHttp yundama = YDMHttp("da_ge_da1", "dageda", 3129, "40d5ad41c047179fc797631e3b9c3025") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(code) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") submit_ele = browser.find_element_by_css_selector( ".Button.SignFlow-submitButton") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(10) try: notify_element = browser.find_element_by_class_name( "Popover PushNotifications AppHeader-notifications") login_success = True Cookies = browser.get_cookies() print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open( './ArticleSpider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] except: pass print("yes")
def start_requests(self): # cookies = pickle.load(open("C:/Users/Administrator/ZhihuSpider/cookies/zhihu.cookie", "rb")) # cookie_dict = {} # for cookie in cookies: # cookie_dict[cookie["name"]] = cookie["value"] chromeOption = Options() chromeOption.add_argument("--disable-extensions") chromeOption.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome( executable_path="I:/chromedriver/chromedriver.exe", chrome_options=chromeOption) try: browser.maximize_window() except: pass browser.get("https://www.zhihu.com/signin") browser.find_element_by_css_selector( ".SignFlow-tabs div:nth-child(2)").click() time.sleep(2) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") time.sleep(2) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "13247598671") browser.find_element_by_css_selector( ".SignFlow-password .SignFlowInput .Input-wrapper input" ).send_keys(Keys.CONTROL + "a") time.sleep(2) browser.find_element_by_css_selector( ".SignFlow-password .SignFlowInput .Input-wrapper input" ).send_keys("156416421727av.") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(10) logon_success = False # 登录状态 while not logon_success: # 登陆成功 try: # notify_ele = browser.find_element_by_class_name("Popover PushNotifications AppHeader-notifications") notify_ele = browser.find_element_by_id("Popover17-toggle") logon_success = True except: pass # 登录失败出现中文验证码 try: chinese_captcha_ele = browser.find_element_by_class_name( "Captcha-chineseImg") except: chinese_captcha_ele = None # 登录失败出现英文验证码 try: english_captcha_ele = browser.find_element_by_class_name( "Captcha-englishImg") except: english_captcha_ele = None # 处理中文验证码 if chinese_captcha_ele: ele_postion = chinese_captcha_ele.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] js = "return (window.outerHeight - window.innerHeight);" browser_navigation_panel_height = browser.execute_script(js) # 获取中文验证码图片(base64)解码,并写入二进制文件 base64_text = chinese_captcha_ele.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") f = open("captcha_cn.jpeg", "wb") f.write(base64.b64decode(code)) f.close() # 自动识别中文验证码 z = zheye() positions = z.Recognize('captcha_cn.jpeg') last_position = [] # 有两个倒立文字 if len(positions) == 2: if positions[0][1] > positions[1][1]: last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) first_pos = [ int(last_position[0][0]) / 2, int(last_position[0][1]) / 2 ] second_pos = [ int(last_position[1][0]) / 2, int(last_position[1][1]) / 2 ] move( x_relative + first_pos[0], y_relative + first_pos[1] + browser_navigation_panel_height) click() time.sleep(3) move( x_relative + second_pos[0], y_relative + second_pos[1] + browser_navigation_panel_height) click() # 只有一个倒立文字 else: last_position.append([positions[0][1], positions[0][0]]) first_pos = [ int(last_position[0][0]) / 2, int(last_position[0][1]) / 2 ] move( x_relative + first_pos[0], y_relative + first_pos[1] + browser_navigation_panel_height) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "13247598671") browser.find_element_by_css_selector( ".SignFlow-password .SignFlowInput .Input-wrapper input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password .SignFlowInput .Input-wrapper input" ).send_keys("156416421727av") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() # 处理英文验证码 if english_captcha_ele: base64_text = english_captcha_ele.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") f = open("captcha_en.jpeg", "wb") f.write(base64.b64decode(code)) f.close() # 使用在线云打码 yundama = YDMHttp('swartz2324', '156416421727av.', 9561, 'd9acf25c4e7f52926f4008a55b4080a3') captcha_result = yundama.decode('captcha_en.jpeg', 5000, 60) while True: time.sleep(1) if captcha_result == '': captcha_result = yundama.decode( 'captcha_en.jpeg', 5000, 60) else: break browser.find_element_by_css_selector( ".Captcha.SignFlow-captchaContainer .SignFlowInput .Input-wrapper input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".Captcha.SignFlow-captchaContainer .SignFlowInput .Input-wrapper input" ).send_keys(captcha_result) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "13247598671") browser.find_element_by_css_selector( ".SignFlow-password .SignFlowInput .Input-wrapper input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password .SignFlowInput .Input-wrapper input" ).send_keys("156416421727av") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(3) # 登录成功后把cookies保存到本地 cookies = browser.get_cookies() pickle.dump( cookies, open("C:/Users/Administrator/ZhihuSpider/cookies/zhihu.cookie", "wb")) cookie_dict = {} for cookie in cookies: cookie_dict[cookie["name"]] = cookie["value"] return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ]
def judge_login(self, response): self.browser.get('https://www.zhihu.com/signin') if self.__is_login(): return [scrapy.Request(url=self.start_urls[0], dont_filter=True)] action = ActionChains(self.browser) self.browser.find_element_by_css_selector( '.SignFlow-tabs .SignFlow-tab:nth-child(2)').click() self.__inputLoginInfo(password=ZHIHU_PASSWORD + '1') time.sleep(3) login_success = False while not login_success: try: if self.__is_login(): login_success = True except: pass try: english_captcha_element = self.browser.find_element_by_class_name( 'Captcha-englishImg') except: english_captcha_element = None try: chinese_captcha_element = self.browser.find_element_by_class_name( 'Captcha-chineseImg') except: chinese_captcha_element = None if chinese_captcha_element: if action._actions: action._actions = [] self.browser.execute_script( 'document.querySelectorAll(".Button.ChineseCaptchaPoint").forEach(function(el){el.click()});') # chinese_captcha_point = self.browser.find_element_by_class_name('') # ele_position = chinese_captcha_element.location # x_relative = ele_position['x'] # y_relative = ele_position['y'] # self.browser_navigation_panel_height = self.browser.execute_script( # 'return window.outerHeight - window.innerHeight' # ) base64_text = chinese_captcha_element.get_attribute('src') import base64 code = base64_text.replace( 'data:image/jpg;base64,', '').replace('%0A', '') fh = open('yzm_cn.jpeg', 'wb') fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() positions = z.Recognize('yzm_cn.jpeg') last_position = [] # 格式化倒立文字的坐标(zheye默认坐标返回为y,x的格式) if len(positions) == 2: if positions[0][1] > positions[1][1]: last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2)] second_position = [ int(last_position[1][0] / 2), int(last_position[1][1] / 2)] action.move_to_element_with_offset( chinese_captcha_element, first_position[0], first_position[1]).click() action.move_to_element_with_offset( chinese_captcha_element, second_position[0], second_position[1]).click().perform() else: last_position.append([positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2)] action.move_to_element_with_offset( chinese_captcha_element, first_position[0], first_position[1]).click().perform() print(last_position) self.__inputLoginInfo() time.sleep(3) if english_captcha_element: base64_text = english_captcha_element.get_attribute('src') import base64 code = base64_text.replace( 'data:image/jpg;base64,', '').replace('%0A', '') fh = open('yzm_en.jpeg', 'wb') fh.write(base64.b64decode(code)) fh.close() from tools.fateadm_api import FateadmApi from ScrapyDemo.settings import ( ff_app_id, ff_app_key, ff_pd_id, ff_pd_key) api = FateadmApi(ff_app_id, ff_app_key, ff_pd_id, ff_pd_key) pred_type = "30400" # 返回详细识别结果 rsp = api.PredictFromFile(pred_type, 'yzm_en.jpeg') while True: if not rsp or rsp.ret_code != 0 or not rsp.pred_rsp or not rsp.pred_rsp.value: rsp = api.PredictFromFile(pred_type, 'yzm_en.jpeg') else: break self.__clear_input( '.Captcha.SignFlow-captchaContainer input[name="captcha"]') self.browser.find_element_by_css_selector( '.Captcha.SignFlow-captchaContainer input[name="captcha"]').send_keys(rsp.pred_rsp.value) self.__inputLoginInfo() time.sleep(3) cookies = self.browser.get_cookies() pickle.dump(cookies, open( './cookies/zhihu.cookie', 'wb')) cookie_dict = {} for cookie in cookies: cookie_dict[cookie['name']] = cookie['value'] return [scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict)]
def start_requests(self): from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys chrome_option = Options() chrome_option.add_argument("--disable-extensions") chrome_option.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome( executable_path= "E:/chromedriver/chromedriver_win32/chromedriver.exe", chrome_options=chrome_option) try: browser.maximize_window() except: pass browser.get("https://www.zhihu.com/signin") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys("xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(10) login_success = False if login_success: Cookies = browser.get_cookies() print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open( './ArticleSpider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] while not login_success: try: notify_ele = browser.find_element_by_class_name( "Popover PushNotifications AppHeader-notifications") login_success = True Cookies = browser.get_cookies() print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open( './ArticleSpider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] except: pass try: english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") except: english_captcha_element = None try: chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") except: chinese_captcha_element = None if chinese_captcha_element: ele_postion = chinese_captcha_element.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') base64_text = chinese_captcha_element.get_attribute("src") import base64 code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() positions = z.Recognize('yzm_cn.jpeg') last_position = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] second_position = [ int(last_position[1][0] / 2), int(last_position[1][1] / 2) ] move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() time.sleep(3) move( x_relative + second_position[0], y_relative + browser_navigation_panel_height + second_position[1]) click() else: last_position.append([positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "18782902568") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("admin1234") move(911, 643) click() if english_captcha_element: base64_text = english_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from tools.yundama_requests import YDMHttp yundama = YDMHttp("xxx", "xxx", 3129, "xxx") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(code) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") move(895, 603) click()
def selenuim_parse(self, url): """ 使用selenium模拟登录知乎网站 """ # 若用文件保存cookie则取消一下注释,并注释if self.statu_code == 401 or len(self.account_item) < 5: # 使用文件保持则无法使用自定义账号管理池类,需将相关内容注释,即与self.account_item相关内容 # if not bool(os.path.exists(COOKIES_PATH+'/cookies.json')): # with open(COOKIES_PATH + '/cookies.json', 'w') as f: # pass # with open(COOKIES_PATH+'/cookies.json', 'r') as f: # try: # cookie = json.load(f) # except: # cookie = None # if not bool(cookie): if self.statu_code == 401 or len(self.account_item) < 5: options = Options() options.add_argument('--disable-extensions') # 将调试selenium的端口设置为9222 options.add_experimental_option('debuggerAddress', '127.0.0.1:9222') browser = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH, chrome_options=options) try: browser.maximize_window() except: pass browser.get("https://www.zhihu.com/signin?next=%2F") browser.find_element_by_xpath( '//div[not(contains(text(),"免")) and contains(text(),"密码")]' ).click() browser.find_element_by_xpath( '//input[contains(@placeholder,"手机号或邮箱")]').send_keys( Keys.CONTROL + "a") browser.find_element_by_xpath( '//input[contains(@placeholder,"手机号或邮箱")]').send_keys( self.account_item[1]) browser.find_element_by_xpath( '//input[contains(@placeholder,"密码")]').send_keys( Keys.CONTROL + "a") browser.find_element_by_xpath( '//input[contains(@placeholder,"密码")]').send_keys( self.account_item[2]) browser.find_element_by_xpath('//button[@type="submit"]').click() time.sleep(3) login_flag = False while not login_flag: try: browser.find_element_by_xpath( '//button[contains(text(),"提问")]') cookies = browser.get_cookies() cookies_file = { item['name']: item['value'] for item in cookies } self.redis.update_cookies(self.account_item[0], self.account_item[1], self.account_item[2], self.account_item[3], cookies_file) # with open(COOKIES_PATH+'/cookies.json', 'w') as f: # f.write(json.dumps(cookies_file, ensure_ascii=False)) browser.close() login_flag = True return scrapy.Request(url=url, callback=self.parse, headers=self.headers, cookies=cookies_file) except: pass try: eng_yzm = browser.find_element_by_xpath( '//img[@class="Captcha-englishImg"]') except: eng_yzm = None try: chn_yzm = browser.find_element_by_xpath( '//img[@class="Captcha-chineseImg"]') except: chn_yzm = None if bool(eng_yzm): # 使用菲菲打码平台解析英文数字的验证码 img_yzm_str = eng_yzm.get_attribute('src') img_yzm = img_yzm_str.replace('data:image/jpg;base64,', '').replace("%0A", "") with open('eng_yzm.jpeg', 'wb') as img_file: img_file.write(base64.b64decode(img_yzm)) feifei = FateadmApi(FEIFEI_USERNAME, FEIFEI_PASSWORD, FEIFEI_PD_USER, FEIFEI_PD_PASSWORD) code = feifei.PredictFromFileExtend( "30400", "eng_yzm.jpeg") browser.find_element_by_xpath( '//input[@placeholder="验证码"]').send_keys(code) time.sleep(1) browser.find_element_by_xpath( '//button[@type="submit"]').click() time.sleep(3) if bool(chn_yzm): # 使用zheye模块识别倒立汉字验证码 img_yzm_str = chn_yzm.get_attribute('src') img_yzm = img_yzm_str.replace('data:image/jpg;base64,', '').replace("%0A", "") with open('chn_yzm.jpeg', 'wb') as img_file: img_file.write(base64.b64decode(img_yzm)) img_location = chn_yzm.location # 获取浏览器工具栏的高度 browser_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') z = zheye() # zheye模块识别后返回一个装有二元组的列表,形如:[(y,x)] hz = z.Recognize('chn_yzm.jpeg') if len(hz) == 2: x1 = int(hz[0][1]) // 2 + img_location['x'] y1 = int( hz[0][0]) // 2 + img_location['y'] + browser_height move(x1, y1) click() time.sleep(3) x2 = int(hz[1][1]) // 2 + img_location['x'] y2 = int( hz[1][0]) // 2 + img_location['y'] + browser_height move(x2, y2) click() else: x1 = int(hz[0][1]) // 2 + img_location['x'] y1 = int( hz[0][0]) // 2 + img_location['y'] + browser_height move(x1, y1) click() time.sleep(1) browser.find_element_by_xpath( '//button[@type="submit"]').click() time.sleep(3) else: return scrapy.Request(url=url, callback=self.parse, headers=self.headers, cookies=self.account_item[4])
def login(self): self.brower = webdriver.Chrome( executable_path="webdriver/chromedriver.exe", chrome_options=self.chrome_options) self.brower.get("https://www.zhihu.com/signin") while not self.check_login(): login_elemet = self.brower.find_element_by_css_selector( "#root > div > main > div > div > div > div.SignContainer-content > div > form > div.SignFlow-tabs > div:nth-child(2)" ) login_elemet.click() self.brower.find_element_by_xpath( "//input[@name='username']").send_keys(Keys.CONTROL + "a") self.brower.find_element_by_xpath( "//input[@name='username']").send_keys(self.uname) self.brower.find_element_by_xpath( "//input[@name='password']").send_keys(Keys.CONTROL + "a") self.brower.find_element_by_xpath( "//input[@name='password']").send_keys(self.passwd) botton = self.brower.find_element_by_css_selector( ".Button.SignFlow-submitButton.Button--primary.Button--blue") try: chimgcode = "null" chinese_captcha_element = self.brower.find_element_by_class_name( "Captcha-chineseImg") base64_text = chinese_captcha_element.get_attribute("src") chimgcode = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") except: pass try: enimgcode = "null" englsh_captcha_element = self.brower.find_element_by_class_name( "Captcha-englishImg") base64_text = englsh_captcha_element.get_attribute("src") enimgcode = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") except: pass if chimgcode != "null": with open("captcha/chincaptcha.jpeg", "wb") as f: f.write(base64.b64decode(chimgcode)) self.brower.maximize_window() ele_postion = chinese_captcha_element.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] brower_navigation_panel_height = self.brower.execute_script( "return window.outerHeight - window.innerHeight;") z = zheye() positions = z.Recognize('captcha/chincaptcha.jpeg') last_position = [] if len(positions) == 2: last_position.append([positions[0][1], positions[0][0]]) last_position.append([positions[1][1], positions[1][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] second_position = [ int(last_position[1][0] / 2), int(last_position[1][1] / 2) ] move( x_relative + first_position[0], y_relative + brower_navigation_panel_height + first_position[1]) click() move( x_relative + second_position[0], y_relative + brower_navigation_panel_height + second_position[1]) click() elif len(positions) == 1: last_position.append([positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] move( x_relative + first_position[0], y_relative + brower_navigation_panel_height + first_position[1]) click() elif enimgcode != "null": with open("captcha/engcaptcha.jpeg", "wb") as f: f.write(base64.b64decode(enimgcode)) chaojiyingapi = Chaojiying_Client(self.settings.CJY_PASSWD, self.settings.CJY_UNAME, "906234") im = open('captcha/engcaptcha.jpeg', 'rb').read() json_result = chaojiyingapi.PostPic(im, 1902) if json_result["err_no"] == 0: pic_str = json_result["pic_str"] print("识别码:{pic_str}".format(pic_str=pic_str)) else: print("识别失败") pic_str = "null" self.brower.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(Keys.CONTROL + "a") self.brower.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(pic_str) botton.click() cookie = self.brower.get_cookies() cookie_dict = {} for co in cookie: cookie_dict[co['name']] = co['value'] self.brower.close() return cookie_dict
def start_requests(self): chrome_options = Options() chrome_options.add_argument("--disable-extensions") chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome( 'C:/Users/孙佩豪/AppData/Local/Google/Chrome/Application/chromedriver.exe', chrome_options=chrome_options) try: browser.maximize_window() # 最大化窗口 except: # 已最大化的情况 代码会出错 捕获错误 pass browser.get('https://www.zhihu.com/signin') # 打开知乎登录页面 time.sleep(2) # browser.find_element_by_xpath('//div[@class="SignFlow-tabs"]/div[2]').click() # 点击帐号密码登录 login_success = False try: notify_ele = browser.find_element_by_xpath( '//div[@class="Popover PushNotifications AppHeader-notifications"]' ) # 是否登录成功 login_success = True except: pass if not login_success: move(914, 329) # 点击 帐号密码登录 click() time.sleep(2) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") # 全选 然后输入账户密码 browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "15292060685") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("qq1362441") move(955, 566) click() click() # browser.find_element_by_xpath('//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]').click() # 点击登录按钮 login_success = False while not login_success: try: time.sleep(1) notify_ele = browser.find_element_by_xpath( '//div[@class="Popover PushNotifications AppHeader-notifications"]' ) # 是否登录成功 login_success = True except: pass try: english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") # 是否出现英文验证码 except: english_captcha_element = None try: chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") # 是否出现中文验证码 except: chinese_captcha_element = None if chinese_captcha_element: # 如果产生中文验证码 time.sleep(1) ele_position = chinese_captcha_element.location # 获取节点坐标 x_relative = ele_position["x"] # x坐标 y_relative = ele_position["y"] # y坐标 browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;' ) # 浏览器上栏高度 browser_navigation_panel_height = 70 time.sleep(3) base64_text = chinese_captcha_element.get_attribute( "src") # 提取中文验证码节点的arc属性 code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") # 消除图片bs64编码中的无用符号 fh = open("yzm_cn.jpeg", "wb") # 保存文件 fh.write(base64.b64decode(code)) fh.close() z = zheye() positions = z.Recognize('yzm_cn.jpeg') # 使用者也 提取倒立文字坐标 last_position = [] if len(positions) == 2: if positions[0][0] > positions[1][0]: # 按照顺序排列倒立文字坐标 last_position.append( [positions[1][0], positions[1][1]]) last_position.append( [positions[0][0], positions[0][1]]) else: last_position.append( [positions[0][0], positions[0][1]]) last_position.append( [positions[1][0], positions[1][0]]) if len(positions) == 2: first_position = [ int(last_position[0][1] / 2) + x_relative, int(last_position[0][0] / 2) + y_relative + browser_navigation_panel_height ] # 实际页面中 倒立文字图片为正常图片缩放的一倍 所有坐标需要除2取整 来获得可以在页面中使用的坐标 second_position = [ int(last_position[1][1] / 2) + x_relative, int(last_position[1][0] / 2) + y_relative + browser_navigation_panel_height ] move(first_position[0], first_position[1] ) # 坐标 起始点x坐标+倒立文字x坐标 起始点y坐标+浏览器地址栏高度+倒立文字y坐标 click() move(second_position[0], second_position[1]) click() else: # 如果只有一个倒立文字 last_position.append([positions[0][1], positions[0][1]]) first_position = [ int(last_position[0][1] / 2) + x_relative, int(last_position[0][0] / 2) + browser_navigation_panel_height + y_relative ] time.sleep(5) move(first_position[0], first_position[1]) time.sleep(5) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") # 全选 然后输入账户密码 browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "15292060685") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("qq1362441") move(954, 619) click() if english_captcha_element: # 如果产生英文验证码 time.sleep(1) base64_text = english_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") # 消除图片bs64编码中的无用符号 fh = open("yzm_en.jpeg", "wb") # 保存文件 fh.write(base64.b64decode(code)) fh.close() Yundama = YDMHttp("sph116", "qq1362441", 8730, "9f94b142759f9fd86bd0e7a912bbc889") # 实例化云打码 code = Yundama.decode("yzm_en.jpeg", 5000, 60) # 识别 while True: # 若识别失败 不停识别 直至成功 if code == "": # code = Yundama.decode("yzm_en.jpeg", 5000, 60) time.sleep(0.5) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(Keys.CONTROL + "a") # 找到英文验证码位置 browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(code) move(956, 600) click() time.sleep(5) try: # notify_element = browser.find_element_by_class_name("Popover PushNotifications AppHeader-notifications") # 查看是否出现 登录成功的节点 # login_success = True Cookies = browser.get_cookies() # 获取登录成功的cookie print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open('./ArticleSpider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') # 存储cookie进入本地 pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] # browser.close() # 暂时不关闭 return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] # 回调 进入解析 except: pass print("======知乎登录成功=========")
def get_captcha_position(): from zheye import zheye z = zheye() positions = z.Recognize('captcha.gif') print(positions) return positions
def start_requests(self): chrome_option = Options() chrome_option.add_argument('--disable-extensions') chrome_option.add_experimental_option('debuggerAddress', '127.0.0.1:9222/json') browser = webdriver.Chrome(executable_path='E:/git/chromedriver_win32 (3)/chromedriver.exe', chrome_options=chrome_option) # browser = webdriver.Chrome(executable_path='E:/git/chromedriver_win32 (3)/chromedriver.exe') try: browser.maximize_window() # 将窗口最大化防止定位错误 except: pass browser.get("https://www.zhihu.com/signin") browser.find_element_by_css_selector('.SignFlow-account input').send_keys(Keys.CONTROL + 'a') browser.find_element_by_css_selector('.SignFlow-account input').send_keys('18930059946') browser.find_element_by_css_selector('.SignFlow-password input').send_keys(Keys.CONTROL + 'a') browser.find_element_by_css_selector('.SignFlow-password input').send_keys('Admin@2019') # browser.find_element_by_css_selector('.Button.SignFlow-submitButton').click() time.sleep(3) move(902, 599) click() time.sleep(10) login_success = False while not login_success: try: notify_ele = browser.find_element_by_class_name('Popover PushNotifications AppHeader-notifications') login_success = True except: pass try: # 查询是否有英文验证码 english_captcha_element = browser.find_element_by_class_name('Captcha-englishImg') except: english_captcha_element = None try: # 查询是否有中文验证码 chinese_captcha_element = browser.find_element_by_class_name('Captcha-chineseImg') except: chinese_captcha_element = None if chinese_captcha_element: ele_postion = chinese_captcha_element.location x_relative = ele_postion['x'] y_relative = ele_postion['y'] browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') """ 保存图片 1. 通过保存base64编码 2. 通过crop方法 """ # 1. 通过保存base64编码 base64_text = chinese_captcha_element.get_attribute("src") code = base64_text.replace('data:image/jpg;base64,', '').replace('%0A', '') fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() z = zheye() positions = z.Recognize("yzm_cn.jpeg") last_postion = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: last_postion.append([positions[1][1], positions[1][0]]) last_postion.append([positions[0][1], positions[0][0]]) else: last_postion.append([positions[0][1], positions[0][0]]) last_postion.append([positions[1][1], positions[1][0]]) first_point = [int(last_postion[0][0] / 2), int(last_postion[0][1] / 2)] second_point = [int(last_postion[1][0] / 2), int(last_postion[1][1] / 2)] move((x_relative + first_point[0]), y_relative + browser_navigation_panel_height + first_point[1]) click() move((x_relative + second_point[0]), y_relative + browser_navigation_panel_height + second_point[1]) click() else: last_postion.append([positions[0][1], positions[0][0]]) first_point = [int(last_postion[0][0] / 2), int(last_postion[0][1] / 2)] move((x_relative + first_point[0]), y_relative + browser_navigation_panel_height + first_point[1]) click() browser.find_element_by_css_selector('.SignFlow-account input').send_keys(Keys.CONTROL + 'a') browser.find_element_by_css_selector('.SignFlow-account input').send_keys('18930059946') browser.find_element_by_css_selector('.SignFlow-password input').send_keys(Keys.CONTROL + 'a') browser.find_element_by_css_selector('.SignFlow-password input').send_keys('Admin@2009') # browser.find_element_by_css_selector('.Button.SignFlow-submitButton').click() time.sleep(3) move(836, 634) click() if english_captcha_element: base64_text = english_captcha_element.get_attribute("src") code = base64_text.replace('data:image/jpg;base64,', '').replace('%0A', '') # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() yundama = YDMHttp("Patrick_Wang", "A100s200", 7107, "f8ecb1a73b115d41f7a2526fd1d8d5a8") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input').send_keys(code) browser.find_element_by_css_selector('.SignFlow-account input').send_keys(Keys.CONTROL + 'a') browser.find_element_by_css_selector('.SignFlow-account input').send_keys('18930059946') browser.find_element_by_css_selector('.SignFlow-password input').send_keys(Keys.CONTROL + 'a') browser.find_element_by_css_selector('.SignFlow-password input').send_keys('Admin@2009') # submit_ele = browser.find_element_by_css_selector(".Button.SignFlow-submitButton") # browser.find_element_by_css_selector(".Button.SignFlow-submitButton").click() move(836, 634) click() time.sleep(10) try: Cookies = browser.get_cookies() print(Cookies) cookie_dict = {} for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open('./ArticleSpider/cookies/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict)] except: pass
def login(self, response): from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.options import Options from mouse import move, click """ 1.启动chrome(启动之前确保所有的chrome实例己经关闭) """ chrome_options = Options() chrome_options.add_argument("--disable-extensions") chrome_options.add_experimental_option("excludeSwitches", ['enable-automation']) chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") # # browser = webdriver.Chrome( executable_path="D:/解压文件/chromedriver_win32/chromedriver.exe") try: browser.maximize_window() except: pass browser.get("https://www.zhihu.com/signin?next=%2F") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[1]/div[2]" ).click() browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[2]/div/label/input' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[2]/div/label/input' ).send_keys("13232732408") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys("abc713912") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/button" ).click() time.sleep(3) while True: browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/button' ).click() has_en = False has_cn = False try: browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/div[2]/img' ) has_cn = True # 中文 except: pass try: browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/span/div/img' ) has_en = True # 英文 except: pass if has_cn or has_en: break time.sleep(3) # 未登录前为false login_succes = False while not login_succes: try: notify_element = browser.find_element_by_class_name( "Popover AppHeader-menu") login_succes = True except: pass try: english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") # 如果是英文验证码 except: english_captcha_element = None try: chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") # 如果是中文验证码 except: chinese_captcha_element = None # 这个是中文验证码的登录 if chinese_captcha_element: # 精准获取xy坐标 ele_postion = chinese_captcha_element.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] # 但是他的计算方法并不是从窗口那里开始获取的,所以下面是固定的代码,可以去掉窗口位置, browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;' ) # browser_navigation_panel_height就是地址栏的高度 # 做一个图片保存转换 base64_text = chinese_captcha_element.get_attribute("src") import base64 code = base64_text.replace( "data:image/jpg;base64,", '' ).replace( "%0A", "" ) # 获取文本后的前边的文本替换成空的,但是这个base64和一般的base64不太一样,还多了一段%0A,所以也要替换掉 fh = open("yzm_cn.jpeg", "wb") # 保存这个图片叫yzm_cn.jpg fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() positions = z.Recognize('yzm_cn.jpeg') pos_arr = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: pos_arr.append([positions[1][1], positions[1][0]]) pos_arr.append([positions[0][1], positions[0][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) pos_arr.append([positions[1][1], positions[1][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) if len(positions) == 2: # 有两个倒立文字 first_position = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] # 原始图片在zheye项目的相比小一半,所以除以2,这是第一个元素 second_position = [ int(pos_arr[1][0] / 2), int(pos_arr[1][1] / 2) ] # 原始图片在zheye项目的相比小一半,所以除以2,这是第二个元素 move((x_relative + first_position[0]), y_relative + browser_navigation_panel_height + second_position[1]) # 这是点击第一个元素 click() move((x_relative + second_position[0]), y_relative + browser_navigation_panel_height + second_position[1]) # 这是点击第二个元素 click() else: # 这是有一个倒立文字 first_position = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] # 原始图片在zheye项目的相比小一半,所以除以2,这是第一个元素 move((x_relative + first_position[0]), y_relative + browser_navigation_panel_height + second_position[1]) # 这是点击第一个元素 click() # 再做一次登录 browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[1]/div[2]" ).click() browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[2]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[2]/div/label/input" ).send_keys("13232732408") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys("abc713912") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/button' ).click() # 英文验证码 if english_captcha_element: # 做一个图片保存转换 base64_text = english_captcha_element.get_attribute("src") import base64 code = base64_text.replace( "data:image/jpg;base64,", '' ).replace( "%0A", "" ) # 获取文本后的前边的文本替换成空的,但是这个base64和一般的base64不太一样,还多了一段%0A,所以也要替换掉 fh = open("yzm_en.jpeg", "wb") # 保存这个图片叫yzm_cn.jpg fh.write(base64.b64decode(code)) print(code) fh.close() from ArticleSpider.tools.chaojiying import Chaojiying_Client Chaojiying = Chaojiying_Client("1171242903", "130796abc", "905526") im = open('D:\BaiduNetdiskDownload\ArticleSpider\yzm_en.jpeg', 'rb').read() code = Chaojiying.PostPic(im, 1902) print("英文验证码:") print(code) # 做一个while循环,怕一次不成功识别,循环到成功,再做一个break while True: if code == "": chaojiying = Chaojiying_Client('1171242903', '130796abc', '905526') im = open( 'D:\BaiduNetdiskDownload\ArticleSpider\yzm_en.jpeg', 'rb').read() code = chaojiying.PostPic(im, 1902) # print("chaojiyingshibie结果:") # print(code) else: break browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[1]/div[2]" ).click() browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[2]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/div/label' ).send_keys(code["pic_str"]) browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[2]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[2]/div/label/input" ).send_keys("13232732408") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys("abc713912") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/button' ).click() time.sleep(10) try: notify_element = browser.find_element_by_xpath( '//*[@id="Popover17-toggle"]/img') login_succes = True Cookies = browser.get_cookies() print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己的文件所在路径 f = open( './ArticleSpider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() yield [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict, callback=check_login) ] except: pass
def start_requests(self): # 本地启动chrome chrome_option = Options() chrome_option.add_argument("--disable-extensions") chrome_option.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome( executable_path= "C:/Users/Administrator/PycharmProjects/Envs/Scripts/chromedriver.exe", chrome_options=chrome_option) try: browser.maximize_window() except: pass try: browser.get("https://www.zhihu.com/signin") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[1]/div[2]" ).click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "16601052213") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys("ZHUHAIOO") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(1) login_success = False while not login_success: try: notify_ele = browser.find_element_by_xpath( "//*[@id='root']/div/div[2]/header/div[1]/a/svg") login_success = True except: pass try: english_captcha_element = browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/span/div/img' ) except: english_captcha_element = None try: chinese_captcha_element = browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[4]/div/div[2]/img" ) except: chinese_captcha_element = None if chinese_captcha_element: # 图片位置坐标x和y ele_position = chinese_captcha_element.location x_relative = ele_position["x"] y_relative = ele_position["y"] # 浏览器上部的y的坐标 browser_navigation_panel_height = 70 # 图片保存 base64_text = chinese_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() z = zheye() positions = z.Recognize('yzm_cn.jpeg') last_position = [] if len(positions) == 2: # 如果第一个元素的x坐标大于第二个元素的x坐标,则第二个元素是第一个倒立文字 if positions[0][1] > positions[1][1]: # 所以列表里放文字的时候返过来放,先放第二个元素的xy,再放第一个元素的xy,后面的是x,前面的是y last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) # 浏览器中的图片实际要比 first_position = [ int(last_position[0][0]) / 2, int(last_position[0][1] / 2) ] second_position = [ int(last_position[1][0]) / 2, int(last_position[1][1] / 2) ] mouse.move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) mouse.click() time.sleep(3) mouse.move( x_relative + second_position[0], y_relative + browser_navigation_panel_height + second_position[1]) mouse.click() else: last_position.append( [positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0]) / 2, int(last_position[0][1] / 2) ] mouse.move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) mouse.click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input" ).send_keys("16601052213") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys("1qaz@4321") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() if english_captcha_element: # 图片保存 base64_text = english_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() chaojiying = Chaojiying_Client('16601052213', 'ZHUHAIOO00', '905609') im = open('yzm_en.jpeg', 'rb').read() json_data = chaojiying.PostPic(im, 1902) code = json_data["pic_str"] while json_data["err_no"] != 0: if code == "": json_data = chaojiying.PostPic(im, 1902) code = json_data["pic_str"] else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(code) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input" ).send_keys("16601052213") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys("1qaz@4321") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() except: cookies = browser.get_cookies() pickle.dump( cookies, open( "C:/Users/Administrator/PycharmProjects/Envs/Scripts/ArticleSpider/cookies/zhihu.cookie", "wb")) cookie_dict = {} for cookie in cookies: cookie_dict[cookie["name"]] = cookie["value"] return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ]
# -*- coding: utf-8 -*- __author__ = 'dongnanzhy' # 参考:https://github.com/muchrooms/zheye import sys sys.path.insert(0, "../") from zheye import zheye z = zheye() positions = z.Recognize('captcha_cn.gif') print(positions)
def start_requests(self): chrome_option = Options() """ 手动启动Chrome,避免被反爬识别 Chrome安装目录下,命令行执行:chrome.exe --remote-debugging-port=9222 验证启动是否成功:http://127.0.0.1:9222/json """ chrome_option.add_argument("--disable-extensions") chrome_option.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome(chrome_options=chrome_option) try: browser.maximize_window() except: pass browser.get("https://www.zhihu.com/signin") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys("xxx") time.sleep(2) browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(3) login_success = False if login_success: cookies_list = browser.get_cookies() print(cookies_list) cookie_dict = {} import pickle for cookie in cookies_list: # 写入文件 f = open( './Article_Spider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] while not login_success: try: browser.find_element_by_class_name( "Popover PushNotifications AppHeader-notifications") login_success = True cookies_list = browser.get_cookies() print(cookies_list) cookie_dict = {} for cookie in cookies_list: # 写入文件 f = open( './Article_Spider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') import pickle pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] except: pass try: english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") except: english_captcha_element = None try: chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") except: chinese_captcha_element = None # 识别中文倒立汉字 if chinese_captcha_element: ele_postion = chinese_captcha_element.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') base64_text = chinese_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") fh = open("verify_code.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() z = zheye() positions = z.Recognize('verify_code.jpeg') last_position = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] second_position = [ int(last_position[1][0] / 2), int(last_position[1][1] / 2) ] move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() time.sleep(3) move( x_relative + second_position[0], y_relative + browser_navigation_panel_height + second_position[1]) click() else: last_position.append([positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "13662241324") time.sleep(2) browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("root0503") move(674, 527) click() # 识别英语字母验证码 if english_captcha_element: base64_text = english_captcha_element.get_attribute("src") code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from tools.yundama_requests import YDMHttp yundama = YDMHttp("xxx", "xxx", 3129, "xxx") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(code) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "13662241324") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("root0503") move(674, 527) click()