def start_requests(self): cookies = [] if os.path.exists(BASE_DIR + r"\cookies\zhihu.cookie"): cookies = pickle.load(open(BASE_DIR + "/cookies/lagou.cookie", "rb")) if not cookies: from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys chrome_option = Options() chrome_option.add_argument('--disable-extensions') chrome_option.add_experimental_option('debuggerAddress', '127.0.0.1:9222') browser = webdriver.Chrome(executable_path=r"D:\scrapytest\ArticleSpider\venv\Scripts\chromedriver.exe", chrome_options=chrome_option) browser.get("https://www.zhihu.com/signin") browser.find_element_by_xpath('//*[@id="root"]/div/main/div/div/div[1]/div/form/div[1]/div[2]').click() browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys("13643095504") time.sleep(1) browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a") time.sleep(1) browser.find_element_by_css_selector(".SignFlow-password input").send_keys("634498qxp@") time.sleep(2) # browser.find_element_by_css_selector(".Button SignFlow-submitButton.Button--primary Button--blue").click() # browser.find_element_by_xpath('//*[@id = "root"]/div/main/div/div/div[1]/div/form/button').click() move(678, 511) click() time.sleep(6) login_success = False while not login_success: try: notify_ele = browser.find_element_by_class_name("Popover PushNotifications AppHeader-notifications") login_success = True return [scrapy.Request(url=self.start_urls[0], dont_filter=True)] except: pass try: browser.maximize_window() except: pass try: english_captcha = browser.find_element_by_class_name('Captcha-englishImg') except: english_captcha = None try: chinese_captcha = browser.find_element_by_class_name('Captcha-chineseImg') except: chinese_captcha = None if chinese_captcha: ele_postion = chinese_captcha.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;' ) base64_text = chinese_captcha.get_attribute("src") import base64 code = base64_text.replace("data:image/jpg;base64,", '').replace("%0A", "") fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() position = z.Recognize('yzm_cn.jpeg') last_position = [] if len(position) == 2: if position[0][1] > position[1][1]: last_position.append([position[1][1], position[1][0]]) last_position.append([position[0][1], position[0][0]]) else: last_position.append([position[0][1], position[0][0]]) last_position.append([position[1][1], position[1][0]]) first_position = [int(last_position[0][0] / 2), int(last_position[0][1] / 2)] second_position = [int(last_position[1][0] / 2), int(last_position[1][1] / 2)] move(x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() time.sleep(2) move(x_relative + second_position[0], y_relative + browser_navigation_panel_height + second_position[1]) click() # for url in self.start_urls: # yield scrapy.Request(url, dont_filter=True, headers=self.headers) else: last_position.append([position[0][1], position[0][0]]) first_position = [int(last_position[0][0] / 2), int(last_position[0][1] / 2)] move(x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() time.sleep(1) move(663, 569) click() if english_captcha: base64_text = english_captcha.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from tools.yundama_requests import YDMHttp yundama = YDMHttp("zzzzqxp", "634498qxp", 8954, "fd03eddd0dc7ebe6eb4ce5c00012bb31") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/div[1]/input').send_keys( Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/div[1]/input').send_keys(code) time.sleep(1) move(663, 544) click() time.sleep(2) time.sleep(1) browser.get("https://www.zhihu.com/") cookies = browser.get_cookies() pickle.dump(cookies,open(r'D:\scrapytest\ArticleSpider\cookies\zhihu.cookie','wb')) cookie_dict={} for cookie in cookies: cookie_dict[cookie["name"]]=cookie["value"]#cookie储存到本地后就可以在开始打开获取,就不用seleniun return [scrapy.Request(url=self.start_urls[0],dont_filter=True,cookies=cookie_dict,headers=self.headers)] time.sleep(1) browser.get("https://www.zhihu.com/") cookies = browser.get_cookies() pickle.dump(cookies, open(BASE_DIR + r"\cookies\zhihu.cookie", 'wb')) cookie_dict = {} for cookie in cookies: cookie_dict[cookie["name"]] = cookie["value"] # cookie储存到本地后就可以在开始打开获取,就不用seleniun return [scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict, headers=self.headers)]
def start_requests(self): chrome_options = Options() chrome_options.add_argument("--disable-extensions") chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome( 'C:/Users/孙佩豪/AppData/Local/Google/Chrome/Application/chromedriver.exe', chrome_options=chrome_options) try: browser.maximize_window() # 最大化窗口 except: # 已最大化的情况 代码会出错 捕获错误 pass browser.get('https://www.zhihu.com/signin') # 打开知乎登录页面 time.sleep(2) # browser.find_element_by_xpath('//div[@class="SignFlow-tabs"]/div[2]').click() # 点击帐号密码登录 login_success = False try: notify_ele = browser.find_element_by_xpath( '//div[@class="Popover PushNotifications AppHeader-notifications"]' ) # 是否登录成功 login_success = True except: pass if not login_success: move(914, 329) # 点击 帐号密码登录 click() time.sleep(2) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") # 全选 然后输入账户密码 browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "15292060685") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("qq1362441") move(955, 566) click() click() # browser.find_element_by_xpath('//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]').click() # 点击登录按钮 login_success = False while not login_success: try: time.sleep(1) notify_ele = browser.find_element_by_xpath( '//div[@class="Popover PushNotifications AppHeader-notifications"]' ) # 是否登录成功 login_success = True except: pass try: english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") # 是否出现英文验证码 except: english_captcha_element = None try: chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") # 是否出现中文验证码 except: chinese_captcha_element = None if chinese_captcha_element: # 如果产生中文验证码 time.sleep(1) ele_position = chinese_captcha_element.location # 获取节点坐标 x_relative = ele_position["x"] # x坐标 y_relative = ele_position["y"] # y坐标 browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;' ) # 浏览器上栏高度 browser_navigation_panel_height = 70 time.sleep(3) base64_text = chinese_captcha_element.get_attribute( "src") # 提取中文验证码节点的arc属性 code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") # 消除图片bs64编码中的无用符号 fh = open("yzm_cn.jpeg", "wb") # 保存文件 fh.write(base64.b64decode(code)) fh.close() z = zheye() positions = z.Recognize('yzm_cn.jpeg') # 使用者也 提取倒立文字坐标 last_position = [] if len(positions) == 2: if positions[0][0] > positions[1][0]: # 按照顺序排列倒立文字坐标 last_position.append( [positions[1][0], positions[1][1]]) last_position.append( [positions[0][0], positions[0][1]]) else: last_position.append( [positions[0][0], positions[0][1]]) last_position.append( [positions[1][0], positions[1][0]]) if len(positions) == 2: first_position = [ int(last_position[0][1] / 2) + x_relative, int(last_position[0][0] / 2) + y_relative + browser_navigation_panel_height ] # 实际页面中 倒立文字图片为正常图片缩放的一倍 所有坐标需要除2取整 来获得可以在页面中使用的坐标 second_position = [ int(last_position[1][1] / 2) + x_relative, int(last_position[1][0] / 2) + y_relative + browser_navigation_panel_height ] move(first_position[0], first_position[1] ) # 坐标 起始点x坐标+倒立文字x坐标 起始点y坐标+浏览器地址栏高度+倒立文字y坐标 click() move(second_position[0], second_position[1]) click() else: # 如果只有一个倒立文字 last_position.append([positions[0][1], positions[0][1]]) first_position = [ int(last_position[0][1] / 2) + x_relative, int(last_position[0][0] / 2) + browser_navigation_panel_height + y_relative ] time.sleep(5) move(first_position[0], first_position[1]) time.sleep(5) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") # 全选 然后输入账户密码 browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "15292060685") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("qq1362441") move(954, 619) click() if english_captcha_element: # 如果产生英文验证码 time.sleep(1) base64_text = english_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") # 消除图片bs64编码中的无用符号 fh = open("yzm_en.jpeg", "wb") # 保存文件 fh.write(base64.b64decode(code)) fh.close() Yundama = YDMHttp("sph116", "qq1362441", 8730, "9f94b142759f9fd86bd0e7a912bbc889") # 实例化云打码 code = Yundama.decode("yzm_en.jpeg", 5000, 60) # 识别 while True: # 若识别失败 不停识别 直至成功 if code == "": # code = Yundama.decode("yzm_en.jpeg", 5000, 60) time.sleep(0.5) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(Keys.CONTROL + "a") # 找到英文验证码位置 browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(code) move(956, 600) click() time.sleep(5) try: # notify_element = browser.find_element_by_class_name("Popover PushNotifications AppHeader-notifications") # 查看是否出现 登录成功的节点 # login_success = True Cookies = browser.get_cookies() # 获取登录成功的cookie print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open('./ArticleSpider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') # 存储cookie进入本地 pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] # browser.close() # 暂时不关闭 return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] # 回调 进入解析 except: pass print("======知乎登录成功=========")
def start_requests(self): from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys chrome_option = Options() chrome_option.add_argument("--disable-extensions") chrome_option.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome( executable_path= "E:/chromedriver/chromedriver_win32/chromedriver.exe", chrome_options=chrome_option) try: browser.maximize_window() except: pass browser.get("https://www.zhihu.com/signin") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys("xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(10) login_success = False if login_success: Cookies = browser.get_cookies() print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open( './ArticleSpider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] while not login_success: try: notify_ele = browser.find_element_by_class_name( "Popover PushNotifications AppHeader-notifications") login_success = True Cookies = browser.get_cookies() print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open( './ArticleSpider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] except: pass try: english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") except: english_captcha_element = None try: chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") except: chinese_captcha_element = None if chinese_captcha_element: ele_postion = chinese_captcha_element.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') base64_text = chinese_captcha_element.get_attribute("src") import base64 code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() positions = z.Recognize('yzm_cn.jpeg') last_position = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] second_position = [ int(last_position[1][0] / 2), int(last_position[1][1] / 2) ] move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() time.sleep(3) move( x_relative + second_position[0], y_relative + browser_navigation_panel_height + second_position[1]) click() else: last_position.append([positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "18782902568") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("admin1234") move(911, 643) click() if english_captcha_element: base64_text = english_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from tools.yundama_requests import YDMHttp yundama = YDMHttp("xxx", "xxx", 3129, "xxx") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(code) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") move(895, 603) click()
def start_requests(self): from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument("--disable-extensions") #chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") #browser = webdriver.Chrome(executable_path="E:/chromedriver/chromedriver_win32/chromedriver.exe", chrome_options=chrome_options) browser = webdriver.Chrome( executable_path= "E:/chromedriver/chromedriver_win32/chromedriver.exe") import time try: browser.maximize_window() #将窗口最大化防止定位错误 except: pass browser.get("https://www.zhihu.com/signin") logo_element = browser.find_element_by_class_name("SignFlowHeader") # y_relative_coord = logo_element.location['y'] browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') browser_navigation_panel_height = 71 time.sleep(5) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys("xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(15) from mouse import move, click # move(800, 400 ,True) # actions = ActionChains(browser) # actions.move_to_element(browser.find_element_by_css_selector( # ".Button.SignFlow-submitButton")) # actions.click(browser.find_element_by_css_selector( # ".Button.SignFlow-submitButton")) # actions.perform() # actions.move_to_element_with_offset(browser.find_element_by_css_selector( # ".Button.SignFlow-submitButton"), 30, 30).perform() #chrome的版本问题有两种解决方案 #1. 自己启动chrome(推荐) 可以防止chromedriver被识别,因为chromedriver出现的一些js变量可以被服务器识别出来 #2. 使用chrome60(版本) # 先判断是否登录成功 login_success = False while not login_success: try: notify_element = browser.find_element_by_class_name( "Popover PushNotifications AppHeader-notifications") login_success = True except: pass try: #查询是否有英文验证码 english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") except: english_captcha_element = None try: # 查询是否有中文验证码 chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") except: chinese_captcha_element = None if chinese_captcha_element: y_relative_coord = chinese_captcha_element.location['y'] y_absolute_coord = y_relative_coord + browser_navigation_panel_height x_absolute_coord = chinese_captcha_element.location['x'] # x_absolute_coord = 842 # y_absolute_coord = 428 """ 保存图片 1. 通过保存base64编码 2. 通过crop方法 """ # 1. 通过保存base64编码 base64_text = chinese_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from zheye import zheye z = zheye() positions = z.Recognize("yzm_cn.jpeg") pos_arr = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: pos_arr.append([positions[1][1], positions[1][0]]) pos_arr.append([positions[0][1], positions[0][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) pos_arr.append([positions[1][1], positions[1][0]]) else: pos_arr.append([positions[0][1], positions[0][0]]) if len(positions) == 2: first_point = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] second_point = [ int(pos_arr[1][0] / 2), int(pos_arr[1][1] / 2) ] move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) click() move((x_absolute_coord + second_point[0]), y_absolute_coord + second_point[1]) click() else: first_point = [ int(pos_arr[0][0] / 2), int(pos_arr[0][1] / 2) ] move((x_absolute_coord + first_point[0]), y_absolute_coord + first_point[1]) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() if english_captcha_element: # 2. 通过crop方法 # from pil import Image # image = Image.open(path) # image = image.crop((locations["x"], locations["y"], locations["x"] + image_size["width"], # locations["y"] + image_size["height"])) # defines crop points # # rgb_im = image.convert('RGB') # rgb_im.save("D:/ImoocProjects/python_scrapy/coding-92/ArticleSpider/tools/image/yzm.jpeg", # 'jpeg') # saves new cropped image # # 1. 通过保存base64编码 base64_text = english_captcha_element.get_attribute("src") import base64 code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from tools.yundama_requests import YDMHttp yundama = YDMHttp("da_ge_da1", "dageda", 3129, "40d5ad41c047179fc797631e3b9c3025") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(code) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "xxx") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") submit_ele = browser.find_element_by_css_selector( ".Button.SignFlow-submitButton") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(10) try: notify_element = browser.find_element_by_class_name( "Popover PushNotifications AppHeader-notifications") login_success = True Cookies = browser.get_cookies() print(Cookies) cookie_dict = {} import pickle for cookie in Cookies: # 写入文件 # 此处大家修改一下自己文件的所在路径 f = open( './ArticleSpider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] except: pass print("yes")
def start_requests(self): chrome_option = Options() """ 手动启动Chrome,避免被反爬识别 Chrome安装目录下,命令行执行:chrome.exe --remote-debugging-port=9222 验证启动是否成功:http://127.0.0.1:9222/json """ chrome_option.add_argument("--disable-extensions") chrome_option.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome(chrome_options=chrome_option) try: browser.maximize_window() except: pass browser.get("https://www.zhihu.com/signin") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys("xxx") time.sleep(2) browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("xxx") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(3) login_success = False if login_success: cookies_list = browser.get_cookies() print(cookies_list) cookie_dict = {} import pickle for cookie in cookies_list: # 写入文件 f = open( './Article_Spider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] while not login_success: try: browser.find_element_by_class_name( "Popover PushNotifications AppHeader-notifications") login_success = True cookies_list = browser.get_cookies() print(cookies_list) cookie_dict = {} for cookie in cookies_list: # 写入文件 f = open( './Article_Spider/cookies/zhihu/' + cookie['name'] + '.zhihu', 'wb') import pickle pickle.dump(cookie, f) f.close() cookie_dict[cookie['name']] = cookie['value'] browser.close() return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ] except: pass try: english_captcha_element = browser.find_element_by_class_name( "Captcha-englishImg") except: english_captcha_element = None try: chinese_captcha_element = browser.find_element_by_class_name( "Captcha-chineseImg") except: chinese_captcha_element = None # 识别中文倒立汉字 if chinese_captcha_element: ele_postion = chinese_captcha_element.location x_relative = ele_postion["x"] y_relative = ele_postion["y"] browser_navigation_panel_height = browser.execute_script( 'return window.outerHeight - window.innerHeight;') base64_text = chinese_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") fh = open("verify_code.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() z = zheye() positions = z.Recognize('verify_code.jpeg') last_position = [] if len(positions) == 2: if positions[0][1] > positions[1][1]: last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] second_position = [ int(last_position[1][0] / 2), int(last_position[1][1] / 2) ] move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() time.sleep(3) move( x_relative + second_position[0], y_relative + browser_navigation_panel_height + second_position[1]) click() else: last_position.append([positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0] / 2), int(last_position[0][1] / 2) ] move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "13662241324") time.sleep(2) browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("root0503") move(674, 527) click() # 识别英语字母验证码 if english_captcha_element: base64_text = english_captcha_element.get_attribute("src") code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "") # print code fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() from tools.yundama_requests import YDMHttp yundama = YDMHttp("xxx", "xxx", 3129, "xxx") code = yundama.decode("yzm_en.jpeg", 5000, 60) while True: if code == "": code = yundama.decode("yzm_en.jpeg", 5000, 60) else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input' ).send_keys(code) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "13662241324") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-password input").send_keys("root0503") move(674, 527) click()