def set_browser(self): user_agents = ( 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1' ' (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50' ' (KHTML, like Gecko) Version/5.1 Safari/534.50', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US)' ' AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2' ' (KHTML, like Gecko) Chrome/6.0', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1)' ' Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)') session = req_session() session.headers.update({'Referer': 'https://www.deviantart.com/'}) if self.mature: session.cookies.update({'agegate_state': '1'}) # Try to use lxml parser if available # https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser try: __import__("lxml") parser = "lxml" except ImportError: parser = "html.parser" self.browser = RoboBrowser(history=False, session=session, tries=3, user_agent=choice(user_agents), parser=parser)
def ask_qrcode_status(request): """ 获取扫描二维码状态 :param request: :return: """ ret_data = {} succ = False need_refresh = False try: args = request.POST username = args["username"] account_type = args["account_type"] lg_token = args.get("lg_token", "") check_url_base = "https://qrlogin.taobao.com/qrcodelogin/qrcodeLoginCheck.do?" \ "lgToken={lgToken}&defaulturl=https%3A%2F%2Fwww.taobao.com%2F" check_url = check_url_base.format(lgToken=lg_token) res_json = get_response_by_requests(check_url, headers=DEFAULT_HEADERS).json() session = req_session() msg = "通过扫描二维码登录失败" code = res_json.get("code") if code == "10000": msg = "请先扫描二维码" elif code == "10001": msg = "扫描成功后,请确认登录" succ = True elif code == "10004": msg = "二维码已失效,请重试" need_refresh = True elif code == "10006": redirect_url = res_json.get("url") resp = session.get(redirect_url, headers=DEFAULT_HEADERS, verify=False) if resp.status_code == 200: msg = "登录成功" cookies = session.cookies.get_dict(domain='.taobao.com') cookies_str = json_dumps(cookies) # 将登录成功的cookies信息存入ssdb,供爬虫端使用 ssdb_connect = get_ssdb_conn() key = username + ACCOUNT_CRAWLING_QRCODE_COOKIES_SSDB_SUFFIX + account_type ssdb_connect.setx(key, cookies_str, DATA_EXPIRE_TIME) succ = True else: msg = res_json.get("msg", "通过扫描二维码登录失败") except Exception: msg = "获取扫描二维码状态出错" if succ: add_ajax_ok_json(ret_data) else: ret_data["need_refresh"] = need_refresh add_ajax_error_json(ret_data, msg) return JsonResponse(ret_data)
def set_browser(self): USERAGENTS = ( 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/6.0', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)' ) session = req_session() session.headers.update({'Referer': 'http://www.deviantart.com/'}) self.browser = RoboBrowser(history=False, session=session, tries=3, user_agent=random.choice(USERAGENTS))
def _deco(*args, **kwargs): # 第一个参数必须是request session = args[0].session if call_time: session["req_cookie"] = None if not session.get("req_cookie"): req_ses = req_session() url = "https://ipcrs.pbccrc.org.cn/userReg.do?method=initReg" response = req_ses.get(url, headers=HEADERS, verify=False) token = etree.HTML(response.content).xpath("//input[@name='org.apache.struts." "taglib.html.TOKEN']/@value")[0] session.set_expiry(0) session["req_cookie"] = response.cookies.get_dict() session["token"] = token session["captcha_code"] = get_captcha_code(req_ses) # 获取并解析验证码 return func(*args, **kwargs)
def get_zhengxin_reg_captcha(): """ 获取图片验证码 """ data = {"method": "initReg"} kwargs = {"timeout": 6, "verify": False, } _session = req_session() _session.headers['User-Agent'] = USER_AGENT _session.headers['Referer'] = "https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp" resp = _session.post("https://ipcrs.pbccrc.org.cn/userReg.do", data, **kwargs) token = zhengxin_token_pattern.search(resp.text).group(1) resp = _session.get("https://ipcrs.pbccrc.org.cn/imgrc.do?" + get_js_time()) captcha_body = resp.content return captcha_body, token, _session.cookies
def back_chuck_passwd(request): """ 处理第一步找回密码信息 :param request: :return: """ args = request.POST loginName = args['loginName'] name = args['name'] certNo = args['certNo'] certType = args['certType'] headers = HEADERS.copy() headers['Referer'] = 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp' url = 'https://ipcrs.pbccrc.org.cn/resetPassword.do?method=init' req_ses = req_session() response = req_ses.get(url=url, headers=headers, verify=False) token = etree.HTML(response.text).xpath("//input[@name='org.apache.struts.taglib.html.TOKEN']/@value")[0] datas = { 'org.apache.struts.taglib.html.TOKEN': token, 'method': 'checkLoginName', 'loginname': loginName, 'name': name.encode('gb2312', 'replace'), 'certType': certType, 'certNo': certNo, '_@IMGRC@_': '', } phone, error_msg1, error_msg2 = find_password(req_ses, datas) msg = '' while error_msg1: error_msg = error_msg1.group(1) if '验证码输入错误,请重新输入' in error_msg: phone, error_msg1, error_msg2 = find_password(req_ses, datas) if error_msg2: msg = error_msg2.group(1) if phone: request.session['passwd_cookies'] = req_ses.cookies.get_dict() return render(request, 'public/zhengxin/show_zhengxin_back_passwd2_from.html', locals()) return render(request, 'public/zhengxin/show_zhengxin_back_passwd_from.html', locals())
def back_chuck_username(request): """ 提交找回用户需要的信息 :param request: :return: """ args = request.POST user = args['name'] certNO = args['certNo'] certType = args['certType'] headers = HEADERS.copy() headers['Referer'] = 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp' url = "https://ipcrs.pbccrc.org.cn/findLoginName.do?method=init" req_ses = req_session() response = req_ses.get(url=url, headers=headers, verify=False) token = etree.HTML(response.text).xpath("//input[@name='org.apache.struts.taglib.html.TOKEN']/@value")[0] # 您无法使用该功能找回登录名,可能是因为您的安全等级为低、未注册或已销户,请重新注册 # 您的登录名已短信发送至平台预留的手机号码,请查收。 # 若您在5分钟内未收到短信或您的手机号码已修改,请使用“用户销户”功能先销户后再重新注册。 data = { 'org.apache.struts.taglib.html.TOKEN': token, 'method': 'findLoginName', 'name': user.encode('gb2312', 'replace'), 'certType': certType, 'certNo': certNO, '_@IMGRC@_': '', } error_msg1, error_msg2 = find_username(req_ses, data) result = {} while error_msg1: error_msg1, error_msg2 = find_username(req_ses, data) else: if error_msg2: add_ajax_error_json(result, error_msg2.group(1)) else: result['msg'] = '您的登录名已短信发送至平台预留的手机号码,请查收。' \ '<br/>若您在5分钟内未收到短信或您的手机号码已修改,' \ '请使用“用户销户”功能先销户后再重新注册。' add_ajax_ok_json(result) return JsonResponse(result)
def set_browser(self): user_agents = ( 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1' ' (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50' ' (KHTML, like Gecko) Version/5.1 Safari/534.50', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US)' ' AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2' ' (KHTML, like Gecko) Chrome/6.0', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1)' ' Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)') session = req_session() session.headers.update({'Referer': 'https://www.deviantart.com/'}) if self.mature: session.cookies.update({'agegate_state': '1'}) session.mount('https://', req_adapters.HTTPAdapter(max_retries=3)) self.browser = StatefulBrowser(session=session, user_agent=choice(user_agents))
def set_browser(self): user_agents = ( 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1' ' (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50' ' (KHTML, like Gecko) Version/5.1 Safari/534.50', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US)' ' AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2' ' (KHTML, like Gecko) Chrome/6.0', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1)' ' Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)' ) session = req_session() session.headers.update({'Referer': 'https://www.deviantart.com/'}) if self.mature: session.cookies.update({'agegate_state': '1'}) session.mount('https://', req_adapters.HTTPAdapter(max_retries=3)) self.browser = StatefulBrowser(session=session, user_agent=choice(user_agents))
def automatic_login(request, username, password, customer_id): """ 自动登录,和普通登录共用! """ req_ses = req_session() login_url = 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp' response = req_ses.get(url=login_url, headers=HEADERS, verify=False) response = etree.HTML(response.text) # token = response.xpath("//input[@name='org.apache.struts.taglib.html.TOKEN']/@value")[0] date = response.xpath("//input[@name='date']/@value")[0] datas = { "method": "login", "date": date, "loginname": username, "password": password, "_@IMGRC@_": '' } # headers = HEADERS.copy() # headers['Referer'] = login_url # response = req_ses.post(url='https://ipcrs.pbccrc.org.cn/login.do', # data=datas, headers=headers, verify=False) error_yanz = chuck_login(login_url, req_ses, datas) result = {} while error_yanz: # 登录不成功 说明验证码错误 error_msg = error_yanz.group(1) if '验证码输入错误,请重新输入' in error_msg: error_yanz = chuck_login(login_url, req_ses, datas) else: add_ajax_error_json(result, yanz_pattern.search(error_msg).group(1)) return result session = request.session session['question_cookies'] = req_ses.cookies.get_dict() session['zhengxin_username'] = username session['zhengxin_password'] = password if customer_id: try: zh = ZhengXinUserDB.objects.get(customerId=customer_id) except ObjectDoesNotExist: # 抛出异常说明未查询到数据 ,新添加数据 zhs = ZhengXinUserDB(customerId=customer_id, username=username, password=password) zhs.save() else: zh.customerId = customer_id zh.username = username zh.password = password zh.save() # 验证用户是否存在身份验证,如果存在,则跳转页面如果不存在,则直接跳转问题回答 htmls = req_ses.get('https://ipcrs.pbccrc.org.cn/reportAction.do?method=applicationReport', verify=False) status = etree.HTML(htmls.text).xpath('//font[@class="span-red span-12"]/text()') if status: status_str = status[0] if '成功' in status_str or '已通过' in status_str: result['succ'] = True result['succ_msg'] = '' elif '处理' in status_str: result['succ'] = True result['succ_msg'] = '账号正在处理中,请耐心等待!' else: result['succ'] = False add_ajax_ok_json(result) return result
PERSONAL_KEY='some random character' def sendContent(browser,code,body): payload = {'code': code, 'body': body} r = requests.post(HOSTING, data=payload) #browser.open('http://www.danielebaschieri.eu/devwatch/devwatch.php?code='+code+"&body="+body) if r.text=="done": print('Posted online') if r.text=="403": print('Unautorised 403') USERAGENTS ='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:37.0) Gecko/20100101 Firefox/37.0' session = req_session() session.headers.update({'Referer': 'https://www.deviantart.com'}) browser = RoboBrowser(history=False, session=session, tries=2, user_agent=USERAGENTS) print("Attempting to log in to deviantArt...") browser.open('https://www.deviantart.com/users/login?ref=https%3A%2F%2Fwww.deviantart.com%2F&remember_me=1') form = browser.get_forms()[1] form['username'] = USERNAME form['password'] = PSWD #print(form) if browser.find(text=re.compile("Login")): print('Compiled login fields form...') browser.submit_form(form)
def parse(self, response): meta = response.meta item = meta['item'] username = item["username"] try: cookies_dict = dict() req_ss = req_session() index_res = req_ss.get(self._start_url_, headers=self.headers, verify=False) cookies_dict.update(index_res.cookies.get_dict()) # 获取cookies index_cookie_url = 'https://v4.passport.sohu.com/i/cookie/common' \ '?callback=passport403_cb%s&_=%s' % (get_js_time(), get_js_time()) index_cookie_res = req_ss.get(index_cookie_url, verify=False) cookies_dict.update(index_cookie_res.cookies.get_dict()) code_callback_url = 'https://v4.passport.sohu.com/i/jf/code?callback=passport403_cb%s' \ '&type=0&_=%s' % (get_js_time(), get_js_time()) code_callback_res = req_ss.get(code_callback_url, verify=False) cookies_dict.update(code_callback_res.cookies.get_dict()) # 解析 jv_val = self.js_driver(code_callback_res.text).split('=') jv_dict = {jv_val[0]: jv_val[1].split(';', 1)[0]} req_ss.cookies.update(jv_dict) cookies_dict.update(jv_dict) data = { 'userid': username, 'password': md5(item["password"].encode('utf-8')).hexdigest(), 'appid': '101305', 'callback': 'passport403_cb' + get_js_time() } login_url = 'https://v4.passport.sohu.com/i/login/101305' res = req_ss.post(login_url, data=data) result = json_loads(self.result_pattern2.search(res.text).group(1)) status = result['status'] if status == 404: yield from self.error_handle(username, "搜狐邮箱---账号密码错误", tell_msg="请刷新页面重试") return elif status == 465: callback_url = 'https://v4.passport.sohu.com/i/jf/code?callback=passport403_cb%s' \ '&type=0&_=%s' % (get_js_time(), get_js_time()) jv_content = req_ss.get(callback_url) cookies_dict.update(jv_content.cookies.get_dict()) code_callback_res = req_ss.get(code_callback_url, verify=False) jv = self.js_driver(code_callback_res.text).split('=') cookies_dict.update({jv[0]: jv[1].split(';', 1)[0]}) pagetoken = get_js_time() captcha_url = 'https://v4.passport.sohu.com/i/captcha/picture?pagetoken=%s' \ '&random=passport403_sdk%s' % (pagetoken, get_js_time()) self.set_image_captcha_headers_to_ssdb(cookies_dict, username) self.set_email_img_url_to_ssdb(captcha_url, username) captcha_body = req_get(captcha_url, cookies=cookies_dict).content captcha_code = self.ask_image_captcha(captcha_body, username, file_type=".png") data.update({ 'captcha': captcha_code, 'pagetoken': str(pagetoken) }) res = req_post(login_url, data=data, cookies=cookies_dict, verify=False) result_two = json_loads( self.result_pattern2.search(res.text).group(1)) status_two = result_two['status'] if status_two == 420: yield from self.error_handle(username, "搜狐邮箱---输入验证错误!", tell_msg="输入验证错误!") return elif status_two != 200: yield from self.error_handle(username, "搜狐邮箱---未知错误!", tell_msg="抓取失败!") return if res: # 登录成功之后需发送回滚请求, cookies_dict.update(res.cookies.get_dict()) callback_url = 'https://mail.sohu.com/fe/login/callback' call_back = req_ss.post(callback_url, verify=False) # 更新回滚 cookie cookies_dict.update(call_back.cookies.get_dict()) # 构造登出cookies logout_con = req_ss.get( 'https://v4.passport.sohu.com/i/jf/code?callback=passport403_cb%s' '&type=0&_=%s' % (get_js_time(), get_js_time()), cookies=cookies_dict, verify=False) ppmdig_cookies = logout_con.cookies.get_dict() logout_val = self.js_driver(logout_con.text).split('=') ppmdig_cookies.update( {logout_val[0]: logout_val[1].split(';', 1)[0]}) search_url = self.search_url % (0, get_js_time(), self.keyword) meta["cookies_dict"] = cookies_dict meta["ppmdig_cookies"] = ppmdig_cookies yield Request(url=search_url, cookies=cookies_dict, callback=self.parse_search, meta=meta, dont_filter=True, errback=self.err_callback) # 登录成功之后直接访问json页面,提取对应数据进行筛选 # 登录成功后,用户用可能讲账单添加到自定义文件夹里,这里需要筛选用户自定义内容,自定的新ID为17开始 # 首先判断是否存在自定义标签 else: yield from self.error_handle(username, "搜狐邮箱---账号密码错误!", tell_msg="账号或密码错误,请刷新页面重试") except Exception: yield from self.except_handle(username, "登录入口异常", tell_msg="邮箱登录失败,请刷新页面重试")