def reply_sina_club(post_url, src): """ 新浪论坛回复函数 - Name: 新浪论坛18646492184 - Feature: (forum|club).*.sina.com.cn - Captcha: YES - Login: YES @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_sina(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') # Step 2: 回复 resp = sess.get(post_url) host = utils.get_host(post_url) # 获取回复地址 reply_url = re.findall(r'id=\"postform\" action=\"(.*?)\"', resp.content)[0] soup = BeautifulSoup(resp.content) # 获取回复form form = soup.find('form', attrs={'id': 'postform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['message'] = src['content'].decode('utf8').encode(CHARSET) # 替换回复地址中的特殊符号 reply_url = reply_url.replace('&', '&') # 发送回复post包 resp = sess.post(reply_url, data=payload, headers={ 'Origin': utils.get_host(post_url), 'Referer': post_url }) post_times = 0 # 验证是否成功,如果失败再次发送 # 失败可能原因:验证码错误 while 'postform' not in resp.content \ and post_times < src['TTL']: # 限制最大发送次数 post_times = post_times + 1 logger.info(' reply need captcha') # 获取验证码图片 captcha = sess.get(host + 'seccode.php', headers={ 'Accept': config.accept_image, 'Referer': reply_url }) # 获取验证码字符串 seccode = utils.crack_captcha(captcha.content) logger.info(' seccode:' + seccode) # 回复参数中增加验证码 payload['seccodeverify'] = seccode.decode(CHARSET) # 发送回复post包 resp = sess.post(reply_url, data=payload, headers={ 'Origin': utils.get_host(post_url), 'Referer': post_url }) # 若指定字样出现在response中,表示回复成功 if 'postform' not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def login_sina(sess, src): """ 新浪登录函数 @param sess: requests.Session() @type sess: Session @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ url_prelogin = '******' url_login = '******' # 获取prelogin中各项参数:servertime, nonce, pubkey, rsakv, pcid resp = sess.get(url_prelogin) json_data = re.findall(r'\((.*?)\)', resp.content)[0] data = json.loads(json_data) servertime = data['servertime'] nonce = data['nonce'] pubkey = data['pubkey'] rsakv = data['rsakv'] pcid = data['pcid'] # 用户名:采用base64加密 su = base64.b64encode(urllib.quote(src['username'])) # 密码:采用rsa加密 rsaPublickey = int(pubkey, 16) key = rsa.PublicKey(rsaPublickey, 65537) message = str(servertime) + '\t' + str(nonce) + '\n' + str(src['password']) sp = binascii.b2a_hex(rsa.encrypt(message, key)) # 获取验证码图片 captcha_URI = sess.get( 'http://login.sina.com.cn/cgi/pin.php?r=39011430&s=0&p=' + pcid, headers={'Accept': config.accept_image}) # 获取验证码字符串 captcha = utils.crack_captcha(captcha_URI.content) print 'captcha:' + captcha payload = { 'entry': 'weibo', 'gateway': '1', 'from': '', 'savestate': '7', 'userticket': '1', 'pagerefer': 'http://www.baidu.com/', 'ssosimplelogin': '******', 'vsnf': '1', 'pcid': pcid, 'door': captcha, 'su': su, 'service': 'miniblog', 'servertime': servertime, 'nonce': nonce, 'pwencode': 'rsa2', 'rsakv': rsakv, 'sp': sp, 'sr': '1366*768', 'prelt': '168', 'encoding': 'UTF-8', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack', 'returntype': 'META' } resp = sess.post( url_login, data=payload, headers={ 'Referer': 'http://weibo.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36' }) retcode = re.findall(r'retcode=(\d+)', resp.content)[0] print 'retcode: ' + retcode if retcode == '0': return True else: reason = re.findall(r'reason=(.+)&', resp.content)[0] print 'reason: ' + urllib.unquote(reason).decode('GBK') return False
def validateuser(self, uid): s = requests.session() cookies = { 'anonymid': 'i8pg5nhd-shfdcd', '_r01_': '1', 'JSESSIONID': 'abc6gyMzwvHPk4Az8nN0u', 'wp': '0', 'jebe_key': '35c0ba4a-c1fa-454c-bf08-3cd0517227a4%7Ce65290594db334b483ac1f24d6999fef%7C1430884448428%7C1%7C1430885122107', '_urm_378384894': '9999', 'depovince': 'BJ', 'jebecookies': 'e1c7f34b-675d-48d9-873c-a6327590c04e|||||', 'ick_login': '******', '_de': 'E6CA621BCBC30E9A85E798EF221DF2AF', 'p': '920950803139a28ca9a4c028f1c2846e4', 'first_login_flag': '1', 'ln_uact': '18345174475', 'ln_hurl': 'http://hdn.xnimg.cn/photos/hdn421/20140206/2325/h_main_TZb3_97690001dbe1111a.jpg', 't': '2582a0a96c3887f092040edd842ea33b4', 'societyguester': '2582a0a96c3887f092040edd842ea33b4', 'id': '378384894', 'xnsid': 'd7c2bea1', '__utma': '10481322.1851133621.1430901819.1430901819.1430901819.1', '__utmc': '10481322', '__utmz': '10481322.1430901819.1.1.utmcsr=share.renren.com|utmccn=(referral)|utmcmd=referral|utmcct=/share/v7/161125632', 'alxn': 'a5d9878e44be799cc3e99efcab2d3dcd', 'mt': 'Jvfhn7u-wG7PdteezqsqEK', 'cp_config': '2', 'ver': '7.0', 'loginfrom': 'null', 'jebe_key': '35c0ba4a-c1fa-454c-bf08-3cd0517227a4%7Ce65290594db334b483ac1f24d6999fef%7C1430910523708%7C1', 'wp_fold': '0', 'l4pager': '0' } r = s.get( 'http://www.renren.com/validateuser.do?id=' + str(uid), cookies=cookies, headers={ 'Pragma': 'no-cache', 'Referer': 'http://www.renren.com/343633795/profile', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36' }) soup = BeautifulSoup(r.content) form = soup.find('form', attrs={'name': 'valiateUserForm'}) payload = utils.get_datadic(form) r = s.get('http://icode.renren.com/getcode.do?t=ninki&rnd=' + str(int(time.time() * 1000)), cookies=cookies, headers={ 'Accept': 'image/webp,*/*;q=0.8', 'Referer': 'http://www.renren.com/validateuser.do' }) seccode = utils.crack_captcha(r.content) payload['icode'] = seccode payload['requestToken'] = '-295732589' payload['_rtk'] = 'ba2078ed' print payload r = s.post( 'http://www.renren.com/validateuser.do', data=payload, cookies=cookies, headers={ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Host': 'www.renren.com', 'Origin': 'http://www.renren.com', 'Pragma': 'no-cache', 'Referer': 'http://www.renren.com/validateuser.do?id=' + str(uid), 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36' }) print r.url
def post_sina_blog(post_url, src): """ 新浪博客发帖函数 @param post_url: 板块地址 blog.sina.com.cn @type post_url: str @param src: 用户名,密码,等等。 @type src: dict @return: 是否发帖成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 is_login = False i = 0 while not is_login and i < src['TTL']: i += 1 is_login = login_sina(sess, src) if not is_login: logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') resp = sess.get( 'http://control.blog.sina.com.cn/admin/article/article_add.php') soup = BeautifulSoup(resp.content) # 获取回复form form = soup.find('form', attrs={'id': 'editorForm'}) payload = utils.get_datadic(form) payload['blog_title'] = src['subject'] payload['blog_body'] = src['content'] payload['conlen'] = 9 payload['x_cms_flag'] = 0 payload['x_rank'] = '' #print payload resp = sess.post(form['action'], data=payload) jsonData = json.loads(resp.content) logger.info(resp.content) if jsonData['code'] == u'B06001': url = 'http://blog.sina.com.cn/s/blog_' + jsonData['data'] + '.html' logger.info(' Post OK') return (url, str(logger)) while jsonData['code'] == u'B06013' and src['TTL']: src['TTL'] = src['TTL'] - 1 captcha = sess.get( 'http://interface.blog.sina.com.cn/riaapi/checkwd_image.php?r=0.8578676988836378', headers={'Accept': config.accept_image}) # 获取验证码字符串 seccode = utils.crack_captcha(captcha.content) logger.info(' captcha:' + seccode) payload['checkword'] = seccode resp = sess.post(form['action'], data=payload) jsonData = json.loads(resp.content) logger.info(resp.content) if jsonData['code'] == u'B06001': url = 'http://blog.sina.com.cn/s/blog_' + jsonData['data'] + '.html' logger.info(' Post OK') return (url, str(logger)) logger.info(' Post Error') return ('', str(logger))
def login_qq(sess, src): """ QQ登录函数 @param sess: requests.Session() @type sess: Session @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ # 获取验证码图片 url = 'http://i.qq.com' resp = sess.get(url) soup = BeautifulSoup(resp.content) url = soup.select('#login_frame')[0]['src'] resp = sess.get(url) login_sig = re.findall('login_sig:"(.*?)"', resp.content)[0] # 构造获取验证码图片参数 par = { 'appid': '549000912', 'js_type': 1, 'js_ver': 10116, 'login_sig': login_sig, 'r': 0.8861454421075537, 'regmaster': "", 'pt_tea': "1", 'u1': "http://qzs.qq.com/qzone/v5/loginsucc.html?para=izone", 'uin': src['username'] } resp = sess.get('http://check.ptlogin2.qq.com/check', params=par) print resp.content _, vcode, uin, _, _ = re.findall(r'\'(.*?)\'', resp.content) url = 'http://captcha.qq.com/getimage?uin=' + src[ 'username'] + '&aid=549000912&cap_cd=' + vcode + '&0.5720198110211641' resp = sess.get(url, headers={ 'Accept': config.accept_image, }) # 获取验证码字符串 seccode = utils.crack_captcha(resp.content) #构造参数 payload = { 'u': src['username'], 'verifycode': seccode, 'pt_vcode_v1': 0, 'pt_verifysession_v1': sess.s.cookies['verifysession'], 'p': get_pwd(src['password'], uin, seccode), 'pt_randsalt': 0, 'u1': 'http://qzs.qq.com/qzone/v5/loginsucc.html?para=izone', 'ptredirect': 0, 'h': 1, 't': 1, 'g': 1, 'from_ui': 1, 'ptlang': 2052, 'action': '5-4-1427091605160', 'js_ver': 10116, 'js_type': 1, 'login_sig': login_sig, 'pt_uistyle': 32, 'aid': 549000912, 'daid': 5, 'pt_qzone_sig': 1, '': '' } # for k,v in payload.items(): # print k, v resp = sess.get('http://ptlogin2.qq.com/login', params=payload) print resp.content if '登录成功' in resp.content: return True return False
def reply_163_blog(post_url, src): """ 网易博客回复函数 - Name: 网易博客 - Feature: blog.163.com - Captcha: YES - Login: YES @param post_url: 博客地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_163(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') # Step 2: 验证码 resp = sess.get(post_url) parent_id = re.findall(r'userId:(.*?),', resp.content)[0] # 获取验证码图片 resp = sess.get('http://api.blog.163.com/cap/captcha.jpgx?' 'parentId=' + parent_id, headers={ 'Accept': config.accept_image, 'Referer': post_url, }) # 获取验证码字符串 seccode = utils.crack_captcha(resp.content) logger.info(' seccode:' + seccode) # Step 3: 提交回复 resp = sess.get(post_url) soup = BeautifulSoup(resp.content) meta = soup.find('meta', attrs={'name': 'author'}) # 从页面中获取各项参数 page_id = re.findall('id:\'(.*?)\'', resp.content)[0] parent_id = re.findall('parentId=(.*?)&', resp.content)[0] author_0 = urllib.quote( meta.attrs['content'].split(',')[0].encode(CHARSET)) author_1 = urllib.quote( meta.attrs['content'].split(',')[1].encode(CHARSET)) content = urllib.quote(src['content'] + '<wbr>') # 构造回复参数 payload = 'callCount=1\n' +\ 'scriptSessionId=${scriptSessionId}187\n' +\ 'c0-scriptName=BlogBeanNew\n' +\ 'c0-methodName=addBlogComment\n' +\ 'c0-id=0\n' +\ 'c0-e1=string:' + page_id + '\n' +\ 'c0-e2=number:' + parent_id + '\n' +\ 'c0-e3=string:\n' +\ 'c0-e4=string:''' + content + '\n' +\ 'c0-e5=string:''' + src['username'] + '\n' +\ 'c0-e6=string:\n' +\ 'c0-e7=number:-1\n' +\ 'c0-e8=number:-1\n' +\ 'c0-e9=number:''' + parent_id + '\n' +\ 'c0-e10=string:''' + author_0 + '\n' +\ 'c0-e11=string:''' + author_1 + '\n' +\ 'c0-e12=bool:true\n' +\ 'c0-param0=Object_Object:' \ '{blogId:reference:c0-e1,' \ 'blogUserId:reference:c0-e2,' \ 'blogTitle:reference:c0-e3,' \ 'content:reference:c0-e4,' \ 'publisherNickname:reference:c0-e5,' \ 'publisherEmail:reference:c0-e6,' \ 'mainComId:reference:c0-e7,' \ 'replyComId:reference:c0-e8,' \ 'replyToUserId:reference:c0-e9,' \ 'replyToUserName:reference:c0-e10,' \ 'replyToUserNick:reference:c0-e11,' \ 'synchMiniBlog:reference:c0-e12}\n' +\ 'c0-param1=string:''' + seccode + '\n' +\ 'c0-param2=bool:false\n' +\ 'batchId=118652' # 请求头设置 headers = { 'Content-Type': 'text/plain', 'Origin': 'http://api.blog.163.com', 'Referer': 'http://api.blog.163.com/crossdomain.html?t=20100205', 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6' } # 发送回复post包 resp = sess.post( 'http://api.blog.163.com/xinluduwu/dwr/' 'call/plaincall/BlogBeanNew.addBlogComment.dwr', data=payload, headers=headers) # 若指定字样出现在response中,表示回复成功 if '_remoteHandleCallback' not in resp.content: logger.info(resp.content) logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def reply_163_bbs(post_url, src): """ 网易论坛回复函数 - Name: 网易论坛 - Feature: bbs.*.163.com/bbs/ - Captcha: YES - Login: YES @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_163(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') # Step 2: 验证码 host = utils.get_host(post_url) logger.info(host) page = sess.get(post_url) # 获取各项参数 board_id = re.findall('boardId = \"(.*?)\"', page.content)[0] thread_id = re.findall('threadId = \"(.*?)\"', page.content)[0] # 当前时间戳 timestamp = str(time.time()) # 询问是否需要验证码 resp = sess.post(host + 'v2/post/replyCheck/' + board_id + '/' + thread_id + '/?timestamp=' + timestamp, headers={ 'X-Requested-With': 'XMLHttpRequest', 'Origin': host, 'Referer': post_url, 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6' }) # 询问结果 check_code = re.findall('\"checkCode\":\"(.*?)\",', resp.content)[0] logger.info(check_code) seccode = '' validate_sucess = False post_times = 0 # 如果需要验证码 if check_code == '1': # 询问验证码是否正确,如果不正确再次发送 while not validate_sucess \ and post_times < src['TTL']: # 限制最大发送次数 post_times = post_times + 1 # 获取验证码图片 resp = sess.get(host + 'v2/checkcode/codeimg?timestamp=' + timestamp, headers={ 'Accept': config.accept_image, 'Referer': post_url, }) # 获取验证码字符串 seccode = utils.crack_captcha(resp.content) logger.info(' seccode:' + seccode) # 发送验证码,询问是否正确 payload = {'code': seccode.encode('utf-8')} resp = sess.post(host + 'v2/checkcode/validate', data=payload, headers={ 'Origin': host, 'Referer': post_url, }) logger.info(resp.content) validate_sucess = '"code":1' in resp.content # Step 3: 回复 # 构造回复参数 payload = { 'checkcode': seccode, 'content': src['content'].decode('utf8').encode(CHARSET), 'title': re.findall('<title>(.*?)</title>', page.content)[0], 'boardId': board_id, 'threadId': thread_id } # 发送回复post包 resp = sess.post(host + 'v2/post/doReply', data=payload, headers={ 'Referer': post_url, }) # 若指定字样出现在response中,表示回复成功 if '\"message\">' in resp.content: logger.error(' Reply Error ' + re.findall( '\"message\">(.*?)</td>', resp.content.decode(CHARSET))[0]) return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def reply_163_news(post_url, src): """ 网易新闻回复函数 - Name: 网易新闻 - Feature: news.163.com/[0-9] - Captcha: YES - Login: YES @param post_url: 新闻地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_163(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') # Step 2: 验证码 resp = sess.get(post_url) # 获取各项参数 board_id = re.findall('boardId = \"(.*?)\"', resp.content)[0] thread_id = re.findall('threadId = \"(.*?)\"', resp.content)[0] comments_url = "http://comment.news.163.com/" + \ board_id + "/" + thread_id + ".html" # 构造请求头 headers = { 'X-Requested-With': 'XMLHttpRequest', 'Origin': 'http://comment.news.163.com', 'Referer': comments_url, 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6' } # 询问是否需要验证码 resp = sess.get( 'http://comment.news.163.com/reply/needvalidate.jsp?' 'time=1413303328581', headers=headers) # 询问结果 need_validate = re.findall('needValidate:([0-9])', resp.content)[0] seccode = '' validate_sucess = False post_times = 0 # 如果需要验证码 if need_validate == '1': # 询问验证码是否正确,如果不正确再次发送 while not validate_sucess \ and post_times < src['TTL']: # 限制最大发送次数 post_times = post_times + 1 # 获取验证图片 resp = sess.get( 'http://comment.news.163.com/reply/' 'auth/validatecode.jsp?rnd=', headers={ 'Accept': config.accept_image, 'Referer': comments_url, }) # 获取验证码字符串 seccode = utils.crack_captcha(resp.content) logger.info(' seccode:' + seccode) # 询问验证码是否正确 resp = sess.get('http://comment.news.163.com/reply/' 'isValidateCodeValid.jsp?' 'validateCode=' + seccode, headers={ 'Referer': comments_url, }) logger.info(resp.content) validate_sucess = 'true' in resp.content # Step 3: 回复 # 构造回复参数 payload = { 'board': board_id, 'quote': '', 'threadid': thread_id, 'hidename': False, 'username': src['username'], 'body': src['content'], 'isTinyBlogSyn': 1, 'flag': '', 'validateCode': seccode, } # 发送回复post包 resp = sess.post('http://comment.news.163.com/reply/dopost.jsp', data=payload, headers=headers) # 若指定字样出现在response中,表示回复成功 if '网易首页' not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def post_163_blog(post_url, src): """ 网易博客发帖函数 @param post_url: 板块地址 blog.163.com @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否发帖成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_163(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') username = src['username'].split('@')[0] resp = sess.get('http://' + username + '.blog.163.com/blog/getBlog.do?fromString=bloglist') parentId = re.findall(r'parentId=(\d+)', resp.content)[0] soup = BeautifulSoup(resp.content) # 获取回复form form = soup.find('form', attrs={'target': 'blog-163-com-post'}) payload = utils.get_datadic(form) payload['title'] = src['subject'] payload['HEContent'] = src['content'] + '<wbr>' payload['allowview'] = '-100' # print payload url = 'http://api.blog.163.com/' + username + '/editBlogNew.do?p=1&n=1&from=bloglist' resp = sess.post(url, data=payload) logger.info(resp.content) sfx = re.findall(r'sfx:\'(.*?)\'', resp.content)[0] if sfx != '/': blog_url = 'http://' + username + '.blog.163.com/' + sfx logger.info(' Post OK') return (blog_url, str(logger)) while sfx == '/' and src['TTL']: src['TTL'] = src['TTL'] - 1 captcha = sess.get( 'http://api.blog.163.com/cap/captcha.jpgx?parentId=' + parentId + '&r=308985', headers={'Accept': config.accept_image}) # 获取验证码字符串 seccode = utils.crack_captcha(captcha.content) logger.info(' captcha:' + seccode) payload['valcodeKey'] = seccode resp = sess.post(url, data=payload) logger.info(resp.content) sfx = re.findall(r'sfx:\'(.*?)\'', resp.content)[0] if sfx != '/': blog_url = 'http://' + username + '.blog.163.com/' + sfx logger.info(' Post OK') return (blog_url, str(logger)) logger.info(' Post Error') return ('', str(logger))