def login_163(sess, src): """ 网易登录函数 @param sess: requests.Session() @type sess: Session @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ # 登录页面 login_page = 'http://reg.163.com/' resp = sess.get(login_page) soup = BeautifulSoup(resp.content) # 获取登录form form = soup.find('form', attrs={'id': 'fLogin'}) # 构造登录参数 payload = utils.get_datadic(form) payload['username'] = src['username'] payload['password'] = src['password'] # 发送登录post包 resp = sess.post(login_page + 'logins.jsp', data=payload) # 获取页面跳转地址 redirects = re.findall(r'location.replace\(\"(.*?)\"\)', resp.content) # 获取登录结果 resp = sess.get(redirects[0]) # 若指定字样出现在response中,表示登录成功 if '上次登录情况' not in resp.content: return False return True
def reply_eulam_forum(post_url, src): logger = utils.RAPLogger(post_url) host = utils.get_host(post_url) s = utils.RAPSession(src) # Step 1: 登录 if not login_eulam_forum(s, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') r = s.get(post_url) soup = BeautifulSoup(r.content) form = soup.find('form', attrs={'name': 'form'}) payload = utils.get_datadic(form, CHARSET) payload['Body'] = src['content'].decode('utf8').encode(CHARSET) payload['BBSXPCodeForm'] = '' r = s.post(host + 'ReTopic.asp', data=payload, headers={'Referer': post_url}) if u'操作成功'.encode(CHARSET) not in r.content: logger.error('Reply Error') return (False, str(logger)) logger.info('Reply OK') return (True, str(logger))
def login_enewstree(post_url, sess, src): """ 消息树登录函数 @param sess: requests.Session() @type sess: Session @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'lsform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['username'] = src['username'].decode('utf8').encode(CHARSET,'ignore') payload['password'] = src['password'].decode('utf8').encode(CHARSET,'ignore') # 发送登录post包 resp = sess.post('http://enewstree.com/discuz/'+form['action'], data=payload, headers = { 'Referer':post_url }) # 若指定字样出现在response中,表示登录成功 if src['username'].decode('utf8') not in resp.content.decode(CHARSET,'ignore'): return False return True
def login_1dpw(post_url, sess, src): """ 加国华人网登录函数 @param sess: requests.Session() @type sess: Session @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'lsform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['username'] = src['username'] payload['password'] = src['password'] # 发送登录post包 resp = sess.post('http://bbs.1dpw.com/' + form['action'], data=payload) # 若指定字样出现在response中,表示登录成功 if src['username'] not in resp.content: return False return True
def login_backchina(sess, src): """ 倍可亲社区登录函数 @param sess: requests.Session() @type sess: Session @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ # Step 1: Login #登录页面 login_page = 'http://www.backchina.com/' resp = sess.get(login_page + '/member.php?mod=logging&action=login&referer=') #获取登录页面content的HTML soup = BeautifulSoup(resp.content) #查找与{'name': 'login'}匹配的form标签 form = soup.find('form', attrs={'name': 'login'}) #将form标签中的form属性内容存入payload中 payload = utils.get_datadic(form) #将src['username']内容,src['password']加密后内容存入payload中 payload['username'] = src['username'] payload['password'] = md5(src['password']).hexdigest() #发送post包 resp = sess.post(login_page + form['action'], data=payload) #判断登录后页面是否含有用户字段,若存在则证明登录成功,否则失败 if u'自动跳转' in resp.content.decode('utf8'): return True return False
def login_inmediahk(sess, src): """ 香港独立媒体登录函数 @param sess: requests.Session() @type sess: Session @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ payload = { 'external_page_url': src['external_page_url'], 'iframe_src': src['iframe_src'], 'locale': 'zh_HK', 'display': 'popup', 'social_plugin': 'multi_login', 'cancel_url': 'https://www.facebook.com/connect/window_comm.php?_id=fcecdd1f4&_relation=opener', 'next': 'https://www.facebook.com/plugins/multi_login_popup_loggedin.php', 'provider': 'facebook' } resp = sess.get('https://www.facebook.com/login.php', params=payload) soup = BeautifulSoup(resp.content) # 获得登录form form = soup.find('form', attrs={'id': 'login_form'}) payload = utils.get_datadic(form) payload['email'] = src['username'] payload['pass'] = src['password'] # 发送登录post包 resp = sess.post( HOST + form['action'], data=payload, headers={ 'Origin': 'https://www.facebook.com', 'Referer': resp.url, 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/37.0.2062.124 Safari/537.36', 'X-DevTools-Emulate-Network-Conditions-Client-Id': '97765AD6-3270-4C0F-A8B0-6267258612E0' }) # 若指定字样出现在response中,表示登录成功 print resp.content if 'Redirecting' in resp.content: return False return True
def reply_backchina_forum(post_url, src): """倍可亲回复模块 @param sess: requests.Session() @type sess: Session @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ # Returnable logger logger = utils.RAPLogger(post_url) host = utils.get_host(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_backchina(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') # Step 2: Load post page # 获取所需发帖页面,查找与{'id':'fastpostform'}匹配的form标签 resp = sess.get(post_url) soup = BeautifulSoup(resp.content) form = soup.find('form', attrs={'id': 'fastpostform'}) # Step 3: Submit # 回复内容 payload = utils.get_datadic(form) if 'subject' in src: payload['subject'] = src['subject'] payload['message'] = src['content'] #发送post包 resp = sess.post(host + form['action'], data=payload) #判断回帖后页面是否含有回帖内容,若存在则证明回帖成功,否则失败 if src['content'] in resp.content: logger.info('Reply OK') else: logger.error('Reply Error: Reply Error, please try again !') return (False, str(logger)) return (True, str(logger))
def post_wailaike_forum(post_url, src): """ 外来客论坛发主贴函数 @param post_url: 板块地址 如:http://www.wailaike.net/group_post?gid=1 @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。用户名:[email protected] 密码:wenshen4921119 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ # Returnable logger logger = utils.RAPLogger(post_url) host = utils.get_host(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_wailaike(sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') gid = re.findall(r'gid=(\d*)', post_url)[0] resp = sess.get('http://www.wailaike.net/newpost?gid='+gid) soup = BeautifulSoup(resp.content) # 获得发帖form form = soup.find('form', attrs={'id': 'editor'}) # 构造回复参数 payload = utils.get_datadic(form) payload['title'] = src['subject'].decode('utf8').encode(CHARSET) payload['rstbody'] = src['content'].decode('utf8').encode(CHARSET) resp = sess.get('http://www.wailaike.net/time.php') payload['time'] = re.findall('"time":"(.*?)"', resp.content)[0] # 发送发帖post包 resp = sess.post('http://www.wailaike.net/newpost?gid='+gid, data=payload) # By sniper 2015-2-1 # 标题中的'('和')'等需要在正则表达式中转义 # 如:[转帖]ZT) 汉服是FQ闹的大笑话 subject = re.escape(src['subject']) url = re.findall(r'<h3 class="titles-txt"><a href=\"(.*?)\" target="_blank">' + subject + '</a></h3>',resp.content)[0] url = "http://www.wailaike.net" + url # 如果url未成功匹配,则抛出异常,Post Error logger.info(' Post OK') return (url, str(logger))
def reply_canyu(post_url, src): """参与网回帖模块 @author: sky @since: 2015-01-05 @param sess: requests.Session() @type sess: Session @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ logger = RAPLogger(post_url) host = get_host(post_url) sess = RAPSession(src) # Step 1: 获取回帖页面 resp = sess.get(post_url) soup = BeautifulSoup(resp.content) form = soup.find('form', attrs={'id': 'remarkForm'}) # Step 2: 提交回帖 # 回复内容 payload = {} payload = get_datadic(form) payload['face'] = '1' payload['body'] = src['content'].decode('utf8').encode(CHARSET) if 'nickname' in src: payload['username'] = src['nickname'].decode('utf8').encode(CHARSET) else: payload['username'] = u'匿名'.encode(CHARSET) #发送post包 resp = sess.post(host + form['action'], data=payload) #再次请求原网页,查看是否已经有回帖内容 resp = sess.get(post_url) #判断回帖后页面是否含有回帖内容,若存在则证明回帖成功,否则失败 if src['content'].decode('utf8').encode(CHARSET) in resp.content: logger.info('Reply OK') else: logger.error('Reply Error, please try again !') return (False, str(logger)) return (True, str(logger))
def post_kdnet(post_url, src): """ 凯迪社区发主贴函数 - Name: 凯迪社区 - Feature: club.kdnet.net - Captcha: NO - Login: NO @param post_url: 板块地址 如:http://club.kdnet.net/list.asp?boardid=2 @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_kdnet(sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') # 获得boardid,作为post参数 boardid = re.findall(r'boardid=(\d*)',post_url)[0] resp = sess.get('http://upfile1.kdnet.net/textareaeditor/post_ubb.asp?action=new&boardid='+boardid) soup = BeautifulSoup(resp.content) # 获得发帖form form = soup.find('form', attrs={'id': 'Dvform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['topic'] = src['subject'].decode('utf8').encode(CHARSET,'ignore') payload['body'] = src['content'].decode('utf8').encode(CHARSET,'ignore') payload['font1'] = u'[原创]'.encode(CHARSET,'ignore') # 发送发帖post包 resp = sess.post('http://upfile1.kdnet.net/SavePost_ubb.asp?Action=snew&boardid=' + boardid, data=payload) # 若指定字样出现在response中,表示发帖成功 if u'发帖成功'.encode(CHARSET,'ignore') not in resp.content: logger.error(' Post Error') return ('', str(logger)) logger.info(' Post OK') url = re.findall(r'var url="(.*?)"',resp.content)[0] print url return (url, str(logger))
def reply_kdnet(post_url, src): """ 凯迪社区回复函数 - Name: 凯迪社区 - Feature: club.kdnet.net - Captcha: NO - Login: YES @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) resp = sess.get(post_url) post_url = resp.url # 获得回复iframe iframe = re.findall('<iframe src=\"(.*?)\"', resp.content)[0] resp = sess.get(iframe.decode(CHARSET)) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 't_form'}) # 获得boardid,作为post参数 boardid = re.findall(r'boardid=(.*\d)', post_url)[0] # 构造回复参数 payload = utils.get_datadic(form) payload['UserName'] = src['username'].decode('utf8').encode(CHARSET,'ignore') payload['password'] = src['password'].decode('utf8').encode(CHARSET,'ignore') payload['body'] = src['content'].decode('utf8').encode(CHARSET,'ignore') # 回复地址 reply_url = 'http://upfile1.kdnet.net/do_lu_shuiyin.asp?'\ + 'action=sre&method=fastreply&BoardID=' # 发送回复post包 resp = sess.post(reply_url + boardid, data=payload) print resp.content.decode(CHARSET) # 若指定字样出现在response中,表示回复成功 if u'成功回复'.encode(CHARSET,'ignore') not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def post_backchina_forum(post_url, src): """ 倍可亲论坛发主贴函数 @param post_url: 板块地址 如:http://www.backchina.com/forum/37/index-1.html @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ # Returnable logger logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_backchina(sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') fid = re.findall(r'forum/(\d+)/', post_url)[0] resp = sess.get( 'http://www.backchina.com/forum.php?mod=post&action=newthread&fid=' + fid) soup = BeautifulSoup(resp.content) # 获得发帖form form = soup.find('form', attrs={'id': 'postform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['subject'] = src['subject'] payload['message'] = src['content'] # 发送发帖post包 resp = sess.post( 'http://www.backchina.com/forum.php?mod=post&action=newthread&fid=' + fid + '&extra=&topicsubmit=yes', data=payload) # 若指定字样出现在response中,表示发帖成功 if src['subject'] not in resp.content: logger.error(' Post Error') return ('', str(logger)) logger.info(' Post OK') url = resp.url return (url, str(logger))
def reply_enewstree_forum(post_url, src): """ 消息树回复函数 - Name: 凯迪社区 - Feature: club.kdnet.net - Captcha: NO - Login: NO @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_enewstree(post_url, sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'fastpostform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['message'] = src['content'].decode('utf8').encode(CHARSET,'ignore'), resp = sess.post('http://enewstree.com/discuz/'+form['action'], data=payload, headers = { 'Referer':post_url }) if src['content'].decode('utf8') not in resp.content.decode(CHARSET,'ignore'): logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def login_wailaike(sess, src): """ 外来客社区登录函数 @param sess: requests.Session() @type sess: Session @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否登录成功 @rtype: bool """ # Step 1: Login #登录页面 login_page = 'http://www.wailaike.net/' resp = sess.get( login_page + 'login_g') #获取登录页面content的HTML soup = BeautifulSoup(resp.content) #查找与{'name': 'login'}匹配的form标签 form = soup.find('form', attrs={'id': 'logForm'}) #将form标签中的form属性内容存入payload中 payload = utils.get_datadic(form) resp = sess.get( 'http://www.wailaike.net/pass.php') payload = { 'email': src['username']+'@163.com', 'password': re.findall('"time":"(.*?)"', resp.content)[0], 'passwordFake': src['password'], 'redirect_to': 'http://www.wailaike.net/' } #发送post包 resp = sess.post(login_page + form['action'], data=payload) soup = BeautifulSoup(resp.content) #判断登录后页面是否含有用户字段,若存在则证明登录成功,否则失败 if '注册' not in resp.content: #if u'站内信'.encode(CHARSET) in resp.content: return True return False
def post_1dpw_forum(post_url, src): """ 加国华人网发主贴函数 @param post_url: 板块地址 如:http://bbs.1dpw.com/forum-71-1.html @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ host = utils.get_host(post_url) logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_1dpw(post_url, sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'fastpostform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['subject'] = src['subject'] payload['message'] = src['content'] payload['posttime'] = int(time.time()) # 发送登录post包 resp = sess.post(host + form['action'] + '&inajax=1', data=payload) # 若指定字样出现在response中,表示发帖成功 if '主题已发布' not in resp.content: logger.error(' Post Error') return ('', str(logger)) logger.info(' Post OK') url = host + re.findall(r'succeedhandle_fastnewpost\(\'(.*?)\'', resp.content)[0] print url return (url, str(logger))
def reply_penchinese_blog(post_url, src): """ 独立中文笔会博客回复函数 - Name: 独立中文笔会 - Feature: club.kdnet.net - Captcha: NO - Login: YES @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'commentform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['author'] = src['username'] payload['email'] = src['password'] payload['comment'] = src['content'] # 发送回复post包 resp = sess.post(form['action'], data=payload) print resp.url # 若指定字样出现在response中,表示回复成功 if resp.url == form['action']: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def reply_unitedtimes(post_url, src): """ 澳洲联合网回复函数 - Name: 澳洲联合网 - Feature: unitedtimes.com.au/ - Captcha: NO - Login: NO @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) resp = sess.get(post_url) # 获得回复iframe iframe = re.findall('<iframe src=\"(.*?)\"', resp.content)[0] resp = sess.get(iframe.decode(CHARSET)) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'method': 'post'}) # 构造回复参数 payload = utils.get_datadic(form) payload['content'] = src['content'] # 发送回复post包 resp = sess.post(form['action'], data=payload) # 若指定字样出现在response中,表示回复成功 if '操作成功' not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def reply_1dpw_forum(post_url, src): """ 加国华人网发回复函数(10个字符) @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ host = utils.get_host(post_url) logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_1dpw(post_url, sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') resp = sess.get(post_url) soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'id': 'fastpostform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['message'] = src['content'] payload['posttime'] = int(time.time()) # 发送登录post包 resp = sess.post(host + form['action'] + '&inajax=1', data=payload) # 若指定字样出现在response中,表示发帖成功 if 'Database' not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def post_163_blog(post_url, src): """ 网易博客发帖函数 @param post_url: 板块地址 blog.163.com @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否发帖成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_163(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') username = src['username'].split('@')[0] resp = sess.get('http://' + username + '.blog.163.com/blog/getBlog.do?fromString=bloglist') parentId = re.findall(r'parentId=(\d+)', resp.content)[0] soup = BeautifulSoup(resp.content) # 获取回复form form = soup.find('form', attrs={'target': 'blog-163-com-post'}) payload = utils.get_datadic(form) payload['title'] = src['subject'] payload['HEContent'] = src['content'] + '<wbr>' payload['allowview'] = '-100' # print payload url = 'http://api.blog.163.com/' + username + '/editBlogNew.do?p=1&n=1&from=bloglist' resp = sess.post(url, data=payload) logger.info(resp.content) sfx = re.findall(r'sfx:\'(.*?)\'', resp.content)[0] if sfx != '/': blog_url = 'http://' + username + '.blog.163.com/' + sfx logger.info(' Post OK') return (blog_url, str(logger)) while sfx == '/' and src['TTL']: src['TTL'] = src['TTL'] - 1 captcha = sess.get( 'http://api.blog.163.com/cap/captcha.jpgx?parentId=' + parentId + '&r=308985', headers={'Accept': config.accept_image}) # 获取验证码字符串 seccode = utils.crack_captcha(captcha.content) logger.info(' captcha:' + seccode) payload['valcodeKey'] = seccode resp = sess.post(url, data=payload) logger.info(resp.content) sfx = re.findall(r'sfx:\'(.*?)\'', resp.content)[0] if sfx != '/': blog_url = 'http://' + username + '.blog.163.com/' + sfx logger.info(' Post OK') return (blog_url, str(logger)) logger.info(' Post Error') return ('', str(logger))
def reply_sina_club(post_url, src): """ 新浪论坛回复函数 - Name: 新浪论坛18646492184 - Feature: (forum|club).*.sina.com.cn - Captcha: YES - Login: YES @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_sina(sess, src): logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') # Step 2: 回复 resp = sess.get(post_url) host = utils.get_host(post_url) # 获取回复地址 reply_url = re.findall(r'id=\"postform\" action=\"(.*?)\"', resp.content)[0] soup = BeautifulSoup(resp.content) # 获取回复form form = soup.find('form', attrs={'id': 'postform'}) # 构造回复参数 payload = utils.get_datadic(form) payload['message'] = src['content'].decode('utf8').encode(CHARSET) # 替换回复地址中的特殊符号 reply_url = reply_url.replace('&', '&') # 发送回复post包 resp = sess.post(reply_url, data=payload, headers={ 'Origin': utils.get_host(post_url), 'Referer': post_url }) post_times = 0 # 验证是否成功,如果失败再次发送 # 失败可能原因:验证码错误 while 'postform' not in resp.content \ and post_times < src['TTL']: # 限制最大发送次数 post_times = post_times + 1 logger.info(' reply need captcha') # 获取验证码图片 captcha = sess.get(host + 'seccode.php', headers={ 'Accept': config.accept_image, 'Referer': reply_url }) # 获取验证码字符串 seccode = utils.crack_captcha(captcha.content) logger.info(' seccode:' + seccode) # 回复参数中增加验证码 payload['seccodeverify'] = seccode.decode(CHARSET) # 发送回复post包 resp = sess.post(reply_url, data=payload, headers={ 'Origin': utils.get_host(post_url), 'Referer': post_url }) # 若指定字样出现在response中,表示回复成功 if 'postform' not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))
def post_sina_blog(post_url, src): """ 新浪博客发帖函数 @param post_url: 板块地址 blog.sina.com.cn @type post_url: str @param src: 用户名,密码,等等。 @type src: dict @return: 是否发帖成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 is_login = False i = 0 while not is_login and i < src['TTL']: i += 1 is_login = login_sina(sess, src) if not is_login: logger.error(' Login Error') return (False, str(logger)) logger.info(' Login OK') resp = sess.get( 'http://control.blog.sina.com.cn/admin/article/article_add.php') soup = BeautifulSoup(resp.content) # 获取回复form form = soup.find('form', attrs={'id': 'editorForm'}) payload = utils.get_datadic(form) payload['blog_title'] = src['subject'] payload['blog_body'] = src['content'] payload['conlen'] = 9 payload['x_cms_flag'] = 0 payload['x_rank'] = '' #print payload resp = sess.post(form['action'], data=payload) jsonData = json.loads(resp.content) logger.info(resp.content) if jsonData['code'] == u'B06001': url = 'http://blog.sina.com.cn/s/blog_' + jsonData['data'] + '.html' logger.info(' Post OK') return (url, str(logger)) while jsonData['code'] == u'B06013' and src['TTL']: src['TTL'] = src['TTL'] - 1 captcha = sess.get( 'http://interface.blog.sina.com.cn/riaapi/checkwd_image.php?r=0.8578676988836378', headers={'Accept': config.accept_image}) # 获取验证码字符串 seccode = utils.crack_captcha(captcha.content) logger.info(' captcha:' + seccode) payload['checkword'] = seccode resp = sess.post(form['action'], data=payload) jsonData = json.loads(resp.content) logger.info(resp.content) if jsonData['code'] == u'B06001': url = 'http://blog.sina.com.cn/s/blog_' + jsonData['data'] + '.html' logger.info(' Post OK') return (url, str(logger)) logger.info(' Post Error') return ('', str(logger))
def validateuser(self, uid): s = requests.session() cookies = { 'anonymid': 'i8pg5nhd-shfdcd', '_r01_': '1', 'JSESSIONID': 'abc6gyMzwvHPk4Az8nN0u', 'wp': '0', 'jebe_key': '35c0ba4a-c1fa-454c-bf08-3cd0517227a4%7Ce65290594db334b483ac1f24d6999fef%7C1430884448428%7C1%7C1430885122107', '_urm_378384894': '9999', 'depovince': 'BJ', 'jebecookies': 'e1c7f34b-675d-48d9-873c-a6327590c04e|||||', 'ick_login': '******', '_de': 'E6CA621BCBC30E9A85E798EF221DF2AF', 'p': '920950803139a28ca9a4c028f1c2846e4', 'first_login_flag': '1', 'ln_uact': '18345174475', 'ln_hurl': 'http://hdn.xnimg.cn/photos/hdn421/20140206/2325/h_main_TZb3_97690001dbe1111a.jpg', 't': '2582a0a96c3887f092040edd842ea33b4', 'societyguester': '2582a0a96c3887f092040edd842ea33b4', 'id': '378384894', 'xnsid': 'd7c2bea1', '__utma': '10481322.1851133621.1430901819.1430901819.1430901819.1', '__utmc': '10481322', '__utmz': '10481322.1430901819.1.1.utmcsr=share.renren.com|utmccn=(referral)|utmcmd=referral|utmcct=/share/v7/161125632', 'alxn': 'a5d9878e44be799cc3e99efcab2d3dcd', 'mt': 'Jvfhn7u-wG7PdteezqsqEK', 'cp_config': '2', 'ver': '7.0', 'loginfrom': 'null', 'jebe_key': '35c0ba4a-c1fa-454c-bf08-3cd0517227a4%7Ce65290594db334b483ac1f24d6999fef%7C1430910523708%7C1', 'wp_fold': '0', 'l4pager': '0' } r = s.get( 'http://www.renren.com/validateuser.do?id=' + str(uid), cookies=cookies, headers={ 'Pragma': 'no-cache', 'Referer': 'http://www.renren.com/343633795/profile', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36' }) soup = BeautifulSoup(r.content) form = soup.find('form', attrs={'name': 'valiateUserForm'}) payload = utils.get_datadic(form) r = s.get('http://icode.renren.com/getcode.do?t=ninki&rnd=' + str(int(time.time() * 1000)), cookies=cookies, headers={ 'Accept': 'image/webp,*/*;q=0.8', 'Referer': 'http://www.renren.com/validateuser.do' }) seccode = utils.crack_captcha(r.content) payload['icode'] = seccode payload['requestToken'] = '-295732589' payload['_rtk'] = 'ba2078ed' print payload r = s.post( 'http://www.renren.com/validateuser.do', data=payload, cookies=cookies, headers={ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Host': 'www.renren.com', 'Origin': 'http://www.renren.com', 'Pragma': 'no-cache', 'Referer': 'http://www.renren.com/validateuser.do?id=' + str(uid), 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.94 Safari/537.36' }) print r.url
def reply_inmediahk(post_url, src): """ 香港独立媒体回复函数 - Name: 香港独立媒体 - Feature: www.inmediahk.net - Captcha: NO - Login: YES @param post_url: 帖子地址 @type post_url: str @param src: 用户名,密码,回复内容,等等。 @type src: dict @return: 是否回复成功 @rtype: bool """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) faild_info = {'Error': 'Failed to get account info'} payload = { 'api_key': '408250695928069', 'channel_url': 'http://static.ak.facebook.com/connect/xd_arbiter/QjK2hWv6uak.js?version=41#cb=f552c5c88&domain=' 'www.inmediahk.net&origin=http%3A%2F%2Fwww.inmediahk.net%2Ffee0db6f8&relation=parent.parent', 'colorscheme': 'light', 'href': post_url, 'locale': 'zh_HK', 'numposts': 50, 'sdk': 'joey', 'skin': 'light', 'width': 589 } resp = sess.get(HOST + '/plugins/feedback.php', params=payload) # Step 1: 登录 src['external_page_url'] = post_url src['iframe_src'] = resp.url if not login_inmediahk(sess, src): logger.error(' Login Error') return (faild_info, str(logger)) logger.info(' Login OK') resp = sess.get(HOST + '/plugins/feedback.php', params=payload) # 回复 soup = BeautifulSoup(resp.content) # 获得回复form form = soup.find('form', attrs={'rel': 'async'}) payload = utils.get_datadic(form) payload['text_text'] = src['content'] payload['text'] = src['content'] # payload['post_to_profile'] = 'on' payload['__user'] = payload['commentas'] payload['__rev'] = re.findall(r'{\"revision\":(.*?),', resp.content)[0] payload['__a'] = '1' payload['__req'] = '1' payload['__dyn'] = '7wci2e4oK4pomXWo2vwAxu6E' payload['ttstamp'] = '265816910453508648110120112113' # payload['iframe_referer'] = resp.url print payload # 发送登录post包 resp = sess.post( HOST + form['action'], data=payload, headers={ 'host': 'www.facebook.com', 'method': 'POST', 'path': '/ajax/connect/feedback.php', 'scheme': 'https', 'version': 'HTTP/1.1', 'accept': '*/*', 'origin': HOST, 'referer': resp.url, 'user-agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/37.0.2062.124 Safari/537.36', }) # print resp.headers # print resp.request.headers print resp.content if 'payload' not in resp.content: logger.error(' Reply Error') return (False, str(logger)) logger.info(' Reply OK') return (True, str(logger))