def init(self, account, loop=2): self.account = account self.session = requests.session() self.session.headers = {'User-Agent': Base.UA} self.logger.info("mp:%s, account:%s" % (account.mp_id, account.account)) rs = RedisApi() key = "{}:{}:{}".format(account.uid, account.mp_id, account.id) r_cookies = rs.get_value(key) if r_cookies: cookies = json.loads(r_cookies.decode('utf8')) self.session.cookies = requests.cookies.cookiejar_from_dict( cookies) try: if not self.check_user_cookies(): cookies = self._login(account, loop=loop) except: cookies = self._login(account, loop=loop) else: cookies = self._login(account, loop=loop) if cookies: self.session.cookies = requests.cookies.cookiejar_from_dict( cookies) return True else: return False
def login(user, pswd, code): TouTiao.logger.info("") session = requests.session() try: rs = RedisApi() tem_cookie = json.loads(rs.get_value(user)) session.cookies = requests.cookies.cookiejar_from_dict(tem_cookie) fp = tem_cookie.get('s_v_web_id', '') if not fp: return {}, '' session.headers.update({ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36" }) e = {'fp': fp, 'code': code, 'mobile': user} n = ["mobile", "code"] ss = TouTiao._get_encry().call('encryptParams', e, n) data = { "aid": "24", "service": "https://www.toutiao.com/", "account_sdk_source": "web", } data.update(ss) url = 'https://sso.toutiao.com/quick_login/v2/' resp = session.get(url, params=data) resp_json = resp.json() redirect_url = resp_json.get('redirect_url', '') if not redirect_url: TouTiao.logger.error('登录失败') return {}, '' session.get(redirect_url) session.get('https://www.toutiao.com/') home = session.get( 'https://mp.toutiao.com/get_media_info/').json() user_name = home['data']['user']['screen_name'] cookies = session.cookies.get_dict() return cookies, user_name except Exception as e: TouTiao.logger.error(e) return {}, '' finally: session.close()
def fetch_code(user, pswd, loop=3): session = requests.session() cookies = TouTiao.init_cookie() fp = cookies.get('s_v_web_id', '') if not fp: TouTiao.logger.error('获取初始化cookies失败') return False session.cookies = requests.cookies.cookiejar_from_dict(cookies) session.headers.update({ "User-Agent": Base.UA }) resp = TouTiao.get_mobile_code(session, user, fp) # init click fetch toutiao_code if resp.json()['error_code'] == 1105: while loop: TouTiao.logger.error(f'break_code:{loop}次') code_resp = TouTiao.break_code(session, fp, user) # 获取验证吗 if code_resp['ret'] == 200: resp = TouTiao.get_mobile_code(session, user, fp) resp_json = resp.json() TouTiao.logger.error(resp_json) if resp_json['error_code'] == 0: TouTiao.logger.info(f'{user}获取mobile——code成功') rs = RedisApi() cookies = session.cookies.get_dict() rs.set_value(user, json.dumps(cookies), ex=300) return True elif resp_json['error_code'] == 1206: # 验证太频繁 TouTiao.logger.error(resp_json['description']) raise CustomException(resp_json['description']) else: time.sleep(1) TouTiao.logger.error(f'重试:{loop}') TouTiao.logger.error('刷新滑块') loop -= 1 else: time.sleep(1) TouTiao.logger.error(f'滑动失败,重试{loop}') loop -= 1 return False else: rs = RedisApi() cookies = session.cookies.get_dict() rs.set_value(user, json.dumps(cookies), ex=300) return True
class DaFeng(Base): rs = RedisApi() mp_id = 2 zh_name = "大风号" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36" } categorylist = { "百科": "5854cc129cd31025a647acb1", "争鸣": "5854cc129cd31025a647accc", "时政": "5858d867afbea52c129b4773", "人文": "5854cc129cd31025a647acc3", "财经": "5854cc129cd31025a647acb9", "军事": "5854cc129cd31025a647acaf", "社会": "5854cc129cd31025a647acb4", "时尚": "5854cc129cd31025a647acbf", "科技": "5854cc129cd31025a647acb2", "体育": "5854cc129cd31025a647acac", "历史": "5854cc129cd31025a647acb0", "视频": "5854cc129cd31025a647accd", "视觉": "5854cc129cd31025a647acc5", "汽车": "5854cc129cd31025a647acbd", "娱乐": "5854cc129cd31025a647acc2", "旅游": "5854cc129cd31025a647acb3", "佛教": "5858d89483d5222bee290986", "明星": "5858d88383d5222bee29097d", "房产": "5854cc129cd31025a647acb7", "电影": "5854cc129cd31025a647acc6", "情感": "5854cc129cd31025a647acc0", "公益": "5854cc129cd31025a647acb5", "母婴": "5854cc129cd31025a647acc1", "健康": "5854cc129cd31025a647acbe", "美食": "5854cc129cd31025a647acad", "星座": "5854cc129cd31025a647acae", "乐活": "5854cc129cd31025a647acc8", "教育": "5854cc129cd31025a647acbc", "媒体": "5854cc129cd31025a647acba", "漫画": "5854cc129cd31025a647acbb", "家居": "5854cc129cd31025a647acb8", "杂志": "5854cc129cd31025a647acca", "地方": "5854cc129cd31025a647accb", } @staticmethod def login(user, pswd, **kw): # 普通登录 DaFeng.logger.info(user) session = requests.session() try: if user not in weibolists: response = session.get( 'https://id.ifeng.com/public/authcode?_=%s' % str(int(time.time() * 1000))) with open('captcah.jpeg', 'wb') as f: f.write(response.content) cid, result = verify_captcha('captcah.jpeg', 1004) if os.path.exists("captcah.jpeg"): os.remove("captcah.jpeg") respon = session.post( "https://id.ifeng.com/api/sitelogin", data={ "u": user, "k": pswd, "auth": result, "auto": "on", "comfrom": "", "type": "3" }, headers={ "Host": "id.ifeng.com", "Connection": "keep-alive", "Cache-Control": "max-age=0", "Origin": "http://id.ifeng.com", "Upgrade-Insecure-Requests": "1", "Content-Type": "application/x-www-form-urlencoded", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Referer": "http://id.ifeng.com/allsite/login", "Accept-Language": "zh-CN,zh;q=0.9", }) cookies = session.cookies.get_dict() if 'sid' not in cookies: raise CustomException('登录失败') res = session.get( "http://fhh.ifeng.com/hapi/account/query").json() user_name = res['data']['weMediaName'] return cookies, user_name else: key_data = session.get( 'https://login.sina.com.cn/sso/prelogin.php?entry=sso&callback=sinaSSOController.preloginCallBack&su=MTU2MDA2Mzg2MDM%3D&rsakt=mod&client=ssologin.js(v1.4.15)&_=1531898376129' ).text pat = re.compile( r'"servertime":(.*?),.*"nonce":"(.*?)","pubkey":"(.*?)"') res = pat.findall(key_data) if not res: DaFeng.logger.error("Failed to login in sina") raise CustomException('sina 登录规则变') servertime, nonce, pubkey = res[0] name = base64.b64encode(user.encode()).decode() key = rsa.PublicKey(int(pubkey, 16), 65537) message = ('%s\t%s\n%s' % (servertime, nonce, pswd)).encode() passwd = rsa.encrypt(message, key) passwd = binascii.b2a_hex(passwd).decode() data = { "entry": "openapi", "gateway": "1", "from": "", "savestate": "0", "useticket": "1", "pagerefer": "http%3A%2F%2Fid.ifeng.com%2Fallsite%2Flogin", "ct": "1800", "s": "1", "vsnf": "1", "vsnval": "", "door": "", "appkey": "1Jd1G6", "su": name, "service": "miniblog", "servertime": servertime, "nonce": nonce, "pwencode": "rsa2", "rsakv": "1330428213", "sp": passwd, "sr": "1920*1080", "encoding": "UTF-8", "cdult": "2", "domain": "weibo.com", "prelt": "499", "returntype": "TEXT" } response = session.post( "https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=1528351537613&openapilogin=qrcode", data=data, ).json() if response['retcode'] == "4049": key_data = session.get( 'https://login.sina.com.cn/sso/prelogin.php?entry=sso&callback=sinaSSOController.preloginCallBack&su=MTU2MDA2Mzg2MDM%3D&rsakt=mod&client=ssologin.js(v1.4.15)&_=1531898376129' ).text pat = re.compile( r'"servertime":(.*?),.*"nonce":"(.*?)","pubkey":"(.*?)"' ) res = pat.findall(key_data) servertime, nonce, pubkey = res[0] img_code = session.get( 'https://login.sina.com.cn/cgi/pin.php?r=57148031&s=0') with open('df_code.png', 'wb') as f: f.write(img_code.content) try: data['su'] = name data['servertime'] = servertime data['nonce'] = nonce cid, result = verify_captcha('df_code.png', 1005) if result == '看不清': cid, result = verify_captcha('df_code.png', 1005) data['door'] = result response = session.post( 'https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)&_=1531898376166', data=data).json() if not response['uid']: DaFeng.logger.info("Failed to get uid") raise CustomException('登录失败') finally: os.remove(os.path.abspath('df_code.png')) session.post( 'https://api.weibo.com/oauth2/authorize', headers={ "Host": "api.weibo.com", "Connection": "keep-alive", "Cache-Control": "max-age=0", "Origin": "https://api.weibo.com", "Upgrade-Insecure-Requests": "1", "Content-Type": "application/x-www-form-urlencoded", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Referer": "https://api.weibo.com/oauth2/authorize?client_id=1073104718&redirect_uri=http%3A%2F%2Fid.ifeng.com%2Fcallback%2Fsina&response_type=code", "Accept-Language": "zh-CN,zh;q=0.9" }, data={ "action": "login", "display": "default", "withOfficalFlag": "0", "quick_auth": "false", "withOfficalAccount": "", "scope": "", "ticket": response['ticket'], "isLoginSina": "", "response_type": "code", "regCallback": "https%3A%2F%2Fapi.weibo.com%2F2%2Foauth2%2Fauthorize%3Fclient_id%3D1073104718%26response_type%3Dcode%26display%3Ddefault%26redirect_uri%3Dhttp%253A%252F%252Fid.ifeng.com%252Fcallback%252Fsina%26from%3D%26with_cookie%3D", "redirect_uri": "http://id.ifeng.com/callback/sina", "client_id": "1073104718", "appkey62": "1Jd1G6", "state": "", "verifyToken": "null", "from": "", "switchLogin": "******", "userId": "", "passwd": "", }) session.get( "https://api.weibo.com/oauth2/authorize?client_id=1073104718&redirect_uri=http%3A%2F%2Fid.ifeng.com%2Fcallback%2Fsina&response_type=code" ) cookies = session.cookies.get_dict() res = session.get( "http://fhh.ifeng.com/hapi/account/query").json() user_name = res['data']['weMediaName'] return cookies, user_name finally: session.close() def publish(self, title, content, category, flag=1): # 发布内容 """ :param title: :param content: :param category: :return: status, article_id, cause """ self.session.get('http://fhh.ifeng.com/publish/article') status = 3 ariticle_id = None cause = '' self.logger.info("") result = re.compile(r'<img.*?src="(.*?)".*?>', re.S).findall(content) if not result: cause = "请上传文章封面" self.logger.error(cause) return status, ariticle_id, cause coverurl = result[0] udata = {"coverurl": coverurl} url_values = parse.urlencode(udata) url_values = url_values[9::] print(url_values) data = { "dataSource": "1", "title": title, "coverURL": url_values, "tags": "", "categoryId": self.categorylist[category], "isPublish": "true", "videoUrls": "", "audioUrls": "", "content": content, "articleURL": "", "coverPattern": "0", "type": "article", "isCreation": "0", "isMultiCoverTitle": "1", "title2": "", "coverURL2": "", } curl = '' if flag == 3: for item in result: udata = {"coverurl": item} url_values = parse.urlencode(udata) url_values = url_values[9::] curl += url_values + ',' data['coverURL'] = curl[:-1] print(data) url = "http://fhh.ifeng.com/api/article/insert" response = self.session.post(url=url, data=data, headers=self.headers).json() logging.error(response) if response['success']: self.logger.info("successs get articleid") status = 2 return status, cause elif '过于频繁' in str(response): self.logger.error('发文频繁,等待下一次') return 1, cause else: return 3, str(response) def read_count( self, stime=datetime.datetime.now() - datetime.timedelta(days=int(7)), etime=datetime.datetime.now() - datetime.timedelta(days=int(1))): self.logger.info("") res = self.session.get( "http://fhh.ifeng.com/hapi/account/query").json() self.logger.info(res) user_name = res['data']['weMediaName'] res = self.session.get( "http://fhh.ifeng.com/api/statistics/findstatlist", params="stime=%s&etime=%s&type=article&channel=0" % (stime.strftime("%Y-%m-%d"), etime.strftime("%Y-%m-%d"))).json() if not res['success']: raise CustomException('获取阅读数失败') read_list = [] for data in res['data']['rows']: readcount = { "day_time": data["date"], "user_name": user_name, "recomment_num": data["ev"], "read_num": data["pv"], "comment_num": data["commentNum"], "share_num": data["shareNum"], "collect_num": data["collectNum"] } # logging.error(data) read_list.append(readcount) return read_list def fetch_article_status(self, title): resp = self.session.get( 'http://fhh.ifeng.com/api/article/list?isOriginal=1&operationStatus=0&pageSize=30&pageNumber=1&_=1532677834082' ).json()['data']['rows'] url = '' res = [2, '没查询到该文章', url] for art in resp: if title != art['title']: continue if art['operationStatus'] == 4: eArticleId = art['eArticleId'] url = 'http://wemedia.ifeng.com/{}/wemedia.shtml'.format( eArticleId) res = 4, '', url elif art['operationStatus'] == 10: art_id = art['_id'] url = 'http://fhh.ifeng.com/manage/articlePreview?id={}'.format( art_id) res = 5, art['auditReason'], url elif art['operationStatus'] == 16 or art['operationStatus'] == 6: cause = art['auditReason'] if cause == 'null': cause = '违反协议,已经下线' res = 5, cause, url return res def upload_image(self, image_name, image_data): self.logger.info('') return def check_user_cookies(self): self.logger.info('') resp = self.session.get( "http://fhh.ifeng.com/hapi/account/query").json() if resp['success']: return True else: return False def query_article_data(self, title): """获取单篇文章阅读""" resp = self.session.get( 'http://fhh.ifeng.com/api/article/list?isOriginal=1&operationStatus=4&pageSize=30&pageNumber=1&_=1541678014839' ).json() if resp['success']: for art in resp['data']['rows']: if title != art['title']: continue else: data = dict(read_num=art['readNum'], recomment_num=art['recommendNum'], comment_num=art['commentNum'], share_num=art['shareNum'], collect_num=art['collectNum'], publish_time=art['updateTime'], like_num=-1, follow_num=-1) return data raise CustomException('失败') def query_account_message(self, auto): return []
class Base(object): mp_id = 0 # 中文描述 zh_name = '' UA = get_user_agent() logger = get_logger() def __init__(self): self.account = None self.session = None self.rs = RedisApi() def init(self, account, loop=2): self.account = account self.session = requests.session() self.session.headers = {'User-Agent': Base.UA} self.logger.info("mp:%s, account:%s" % (account.mp_id, account.account)) rs = RedisApi() key = "{}:{}:{}".format(account.uid, account.mp_id, account.id) r_cookies = rs.get_value(key) if r_cookies: cookies = json.loads(r_cookies.decode('utf8')) self.session.cookies = requests.cookies.cookiejar_from_dict( cookies) try: if not self.check_user_cookies(): cookies = self._login(account, loop=loop) except: cookies = self._login(account, loop=loop) else: cookies = self._login(account, loop=loop) if cookies: self.session.cookies = requests.cookies.cookiejar_from_dict( cookies) return True else: return False def _login(self, account, loop=1): while loop: key = "{}:{}:{}".format(account.uid, account.mp_id, account.id) try: cookies, nick_name = self.login(account.account, account.pswd) except Exception as e: self.logger.error(e) nick_name = '' cookies = {} if cookies: udata = dict(status=2) self.rs.set_value(key, json.dumps(cookies)) db_account_api = DBAccountApi() if not account.nick_name: udata['nick_name'] = nick_name db_account_api.update( udata, dict(uid=account.uid, mp_account_id=account.id)) return cookies else: loop -= 1 return None @staticmethod def login(user, pswd): pass def publish(self, title, content, category_id): pass def close(self): self.session.close()
def __init__(self): self.account = None self.session = None self.rs = RedisApi()
class Twitter(Base): logger = get_logger() rs = RedisApi() login_url = 'https://twitter.com/login' post_url = 'https://twitter.com/sessions' home_url = 'https://twitter.com/home' user_url = 'https://twitter.com/i/search/typeahead.json?count=1200&media_tagging_in_prefetch=true&prefetch=true&result_type=users&users_cache_age=-1' create_url = 'https://twitter.com/i/tweet/create' mp_id = 10 zh_name = 'Twitter' @staticmethod def login(user, pswd, **kw): _session = requests.session() token, rf, cookies = Twitter._get_values() _session.cookies = requests.cookies.cookiejar_from_dict(cookies) _session.headers.update({ 'User-Agent': get_one_ua(0), 'Referer': 'https://twitter.com/' }) post_data = { 'session[username_or_email]': user, 'session[password]': pswd, 'authenticity_token': token, 'ui_metrics': rf, 'scribe_log': '', 'redirect_after_login': '', 'authenticity_token': token, 'remember_me': 1, } print(post_data) _session.post(Twitter.post_url, data=post_data, allow_redirects=False, timeout=30) cookies = _session.cookies.get_dict() _session.get(Twitter.home_url, allow_redirects=False) try: resp = _session.get(Twitter.user_url) if resp.status_code == 200: name = resp.json()['users'][0]['name'] Twitter.logger.info(name) return cookies, name except: raise Exception('登录失败') @staticmethod def _get_values(): opt = webdriver.ChromeOptions() opt.add_argument("--no-sandbox") opt.add_argument('user-agent={}'.format(get_one_ua(0))) opt.set_headless() chrome = webdriver.Chrome(options=opt) chrome.get(Twitter.login_url) resp = chrome.page_source token = re.compile( r'<input type="hidden" value="(.*?)" name="authenticity_token"', re.S).findall(resp) rf = re.compile( r'<input type="hidden" name="ui_metrics" autocomplete="off" value="(.*?)" />', re.S).findall(resp) if token: token = token[0] else: chrome.quit() raise Exception('token 获取失败') if rf: rf = rf[0] rf = rf.replace('"', '"') # print(json.loads(rt)) else: chrome.quit() raise Exception('rf获取错') cc = chrome.get_cookies() cookies = {} for s in cc: cookies[s['name']] = s['value'] cookies['app_shell_visited'] = '1' cookies['path'] = '/' cookies['max-age'] = '5' chrome.quit() return token, rf, cookies def publish(self, content): self.session.headers['Referer'] = 'https://twitter.com/' result = re.compile(r'<img.*?src="(.*?)".*?>', re.S).findall(content) # 查找img media_id_list = [] if len(result) > 4: raise Exception('图片不能超过4张') for item in result: res = requests.get(item) img_data = res.content s = self._upload_img(img_data) media_id_list.append(str(s)) media_id = ','.join(media_id_list) self.logger.info(media_id) # 剔除标签 content = remove_tags(content) # if len(content.encode(encoding='gbk')) > 280: # raise Exception('文章内容已超过最大长度') data = { "authenticity_token": "285baf78b4de28459a94ec0c0a3c5fa3f62fdfd5", "batch_mode": "off", "is_permalink_page": "false", "media_ids": media_id, "place_id": "", "status": content, "tagged_users": "", } if not media_id_list: del data['tagged_users'] del data['media_ids'] resp = self.session.post(self.create_url, data=data, timeout=30) resp_json = resp.json() try: tweet_id = resp_json['tweet_id'] return 2, '', tweet_id except Exception as e: cause = resp_json['message'] self.logger.error(e) return 3, cause, '' def _upload_img(self, img_data): b_img_l = len(img_data) url = 'https://upload.twitter.com/i/media/upload.json?command=INIT&total_bytes={}&media_type=image%2Fjpeg&media_' \ 'category=tweet_image'.format(b_img_l) data = { "command": "INIT", "total_bytes": b_img_l, "media_type": "image/jpeg", "media_category": "tweet_image", } resp = self.session.post(url, data=data, timeout=30).json() # 获取图id media_id = resp['media_id'] up_url = 'https://upload.twitter.com/i/media/upload.json?command=APPEND&media_id={}&segment_index=0'.format( media_id) file = { 'media': ('blob', img_data, 'image/jpeg', { 'Content-Type': 'application/octet-stream' }) } # 上传 self.session.post(up_url, files=file) self.session.post( 'https://upload.twitter.com/i/media/upload.json?command=FINALIZE&media_id=' .format(media_id), data={ 'command': 'FINALIZE', 'media_id': media_id }, timeout=30) return media_id def upload_image(self, image_name, image_data): return '' def fetch_article_status(self, mp_article_id): resp = self.session.get(self.user_url, timeout=30) resp_json = resp.json() screen_name = resp_json['users'][0]['screen_name'] url = 'https://twitter.com/{}/status/{}?conversation_id={}'.format( screen_name, mp_article_id, mp_article_id) return 4, '', url def query_article_data(self, mp_article_id): resp = self.session.get(self.user_url, timeout=30) resp_json = resp.json() screen_name = resp_json['users'][0]['screen_name'] url = 'https://twitter.com/{}/status/{}?conversation_id={}'.format( screen_name, mp_article_id, mp_article_id) self.session.headers[ 'Accept'] = 'application/json, text/javascript, */*; q=0.01' self.session.headers['Referer'] = 'https://twitter.com/' resp = self.session.get(url, timeout=30) html = etree.HTML(resp.text) datas = html.xpath( '//span[@class="ProfileTweet-actionCountForAria"]/text()') if not datas: return '' reads = dict(comment_num=int(datas[0][0]), follow_num=int(datas[1][0]), like_num=int(datas[2][0])) return reads def check_user_cookies(self): try: resp = self.session.get(self.user_url, timeout=30) resp_json = resp.json() screen_name = resp_json['users'][0]['screen_name'] if screen_name: return True else: self.logger.error('Twitter 规则改变') return False except: return False