def get_ynote_web_header(self, mode: int = 0): headers = { "Content-Type": get_content_type(), "Cookie": self.cookie, "Host": self.Y_URL.split("/")[2], "Origin": self.Y_URL, "Referer": self.WEB_URL, } if mode: headers["Accept"] = get_accept("xhr") else: headers["Accept"] = get_accept("html") return headers
def get_login_headers(self, mode: int = 0, cookie: dict = {}): headers = { 'Referer': self.LOGIN_URL, } if mode != 3: headers['Accept'] = get_accept('*') if mode == 2 else get_accept( 'xhr') if mode == 1: headers['Content-Type'] = get_content_type('') elif mode == 2: headers['X-Requested-With'] = 'XMLHttpRequest' if len(cookie): headers['Cookie'] = encoder_cookie(cookie) return headers
def __init__(self): self.default_hotel_id = 4889292 self.header = { 'Cookie': '', 'Accept': get_accept('html'), 'Content-Type': get_content_type(), }
def load_gather(self): """ load gather proxy pool text If failured, you should reactive the cookie. """ headers = { "Host": "www.gatherproxy.com", "Origin": "http://www.gatherproxy.com", "Referer": "http://www.gatherproxy.com/proxylist/anonymity/?t=Transparent", "Cookie": "_lang=en-US; _ga=GA1.2.1084455496.1548351129; _gid=GA1.2.1515017701.1552361687; ASP.NET_SessionId=ckin3pzyqyoyt3zg54zrtrct; _gat=1; arp_scroll_position=57", "Content-Type": get_content_type(), "Accept": get_accept("html"), } url = "http://www.gatherproxy.com/subscribe/infos" try: sid_url_req = requests.get(url, headers=headers, verify=False, timeout=10) except: return sid_url_html = BeautifulSoup(sid_url_req.text, "html.parser") sid_url = sid_url_html.find_all("div", class_="wrapper")[1].find_all("a")[0][ "href" ] if len(sid_url.split("sid=")) < 2: echo("0|warning", "cookie error") self.get_cookie() self.load_gather() return sid = sid_url.split("sid=")[1] sid_url = "http://www.gatherproxy.com" + sid_url data = {"ID": sid, "C": "", "P": "", "T": "", "U": "0"} gatherproxy = requests.post(sid_url, headers=headers, data=data, verify=False) with codecs.open(data_dir + "gatherproxy", "w", encoding="utf-8") as f: f.write(gatherproxy.text)
def get_api_headers(self, bv_id: str, types: int = 0) -> dict: if isinstance(bv_id, int): bv_id = "av{}".format(bv_id) if types == 0: return {"Accept": "*/*", "Referer": self.BASIC_BV_URL % bv_id} if types == 1: return {"Accept": get_accept("html"), "Host": self.BILIBILI_URL}
def search_goods_once(self, goods_name, index): if not os.path.exists('%scookie_alimama' % data_dir): print('alimama cookie not exist!!!') return with codecs.open('%scookie_alimama' % data_dir, 'r', encoding='utf-8') as f: cookie = f.readlines() url_list = [ 'https://pub.alimama.com/items/search.json?auctionTag=&perPageSize=50&shopTag=&_tb_token_=', cookie[1][:-1], '&pvid=', cookie[2][:-1], '&t=', str(int(round(time.time() * 1000))), '&_t=', str(int(round(time.time() * 1000))), '&q=', goods_name ] headers = { 'X-Requested-With': 'XMLHttpRequest', 'Cookie': '', 'Content-Type': get_content_type(), 'Accept': get_accept('xhr'), } headers['Cookie'] = cookie[0][:-1] ca = basic_req(''.join(url_list), 2, header=headers) if ca.status_code != 200 or not 'data' in ca.json(): if can_retry(''.join(url_list)): self.search_goods_once(goods_name, index) return page_list = ca.json()['data']['pageList'] title = [ '||'.join( [str(index['auctionId']), goods_name, str(index['zkPrice'])]) for index in page_list ][0] self.goods_name[index] = title print(title)
def have_places_once(self): """ have places """ url = 'http://elective.pku.edu.cn/elective2008/edu/pku/stu/elective/controller/supplement/refreshLimit.do' if not os.path.exists('%scookie' % data_path): print('Brush Cookie not exist!!!') return with open('%scookie' % data_path, 'r') as f: cookie = f.readlines() headers = { 'X-Requested-With': 'XMLHttpRequest', 'Cookie': '', 'Content-Type': get_content_type(), 'Accept': get_accept('xhr'), "Origin": "http://elective.pku.edu.cn", "Referer": "http://elective.pku.edu.cn/elective2008/edu/pku/stu/elective/controller/supplement/SupplyCancel.do", } headers['Cookie'] = cookie[0][:-1] data = { "index": '10', "seq": 'yjkc20141100016542', } ca = proxy_req(url, 11, data, header=headers) if not ca: if round(time.time()) - self.laster_timestamp > 60: send_email("Cookie failure", "Cookie failure") return False print(ca['electedNum']) self.laster_timestamp = round(time.time()) return int(ca['electedNum']) < 120
def getJianshuViews(self): ''' get jianshu views ''' header = {'accept': get_accept('html')} for rounds in range(1, 4): url = self.JIANSHU_URL if rounds > 1: url += '?order_by=shared_at&page={}'.format(rounds) echo('1|debug', 'jianshu req url:', url) html = self.get_request( url, 0, lambda i: not i or not len( i.find_all('div', class_='content')), header) if html is None: echo(0, 'None') return for index in html.find_all('li', class_=["", 'have-img']): if len(index.find_all('i')) < 3: continue title = index.find_all('a', class_='title')[0].text.replace( '`', '') jianshu_id = int(index['data-note-id']) jianshu_count = int(index.find_all('a')[-2].text) if title in self.title2slug: temp_slug = self.title2slug[title] self.jianshu_id[temp_slug] = jianshu_id self.jianshu_views[temp_slug] = jianshu_count elif jianshu_id in self.jianshu_id_map: temp_slug = self.jianshu_id_map[jianshu_id] self.jianshu_id[temp_slug] = jianshu_id self.jianshu_views[temp_slug] = jianshu_count else: echo(1, title)
def get_tb_headers(self, url: str = "", refer_url: str = "") -> dict: headers = {"Accept": get_accept("html"), "User-Agent": get_use_agent()} if url != "": headers["Host"] = url.split("/")[2] if refer_url != "": headers["referer"] = refer_url return headers
def test_change_youdaoyun(self, atricle_id, body, article_name): """ change youdaoyun article demo @param 'buildmd/data/cookie': cookie in youdaoyun web @param article_id: change article No. @param: body: change article body @param: article_name: change article name """ url = 'https://note.youdao.com/yws/api/personal/sync?method=push&keyfrom=web&cstk=E3CF_lx8' headers = { 'Cookie': '', 'Content-Type': get_content_type(), 'Accept': get_accept('xhr'), 'Origin': 'https://note.youdao.com', 'Referer': 'https://note.youdao.com/web' } if not os.path.exists('%scookie' % data_dir): print('Youdao Note cookie not exist!!!') return with codecs.open('%scookie' % data_dir, 'r', encoding='utf-8') as f: cookie = f.readline() headers['cookie'] = cookie[:-1] headers['Host'] = url.split('/')[2] file_list_url = 'https://note.youdao.com/yws/api/personal/file?method=listRecent&offset=0&limit=30&keyfrom=web&cstk=E3CF_lx8' file_data = {'cstk': 'E3CF_lx8'} ca = basic_req(file_list_url, 11, data=file_data, header=headers) if not len(ca): print('List Error') return change_data_origin = ca[atricle_id]['fileEntry'] body_string = [ '<?xml version="1.0"?><note xmlns="http://note.youdao.com" schema-version="1.0.3" file-version="0"><head/><body><para><coId>12-1550424181958</coId><text>', body, '</text><inline-styles/><styles/></para></body></note>' ] change_data = { 'name': article_name, 'fileId': change_data_origin['id'], 'parentId': change_data_origin['parentId'], 'domain': change_data_origin['domain'], 'rootVersion': -1, 'sessionId': '', 'modifyTime': int(round(time.time())), 'bodyString': "".join(body_string), 'transactionId': change_data_origin['id'], 'transactionTime': int(round(time.time())), 'orgEditorType': change_data_origin['orgEditorType'], 'tags': change_data_origin['tags'], 'cstk': 'E3CF_lx8' } print(change_data) cb = basic_req(url, 12, data=change_data, header=headers) return cb
def get_item_basic(self, item_id: int, url: str = ""): url = self.ITEM_URL % item_id if url == "" else url headers = {"Accept": get_accept("html")} req = proxy_req(url, 2, header=headers, config={"allow_redirects": False}) if req is None: if can_retry(url): return self.get_item_basic(item_id, url) return if req.status_code != 200: return self.get_item_basic(item_id, req.headers["Location"]) return req
def get_m_html(self, bv_id: str) -> str: url = self.M_BILIBILI_URL % bv_id headers = { "Accept": get_accept("html"), "Host": url.split("/")[2], "User-Agent": get_use_agent("mobile"), } m_html = proxy_req(url, 3, header=headers) if len(m_html) < 1000: if can_retry(url): return self.get_m_html(bv_id) else: return "" return m_html
def get_score(cookie: str): SCORE_URL = 'https://portal.w.pku.edu.cn/portal2017/bizcenter/score/retrScores.do' headers = { 'Accept': get_accept('xhr'), 'Host': 'portal.w.pku.edu.cn', 'Origin': 'https://portal.w.pku.edu.cn', 'Referer': 'https://portal.w.pku.edu.cn/portal2017/', 'Cookie': cookie, } req = basic_req(SCORE_URL, 11, header=headers) if req is None or list(req.keys()) != ['success', 'xslb', 'xh', 'xm', 'scoreLists']: if can_retry(SCORE_URL): return get_score(cookie) else: return return req
def get_cookie(self): """ make cookie login PS: Though cookie expired time is more than 1 year, but It will be break when the connect close. So you need reactive the cookie by this function. """ headers = { "Cookie": "_lang=en-US; _ga=GA1.2.1084455496.1548351129; _gid=GA1.2.1515017701.1552361687; ASP.NET_SessionId=ckin3pzyqyoyt3zg54zrtrct; _gat=1; arp_scroll_position=57", "Accept": get_accept("html") + ";q=0.9", } login_url = "http://www.gatherproxy.com/subscribe/login" cookie_html = basic_req(login_url, 3, header=headers) try: verify_text = re.findall('<span class="blue">(.*?)</span>', cookie_html)[0] except: return verify_list = verify_text.replace("= ", "").strip().split() num_map = { "Zero": 0, "One": 1, "Two": 2, "Three": 3, "Four": 4, "Five": 5, "Six": 6, "Seven": 7, "Eight": 8, "Nine": 9, "Ten": 10, } verify_num = [verify_list[0], verify_list[2]] for index, num in enumerate(verify_num): if num.isdigit(): verify_num[index] = int(num) elif num in num_map: verify_num[index] = num_map[num] else: echo("0|error", "Error", num) # return False verify_code = 0 error = True operation = verify_list[1] if (operation == "+" or operation == "plus" or operation == "add" or operation == "multiplied"): verify_code = verify_num[0] + verify_num[1] error = False if operation == "-" or operation == "minus": verify_code = verify_num[0] - verify_num[1] error = False if operation == "X" or operation == "multiplication": verify_code = verify_num[0] * verify_num[1] error = False if error: echo("0|error", "Error", operation) if not os.path.exists("%spassage" % data_dir): echo("0|warning", "gather passage not exist!!!") return with codecs.open("%spassage" % data_dir, "r", encoding="utf-8") as f: passage = [index[:-1] for index in f.readlines()] data = { "Username": passage[0], "Password": passage[1], "Captcha": str(verify_code), } time.sleep(2.163) r = requests.session() r.cookies = cj.LWPCookieJar() login_req = r.post(login_url, headers=headers, data=data, verify=False)
def match_goods(self): self.headers = { 'X-Requested-With': 'XMLHttpRequest', 'Cookie': '', 'Content-Type': get_content_type(), 'Accept': get_accept('xhr') } version = begin_time() changeHtmlTimeout(30) block_size = 10 if not os.path.exists('%sgoods' % data_dir): print('goods file not exist!!!') return with codecs.open('%sgoods' % data_dir, 'r', encoding='utf-8') as f: wait_goods = f.readlines() goods_url = [ re.findall('http.* ', index)[0].strip().replace('https', 'http') if 'http' in index and not '【' in index else False for index in wait_goods ] if not os.path.exists('%scollect_wyy' % data_dir): print('collect file not exist!!!') return with codecs.open('%scollect_wyy' % data_dir, 'r', encoding='utf-8') as f: collect = f.readlines() self.title2map = { index.split("||")[1]: index.split("||")[0] for index in collect } threadings = [] for index, url in enumerate(goods_url): if url == False: continue work = threading.Thread(target=self.get_goods_id_first, args=( url, index, )) threadings.append(work) url_len = len(threadings) for index in range((url_len - 1) // block_size + 1): begin_id = index * block_size end_id = min(url_len, (index + 1) * block_size) threadings_block = threadings[begin_id:end_id] for work in threadings_block: work.start() for work in threadings_block: work.join() time.sleep(random.randint(0, 9)) write_body = [ ' '.join([self.goods_map[index], body]) if index in self.goods_map else (' '.join([self.url2goods[goods_url[index]], body]) if goods_url[index] in self.url2goods else body) for index, body in enumerate(wait_goods) ] with codecs.open('%sgoods_one' % data_dir, 'w', encoding='utf-8') as f: f.write(''.join(write_body)) end_time(version)
def bulk_import_alimama(self): """ bulk import alimama """ version = begin_time() if not os.path.exists('%scollect_wyy' % data_dir): print('Collect File not exist!!!') return with codecs.open('%scollect_wyy' % data_dir, 'r', encoding='utf-8') as f: goods = f.readlines() self.goods_candidate = [index.split('||')[0] for index in goods] goods_len = len(self.goods_candidate) self.headers = { 'X-Requested-With': 'XMLHttpRequest', 'Cookie': '', 'Content-Type': get_content_type(), 'Accept': get_accept('xhr'), 'Origin': 'http://pub.alimama.com', 'Referer': 'http://pub.alimama.com/promo/search/index.htm?q=%E7%AC%AC%E5%9B%9B%E5%8D%81%E4%B9%9D%E5%A4%A9%2019%E6%98%A5%E5%AD%A3&_t=1550891362391' } if not os.path.exists('%scookie_alimama' % data_dir): print('alimama cookie not exist!!!') return with codecs.open('%scookie_alimama' % data_dir, 'r', encoding='utf-8') as f: cookie = f.readlines() url_list = [ 'https://pub.alimama.com/favorites/group/newList.json?toPage=1&perPageSize=40&keyword=&t=', str(int(round(time.time() * 1000))), '&_tb_token_=', cookie[1][:-1], '&pvid=', cookie[2][:-1] ] url = ''.join(url_list) self.headers['Cookie'] = cookie[0][:-1] self.headers['Host'] = url.split('/')[2] group_list = basic_req(url, 2, header=self.headers) if group_list.status_code != 200 or group_list.json( )['info']['message'] == 'nologin': print('group_list error') return group_list = group_list.json()['data']['result'] group_list = [index['id'] for index in group_list] print(group_list) assert len(group_list) > (goods_len - 1) // 200 threadings = [] for index in range((goods_len - 1) // 200 + 1): work = threading.Thread(target=self.bulk_import_alimama_once, args=( index, group_list[index], )) threadings.append(work) for work in threadings: work.start() for work in threadings: work.join() end_time(version)