def _set_headers(self): self.headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': get_random_phone_ua(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', }
def _set_headers(self): self.headers = { 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'user-agent': get_random_phone_ua(), 'content-type': 'application/x-www-form-urlencoded', 'accept': '*/*', # 'authority': 'm-goods.kaola.com', 'x-requested-with': 'XMLHttpRequest', }
def _get_phone_headers(): return { 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1', 'user-agent': get_random_phone_ua(), 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', }
def _set_headers(self): self.headers = { 'Accept': '*/*', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept-Encoding': 'br, gzip, deflate', 'Host': 'app.quanmama.com', 'User-Agent': get_random_phone_ua(), 'Content-Length': '885', 'Accept-Language': 'zh-Hans-CN;q=1, en-CN;q=0.9', }
def _get_headers(): return { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Upgrade-Insecure-Requests': '1', 'User-Agent': get_random_phone_ua(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', }
def _set_headers(self): self.headers = { 'Origin': 'https://home.mi.com', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'User-Agent': get_random_phone_ua(), 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': '*/*', # 'Referer': 'https://home.mi.com/detail?gid=101421', 'DToken': '', 'Connection': 'keep-alive', }
def _set_headers(self): self.headers = { # 'cookie': 'davisit=2; usertrack=O2+g2Ftatitk7YwIAwY2Ag==; _ntes_nnid=7732365205c88dc47486ad1208406e7e,1532671534874; _ga=GA1.2.960357080.1532671535; _gid=GA1.2.1543960295.1532671535; JSESSIONID-WKL-8IO=JpPe0U2ISOSX%2B7b86uwx%2FDCCROKOxwv%2B9vh7Yj%2BBTVVOOIQXHVnSAe19xxMrURx2OK5Q6PV1E%2FSR5UOnm%5C0U2i1RDD3ur5uh%2F7lHemHDcbf90BrkXSqTqZySf%2F%5CWgGSu81cjbESgntQrE%2FYJU89hyhg%5CtPZ6jYgVrxw3yil6BxlEonas%3A1532757935029; _klhtxd_=31; kaola_user_key=47cca4d0-57c9-41ca-ae67-2172c4a81500; KAOLA_NEW_USER_COOKIE=yes; __da_ntes_utma=2525167.1705273738.1532671535.1532671535.1532671535.1; davisit=1; __da_ntes_utmb=2525167.1.10.1532671535; __da_ntes_utmz=2525167.1532671535.1.1.utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none); __da_ntes_utmfc=utmcsr%3D(direct)%7Cutmccn%3D(direct)%7Cutmcmd%3D(none); _jzqa=1.658432386831847000.1532671536.1532671536.1532671536.1; _jzqc=1; _jzqx=1.1532671536.1532671536.1.jzqsr=google%2Ecom|jzqct=/.-; _jzqckmp=1; WM_TID=BuJzWuW25WT9h9YnJbNPwKuHb0%2FJdiEw; __kaola_usertrack=20180727140634933960; _da_ntes_uid=20180727140634933960; NTES_KAOLA_ADDRESS_CONTROL=330000|330100|330102|1; _jzqb=1.8.10.1532671536.1; NTES_KAOLA_RV=1472242_1532671698324_0|27979_1532671614705_0; _gat=1', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'user-agent': get_random_phone_ua(), 'content-type': 'application/x-www-form-urlencoded', 'accept': '*/*', # 'referer': 'https://m-goods.kaola.com/product/27979.html?ri=navigation&from=page1&zn=result&zp=page1-5&position=5&istext=0&srId=7891cc6632688f65bdcb4f04e150950c&isMarketPriceShow=true&hcAntiCheatSwitch=0&anstipamActiCheatSwitch=1&anstipamActiCheatToken=de3223456456fa2e3324354u4567lt&anstipamActiCheatValidate=anstipam_acti_default_validate', 'authority': 'm-goods.kaola.com', 'x-requested-with': 'XMLHttpRequest', }
def __init__(self): self.headers = { 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', 'upgrade-insecure-requests': '1', 'user-agent': get_random_phone_ua(), 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'cache-control': 'max-age=0', 'authority': 'www.douyin.com', # 'cookie': '_ba=BA0.2-20180330-5199e-OeUxtvwJvy5ElpWGFLId; _ga=GA1.2.390071767.1522391891; sso_login_status=1; tt_webid=6540458660484122126; __tea_sdk__user_unique_id=10_; __tea_sdk__ssid=e88eef4a-ec1f-497d-b2c7-301239bfdc67; login_flag=d6ee54ffebe3021c3fb67ff863970736; sessionid=7bdfd0e36df78f38c25abd13f0eff3cc; uid_tt=644e532b271dae498b62c659de17afdf; sid_tt=7bdfd0e36df78f38c25abd13f0eff3cc; sid_guard="7bdfd0e36df78f38c25abd13f0eff3cc|1522819290|2591999|Fri\\054 04-May-2018 05:21:29 GMT"', }
async def _search(self, search_key) -> list: ''' 天眼查搜索功能 :param search_key: 待搜索key :return: ''' headers = { 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'User-Agent': get_random_phone_ua(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'https://m.tianyancha.com/', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', } params = (('key', str(search_key)), ) url = 'https://m.tianyancha.com/search' body = Requests.get_url_body(url=url, headers=headers, params=params, cookies=None) # print(body) if body == '': return [] search_list = [] try: # div.new-border-bottom search_res = Selector(text=body).css( 'div.search_result_container ::text').extract_first() or '' company_name = Selector( text=search_res).css('div.new-border-bottom a span text ::text' ).extract_first() or '' assert company_name != '', 'company_name为空值!' url = Selector(text=search_res).css( 'div.new-border-bottom a ::attr("href")').extract_first() or '' assert url != '', 'url为空值!' legal_person = Selector(text=search_res).css( 'a.legalPersonName ::text').extract_first() or '' legal_person_url = Selector(text=search_res).css( 'a.legalPersonName ::attr("href")').extract_first() or '' legal_person_url = 'https://m.tianyancha.com' + legal_person_url if legal_person_url != '' else '' except AssertionError as e: print(e) return []
def _get_one_page_comment_info(self, goods_id, page_num) -> list: """ 获取单页comment info :return: """ headers = { 'Referer': 'https://item.m.jd.com/product/{}.html'.format(goods_id), 'User-Agent': get_random_phone_ua(), } params = ( # ('callback', 'skuJDEvalA'), ('sorttype', '5'), ('pagesize', '10'), ('sceneval', '2'), ('score', '3'), # 取好评的 ('sku', str(goods_id)), ('page', str(page_num)), # ('t', '0.7175421988280679'), ) url = 'https://wq.jd.com/commodity/comment/getcommentlist' body = Requests.get_url_body( url=url, headers=headers, params=params, ip_pool_type=self.ip_pool_type, ) # self.lg.info(body) assert body != '', 'body不为空值!' data = [] try: data = json_2_dict( json_str=re.compile('\((.*)\)').findall(body)[0], default_res={}).get('result', {}).get('comments', []) except IndexError: pass # pprint(data) self.lg.info('[{}] page_num: {}'.format( '+' if data != [] else '-', page_num, )) # assert data != [], 'data不为空list! 出错goods_id: {}'.format(goods_id) return data
@author = super_fazai @File : gd_map_spider.py @connect : [email protected] ''' from pprint import pprint from fzutils.internet_utils import get_random_phone_ua from fzutils.spider.fz_requests import Requests from fzutils.ip_pools import tri_ip_pool from fzutils.common_utils import json_2_dict # 高德map 单页shop搜索 headers = { 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', 'user-agent': get_random_phone_ua(), 'accept': 'application/json', 'referer': 'https://m.amap.com/search/view/keywords=%E8%A1%A3%E6%9C%8D', 'authority': 'm.amap.com', 'x-requested-with': 'XMLHttpRequest', } params = ( ('pagenum', '2'), ('user_loc', '120.153576,30.287459'), ('geoobj', '120.089203|30.177242|120.217949|30.397676'), ('city', '杭州'), ('keywords', '衣服'), ('cluster_state', '5'), ('client_network_class', '4'), # ('uuid', '2a21e0af-009d-4a1a-a63e-ee5c6dec2488'),