def get_id_info(category, page): url = "http://api.yangkeduo.com/operations?pdduid=1393851438" urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) res = json.loads( requests.get(url, headers=get_headers2(), verify=False).text) cate_dict = {} for temp_child in res: for eve_child in temp_child['children']: cate_id = eve_child['id'] if 'opt_desc' in eve_child: cate_name = eve_child['opt_desc'] else: cate_name = eve_child['opt_name'] cate_dict['%s' % cate_name] = cate_id url1 = "http://api.yangkeduo.com/v4/operation/{}/groups".format( cate_dict['%s' % category]) res1 = json.loads( requests.get(url1, params=get_params1(page), headers=get_headers2(), verify=False).text) pprint.pprint(res1) idd_list = [] for goods_id in res1['goods_list']: idd = goods_id['goods_id'] idd_list.append(idd) return idd_list
def get_detail(category, page): url = "http://api.yangkeduo.com/operations?pdduid=1393851438" urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) res = json.loads( requests.get(url, headers=get_headers2(), verify=False).content.decode('utf-8')) cate_dict = {} for temp_child in res: for eve_child in temp_child['children']: cate_id = eve_child['id'] if 'opt_desc' in eve_child: cate_name = eve_child['opt_desc'] else: cate_name = eve_child['opt_name'] cate_dict['%s' % cate_name] = cate_id url1 = "http://api.yangkeduo.com/v4/operation/{}/groups".format( cate_dict['%s' % category]) res1 = json.loads( requests.get(url1, params=get_params1(page), headers=get_headers2(), verify=False).content.decode('utf-8')) product_info_list = [] for goods_id in res1['goods_list']: idd = goods_id['goods_id'] mall_id = goods_id['mall_id'] url2 = "http://api.yangkeduo.com/api/oakstc/v14/goods/{}?goods_id={}&from=0&pdduid=1393851438".format( idd, idd) res2 = json.loads( requests.get(url2, headers=get_headers2(), verify=False).content.decode('utf-8')) product_info = {} product_info['title'] = res2['goods_name'] product_info['goods_id'] = idd product_info['price'] = res2['market_price'] / 1000 imgsSrc_list = [] for video_img_temp in res2['gallery']: video_img_eve = video_img_temp['url'] if '.mp4' in video_img_eve: product_info['videoUrl'] = video_img_eve else: Src = video_img_eve imgsSrc_list.append(Src) product_info['imgsSrc'] = imgsSrc_list product_info[ 'source'] = "https://mobile.yangkeduo.com/goods2.html?goods_id={}".format( idd) url3 = "http://api.yangkeduo.com/mall/{}/info?check_merchant_coupon=no&pdduid=1393851438".format( mall_id) res3 = json.loads(requests.get(url3, headers=get_headers2()).text) product_info['shop_name'] = res3['mall_name'] product_info_list.append(product_info) # pprint.pprint(product_info) return product_info_list
def one_page(cate, page): url1 = "http://search.suning.com/emall/mobile/wap/clientSearch.jsonp" res_temp = requests.get(url1, params=get_params1(cate, page), headers=get_headers2()).text res_eve = '{' + re.search('\{(.*?)jlf_fold_onoff', res_temp, re.S).group(1).rstrip('"').rstrip(',') + '}' pprint.pprint(res_eve) res1 = json.loads(res_eve) for goods in res1['goods']: url2 = "https://m.suning.com/product/0000000000/{}.html".format( goods['partnumber']) res2 = requests.get(url2, params=get_params2(), headers=get_headers3())
def one_page(cate, page): url = "http://search.suning.com/emall/mobile/wap/clientSearch.jsonp" res_temp = requests.get(url, params=get_params1(cate, page), headers=get_headers2()).text res_eve = '{' + re.search('\{(.*?)jlf_fold_onoff', res_temp, re.S).group(1).rstrip('"').rstrip(',') + '}' # pprint.pprint(res_eve) res = json.loads(res_eve) id_list = [] for goods in res['goods']: id_list.append(goods['partnumber']) # pprint.pprint(goods['partnumber']) return id_list
def get_cate(): url = "http://api.yangkeduo.com/operations?pdduid=1393851438" urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) res = json.loads(requests.get(url, headers=get_headers2(), verify=False).text) cate_dict = {} for temp_child in res: for eve_child in temp_child['children']: cate_id = eve_child['id'] if 'opt_desc' in eve_child: cate_name = eve_child['opt_desc'] else: cate_name = eve_child['opt_name'] cate_dict['%s' % cate_name] = cate_id # pprint.pprint(cate_dict) return cate_dict
# coding: utf-8 import requests import urllib3 from headers_list import get_headers2 from params_list import get_params1 from lxml import etree import pprint url = "https://search.jd.com/s_new.php" urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) res = requests.get(url, headers=get_headers2(), params=get_params1()).text id_list = etree.HTML(res).xpath("//li[@class='gl-item']/@data-sku") # print(id_list) pprint.pprint(id_list)