def _get_true_sku_info(self, goods_id, tmp_data): ''' 得到每个规格对应的库存, 价格, 图片等详细信息 :param tmp_data: :return: ''' multiColor = tmp_data[5].get('result', {}) # sku_price = tmp_data[2].get('result', {}).get('sku_price', []) ## ** 研究发现multiColor以及productSku中的type为1时,表示该商品规格库存为0 productSku = tmp_data[6].get('result', {}).get('productSku', {}) # tmp = { # 'multiColor': multiColor, # # 'sku_price': sku_price, # 'productSku': productSku, # } # pprint(tmp) true_sku_info = [] if multiColor == {} or productSku == {}: return [] else: if multiColor.get('items') is None: color_ = None else: tmp_color_items = multiColor.get('items', []) color_ = [] for item in tmp_color_items: if item.get('type', 0) == 1: # 该颜色无库存 continue else: # 为0,表示有库存 # 先获取到有库存的对应规格, 是否有颜色属性后面再判断 color_.append({ 'goods_id': item.get('product_id', ''), 'name': item.get('name', ''), 'img_url': 'https:' + item.get('icon', {}).get('imageUrl', '') }) if color_ == []: # 没有规格 也可能是 # 表示没有库存, 买完或者下架 print('获取到的color_为空[], 请检查!') return [] else: if productSku.get('items') is None: print('获取到的others_items为None') return [] else: other_items = productSku.get('items', []) other_ = [] for item in other_items: if item.get('type', 0) == 1: # 该规格无库存 continue else: # 该规格有库存 detail_price = item.get('promotion_price', '') # 还是选择所有商品都拿最优惠的价格 # if detail_price == '' or goods_id[0] == 1: # 为空就改为获取vipshop_price字段 if detail_price == '': # 为空就改为获取vipshop_price字段 detail_price = item.get('vipshop_price', '') else: pass normal_price = item.get('market_price', '') if normal_price == '': normal_price = detail_price other_.append({ 'spec_value': item.get('sku_name', ''), 'detail_price': detail_price, 'normal_price': normal_price, 'img_url': '', # 设置默认为空值 'rest_number': item.get('leavings', 0), # 该规格的剩余库存量 }) if color_ is None: for item_2 in other_: spec_value = item_2.get('spec_value', '') item_2['spec_value'] = spec_value item_2['img_url'] = '' true_sku_info.append(item_2) elif len( color_ ) == 1: # 颜色长度为1时,表示唯品会默认选择的属性,不需要将color_相关的值添加到spec_value里面 true_sku_info = other_ else: for item in color_: if item.get( 'goods_id') == goods_id[1]: # 表示为原先的那个goods_id if item.get('name', '') == '无': # 表示无颜色属性 pass else: for item_2 in other_: spec_value = item.get( 'name', '') + '|' + item_2.get( 'spec_value', '') item_2['spec_value'] = spec_value item_2['img_url'] = item.get('img_url', '') true_sku_info.append(item_2) else: # 表示是其他颜色对应的goods_id ''' 下面是获取该颜色对应goods_id的所有可售的规格价格信息 ''' url = 'https://m.vip.com/server.html' params = self._set_params() page = 'product-0-' + str(goods_id[1]) + '.html' post_data = self._set_post_data(page=page) tmp_data_2 = MyRequests.post_url_body( url=url, headers=self.headers, params=params, data=post_data) # print(tmp_data_2) # 先处理得到dict数据 if tmp_data_2 == '': print('获取其他颜色规格的url的body时为空值') return [] else: try: tmp_data_2 = json.loads(tmp_data_2) # pprint(tmp_data_2) except Exception: print('json.loads转换tmp_data_2时出错, 请检查!') return [] other_items_2 = tmp_data_2[6].get( 'result', {}).get('productSku', {}).get('items', []) other_2 = [] for item_3 in other_items_2: if item_3.get('type', 0) == 1: # 该规格无库存 continue else: # 该规格有库存 detail_price = item_3.get( 'promotion_price', '') # 还是都拿最优惠的价格 不管限时2小时时间问题的折扣 # if detail_price == '' or goods_id[0] == 1: # 为空就改为获取vipshop_price字段 if detail_price == '': # 为空就改为获取vipshop_price字段 detail_price = item_3.get( 'vipshop_price', '') normal_price = item_3.get( 'market_price', '') if normal_price == '': normal_price = detail_price other_2.append({ 'spec_value': item_3.get('sku_name', ''), 'detail_price': detail_price, 'normal_price': normal_price, 'rest_number': item_3.get('leavings', 0), # 设置默认的值 'img_url': '', # 设置默认为空值 }) for item_4 in other_2: spec_value = item.get( 'name', '') + '|' + item_4.get( 'spec_value', '') item_4['spec_value'] = spec_value item_4['img_url'] = item.get('img_url', '') true_sku_info.append(item_4) return true_sku_info
def get_goods_data(self, goods_id): ''' 模拟构造得到data的url :param goods_id: 类型 list :return: data dict类型 ''' if goods_id == []: self.result_data = {} return {} else: data = {} # 抓包: 唯品会微信小程序 url = 'https://m.vip.com/server.html' params = self._set_params() page = 'product-0-' + str(goods_id[1]) + '.html' post_data = self._set_post_data(page=page) body = MyRequests.post_url_body(url=url, headers=self.headers, params=params, data=post_data) # print(body) if body == '': self.result_data = {} return {} else: try: tmp_data = json.loads(body) # pprint(tmp_data) except Exception: print('json.loads转换body时出错, 请检查!') tmp_data = {} if tmp_data == {}: self.result_data = {} return {} else: try: # title, sub_title data['title'] = tmp_data[2].get('result', {}).get( 'product_name', '') assert data['title'] != '', '获取到的title为空值, 请检查!' data['sub_title'] = '' # shop_name data['shop_name'] = tmp_data[2].get('result', {}).get( 'brand_info', {}).get('brand_name', '') # 获取所有示例图片 all_img_url = tmp_data[2].get('result', {}).get('img_pre', []) assert all_img_url != [], '获取到的all_img_url为空[], 请检查!' all_img_url = [{ 'img_url': 'https:' + item.get('b_img', '') } for item in all_img_url] # pprint(all_img_url) data['all_img_url'] = all_img_url # 获取p_info p_info = self._get_p_info(tmp_data=tmp_data) assert p_info != [], 'p_info为空list, 请检查!' # pprint(p_info) data['p_info'] = p_info # 获取每个商品的div_desc div_desc = self.get_goods_div_desc( tmp_data=tmp_data[2].get('result', {}).get( 'detailImages', [])) assert div_desc != '', '获取到的div_desc为空值! 请检查' data['div_desc'] = div_desc ''' 上下架时间 ''' data['sell_time'] = { 'begin_time': tmp_data[2].get('result', {}).get('sell_time_from', {}), 'end_time': tmp_data[2].get('result', {}).get('sell_time_to', {}), } if int(data['sell_time'].get('begin_time')) > int( time.time()): # *** 先根据上下架时间来判断是否为预售商品,如果是预售商品就按预售商品的method来去对应规格的价格 goods_id = [1, goods_id[1]] # 设置成预售的商品goods_id格式 # 设置detail_name_list detail_name_list = self._get_detail_name_list( tmp_data=tmp_data) # print(detail_name_list) data['detail_name_list'] = detail_name_list ''' 获取每个规格对应价格跟规格以及库存 ''' true_sku_info = self._get_true_sku_info( goods_id=goods_id, tmp_data=tmp_data) # pprint(true_sku_info) if true_sku_info == []: # 也可能是 表示没有库存, 买完或者下架 print('获取到的sku_info为空值, 请检查!') print('*** 注意可能是卖完了,库存为0 导致!! ***') # raise Exception data['price_info_list'] = true_sku_info else: data['price_info_list'] = true_sku_info except Exception as e: print('遇到错误如下: ', e) self.result_data = {} # 重置下,避免存入时影响下面爬取的赋值 return {} if data != {}: # pprint(data) self.result_data = data return data else: print('data为空!') self.result_data = {} # 重置下,避免存入时影响下面爬取的赋值 return {}
def test(): # 抓包: 唯品会微信小程序 url = 'https://m.vip.com/server.html' headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip', 'Accept-Language': 'zh-cn', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Host': 'm.vip.com', 'Referer': 'https://servicewechat.com/wxe9714e742209d35f/284/page-frame.html', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Mobile/15A5341f MicroMessenger/6.6.5 NetType/WIFI Language/zh_CN', } t = str(time.time().__round__()) + str(randint(100, 999)) params = { 'serv': 'getGoodsActiveMsg', '_xcxid': t, } goods_id = '460143743' page = 'product-0-' + str(goods_id) + '.html' data = dumps([ { "method": "getGoodsActiveMsg", "params": { "page": page, "query": "" }, # "id":4884390025335, 'id': 1, "jsonrpc": "2.0" }, { "method": "getCoupon", "params": { "page": page, "query": "" }, # "id":4884390025336, 'id': 2, "jsonrpc": "2.0" }, { "method": "getProductDetail", "params": { "page": page, "query": "" }, # "id":4884390025337, 'id': 3, "jsonrpc": "2.0" }, { "method": "getProductMeta", "params": { "page": page, "query": "" }, # "id":4884390025338, 'id': 4, "jsonrpc": "2.0" }, { "method": "getProductSlide", "params": { "page": page, "query": "" }, # "id":4884390025339, 'id': 5, "jsonrpc": "2.0" }, { "method": "getProductMultiColor", "params": { "page": page, "query": "" }, # "id":4884390025340, 'id': 6, "jsonrpc": "2.0" }, { "method": "getProductSize", "params": { "page": page, "query": "" }, # "id":4884390025341, 'id': 7, "jsonrpc": "2.0" }, { "method": "getProductCountdown", "params": { "page": page, "query": "" }, # "id":4884390025342, 'id': 8, "jsonrpc": "2.0" }, { "method": "ProductRpc.getProductLicense", "params": { "page": page, "query": "" }, # "id":4884390025343, 'id': 9, "jsonrpc": "2.0" }, ]) body = MyRequests.post_url_body(url=url, headers=headers, params=params, data=data) # print(body) try: data = json.loads(body) pprint(data) except: pass
def test(): # 抓包: 唯品会微信小程序 url = 'https://m.vip.com/server.html' headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip', 'Accept-Language': 'zh-cn', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Host': 'm.vip.com', 'Referer': 'https://servicewechat.com/wxe9714e742209d35f/284/page-frame.html', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Mobile/15A5341f MicroMessenger/6.6.5 NetType/WIFI Language/zh_CN', } cookies = { 'mars_cid': '1522488378117_dc1dd95b12eabf2810ceccbe1d7b5f05', 'userId': '246736848', 'warehouse': 'VIP_SH', 'vip_wh': 'VIP_SH', 'WAP[p_wh]': 'VIP_SH', 'saturn': 'v494a41983b12ac4be82124030c99f71f', 'wap_consumer': 'C1-2', 'client_from': 'wxsmall', 'm_vip_province': '103103', 'WAP[p_area]': '%E6%B5%99%E6%B1%9F', } t = str(int(time.time())) params = { 'serv': 'getGoodsActiveMsg', '_xcxid': t + '001', } data = dumps([ { "method":"getGoodsActiveMsg", "params":{ "page":"product-2558393-460143743.html", "query":"" }, "id":4884390025335, "jsonrpc":"2.0" },{ "method":"getCoupon", "params":{ "page":"product-2558393-460143743.html", "query":"" }, "id":4884390025336, "jsonrpc":"2.0" },{ "method":"getProductDetail", "params":{ "page":"product-2558393-460143743.html", "query":"" }, "id":4884390025337, "jsonrpc":"2.0" },{ "method":"getProductMeta", "params":{ "page":"product-2558393-460143743.html", "query":"" }, "id":4884390025338, "jsonrpc":"2.0" },{ "method":"getProductSlide", "params":{ "page":"product-2558393-460143743.html", "query":"" }, "id":4884390025339, "jsonrpc":"2.0" },{ "method":"getProductMultiColor", "params":{ "page":"product-2558393-460143743.html", "query":"" }, "id":4884390025340, "jsonrpc":"2.0" },{ "method":"getProductSize", "params":{ "page":"product-2558393-460143743.html", "query":"" }, "id":4884390025341, "jsonrpc":"2.0" },{ "method":"getProductCountdown", "params":{ "page":"product-2558393-460143743.html", "query":"" }, "id":4884390025342, "jsonrpc":"2.0" },{ "method":"ProductRpc.getProductLicense", "params":{ "page":"product-2558393-460143743.html", "query":"" }, "id":4884390025343, "jsonrpc":"2.0" }, ]) body = MyRequests.post_url_body(url=url, headers=headers, params=params, data=data) # print(body) # body = MyRequests().get_url_body(url=url, headers=headers, params=params) # print(body) try: data = json.loads(body) pprint(data) except: pass