def parse(self, response):
		subject_info = response.meta['subject_info']
		page 		 = int(response.meta['page'])
		goods_list   = response.meta['goods_list']
		page_size    = int(response.meta['page_size'])
		rank    = (page - 1) * page_size + 1

		content    = json.loads(response.body.decode('utf-8'))

		if 'goods_list' in content.keys() and len(content['goods_list'] ) > 0:
			for goods in content['goods_list']:
				price = float(goods['group']['price']/100)
				goods_id = goods['goods_id']

				goods_info = self.build_goods_rank_info(goods_id, subject_info['subject_id'], subject_info['type'], rank, price)
				rank += 1
				goods_list.append(goods_info)

			page += 1
			url = self.get_activity_url(subject_info, page, page_size)
			meta= {'subject_info':subject_info, 'page':page, 'page_size':page_size, 'goods_list':goods_list}
			
			yield scrapy.Request(url, meta=meta, callback=self.parse)
					
		else:
			if goods_list:
				item = CategoryGoodsItem()
				item['goods_lists'] = goods_list
				#print(len(goods_lists), cat_id)
				yield item
    def parse(self, response):
        # print(response.meta)
        meta = response.meta
        offset = response.meta['offset']
        cat_id = response.meta['cat_id']
        opt_type = response.meta['opt_type']
        receive_info = response.body.decode('utf-8')
        data = json.loads(receive_info)
        flip = data['flip']
        goods_lists = []
        # print('parse_before', flip, offset,len(data['goods_list']),cat_id)
        if data['goods_list']:
            i = 0
            for goods_data in data['goods_list']:
                goods_id = goods_data['goods_id']
                i += 1
                goods_lists.append(goods_data)

            offset += i
            item = CategoryGoodsItem()
            item['goods_lists'] = goods_lists
            self.save_goods_log(cat_id, offset, receive_info)
            # print('parse_middle',flip,offset,len(data['goods_list']),cat_id)
            yield item
            # print('parse_after',flip,offset,len(data['goods_list']),cat_id)
            if i >= self.pagesise and offset < 900:
                url = self.build_url(opt_type, cat_id, offset, flip)
                meta['offset'] = offset
                headers = self.make_headers()
                yield scrapy.Request(url,
                                     meta=meta,
                                     callback=self.parse,
                                     headers=headers,
                                     dont_filter=True,
                                     errback=self.errback_httpbin)
 def parse(self, response):
     receive_info = response.body.decode('utf-8')  ##bytes转换为str
     goods_info = json.loads(receive_info)
     goods_id = goods_info['goods_id']
     self.save_goods_log(goods_id, receive_info)
     item = CategoryGoodsItem()
     item['goods_lists'] = goods_info
     yield item
    def parse(self, response):
        # print(response.meta)
        meta = response.meta
        offset = response.meta['offset']
        cat_id = response.meta['cat_id']
        opt_type = response.meta['opt_type']
        receive_info = response.body.decode('utf-8')
        # self.save_goods_log(cat_id, offset, receive_info)
        data = json.loads(receive_info)
        logging.debug(
            json.dumps({
                'cat_id': cat_id,
                'offset': offset,
                'receive_info': data
            }))
        goods_lists = []
        # print('parse_before', flip, offset,len(data['goods_list']),cat_id)
        if 'goods_list' not in data.keys():
            self.err_after(meta, True)
            return False
        # flip = data['flip']
        if len(data['goods_list']) > 0:
            i = 0
            for goods_data in data['goods_list']:
                i += 1
                rank = offset + i
                goods_data['rank'] = rank
                goods_data['subject_id'] = cat_id
                goods_data['type'] = 2
                goods_lists.append(goods_data)

            offset += i
            item = CategoryGoodsItem()
            item['goods_lists'] = goods_lists
            # print('parse_middle',flip,offset,len(data['goods_list']),cat_id)
            yield item
            # print('parse_after',flip,offset,len(data['goods_list']),cat_id)
            if i >= self.realPagesise and offset < 1000 - self.realPagesise:
                uri = self.build_uri(opt_type, cat_id, offset, '')
                meta['offset'] = offset
                headers = self.make_headers()
                form_data = {
                    'name': 'getOperationGroups',
                    'method': 'GET',
                    'domain': 'http://apiv4.yangkeduo.com',
                    'uri': uri,
                    'headers': json.dumps(headers),
                }

                logging.debug(json.dumps(form_data))

                yield scrapy.FormRequest(url=self.gateway_url,
                                         formdata=form_data,
                                         meta=meta,
                                         headers={},
                                         callback=self.parse,
                                         dont_filter=True,
                                         errback=self.errback_httpbin)
Exemplo n.º 5
0
    def parse(self, response):
        result = json.loads(response.body.decode('utf-8'))
        logging.debug(json.dumps({"result": result, "meta": response.meta}))
        subject_info = response.meta['subject_info']
        page = int(response.meta['page'])
        rank = (page - 1) * 500 + 1
        api_type = subject_info["api_type"]
        goods_list = []
        if api_type in [11]:  # items  限时秒杀
            # self.save_log(json.dumps({'goods_api_kill': api_type, 'goods_api_items_kill': result["items"]}))
            if "items" in result.keys() and len(result["items"]) > 0:
                for i in result["items"]:
                    try:
                        goods_id = i["data"]["goods_id"]
                    except Exception:
                        goods_id = None
                    if goods_id:
                        goods_info = self.get_kill_goods_info(subject_info, rank, i["data"])
                        goods_list.append(goods_info)
                        self.save_log(json.dumps({'goods_info_kill_' + str(api_type): goods_info}))
            else:
                return None
        if api_type in [14]:  # 品牌秒杀子活动
            # self.save_log(json.dumps({'goods_api_slow': api_type, 'goods_api_items_slow': result["result"]}))
            if "result" in result.keys() and len(result["result"]) > 0:
                for i in result["result"]:
                    goods_info = self.get_goods_info_slow(subject_info, rank, i)
                    goods_list.append(goods_info)
                    self.save_log(json.dumps({'goods_info_slow' + str(api_type): goods_info}))
            else:
                return None

        if api_type in [21, 72]:  # list   断码清仓/首页子活动
            if "list" in result.keys() and len(result["list"]) > 0:
                for i in result["list"]:
                    goods_info = self.get_goods_info(subject_info, rank, i)
                    goods_list.append(goods_info)
                    self.save_log(json.dumps({'goods_info_short_' + str(api_type): goods_info}))
            else:
                return None
        if api_type in [31, 41]:  # goods_list   # 品牌馆/9块9特卖
           #  self.save_log(json.dumps({'goods_api_brand': api_type, 'goods_api_items_brand': result["goods_list"]}))
            if "goods_list" in result.keys() and len(result["goods_list"]) > 0:
                for i in result["goods_list"]:
                    goods_info = self.get_goods_info(subject_info, rank, i)
                    goods_list.append(goods_info)
                    self.save_log(json.dumps({'goods_info_brand_' + str(api_type): goods_info}))
            else:
                return None

        item = CategoryGoodsItem()
        logging.debug(json.dumps({'goods_list': goods_list, "goods_len": len(goods_list)}))
        self.save_log(json.dumps({'goods_list_all': goods_list, "goods_len": len(goods_list)}))
        item['goods_lists'] = goods_list
        yield item
Exemplo n.º 6
0
    def parse(self, response):
        # print(response.meta)
        meta = response.meta
        offset = response.meta['offset']
        cat_id = response.meta['cat_id']
        opt_type = response.meta['opt_type']
        receive_info = response.body.decode('utf-8')
        # self.save_goods_log(cat_id, offset, receive_info)
        data = json.loads(receive_info)
        logging.debug(
            json.dumps({
                'cat_id': cat_id,
                'offset': offset,
                'receive_info': data
            }))
        goods_lists = []
        # print('parse_before', flip, offset,len(data['goods_list']),cat_id)
        if 'goods_list' in data.keys():
            flip = data['flip']
            if len(data['goods_list']) > 0:
                i = 0
                for goods_data in data['goods_list']:
                    i += 1
                    rank = offset + i
                    goods_data['rank'] = rank
                    goods_data['subject_id'] = cat_id
                    goods_data['type'] = 2
                    goods_lists.append(goods_data)

                offset += i
                item = CategoryGoodsItem()
                item['goods_lists'] = goods_lists
                # print('parse_middle',flip,offset,len(data['goods_list']),cat_id)
                yield item
                # print('parse_after',flip,offset,len(data['goods_list']),cat_id)
                if i >= self.pagesise and offset < 900:
                    url = self.build_url(opt_type, cat_id, offset, flip)
                    meta['offset'] = offset
                    headers = self.make_headers()
                    yield scrapy.Request(url,
                                         meta=meta,
                                         callback=self.parse,
                                         headers=headers,
                                         dont_filter=True,
                                         errback=self.errback_httpbin)
        else:
            yield scrapy.Request(response.url,
                                 meta=meta,
                                 callback=self.parse,
                                 headers=response.headers,
                                 dont_filter=True,
                                 errback=self.errback_httpbin)
Exemplo n.º 7
0
 def parse(self, response):
     receive_info = response.body.decode('utf-8')  ##bytes转换为str
     goods_info = json.loads(receive_info)
     #self.save_goods_log(goods_id, receive_info)
     item = CategoryGoodsItem()
     self.success_count += 1
     # self.save_count_log('success', json.dumps({
     # 	'success_count': self.success_count
     # }))
     item['goods_lists'] = goods_info['goods']
     item['goods_lists']['price'] = goods_info['price']
     item['goods_lists']['sku'] = goods_info['sku']
     yield item
Exemplo n.º 8
0
 def parse(self, response):
     pass
     receive_info = response.body.decode('utf-8')  ##bytes转换为str
     goods_info = json.loads(receive_info)
     goods_id = goods_info['goods_id']
     #self.save_goods_log(goods_id, receive_info)
     item = CategoryGoodsItem()
     item['goods_lists'] = {
         'goods_id': goods_id,
         'goods_sales': goods_info['sales'],
         'mall_id': goods_info['mall_id'],
         'goods_price': goods_info['min_on_sale_group_price']
     }
     yield item
Exemplo n.º 9
0
    def parse_goods_info(self, response):
        goods_info = response.meta['goods_info']
        content = response.body.decode('utf-8')
        a = re.search('window\.rawData= (.*)\;\s*\<\/script\>', content)
        if a:
            content = json.loads(a.group(1))
            if 'goods' not in content.keys():
                return False
            goods = content['goods']
            goods_info['price'] = goods['minOnSaleGroupPrice']

        item = CategoryGoodsItem()
        item['goods_lists'] = [goods_info]
        #print(len(goods_lists), cat_id)
        yield item
	def parse(self, response):
		receive_info = response.body.decode('utf-8') ##bytes转换为str
		goods_info = json.loads(receive_info)
		#self.save_goods_log(goods_id, receive_info)
		item = CategoryGoodsItem()
		self.success_count += 1
		self.save_count_log(json.dumps({
			'type': 'success',
			'proxy': response.meta['proxy'],
			'success_count': self.success_count,
			'error_count': self.error_count,
		}))
		item['goods_lists'] = goods_info['goods']
		item['goods_lists']['price'] = goods_info['price']
		item['goods_lists']['sku'] = goods_info['sku']
		yield item
    def parse(self, response):
        logging.debug(response.meta)
        meta = response.meta
        offset = response.meta['offset']
        cat_id = response.meta['cat_id']
        opt_type = response.meta['opt_type']
        receive_info = response.body
        data = json.loads(receive_info.decode('utf-8'))
        self.save_goods_log(
            cat_id,
            json.dumps({
                'cat_id': cat_id,
                'offset': offset,
                'receive_info': data
            }))
        goods_lists = []
        if 'goods_list' not in data.keys():
            self.err_after(meta, True)
            return False
        flip = data['flip']
        if len(data['goods_list']) > 0:
            i = 0
            for goods_data in data['goods_list']:
                i += 1
                rank = offset + i
                goods_data['rank'] = rank
                goods_data['subject_id'] = cat_id
                goods_data['type'] = 2
                goods_lists.append(goods_data)

            offset += i
            item = CategoryGoodsItem()
            item['goods_lists'] = goods_lists
            yield item
            if i >= self.realPagesise and offset < 1000 - self.realPagesise:
                url = self.build_url(opt_type, cat_id, offset, flip)
                meta['offset'] = offset
                meta['flip'] = flip
                headers = self.make_headers()
                yield scrapy.Request(url,
                                     meta=meta,
                                     callback=self.parse,
                                     headers=headers,
                                     dont_filter=True,
                                     errback=self.errback_httpbin)
Exemplo n.º 12
0
    def parse(self, response):
        #print(response.meta)
        page = int(response.meta['page'])
        subject_id = response.meta['subject_id']
        goods_lists = response.meta['goods_lists']
        activity_type = response.meta['activity_type']

        data = json.loads(response.body.decode('utf-8'))
        #print(len(goods_lists))
        if data['goods_list']:

            i = 1
            for goods_data in data['goods_list']:
                goods_id = goods_data['goods_id']
                rank = (page - 1) * 500 + i  ##计算排名
                i += 1
                price = float(goods_data['group']['price'] / 100)
                goods_lists.append({
                    'goods_id': goods_id,
                    'rank': rank,
                    'subject_id': subject_id,
                    'type': 1,
                    'price': price
                })

            page += 1
            url = self.get_activity_url(subject_id, activity_type, page)
            meta = {
                'subject_id': subject_id,
                'page': page,
                'goods_lists': goods_lists,
                'activity_type': activity_type
            }
            #print(url)
            headers = self.make_headers()

            yield scrapy.Request(url,
                                 meta=meta,
                                 callback=self.parse,
                                 headers=headers)
        else:
            item = CategoryGoodsItem()
            item['goods_lists'] = goods_lists
            #print(goods_lists)
            yield item
Exemplo n.º 13
0
    def parse(self, response):
        subject_info = response.meta['subject_info']
        page = int(response.meta['page'])
        goods_list = response.meta['goods_list']
        page_size = int(response.meta['page_size'])
        rank = (page - 1) * page_size + 1

        content = json.loads(response.body.decode('utf-8'))

        if 'goods_list' in content.keys() and len(content['goods_list']) > 0:
            for goods in content['goods_list']:
                if 'icon' in goods.keys():
                    if goods['icon'][
                            'url'] == 'http://t00img.yangkeduo.com/t02img/images/2018-05-31/b0a6bc8699c92bf40d9d6d46b63dd49f.png':
                        price = float(goods['group']['price'] / 100)
                        goods_id = goods['goods_id']
                        rank = int(goods['cnt'])
                        goods_info = self.build_goods_rank_info(
                            goods_id, -618, 1, rank, price)
                        #rank += 1
                        goods_list.append(goods_info)

            page += 1
            url = self.get_activity_url(subject_info, page, page_size)
            meta = {
                'subject_info': subject_info,
                'page': page,
                'page_size': page_size,
                'goods_list': goods_list
            }
            headers = self.make_headers()
            yield scrapy.Request(url,
                                 meta=meta,
                                 callback=self.parse,
                                 headers=headers)

        else:
            if goods_list:
                item = CategoryGoodsItem()
                item['goods_lists'] = goods_list
                #print(goods_list)
                yield item
Exemplo n.º 14
0
    def parse(self, response):
        # print(response.meta)
        meta = response.meta
        offset = response.meta['offset']
        cat_id = response.meta['cat_id']
        opt_type = response.meta['opt_type']
        data = json.loads(response.body.decode('utf-8'))
        flip = data['flip']
        goods_lists = []
        # print('parse_before', flip, offset,len(data['goods_list']),cat_id)
        if data['goods_list']:
            i = 0
            for goods_data in data['goods_list']:
                goods_id = goods_data['goods_id']
                i += 1
                rank = offset + i
                price = float(goods_data['group']['price'] / 100)
                goods_lists.append({
                    'goods_id': goods_id,
                    'rank': rank,
                    'subject_id': cat_id,
                    'type': 2,
                    'price': price
                })

            offset += i
            item = CategoryGoodsItem()
            item['goods_lists'] = goods_lists
            # print('parse_middle',flip,offset,len(data['goods_list']),cat_id)
            yield item
            # print('parse_after',flip,offset,len(data['goods_list']),cat_id)
            if i >= self.pagesise and offset < 900:
                url = self.build_url(opt_type, cat_id, offset, flip)
                meta['offset'] = offset
                headers = self.make_headers()
                yield scrapy.Request(url,
                                     meta=meta,
                                     callback=self.parse,
                                     headers=headers,
                                     dont_filter=True,
                                     errback=self.errback_httpbin)
    def parse(self, response):
        hash_name = 'pdd_category_goods_sales_hash'
        is_end = False
        start_key = ''
        item = CategoryGoodsItem()

        while not is_end:
            goods_list = self.ssdb.hscan(hash_name, '', '', 10)
            if not goods_list:
                is_end = True
                continue

            for i in goods_list:
                i = json.loads(i.decode('utf-8'))

                if type(i) != dict:
                    self.ssdb.hdel(hash_name, i)
                    continue

                item['goods_lists'] = i
                yield item
Exemplo n.º 16
0
	def parse(self, response):
		goods_id = response.meta['goods_id']  ##店铺ID

		goods_data = response.body.decode('utf-8')  ##bytes转换为str

		goods_data = json.loads(goods_data)

		if 'goods_id' in goods_data["goods"].keys():
			item = CategoryGoodsItem()
			detail, skus, galleries = self.make_goods_quantity_data(goods_data, int(time.time()))
			logging.debug(json.dumps({
				'detail': detail,
				'skus': skus,
				'galleries': galleries
			}))
			item['goods_lists'] = {
				'detail': detail,
				'skus': skus,
				'galleries': galleries
			}
			yield item
Exemplo n.º 17
0
    def parse(self, response):
        result = json.loads(response.body.decode('utf-8'))
        # logging.debug(json.dumps({"result": result, "meta": response.meta}))
        subject_info = response.meta['subject_info']
        page = int(response.meta['page'])
        rank = (page - 1) * 500 + 1
        api_type = subject_info["api_type"]
        goods_list = []

        if api_type in [72]:  # list   首页子活动
            if "list" in result.keys() and len(result["list"]) > 0:
                for i in result["list"]:
                    goods_info = self.get_goods_info(subject_info, rank, i)
                    goods_list.append(goods_info)
                # self.save_log(json.dumps({'goods_info_' + str(api_type): goods_info}))
            else:
                return None
        if api_type in [71]:  # goods_list   # 首页商品
            # self.save_log(json.dumps({'goods_api_brand': api_type, 'goods_api_items_brand': result["goods_list"]}))
            if "goods_list" in result.keys() and len(result["goods_list"]) > 0:
                for i in result["goods_list"]:
                    goods_info = self.get_goods_info(subject_info, rank, i)
                    goods_list.append(goods_info)
                    # self.save_log(json.dumps({'goods_info_' + str(api_type): goods_info}))
            else:
                return None
        if api_type in [31]:  # goods_list   # 品牌馆
            # self.save_log(json.dumps({'goods_api_brand': api_type, 'goods_api_items_brand': result["goods_list"]}))
            if "goods_list" in result.keys() and len(result["goods_list"]) > 0:
                for i in result["goods_list"]:
                    goods_info = self.get_goods_info_brand(
                        subject_info, rank, i)
                    goods_list.append(goods_info)
                    # self.save_log(json.dumps({'goods_info_' + str(api_type): goods_info}))
            else:
                return None

        if api_type in [62, 63, 64]:  # result/goods_list  首页轮播
            null = None
            false = None
            # self.save_log(json.dumps({'goods_api_shopping': api_type, 'goods_api_items_shopping': result["result"]["goods_list"]}))
            if "result" in result.keys() and len(
                    result["result"]["goods_list"]) > 0:
                for i in result["result"]["goods_list"]:
                    goods_info = self.get_goods_info_lunbo_1(
                        subject_info, rank, i)
                    goods_list.append(goods_info)
                    # self.save_log(json.dumps({'goods_info_' + str(api_type): goods_info}))
            else:
                return None
        if api_type in [51]:  # result/goods_list  爱逛街
            null = None
            false = None
            # self.save_log(json.dumps({'goods_api_shopping': api_type, 'goods_api_items_shopping': result["result"]["goods_list"]}))
            if "result" in result.keys() and len(
                    result["result"]["goods_list"]) > 0:
                for i in result["result"]["goods_list"]:
                    goods_info = self.get_shopping_goods_info(
                        subject_info, rank, i)
                    goods_list.append(goods_info)
                    # self.save_log(json.dumps({'goods_info_' + str(api_type): goods_info}))
            else:
                return None

        if api_type in [61]:  # result/goods_list 首页轮播
            # self.save_log(json.dumps({'goods_api_shopping': api_type, 'goods_api_items_shopping': result["goods_list"]}))
            if "goods_list" in result.keys() and len(result["goods_list"]) > 0:
                for i in result["goods_list"]:
                    goods_info = self.get_goods_info_lunbo_2(
                        subject_info, rank, i)
                    goods_list.append(goods_info)
                    # self.save_log(json.dumps({'goods_info_' + str(api_type): goods_info}))
            else:
                return None

        item = CategoryGoodsItem()
        # logging.debug(json.dumps({'goods_list': goods_list, "goods_len": len(goods_list)}))
        # self.save_log(json.dumps({'goods_list_all': goods_list, "goods_len": len(goods_list)}))
        item['goods_lists'] = goods_list
        yield item
Exemplo n.º 18
0
    def parse(self, response):
        result = json.loads(response.body.decode('utf-8'))
        logging.debug(json.dumps({"result": result, "meta": response.meta}))
        subject_info = response.meta['subject_info']
        page = int(response.meta['page'])
        rank = (page - 1) * 500 + 1
        api_type = subject_info["api_type"]
        goods_list = []
        if api_type in [11, 14, 15]:  # items  限时秒杀
            if "item" in result.keys() and len(result["items"]) > 0:
                for i in result["items"]:
                    goods_info = self.get_kill_goods_info(
                        subject_info, rank, i["data"])
                    goods_list.append(goods_info)
                    self.save_log(json.dumps({'goods_info_kill': goods_info}))
            else:
                return None
        if api_type in [16]:  # result
            if "result" in result.keys() and len(result["result"]) > 0:
                for i in result["result"]:
                    goods_info = self.get_goods_info(subject_info, rank, i)
                    goods_list.append(goods_info)
                    self.save_log(json.dumps({'goods_info_others':
                                              goods_info}))
            else:
                return None
        if api_type in [21]:  # list   断码清仓
            if "list" in result.keys() and len(result["list"]) > 0:
                for i in result["list"]:
                    goods_info = self.get_goods_info(subject_info, rank, i)
                    goods_list.append(goods_info)
                    self.save_log(json.dumps({'goods_info_short': goods_info}))
            else:
                return None
        if api_type in [31, 41, 61]:  # goods_list   # 品牌馆  9块9特卖
            if "goods_list" in result.keys() and len(result["goods_list"]) > 0:
                for i in result["goods_list"]:
                    goods_info = self.get_goods_info(subject_info, rank, i)
                    goods_list.append(goods_info)
                    self.save_log(json.dumps({'goods_info_brand': goods_info}))
            else:
                return None
        if api_type in [51]:  # result/goods_list  爱逛街
            if "result" in result.keys() and len(
                    result["result"]["goods_list"]) > 0:
                for i in result["result"]["goods_list"]:
                    goods_info = self.get_shopping_goods_info(
                        subject_info, rank, i)
                    goods_list.append(goods_info)
                    self.save_log(
                        json.dumps({'goods_info_shopping': goods_info}))
            else:
                return None

        item = CategoryGoodsItem()
        logging.debug(
            json.dumps({
                'goods_list': goods_list,
                "goods_len": len(goods_list)
            }))
        self.save_log(
            json.dumps({
                'goods_list_all': goods_list,
                "goods_len": len(goods_list)
            }))
        item['goods_lists'] = goods_list
        yield item