示例#1
0
    def parse(self, response):
        title = response.meta.get('title')
        pid = response.meta.get('productId')
        brand = response.meta.get('brand')
        serie = response.meta.get('serie')

        data = json.loads(response.body_as_unicode())['data']
        # log.info(f'data {data}')
        soldNum = data['detail']['soldNum']
        Product[pid].soldNum = soldNum
        commit()

        sizeList = data['sizeList']
        sizeItem = data['item']
        price = sizeItem['price'] / 100
        # formatSize = sizeItem['formatSize']
        # log.success(f'商品:{title} 编号:{pid} 价格: {price}/{formatSize} 交易数量: {soldNum}')
        log.success(f'商品:{title} 编号:{pid} 价格: {price} 交易数量: {soldNum}')

        for s in sizeList:
            item = s['item']
            if not item:
                continue
            yield PriceItem(
                id=pid,
                brand=brand,
                serie=serie,
                title=title,
                size=s['size'],
                formatSize=s['formatSize'],
                price=item['price'] / 100,
                soldNum=soldNum,
            )
示例#2
0
 def process_item(self, item, spider):
     pid = item.get('id')
     title = item.get('title')
     articleNumber = item.get('articleNumber')
     url = item.get('url')
     soldNum = item.get('soldNum')
     logo = item.get('logo')
     categoryId = item.get('categoryId')
     images = item.get('images')
     sellDate = item.get('sellDate')
     authPrice = item.get('authPrice')
     goodsId = item.get('goodsId')
     sizeList = item.get('sizeList')
     imageAndText = item.get('imageAndText')
     detailJson = item.get('detailJson')
     if not Product.exists(id=pid):
         p = Product(id=pid)
     else:
         p = Product[pid]
     p.url = url
     p.title = title
     p.soldNum = soldNum
     p.logo = logo
     p.categoryId = categoryId
     p.images = images
     p.sellDate = sellDate
     p.articleNumber = articleNumber
     p.authPrice = authPrice
     p.goodsId = goodsId
     p.sizeList = sizeList
     p.imageAndText = imageAndText
     p.json = detailJson
     log.success(f'商品:{title} 编号:{pid} 发售日期:{sellDate} 售出量: {soldNum} ')
     return item
示例#3
0
 def check_db():
     from DuTracker.tsdb import influxdb
     try:
         influxdb.ping()
     except Exception as e:
         log.error(f'InfluxDB 连接错误')
         sys.exit(1)
     else:
         log.success(f'InfluxDB 连接成功')
示例#4
0
	def parse_brandInfo(self, response):
		data = json.loads(response.body_as_unicode())['data']
		unionId = response.meta.get('unionId')
		name = response.meta.get('name')

		num = data['total']
		page = math.ceil(num / 20)
		log.success(f'品牌:{name} 编号:{unionId} 商品总数:{num} 页面数:{page}')

		for page in range(1, page + 1):
			yield Request(page_url(unionId, page), callback=self.parse_productId, meta={
				'unionId': unionId,
				'name': self.brandIds[unionId]
			}, headers=headers())
示例#5
0
 def process_item(self, item, spider):
     pid = item.get('id')
     title = item.get('title')
     name = item.get('name')
     if Product.exists(id=pid):
         p = Product[pid]
     else:
         p = Product(id=pid)
     p.title = title
     if spider.name == 'brand':
         p.brand = name
     elif spider.name == 'serie':
         p.serie = name
     log.success(f'商品:{title} 编号:{pid}')
     return item
示例#6
0
文件: brand.py 项目: yftx/DuTracker
    def parse_brandList(self, response):
        brandList = json.loads(response.body_as_unicode())['data']['list']
        for brand in brandList:
            unionId = brand['brand']['goodsBrandId']
            name = brand['brand']['brandName']
            self.brandIds[unionId] = name
            log.success(f'品牌:{name} 编号:{unionId}')

        if not self.auto:
            ids = prompt('输入需要爬取的品牌编号', default='').strip().split(' ')
            if ids == ['']: return IgnoreRequest()
        else:
            ids = self.Ids
            if not ids: return IgnoreRequest()

        log.info(f'获取 {ids} 品牌包含商品')
        for unionId in ids:
            yield Request(page_url(unionId),
                          callback=self.parse_brandInfo,
                          meta={
                              'unionId': unionId,
                              'name': self.brandIds[unionId]
                          })
示例#7
0
文件: serie.py 项目: yftx/DuTracker
    def parse_serieList(self, response):
        serieList = json.loads(response.body_as_unicode())['data']['list']
        for data in serieList:
            for serie in data['seriesList']:
                unionId = serie['productSeriesId']
                name = serie['name']
                self.serieIds[unionId] = name
                log.success(f'系列:{name} 编号:{unionId}')
        if not self.auto:
            ids = prompt('输入需要爬取的系列编号', default='').strip().split(' ')
            if ids == ['']: return IgnoreRequest()
        else:
            ids = self.Ids
            if not ids: return IgnoreRequest()

        log.info(f'获取 {ids} 系列包含商品')
        for unionId in ids:
            yield Request(page_url(unionId),
                          callback=self.parse_serieInfo,
                          meta={
                              'unionId': unionId,
                              'name': self.serieIds[unionId]
                          })