Пример #1
0
 def deal_with_data(self, item, response):
     # 将数据规范化
     try:
         item['name'] = response.xpath(
             'div[2]/div[1]/div/a/text()').extract_first()
         item['publish_time'] = response.xpath(
             'div[2]/div[3]/p[2]/i/text()').extract_first()
         item['popularity'] = response.xpath(
             'div[2]/div[2]/p[1]/i/text()').extract_first()
         item['developer'] = response.xpath(
             'div[2]/div[3]/p[1]/i/text()').extract_first()
         item['state'] = response.xpath(
             'div[2]/div[1]/p[2]/i/text()').extract_first()
         item['category'] = '网游 ' + response.xpath(
             'div[2]/div[1]/p[1]/i/a/text()').extract_first()
         item['publisher'] = response.xpath(
             'div[2]/div[2]/p[3]/i/text()').extract_first()
         item['popularity'] = response.xpath(
             'div[2]/div[2]/p[2]/i/a/text()').extract_first()
         item['score'] = response.xpath(
             'div[2]/div[3]/div[2]/font/text()').extract_first()
         img_url = response.xpath('div[1]/a/img/@src').extract_first()
         filename = random_filename(img_url)
         item['img_url'] = img_url
         item['img_path'] = '/media/' + filename
         # with open('/home/zzh/图片/Threedmgame/'+filename, 'wb') as f:
         #     f.write(requests.get(url=img_url, headers=self.headers).content)
         return item
     except Exception as e:
         print(e)
         return None
 def deal_with_data(self, item, response):
     # 将数据规范化
     try:
         item['name'] = response.xpath('div[2]/a/text()').extract_first()
         item['category'] = response.xpath('div[2]/p[1]/span[1]/text()').extract_first().split(':')[-1]
         item['language'] = response.xpath('div[2]/p[1]/span[2]/text()').extract_first().split(':')[-1]
         item['volume'] = response.xpath('div[2]/p[1]/span[3]/text()').extract_first().split(':')[-1]
         # 判断 a1 为安卓, a2 为苹果
         platform = response.xpath('div[2]/p[2]/span[1]/a/@class').extract()
         if 'a1' in platform and 'a2' in platform:
             item['platform'] = '安卓 苹果'
         elif 'a1' in platform:
             item['platform'] = '安卓'
         elif 'a2' in platform:
             item['platform'] = '苹果'
         else:
             item['platform'] = '未知'
         publisher = response.xpath('div[2]/p[2]/span[2]/text()').extract_first().split(':')[-1]
         if not publisher:
             publisher = '未知'
         item['publisher'] = '手游 ' + publisher
         item['publish_time'] = response.xpath('div[2]/p[2]/span[3]/text()').extract_first().split(':')[-1]
         item['description'] = response.xpath('div[2]/p[3]/text()').extract_first().replace('\n', '').strip()
         item['score'] = response.xpath('div[2]/div/div[2]/text()').extract_first()
         img_url = self.base_url + response.xpath('div[1]/a/img/@src').extract_first()
         filename = random_filename(img_url)
         item['img_url'] = img_url
         item['img_path'] = '/media/' + filename
         return item
     except Exception as e:
         print(e)
         return None
Пример #3
0
 def deal_with_data(self, item, response):
     # 将数据规范化
     category = response.xpath('//div[@class="nav"]/a[contains(@class, "cur")]/text()').extract_first()  # 获取大分类
     item['name'] = response.xpath('div[1]/div[2]/a/text()').extract_first()
     item['publish_time'] = response.xpath('div[1]/div[3]/text()').extract_first().split(':')[-1].strip()
     item['category'] = ' '.join([category, response.xpath('div[1]/div[4]/a/text()').extract_first()])
     item['publisher'] = response.xpath('div[1]/div[5]/text()').extract_first().split(':')[-1].strip()
     item['description'] = response.xpath('div[1]/div[6]/p/text()').extract_first().replace('\r\n', '').replace('\u3000', '').strip()
     img_url = response.xpath('div[1]/div[1]//img/@src').extract_first()
     filename = random_filename(img_url)
     item['img_url'] = img_url
     item['img_path'] = '/media/' + filename
     # with open('/home/zzh/图片/Threedmgame/'+filename, 'wb') as f:
     #     f.write(requests.get(url=img_url, headers=self.headers).content)
     return item
Пример #4
0
    def deal_with_data(self, item, response):
        # 将数据规范化
        try:
            item['name'] = '-'.join([
                response.xpath(
                    'a[@class="bt"]/text()').extract_first().strip(),
                response.xpath('a[@class="bt"]/span/text()').extract_first()
            ])
            item['publish_time'] = response.xpath(
                'ul[@class="info"]/li[1]/text()').extract_first().split(
                    ':')[-1]
            item['publisher'] = response.xpath('ul[@class="info"]/li[2]/text()'
                                               ).extract_first().split(':')[-1]
            item['developer'] = response.xpath('ul[@class="info"]/li[3]/text()'
                                               ).extract_first().split(':')[-1]
            item['platform'] = response.xpath('ul[@class="info"]/li[4]/text()'
                                              ).extract_first().split(':')[-1]
            item['category'] = '单机 ' + response.xpath(
                'ul[@class="info"]/li[5]/text()').extract_first().split(
                    ':')[-1]
            item['language'] = response.xpath('ul[@class="info"]/li[6]/text()'
                                              ).extract_first().split(':')[-1]
            item['description'] = response.xpath(
                'div[@class="miaoshu"]/text()').extract_first().replace(
                    '\n', '').strip()
            item['score'] = response.xpath(
                'div[@class="pfbox"]//font/text()').extract_first()
            img_url = response.xpath(
                'a[@class="img"]/img/@src').extract_first()
            filename = random_filename(img_url)
            item['img_url'] = img_url
            item['img_path'] = '/media/' + filename

            return item
        except Exception as e:
            print(e)
            return None