def deal_with_data(self, item, response): # 将数据规范化 try: item['name'] = response.xpath( 'div[2]/div[1]/div/a/text()').extract_first() item['publish_time'] = response.xpath( 'div[2]/div[3]/p[2]/i/text()').extract_first() item['popularity'] = response.xpath( 'div[2]/div[2]/p[1]/i/text()').extract_first() item['developer'] = response.xpath( 'div[2]/div[3]/p[1]/i/text()').extract_first() item['state'] = response.xpath( 'div[2]/div[1]/p[2]/i/text()').extract_first() item['category'] = '网游 ' + response.xpath( 'div[2]/div[1]/p[1]/i/a/text()').extract_first() item['publisher'] = response.xpath( 'div[2]/div[2]/p[3]/i/text()').extract_first() item['popularity'] = response.xpath( 'div[2]/div[2]/p[2]/i/a/text()').extract_first() item['score'] = response.xpath( 'div[2]/div[3]/div[2]/font/text()').extract_first() img_url = response.xpath('div[1]/a/img/@src').extract_first() filename = random_filename(img_url) item['img_url'] = img_url item['img_path'] = '/media/' + filename # with open('/home/zzh/图片/Threedmgame/'+filename, 'wb') as f: # f.write(requests.get(url=img_url, headers=self.headers).content) return item except Exception as e: print(e) return None
def deal_with_data(self, item, response): # 将数据规范化 try: item['name'] = response.xpath('div[2]/a/text()').extract_first() item['category'] = response.xpath('div[2]/p[1]/span[1]/text()').extract_first().split(':')[-1] item['language'] = response.xpath('div[2]/p[1]/span[2]/text()').extract_first().split(':')[-1] item['volume'] = response.xpath('div[2]/p[1]/span[3]/text()').extract_first().split(':')[-1] # 判断 a1 为安卓, a2 为苹果 platform = response.xpath('div[2]/p[2]/span[1]/a/@class').extract() if 'a1' in platform and 'a2' in platform: item['platform'] = '安卓 苹果' elif 'a1' in platform: item['platform'] = '安卓' elif 'a2' in platform: item['platform'] = '苹果' else: item['platform'] = '未知' publisher = response.xpath('div[2]/p[2]/span[2]/text()').extract_first().split(':')[-1] if not publisher: publisher = '未知' item['publisher'] = '手游 ' + publisher item['publish_time'] = response.xpath('div[2]/p[2]/span[3]/text()').extract_first().split(':')[-1] item['description'] = response.xpath('div[2]/p[3]/text()').extract_first().replace('\n', '').strip() item['score'] = response.xpath('div[2]/div/div[2]/text()').extract_first() img_url = self.base_url + response.xpath('div[1]/a/img/@src').extract_first() filename = random_filename(img_url) item['img_url'] = img_url item['img_path'] = '/media/' + filename return item except Exception as e: print(e) return None
def deal_with_data(self, item, response): # 将数据规范化 category = response.xpath('//div[@class="nav"]/a[contains(@class, "cur")]/text()').extract_first() # 获取大分类 item['name'] = response.xpath('div[1]/div[2]/a/text()').extract_first() item['publish_time'] = response.xpath('div[1]/div[3]/text()').extract_first().split(':')[-1].strip() item['category'] = ' '.join([category, response.xpath('div[1]/div[4]/a/text()').extract_first()]) item['publisher'] = response.xpath('div[1]/div[5]/text()').extract_first().split(':')[-1].strip() item['description'] = response.xpath('div[1]/div[6]/p/text()').extract_first().replace('\r\n', '').replace('\u3000', '').strip() img_url = response.xpath('div[1]/div[1]//img/@src').extract_first() filename = random_filename(img_url) item['img_url'] = img_url item['img_path'] = '/media/' + filename # with open('/home/zzh/图片/Threedmgame/'+filename, 'wb') as f: # f.write(requests.get(url=img_url, headers=self.headers).content) return item
def deal_with_data(self, item, response): # 将数据规范化 try: item['name'] = '-'.join([ response.xpath( 'a[@class="bt"]/text()').extract_first().strip(), response.xpath('a[@class="bt"]/span/text()').extract_first() ]) item['publish_time'] = response.xpath( 'ul[@class="info"]/li[1]/text()').extract_first().split( ':')[-1] item['publisher'] = response.xpath('ul[@class="info"]/li[2]/text()' ).extract_first().split(':')[-1] item['developer'] = response.xpath('ul[@class="info"]/li[3]/text()' ).extract_first().split(':')[-1] item['platform'] = response.xpath('ul[@class="info"]/li[4]/text()' ).extract_first().split(':')[-1] item['category'] = '单机 ' + response.xpath( 'ul[@class="info"]/li[5]/text()').extract_first().split( ':')[-1] item['language'] = response.xpath('ul[@class="info"]/li[6]/text()' ).extract_first().split(':')[-1] item['description'] = response.xpath( 'div[@class="miaoshu"]/text()').extract_first().replace( '\n', '').strip() item['score'] = response.xpath( 'div[@class="pfbox"]//font/text()').extract_first() img_url = response.xpath( 'a[@class="img"]/img/@src').extract_first() filename = random_filename(img_url) item['img_url'] = img_url item['img_path'] = '/media/' + filename return item except Exception as e: print(e) return None