def parse(self, response): pass # if response.status == 429: # meta = {'proxy':self.get_proxy_ip()} # headers = self.make_headers() # yield scrapy.Request(response.url, meta=meta, callback=self.parse, headers=headers,dont_filter=True) # else: #self.ssdb.qpush_back('num_num_list', 1) mall_info = response.body.decode('utf-8') ##bytes转换为str mall_info = json.loads(mall_info) ##str转为字典 mall_data = MallItem() mall_data['mall_id'] = mall_info['mall_id'] mall_data['mall_name'] = mall_info['mall_name'] mall_data['goods_num'] = mall_info['goods_num'] mall_data['score_avg'] = mall_info['score_avg'] mall_data['mall_sales'] = mall_info['mall_sales'] mall_data['is_open'] = mall_info['is_open'] mall_data['status'] = mall_info['status'] mall_data['logo'] = mall_info['logo'] # refund_address = mall_info['refund_address'] # address_info = self.get_address_info(refund_address) mall_data['province'] = '' # address_info['province'] mall_data['city'] = '' # address_info['city'] mall_data['area'] = '' # address_info['area'] mall_data['street'] = '' # address_info['street'] yield mall_data
def parse(self, response): pass meta = response.meta # if response.status == 429: # meta = {'proxy':self.get_proxy_ip()} # headers = self.make_headers() # yield scrapy.Request(response.url, meta=meta, callback=self.parse, headers=headers,dont_filter=True) # else: #self.ssdb_client.qpush_back('num_num_list', 1) html = response.body.decode('utf-8') ##bytes转换为str regex_content = re.search('window\.rawData= (.*)\;\s*\<\/script\>', html) if not regex_content: self.err_after(meta) return False rawData = json.loads(regex_content.group(1)) logging.info(rawData) if not rawData: self.err_after(meta) return False if 'mallInfo' not in rawData.keys(): self.err_after(meta) return False mall_info = rawData['mallInfo'] ##str转为字典 print(mall_info) if 'error' in mall_info.keys() and mall_info['error'] == 'needLogin': self.err_after(meta, True) return False if 'mallID' not in mall_info.keys(): self.err_after(meta) return False mall_data = MallItem() mall_data['mall_id'] = mall_info['mallID'] mall_data['mall_name'] = mall_info['mallName'] mall_data['goods_num'] = mall_info['goodsNum'] mall_data['score_avg'] = mall_info['scoreAvg'] mall_data['mall_sales'] = mall_info['mallSales'] mall_data['is_open'] = mall_info['isOpen'] mall_data['status'] = mall_info['status'] mall_data['logo'] = mall_info['logo'] # refund_address = mall_info['refund_address'] # address_info = self.get_address_info(refund_address) mall_data['province'] = '' # address_info['province'] mall_data['city'] = '' # address_info['city'] mall_data['area'] = '' # address_info['area'] mall_data['street'] = '' # address_info['street'] logging.info(mall_data) yield mall_data
def parse(self, response): mall_info = response.body.decode('utf-8') ##bytes转换为str mall_info = json.loads(mall_info) ##str转为字典 mall_data = MallItem() mall_data['mall_id'] = mall_info['mall_id'] mall_data['mall_name'] = mall_info['mall_name'] mall_data['goods_num'] = mall_info['goods_num'] mall_data['score_avg'] = mall_info['score_avg'] mall_data['mall_sales'] = mall_info['mall_sales'] mall_data['is_open'] = mall_info['is_open'] mall_data['status'] = mall_info['status'] mall_data['logo'] = mall_info['logo'] # refund_address = mall_info['refund_address'] # address_info = self.get_address_info(refund_address) mall_data['province'] = '' # address_info['province'] mall_data['city'] = '' # address_info['city'] mall_data['area'] = '' # address_info['area'] mall_data['street'] = '' # address_info['street'] logging.info(mall_data)
def parse(self, response): pass mall_info = response.body.decode('utf-8') mall_info = json.loads(mall_info) mall_data = MallItem() mall_data['mall_id'] = mall_info['mall_id'] mall_data['mall_name'] = mall_info['mall_name'] mall_data['goods_num'] = mall_info['goods_num'] mall_data['score_avg'] = mall_info['score_avg'] mall_data['mall_sales'] = mall_info['mall_sales'] mall_data['is_open'] = mall_info['is_open'] mall_data['status'] = mall_info['status'] mall_data['logo'] = mall_info['logo'] refund_address = mall_info['refund_address'] address_info = self.get_address_info(refund_address) mall_data['province'] = address_info['province'] mall_data['city'] = address_info['city'] mall_data['area'] = address_info['area'] mall_data['street'] = address_info['street'] yield mall_data