Exemplo n.º 1
0
    def parse(self, response):
        pass

        # if response.status == 429:
        # 	meta  = {'proxy':self.get_proxy_ip()}
        # 	headers = self.make_headers()
        # 	yield scrapy.Request(response.url, meta=meta, callback=self.parse, headers=headers,dont_filter=True)

        # else:
        #self.ssdb.qpush_back('num_num_list', 1)
        mall_info = response.body.decode('utf-8')  ##bytes转换为str
        mall_info = json.loads(mall_info)  ##str转为字典
        mall_data = MallItem()
        mall_data['mall_id'] = mall_info['mall_id']
        mall_data['mall_name'] = mall_info['mall_name']
        mall_data['goods_num'] = mall_info['goods_num']
        mall_data['score_avg'] = mall_info['score_avg']
        mall_data['mall_sales'] = mall_info['mall_sales']
        mall_data['is_open'] = mall_info['is_open']
        mall_data['status'] = mall_info['status']
        mall_data['logo'] = mall_info['logo']
        # refund_address 			= mall_info['refund_address']
        # address_info 			= self.get_address_info(refund_address)
        mall_data['province'] = ''  # address_info['province']
        mall_data['city'] = ''  # address_info['city']
        mall_data['area'] = ''  # address_info['area']
        mall_data['street'] = ''  # address_info['street']
        yield mall_data
Exemplo n.º 2
0
    def parse(self, response):
        pass
        meta = response.meta
        # if response.status == 429:
        # 	meta  = {'proxy':self.get_proxy_ip()}
        # 	headers = self.make_headers()
        # 	yield scrapy.Request(response.url, meta=meta, callback=self.parse, headers=headers,dont_filter=True)

        # else:
        #self.ssdb_client.qpush_back('num_num_list', 1)
        html = response.body.decode('utf-8')  ##bytes转换为str
        regex_content = re.search('window\.rawData= (.*)\;\s*\<\/script\>',
                                  html)
        if not regex_content:
            self.err_after(meta)
            return False
        rawData = json.loads(regex_content.group(1))
        logging.info(rawData)
        if not rawData:
            self.err_after(meta)
            return False
        if 'mallInfo' not in rawData.keys():
            self.err_after(meta)
            return False
        mall_info = rawData['mallInfo']  ##str转为字典
        print(mall_info)
        if 'error' in mall_info.keys() and mall_info['error'] == 'needLogin':
            self.err_after(meta, True)
            return False
        if 'mallID' not in mall_info.keys():
            self.err_after(meta)
            return False
        mall_data = MallItem()
        mall_data['mall_id'] = mall_info['mallID']
        mall_data['mall_name'] = mall_info['mallName']
        mall_data['goods_num'] = mall_info['goodsNum']
        mall_data['score_avg'] = mall_info['scoreAvg']
        mall_data['mall_sales'] = mall_info['mallSales']
        mall_data['is_open'] = mall_info['isOpen']
        mall_data['status'] = mall_info['status']
        mall_data['logo'] = mall_info['logo']
        # refund_address 			= mall_info['refund_address']
        # address_info 			= self.get_address_info(refund_address)
        mall_data['province'] = ''  # address_info['province']
        mall_data['city'] = ''  # address_info['city']
        mall_data['area'] = ''  # address_info['area']
        mall_data['street'] = ''  # address_info['street']
        logging.info(mall_data)
        yield mall_data
Exemplo n.º 3
0
    def parse(self, response):
        mall_info = response.body.decode('utf-8')  ##bytes转换为str
        mall_info = json.loads(mall_info)  ##str转为字典

        mall_data = MallItem()
        mall_data['mall_id'] = mall_info['mall_id']
        mall_data['mall_name'] = mall_info['mall_name']
        mall_data['goods_num'] = mall_info['goods_num']
        mall_data['score_avg'] = mall_info['score_avg']
        mall_data['mall_sales'] = mall_info['mall_sales']
        mall_data['is_open'] = mall_info['is_open']
        mall_data['status'] = mall_info['status']
        mall_data['logo'] = mall_info['logo']
        # refund_address 			= mall_info['refund_address']
        # address_info 			= self.get_address_info(refund_address)
        mall_data['province'] = ''  # address_info['province']
        mall_data['city'] = ''  # address_info['city']
        mall_data['area'] = ''  # address_info['area']
        mall_data['street'] = ''  # address_info['street']
        logging.info(mall_data)
Exemplo n.º 4
0
    def parse(self, response):
        pass

        mall_info = response.body.decode('utf-8')
        mall_info = json.loads(mall_info)

        mall_data = MallItem()
        mall_data['mall_id'] = mall_info['mall_id']
        mall_data['mall_name'] = mall_info['mall_name']
        mall_data['goods_num'] = mall_info['goods_num']
        mall_data['score_avg'] = mall_info['score_avg']
        mall_data['mall_sales'] = mall_info['mall_sales']
        mall_data['is_open'] = mall_info['is_open']
        mall_data['status'] = mall_info['status']
        mall_data['logo'] = mall_info['logo']
        refund_address = mall_info['refund_address']
        address_info = self.get_address_info(refund_address)
        mall_data['province'] = address_info['province']
        mall_data['city'] = address_info['city']
        mall_data['area'] = address_info['area']
        mall_data['street'] = address_info['street']

        yield mall_data