def process_detailpage(self): jsonobj = json.loads(self.response.body, self.response.encoding) gname_dict = self.response.meta['gname_dict'] for key in gname_dict: self.log("%s, type:%s" %(jsonobj.get(key), type(jsonobj.get(key)))) if key not in jsonobj: self.log("fail get price for %s" % key, level=log.ERROR) continue price = canonicalize_price(jsonobj.get(key)) url = "%s%s.html" % (self.DETAIL_BASE_URL, key) name = gname_dict[key] self.save(url, name, [], price) return len(gname_dict)
def process(self): item_num = 0 hxs = HtmlXPathSelector(self.response) prolist = hxs.select('//div[@id="prodlist"]/li') for item in prolist: url = "%s%s" % (self.BASE_URL , extract_value(item.select('a/@href'))) sprice = extract_value( item.select('p[@class="pimg"]/span[@class="pinfo"]/i[@class="ltprice"]/text()') ) price = canonicalize_price(sprice) name = extract_value( item.select('p[@class="pimg"]/span[@class="pname"]/a/text()') ) self.save(url, name, (), price) item_num += 1 self.next_page(hxs) return item_num