Exemplo n.º 1
0
    def process_detailpage(self):
        jsonobj = json.loads(self.response.body, self.response.encoding)
        gname_dict = self.response.meta['gname_dict']
        for key in gname_dict:
            self.log("%s, type:%s" %(jsonobj.get(key), type(jsonobj.get(key))))
            if key not in jsonobj:
                self.log("fail get price for %s" % key, level=log.ERROR)
                continue

            price = canonicalize_price(jsonobj.get(key))
            url = "%s%s.html" % (self.DETAIL_BASE_URL, key)
            name = gname_dict[key]
            self.save(url, name, [], price)
        return len(gname_dict)
Exemplo n.º 2
0
    def process(self):
        item_num = 0
        hxs = HtmlXPathSelector(self.response)
        prolist = hxs.select('//div[@id="prodlist"]/li')
        for item in prolist:
            url = "%s%s" % (self.BASE_URL ,
                    extract_value(item.select('a/@href')))
            sprice = extract_value(
                item.select('p[@class="pimg"]/span[@class="pinfo"]/i[@class="ltprice"]/text()')
                )
            price = canonicalize_price(sprice)
            name = extract_value(
                item.select('p[@class="pimg"]/span[@class="pname"]/a/text()')
                )
            self.save(url, name, (), price)
            item_num += 1

        self.next_page(hxs)
        return item_num