def find_entry(self, path, match):
     """Looks for a particular entry in the etree.
     Returns the element looked for/None.
     """
     xp = path.xpath(self.xpath)
     f = filter(lambda x: x.text == match, xp)
     return f[0].text if len(f) else None
    def get_all_entry(self, path):
        """All entries in the etree is converted to the dictionary

        Returns the list of dictionary/didctionary.
        """
        xps = path.xpath(self.xpath)

        if type(xps) is not list:
            return self._get_one(xps)

        val = []
        for xp in xps:
            val.append(self._get_one(xp))
        return val
Пример #3
0
 def book_pare(self, response):
     item = BookSiteMainItem()
     for path in response.xpath(
             '//*[@id="content_inner"]/article/div[1]/div[2]'):
         item['full_bookname'] = path.xpath(
             '//*[@id="content_inner"]/article/div[1]/div[2]/h1/text()'
         ).get()
         item['priceinpounds'] = path.xpath(
             '//*[@id="content_inner"]/article/div[1]/div[2]/p[1]/text()'
         ).get()
         item['rating'] = path.xpath(
             '//*[@id="content_inner"]/article/div[1]/div[2]/p[3]/@class'
         ).get()
         item['upc'] = path.xpath(
             '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td'
         )[0].get().split()[0]
         item['producttype'] = path.xpath(
             '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td'
         )[1].get().split()[0]
         item['priceexcludetax'] = path.xpath(
             '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td'
         )[2].get().split()[0]
         item['priceincludetax'] = path.xpath(
             '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td'
         )[3].get().split()[0]
         item['tax'] = path.xpath(
             '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td'
         )[4].get().split()[0]
         item['availability'] = path.xpath(
             '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td'
         )[5].get().split()[0:2]
         item['numberavailable'] = path.xpath(
             '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td'
         )[5].get().split()[2:]
         item['numberofreviews'] = path.xpath(
             '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td'
         )[6].get().split()[0]
         yield item