def parse(self, response):
     selector = Selector(response)
     
     for deal in selector.select(self.deals_list_xpath):
         loader = XPathItemLoader(LivingSocialDeal(), selector=deal)
         
         loader.default_input_processor = MapCompose(unicode.strip)
         loader.default_output_processor = Join()
         
         for field, xpath in self.item_fields.iteritems():
             loader.add_xpath(field, xpath)
         
         yield loader.load_item()
Пример #2
0
    def parse(self, response):
        """
        Default callback used by Scrapy to process downloaded responses

        Testing contracts:
        @url http://www.livingsocial.com/cities/15-san-francisco
        @returns items 1
        @scrapes title link

        """

        # iterate over deals
        for i in range(100, 1000):
            loader = XPathItemLoader(Grailed(), selector=Grailed)

            # define processors
            loader.default_input_processor = MapCompose(unicode.strip)
            loader.default_output_processor = Join()

            # iterate over fields and add xpaths to the loader
            for field, xpath in self.item_fields.iteritems():
                loader.add_xpath(field, xpath)
            yield loader.load_item()
Пример #3
0
    def parseItemWithLoader(self, response):

        selector = HtmlXPathSelector(response)

        for sel in selector.select(self._x_query['xpath_item']):

            flowersItemLoader = XPathItemLoader(FlowersItem(), selector=sel)

            # flowersItemLoader.default_input_processor = MapCompose(unicode.strip)
            flowersItemLoader.default_output_processor = TakeFirst()

            # printcn(flowersItemLoader.get_xpath(self._x_query['xpath_item_name']))
            # print flowersItemLoader.get_xpath(self._x_query['xpath_item_image'])
            # printcn(flowersItemLoader.get_xpath(self._x_query['xpath_item_desc']))

            flowersItemLoader.add_xpath('flowerName', self._x_query['xpath_item_name'])
            flowersItemLoader.add_xpath('imageUrl', self._x_query['xpath_item_image'])
            flowersItemLoader.add_xpath('desc', self._x_query['xpath_item_desc'])
            url = str(response.url)
            flowersItemLoader.add_value('url', url)

            flowerItem = flowersItemLoader.load_item()

            yield flowerItem