Python parse_recipes примеры использования

Язык программирования: Python

Пространство имен/Пакет: openrecipes.schema_org_parser

Метод/Функция: parse_recipes

Примеров на hotexamples.com: 7

Python parse_recipes - 7 примеров найдено. Это лучшие примеры Python кода для openrecipes.schema_org_parser.parse_recipes, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

    def parse_item(self, response):

        hxs = HtmlXPathSelector(response)
        image_path = hxs.select("descendant-or-self::img[@class and contains(@class, 'wp-image')][1]/@data-lazy-src").extract()

        raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})
        if raw_recipes:
            # schema.org.  Yay!
            for recipe in raw_recipes:
                recipe['image'] = image_path

            return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]
        else:
            # not schema.org.  Boo!
            il = RecipeItemLoader(item=RecipeItem())

            il.add_value('source', self.source)
            il.add_value('url', response.url)
            il.add_value('image', image_path)

            name_path = '//*[@class="post-title"]/h1/text()'
            il.add_value('name', hxs.select(name_path).extract())
            # maybe it's in the P's
            for p in hxs.select('//div[@id="recipe" or @class="span9"]/p'):
                if is_ingredient_container(p):
                    il.add_value('ingredients', p.select('text()').extract())
            # or maybe it's in the LI's
            for li in hxs.select('//*[@class="span9"]//ul/li'):
                if is_ingredient_container(li):
                    il.add_value('ingredients', li.select('text()').extract())
            # or maybe it's in these other LI's
            for li in hxs.select('//li[@class="ingredient"]/text()'):
                il.add_value('ingredients', li.extract())
            return il.load_item()

Пример #2

Показать файл

Файл: marthastewart_sitemapspider.py Проект: rkroll/openrecipes

    def parse_item(self, response):
        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {
            'source': self.source,
            'url': response.url
        })

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Пример #3

Показать файл

Файл: food_spider.py Проект: eleclerc/openrecipes

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if response.url.endswith('/review'):
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Пример #4

Показать файл

Файл: food_spider.py Проект: jterskine/my_stuff

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if response.url.endswith('/review'):
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Пример #5

Показать файл

Файл: foodnetwork_spider.py Проект: eleclerc/openrecipes

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if '/reviews/' in response.url:
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
                recipe['photo'] = recipe['photo'].replace('_med.', '_lg.')
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])
                recipe['image'] = recipe['image'].replace('_med.', '_lg.')

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Пример #6

Показать файл

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if '/reviews/' in response.url:
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
                recipe['photo'] = recipe['photo'].replace('_med.', '_lg.')
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])
                recipe['image'] = recipe['image'].replace('_med.', '_lg.')

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Пример #7

Показать файл

Файл: marthastewart_spider.py Проект: rkroll/openrecipes

    def parse_item(self, response):

      hxs = HtmlXPathSelector(response)
      raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})

      return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]