def parse_recipe(self, response, title, picture): hxs = HtmlXPathSelector(response) ingredients, extra_ingredients = extract_extra_ingredients( hxs.select(xp_ingredients), lambda node: node.select('strong')) yield CocktailItem(title=title, picture=picture, url=response.url, source='Serious Eats', ingredients=ingredients, extra_ingredients=extra_ingredients)
def parse_recipe(self, response): hxs = HtmlXPathSelector(response) for title in hxs.select('//h1').extract(): break else: return [] for picture in hxs.select("//img[@itemprop='photo']/@src").extract(): picture = urljoin(response.url, picture) break else: picture = None ingredients, extra_ingredients = extract_extra_ingredients( ( split_at_br(hxs.select( "//node()" "[preceding::h4[" "starts-with(text(),'INGREDIENTS') or " "starts-with(text(),'Ingredients') or " "starts-with(text(),'ingredients')" "]]" "[following::h4[" "starts-with(text(),'INSTRUCTIONS') or " "starts-with(text(),'Instructions') or " "starts-with(text(),'instructions') or" "starts-with(text(),'DIRECTIONS') or " "starts-with(text(),'Directions') or " "starts-with(text(),'directions')" "]]" )) or hxs.select('//div[count(*)=1]/b').extract() or split_at_br(hxs.select('//b//node()')) or hxs.select("//span[@style='font-weight: bold;']").extract() ), lambda s: s.isupper() ) if not ingredients: return [] return [CocktailItem( title=html_to_text(title).strip(), picture=picture, url=response.url, source='Saveur', ingredients=ingredients, extra_ingredients=extra_ingredients )]
def parse_recipe(self, response, title, picture): hxs = HtmlXPathSelector(response) ingredients, extra_ingredients = extract_extra_ingredients( hxs.select(xp_ingredients), lambda node: node.select('strong') ) yield CocktailItem( title=title, picture=picture, url=response.url, source='Serious Eats', ingredients=ingredients, extra_ingredients=extra_ingredients )
def parse_recipe(self, response): hxs = HtmlXPathSelector(response) for title in hxs.select('//h1').extract(): break else: return [] for picture in hxs.select("//img[@itemprop='photo']/@src").extract(): picture = urljoin(response.url, picture) break else: picture = None ingredients, extra_ingredients = extract_extra_ingredients( (split_at_br( hxs.select("//node()" "[preceding::h4[" "starts-with(text(),'INGREDIENTS') or " "starts-with(text(),'Ingredients') or " "starts-with(text(),'ingredients')" "]]" "[following::h4[" "starts-with(text(),'INSTRUCTIONS') or " "starts-with(text(),'Instructions') or " "starts-with(text(),'instructions') or" "starts-with(text(),'DIRECTIONS') or " "starts-with(text(),'Directions') or " "starts-with(text(),'directions')" "]]")) or hxs.select('//div[count(*)=1]/b').extract() or split_at_br(hxs.select('//b//node()')) or hxs.select("//span[@style='font-weight: bold;']").extract()), lambda s: s.isupper()) if not ingredients: return [] return [ CocktailItem(title=html_to_text(title).strip(), picture=picture, url=response.url, source='Saveur', ingredients=ingredients, extra_ingredients=extra_ingredients) ]