def parseDrink(self, response): """ Parse function to go grab Drink info from each drink Returns Drink Item """ hxs = HtmlXPathSelector(response) drink = Drink() drink['name'] = hxs.select("//h2[@class='pagetitle']/text()").extract()[0] drink['rating'] = None#hxs.select("//meta[@itemprop='ratingValue']/@content").extract()[0] drink['num_reviews'] = None#hxs.select("//meta[@itemprop='ratingCount']/@content").extract()[0] drink['tags'] = [] tags = hxs.select("//div[@class='posttags']/a[@rel='tag']") for tag in tags: drink['tags'].append(tag.select("text()").extract()[0]) drink['ingredients'] = [] unit_analyzer = Unit_Analyzer() ingredient_strings = hxs.select("//ul[@class='ingredients']/li") for ingredient_string in ingredient_strings: final_triple = unit_analyzer.get_triple(ingredient_string.select('text()').extract()[0]) drink['ingredients'].append(final_triple) drink['directions'] = hxs.select("//div[@class='entry']/div[3]/p/text()").extract() log.msg('Drink retrieved: %s' % drink, level=log.INFO) return drink
def parseDrink(self, response): """ Parse function to go grab Drink info from each drink Returns Drink Item """ hxs = HtmlXPathSelector(response) drink = Drink() #Get names and ratings drink['name'] = hxs.select("//h1[@class='fn recipe_title']/text()").extract()[0] drink['rating'] = hxs.select("//div[@class='ratingsBox rating']/div[1]/div[1]/text()").extract() drink['num_reviews'] = hxs.select("//div[@class='ratingsBox rating']//span[@class='count']/text()").extract() #If no rating, make None if len(drink['rating'][0]) > 4: drink['rating'] = None drink['num_reviews'] = None # Print for Error checking #log.msg('name: %s' % drink['name'], level=log.INFO) #log.msg('rating: %s' % drink['rating'], level=log.INFO) #log.msg('num_reviews: %s' % drink['num_reviews'], level=log.INFO) #drink['tags'] = [] #tags = hxs.select("//div[@class='posttags']/a[@rel='tag']") #for tag in tags: # drink['tags'].append(tag.select("text()").extract()[0]) drink['tags'] = None #Get Ingredients. Turn into string that is parsable by unit_analyzer drink['ingredients'] = [] unit_analyzer = Unit_Analyzer() ingredient_strings = hxs.select("//div[@class='ingredients']//span[@class='ingredient']") for ingredient_string in ingredient_strings: full_string = ingredient_string.select(".//span[@class='amount']/text()").extract()[0] #log.msg('amount: %s' % full_string, level=log.INFO) full_string += " " + ingredient_string.select(".//span[@class='name']//a/text()").extract()[0] #log.msg('ingredient: %s' % full_string, level=log.INFO) final_triple = unit_analyzer.get_triple(full_string) drink['ingredients'].append(final_triple) #Directions drink['directions'] = hxs.select("//div[@class='RecipeDirections instructions']/text()").extract() #log.msg('Drink retrieved: %s' % drink, level=log.INFO) return drink
def parseDrink(self, response): """ Parse function to go grab Drink info from each drink Returns Drink Item """ hxs = HtmlXPathSelector(response) drink = Drink() drink['name'] = hxs.select("//div[@id='drinkRecipe']/h2/text()").extract()[0] drink['rating'] = None drink['num_reviews'] = None drink['tags'] = None drink['directions'] = hxs.select("//div[@id='drinkRecipe']/p[position()=2]/text()").extract()[0] drink['ingredients'] = [] unit_analyzer = Unit_Analyzer() ingredient_strings = hxs.select("//div[@id='drinkRecipe']/ul/li") for ingredient_string in ingredient_strings: final_triple = unit_analyzer.get_triple(ingredient_string.select('text()').extract()[0] + ingredient_string.select('a/text()').extract()[0]) drink['ingredients'].append(final_triple) #log.msg('Drink retrieved: %s' % drink, level=log.INFO) return drink