Exemplo n.º 1
0
def parse_schema_ingredient(ing):
    """Parse one ingredient and return the corrected string
    suitable for ConvenientImporter.add_ing_from_text()

    @param ing: the ingredient
    @ptype ing: string
    @return: the adjusted ingredient string suitable for the default importer
    @rtype: string
    """
    split = ing.split(None, 1)
    if len(split) > 1:
        try:
            # replace german comma with dot
            amount = split[0].replace(',', '.')
            # convert to fraction
            amount = Fraction(amount)
            # convert to string so non-integer fractions can be parsed
            # by gourmet
            amount = str(amount)
            ing = "%s %s" % (amount, split[1])
        except ValueError:
            warn("could not parse amount %r" % split[0])
            # do nothing
            pass
    else:
        # do nothing
        pass
    return ing
Exemplo n.º 2
0
def getdatafromurl(url, content_type_check=None):
    """Download data from URL.
    @param url: the URL to download
    @ptype url: string
    @param content_type_check: if non-empty, only return data if the
      Content-Type header starts with the given string
    @ptype content_type_check: string
    @return: URL data or None, and the url (which may be redirected
      to a new URL)
    @rtype: tuple (string, string) or (None, string)
    """
    data = None
    try:
        sock = get_url_socket(url)
        # update URL in case of redirects
        url = sock.geturl()
        if content_type_check:
            content_type = sock.info().get('content-type',
                                           'application/octet-stream')
            if content_type.lower().startswith(content_type_check):
                data = sock.read()
        else:
            data = sock.read()
    except urllib.error.URLError as msg:
        warn("could not get data from URL %r: %s" % (url, msg))
    return data, url
Exemplo n.º 3
0
 def do_run(self):
     """Construct recipe and ingredients from parsed JSON data."""
     json_data = parse_json_from_data(self.data, self.url)
     if json_data:
         self.start_rec()
         # start_rec() initializes self.rec to an empty recipe dict
         self.rec['source'] = 'Web'
         self.rec['link'] = self.url
         parse_schema_recipe(json_data, self.rec)
         for txt in parse_schema_ingredients(json_data):
             # add_ing_from_text(txt) uses self.db.parse_ingredient(txt)
             self.add_ing_from_text(txt)
         self.commit_rec()
     else:
         warn(
             "could not find recipe data in chefkoch.de URL %s - please submit a bug report"
             % self.url)
     return super(ChefkochDeParser, self).do_run()
Exemplo n.º 4
0
def parse_schema_recipe(json_data, recipe):
    """Fill given recipe dict with data from JSON.
    @param json_data: the parsed JSON recipe schema data
    @ptype json_data: dict (with various keys and values)
    @param recipe: the gourmet recipe to fill
    @ptype recipe: dict
    @return: nothing, the recipe will be modified instead
    @rtype: None
    """
    recipe['title'] = json_data['name']
    if 'recipeCategory' in json_data:
        categories = json_data["recipeCategory"]
        if categories:
            # gourmet only has one category per recipe, so get the first one in the list
            recipe['category'] = categories[0]
            if len(categories) > 1:
                # If there are several categories add them to the modifications.
                recipe['modifications'] = "Kategorien: %s" % (
                    ", ".join(categories))
    try:
        cooktime = parse_iso8601_duration(json_data['cookTime'])
    except ValueError:
        warn("could not parse cookTime %r" % json_data['cookTime'])
    else:
        recipe['cooktime'] = cooktime
    try:
        preptime = parse_iso8601_duration(json_data['prepTime'])
    except ValueError:
        warn("could not parse prepTime %r" % json_data['prepTime'])
    else:
        recipe['preptime'] = preptime
    recipe['instructions'] = json_data['recipeInstructions']
    recipe['yields'] = json_data['recipeYield']
    if 'aggregateRating' in json_data:
        rating = float(json_data['aggregateRating']['ratingValue'])
        # adjust "1 to 5" rating of chefkoch to "1 to 10" of gourmet
        recipe['rating'] = int(rating * 2)
    image = json_data['image']
    if image:
        if isinstance(image, list):
            image = image[0]
        recipe['image'], _ = getdatafromurl(image, content_type_check="image/")
Exemplo n.º 5
0
def parse_schema_ingredient(ing):
    """Parse one chefkoch.de ingredient and return the corrected string
    suitable for ConvenientImporter.add_ing_from_text()
    Note that this parser has specific features only suitable for
    german recipes from chefkoch.de.

    @param ing: the ingredient
    @ptype ing: string
    @return: the adjusted ingredient string suitable for the default importer
    @rtype: string
    """
    if ing.startswith(" "):
        # amount is missing when ingredient starts with a space
        # this is likely specific for chefkoch.de
        ing = ing.strip()
        if ing.lower().startswith("evtl."):
            # add english optional tag
            ing = "optional: " + ing[5:]
    else:
        split = ing.split(None, 1)
        if len(split) > 1:
            try:
                # replace german comma with dot
                amount = split[0].replace(',', '.')
                # convert to fraction
                amount = Fraction(amount)
                # convert to string so non-integer fractions can be parsed
                # by gourmet
                amount = str(amount)
                ing = "%s %s" % (amount, split[1])
            except ValueError:
                warn("could not parse amount %r" % split[0])
                # do nothing
                pass
        else:
            # do nothing
            pass
    return ing
Exemplo n.º 6
0
def parse_schema_recipe(soup, recipe):
    """Fill given recipe dict with data from HTML.

    @param soup: the parsed HTML data from BeautifulSoup
    @ptype soup: BeautifulSoup.Tag
    @param recipe: the gourmet recipe to fill
    @ptype recipe: dict
    @return: nothing, the recipe will be modified instead
    @rtype: None
    """
    nonempty = re.compile(r".+")
    tag = soup.find(itemprop="name", content=nonempty)
    if tag:
        recipe['title'] = tag["content"]
    tag = soup.find(itemprop="recipeCategory")
    if tag:
        recipe['category'] = tag.text
    tag = soup.find(itemprop="ratingValue", content=nonempty)
    if tag:
        rating = float(tag["content"])
        # adjust "1 to 5" rating to "1 to 10" of gourmet
        recipe['rating'] = int(rating * 2)
    tag = soup.find(itemprop="image", src=nonempty)
    if tag:
        image = tag["src"]
        recipe['image'], unused = getdatafromurl(image,
                                                 content_type_check="image/")
    preptime = 0
    tag = soup.find(itemprop="performTime", content=nonempty)
    if tag:
        try:
            preptime = parse_iso8601_duration(tag['content'])
        except ValueError:
            warn("could not parse prepTime %r" % tag['content'])
        else:
            recipe['preptime'] = preptime
    tag = soup.find(itemprop="totalTime", content=nonempty)
    if tag:
        try:
            totaltime = parse_iso8601_duration(tag['content'])
        except ValueError:
            warn("could not parse prepTime %r" % tag['content'])
        else:
            # the cooking time is the difference between total and prep time
            recipe['cooktime'] = totaltime - preptime
    tag = soup.find(itemprop="recipeYield")
    if tag and tag.text:
        recipe['yields'] = tag.text.strip()
    tag = soup.find(itemprop="description")
    if tag:
        # replace images in the text with alternative text since they
        # are sometimes used in place
        for img in tag.find_all('img'):
            img.string = translate_image_text("[" + get_alt_text(img) + "]")
        recipe['instructions'] = tag.get_text()
    tag = soup.find("div", attrs={"class": "tips"})
    if tag:
        p = tag.find("p")
        if p:
            recipe['modifications'] = p.text.strip()
    categories = soup.find_all(id="recipesCatFilterLink")
    if categories:
        cattext = ", ".join(cat.text.strip() for cat in categories)
        text = "%s %s" % (_("Category:"), cattext)
        if "modifications" in recipe:
            recipe["modifications"] += "\n\n" + text
        else:
            recipe["modifications"] = text