def _process_scaling(r, args): if args.required_yield is not None: required_yield = RecipeParser.parse_amount(args.required_yield) if required_yield is None or required_yield.factor is None: print(f'Given yield is not valid', file=sys.stderr) exit(1) try: r = get_recipe_with_yield(r, required_yield) except StopIteration: print(f'Recipe "{r.title}" does not specify a yield in the unit "{required_yield.unit}". The ' f'following units can be used: ' + ", ".join(f'"{y.unit}"' for y in r.yields), file=sys.stderr) exit(1) elif args.multiply is not None: multiply = RecipeParser.parse_amount(args.multiply) if multiply is None or multiply.factor is None: print(f'Given multiplier is not valid', file=sys.stderr) exit(1) if multiply.unit is not None: print(f'A recipe can only be multiplied with a unitless amount', file=sys.stderr) exit(1) r = multiply_recipe(r, multiply.factor) return r
def extract(url, _): try: json_recipes = scrape_schema_recipe.scrape_url(url, python_objects=True) except: return None if len(json_recipes) == 0: return None json_recipe = json_recipes[0] tags = [] if "cookingMethod" in json_recipe: tags.append(json_recipe["cookingMethod"]) if "recipeCategory" in json_recipe: append_or_extend(tags, json_recipe["recipeCategory"]) if "recipeCuisine" in json_recipe: tags.append(json_recipe["recipeCuisine"]) if "keywords" in json_recipe: kw = json_recipe["keywords"] if isinstance(kw, str): kw = kw.split(',') append_or_extend(tags, kw) description_parts = [] if "description" in json_recipe: description_parts.append(json_recipe["description"]) if "image" in json_recipe: if isinstance(json_recipe["image"], list): description_parts.append(f'![]({json_recipe["image"][0]}")') else: description_parts.append(f'![]({json_recipe["image"]}")') yields = [] if "recipeYield" in json_recipe: yields.append(RecipeParser.parse_amount(json_recipe["recipeYield"])) recipe = Recipe( title=json_recipe["name"], description="\n\n".join(description_parts), tags=tags, yields=yields, ingredients=[ Ingredient(name=ingred) for ingred in json_recipe["recipeIngredient"] ], instructions= f'{create_instructions(json_recipe["recipeInstructions"])}\n\n{json_recipe["url"]}', ) return recipe
def _yield_completer(prefix, action, parser, parsed_args): try: src = parsed_args.file.read() r = RecipeParser().parse(src) parsed_yield = RecipeParser.parse_amount(prefix) if parsed_yield is None or parsed_yield.factor is None: return [RecipeSerializer._serialize_amount(a) for a in r.yields] return [RecipeSerializer._serialize_amount(Amount(parsed_yield.factor, a.unit)) for a in r.yields if parsed_yield.unit is None or (a.unit is not None and a.unit.startswith(parsed_yield.unit))] except Exception as e: print(e) return []
def _process_scaling(r, args): """Returns recipes scaled according to --multiply or --yield""" if args.required_yield is not None: required_yield = RecipeParser.parse_amount(args.required_yield) if required_yield is None or required_yield.factor is None: print(f'Given yield is not valid', file=sys.stderr) exit(1) try: r = get_recipe_with_yield(r, required_yield) except StopIteration: print(_make_missing_yield_warning(r, required_yield), file=sys.stderr) exit(1) elif args.multiply is not None: multiply = RecipeParser.parse_amount(args.multiply) if multiply is None or multiply.factor is None: print(f'Given multiplier is not valid', file=sys.stderr) exit(1) if multiply.unit is not None: print(f'A recipe can only be multiplied with a unitless amount', file=sys.stderr) exit(1) r = multiply_recipe(r, multiply.factor) return r
def extract(url, soup): if not 'chefkoch.de' in url: return # title title = soup.find('h1', attrs={'class': 'page-title'}).text if title == 'Fehler: Seite nicht gefunden' or title == 'Fehler: Rezept nicht gefunden': raise ValueError('No recipe found, check URL') # summary summaryTag = soup.find('div', attrs={'class': 'summary'}) summary = summaryTag.text if summaryTag else None # servings servings = soup.find('input', attrs={'id': 'divisor'}).attrs['value'] yields = [ Amount(Decimal(servings), f'Portion{"en" if int(servings) > 1 else ""}') ] # tags tags = [] tagcloud = soup.find('ul', attrs={'class': 'tagcloud'}) for tag in tagcloud.find_all('a'): tags.append(tag.text) # ingredients table = soup.find('table', attrs={'class': 'incredients'}) rows = table.find_all('tr') ingreds = [] for row in rows: cols = row.find_all('td') cols = [s.text.strip() for s in cols] amount = RecipeParser.parse_amount(cols[0]) ingreds.append(Ingredient(name=cols[1], amount=amount)) # instructions instruct = soup.find('div', attrs={ 'id': 'rezept-zubereitung' }).text # only get text instruct = instruct.strip() # remove leadin and ending whitespace # write to file return Recipe(title=title, ingredients=ingreds, instructions=instruct, description=summary, tags=tags, yields=yields)
def extract(url, _): try: scraper = scrape_me(url) except WebsiteNotImplementedError: return None try: description = f'![]({scraper.image()})' except NotImplementedError: description = '' recipe = Recipe( title=scraper.title(), description=description, yields=[RecipeParser.parse_amount(scraper.yields())], ingredients=[ Ingredient(name=ingred) for ingred in scraper.ingredients() ], instructions=scraper.instructions(), ) return recipe
def main(): parser = argparse.ArgumentParser( description='Read and process recipemd recipes') parser.add_argument('file', type=open, help='A recipemd file') display_parser = parser.add_mutually_exclusive_group() display_parser.add_argument('-t', '--title', action='store_true', help='Display recipe title') display_parser.add_argument('-i', '--ingredients', action='store_true', help='Display recipe ingredients') scale_parser = parser.add_mutually_exclusive_group() scale_parser.add_argument('-m', '--multiply', type=str, help='Multiply recipe by N', metavar='N') scale_parser.add_argument('-y', '--yield', type=str, help='Scale the recipe for yield Y', metavar='Y', dest='required_yield') args = parser.parse_args() src = args.file.read() rp = RecipeParser() r = rp.parse(src) if args.required_yield is not None: required_yield = RecipeParser.parse_amount(args.required_yield) if required_yield is None or required_yield.factor is None: print(f'Given yield is not valid', file=sys.stderr) exit(1) matching_recipe_yield = next( (y for y in r.yields if y.unit == required_yield.unit), None) if matching_recipe_yield is None: if required_yield.unit is None: matching_recipe_yield = Amount(Decimal(1)) else: print( f'Recipe "{r.title}" does not specify a yield in the unit "{required_yield.unit}". The ' f'following units can be used: ' + ", ".join(f'"{y.unit}"' for y in r.yields), file=sys.stderr) exit(1) r = multiply_recipe( r, required_yield.factor / matching_recipe_yield.factor) elif args.multiply is not None: multiply = RecipeParser.parse_amount(args.multiply) if multiply is None or multiply.factor is None: print(f'Given multiplier is not valid', file=sys.stderr) exit(1) if multiply.unit is not None: print(f'A recipe can only be multiplied with a unitless amount', file=sys.stderr) exit(1) r = multiply_recipe(r, multiply.factor) if args.title: print(r.title) elif args.ingredients: for ingr in r.leaf_ingredients: print(_ingredient_to_string(ingr)) else: rs = RecipeSerializer() print(rs.serialize(r))
def extract(url, soup): recipe_id_element = soup.find(attrs={ 'data-recipe-id': True, 'class': 'wprm-recipe-container' }) if not recipe_id_element: return recipe_id = recipe_id_element.attrs['data-recipe-id'] data = getJson(url, recipe_id) try: # title title = getText(data['recipe']['name']) # summary summary = getText(data['recipe']['summary']) # servings and tags servingsAmount = RecipeParser.parse_amount(data['recipe']['servings']) servingsUnit = data['recipe']['servings_unit'] if servingsUnit != "": servingsAmount = replace(servingsAmount, unit=servingsUnit) yields = [servingsAmount] tags = [] for tagGroup in data['recipe']['tags'].values(): for tag in tagGroup: tags.append(tag['name']) # ingredients ingredients = [] for ingredGroup in data['recipe']['ingredients']: children = [] if 'name' in ingredGroup: title = getText(ingredGroup['name']) else: title = None for ingred in ingredGroup['ingredients']: amount = RecipeParser.parse_amount(ingred['amount']) unit = ingred['unit'].strip() if unit != '': amount = replace(amount, unit=unit) name = getText('{} {}'.format(ingred['name'], ingred['notes'])) children.append(Ingredient(name, amount)) group = IngredientGroup(title=title, ingredients=children) ingredients.append(group) # instructions instructions = '' for instrGroup in data['recipe']['instructions']: if 'name' in instrGroup: instructions = instructions + '## ' + getText( instrGroup['name']) + '\n' for index, instr in enumerate(instrGroup['instructions']): instructions = instructions + '{}. {}\n'.format( index + 1, getText(instr['text'])) if 'notes' in data['recipe']: instructions = instructions + '\n## Recipe Notes\n\n' + getText( data['recipe']['notes']) return Recipe(title=title, ingredients=ingredients, instructions=instructions, description=summary, tags=tags, yields=yields) except Exception as e: print('failed to extract json:', e) # if the json extraction fails, try to extract data from website # title title = soup.find(attrs={'class': 'wprm-recipe-name'}).text.strip() # summary summary = soup.find('div', attrs={ 'class': 'wprm-recipe-summary' }).text.strip() # yields yields = [] servings = soup.find( 'span', attrs={'class': 'wprm-recipe-details wprm-recipe-servings'}) if servings: servingsAmount = RecipeParser.parse_amount(servings.text.strip()) servingsUnit = soup.find( 'span', attrs={ 'class': 'wprm-recipe-details-unit wprm-recipe-servings-unit' }).text.strip() if servingsUnit != "": servingsAmount = replace(servingsAmount, unit=servingsUnit) yields.append(servingsAmount) # tags tags = [] courseTags = soup.find('span', attrs={'class': 'wprm-recipe-course'}) if courseTags: courseTags = courseTags.text.split(',') else: courseTags = [] cuisineTags = soup.find('span', attrs={'class': 'wprm-recipe-cuisine'}) if cuisineTags: cuisineTags = cuisineTags.text.split(',') else: cuisineTags = [] keywords = soup.find('span', attrs={'class': 'wprm-recipe-keyword'}) if keywords: keywords = keywords.text.split(',') else: keywords = [] for tag in courseTags + cuisineTags + keywords: tags.append(tag.strip()) # ingredients ingreds = [] ingredGroups = soup.find_all( 'div', attrs={'class': 'wprm-recipe-ingredient-group'}) for ingredGroup in ingredGroups: groupName = ingredGroup.find( 'h4', attrs={ 'class': 'wprm-recipe-group-name wprm-recipe-ingredient-group-name' }) if (groupName): title = groupName.text.strip() else: title = None groupIngreds = ingredGroup.find_all( 'li', attrs={'class': 'wprm-recipe-ingredient'}) children = [] for ingred in groupIngreds: amount = ingred.find( 'span', attrs={'class': 'wprm-recipe-ingredient-amount'}) if amount: amount = RecipeParser.parse_amount(amount.text) else: amount = None unit = ingred.find('span', attrs={'class': 'wprm-recipe-ingredient-unit'}) if unit: amount = replace(amount, unit=unit.text) name = ingred.find('span', attrs={'class': 'wprm-recipe-ingredient-name'}) if name: name = name.text.strip() else: name = '' notes = ingred.find( 'span', attrs={'class': 'wprm-recipe-ingredient-notes'}) if notes: notes = notes.text.strip() else: notes = '' children.append( Ingredient('{} {}'.format(name, notes).strip(), amount=amount)) group = IngredientGroup(title=title, ingredients=children) ingreds.append(group) # instructions instructions = '' instructGroups = soup.find_all( 'div', attrs={'class': 'wprm-recipe-instruction-group'}) for ingredGroup in instructGroups: groupName = ingredGroup.find( 'h4', attrs={ 'class': 'wprm-recipe-group-name wprm-recipe-instruction-group-name' }) if groupName: instructions = instructions + '## ' + groupName.text.strip() + '\n' groupInstructs = ingredGroup.find_all( 'li', attrs={'class': 'wprm-recipe-instruction'}) for index, inst in enumerate(groupInstructs): instructions = instructions + str( index + 1) + '. ' + inst.text.strip() + '\n' # notes notesContainer = soup.find('div', attrs={'class': 'wprm-recipe-notes-container'}) if notesContainer: notesTitle = notesContainer.find(attrs={ 'class': 'wprm-recipe-header' }).text.strip() instructions = instructions + '\n## ' + notesTitle for p in notesContainer.find_all('p'): instructions = instructions + '\n\n' + p.text.strip() return Recipe(title=title, ingredients=ingreds, instructions=instructions, description=summary, tags=tags, yields=yields)