def recipe_from_url(request): url = request.POST['url'] try: scrape = scrape_me(url) except WebsiteNotImplementedError: try: scrape = scrape_me(url, wild_mode=True) except NoSchemaFoundInWildMode: return JsonResponse( { 'error': True, 'msg': _('The requested site provided malformed data and cannot be read.' ) # noqa: E501 }, status=400) except ConnectionError: return JsonResponse( { 'error': True, 'msg': _('The requested page could not be found.') }, status=400) return JsonResponse(get_from_scraper(scrape, request.space))
def AcceptImage(): #Init = time.time() #print(MakePrediction("tomato.jpeg")) try: if (request.method == "POST"): #print("Here") f = request.files['fileupload'] filename = f.filename f.save(os.path.join(Save_Folder, secure_filename(f.filename))) CheckVal = request.form.get("Check") ActualObject = MakePrediction(filename) ListOfLink = FindOnGoogle(ActualObject) ValidSite = [] counter = 0 max = 0 for Link in ListOfLink: try: if (counter == 3): break ListofItem = scrape_me(Link).ingredients() Time = scrape_me(Link).total_time() Title = scrape_me(Link).title() if (len(ListofItem) != 0): Ingredient, Price = WegmanAPI.FormatData(ListofItem) InsideLinkObj = [] InsideLinkObj.append(Link) InsideLinkObj.append(Ingredient) InsideLinkObj.append(round(float(Price), 2)) InsideLinkObj.append(Time) InsideLinkObj.append(Title) ValidSite.append(InsideLinkObj) counter += 1 except: print("Stupid Error") ValidSite = Sorting.SortProperly(ValidSite, CheckVal) print("Got past ValidSite") for i in ValidSite: print(i, max) if len(i[1]) > max: max = len(i[1]) print("ValidSite indexing") for i in ValidSite: if len(i[1]) < max: for j in range((max - len(i[1]))): i[1].append(' ') print("ValidSite afterward") return render_template("Recipes.html", ActualObject=ActualObject, ListOfLink=ValidSite, CheckV=CheckVal) else: return "GTFO GET" except: return render_template("ErrorPage.html")
def write_recipe_to_file(link): from recipe_scrapers import scrape_me scraper = scrape_me(link) ingredients = scraper.ingredients() write_json_to_file( "recipes/" + scraper.title(), { #zis is a dictionary # dictionary go brrrrrrrrrrr "title": scraper.title(), "ingredients": ingredients, "just_ingredients": unique(join_lists([extract_ingredient(i) for i in ingredients])), "image": scraper.image(), "instructions": scraper.instructions(), "total time": scraper.total_time(), "yields:": scraper.yields() })
def scrapeRecipeUrl(url): scraper = scrape_me(url) rawIngredients = scraper.ingredients() ingredients = parseIngredients(rawIngredients['parsed_ingredients']) tags = scraper.tags() title = scraper.title() image = scraper.images() recipe_summary = scraper.recipe_summary() isRecipe = scraper.isRecipe() if not isRecipe: return data = {} data['title'] = title data['img_src'] = image data['url'] = url data['tags'] = tags data['raiting'] = scraper.raiting() data['instructions'] = scraper.instructions() data['full_nutrition_data'] = scraper.full_nutrition_data() data['parsed_ingredients'] = ingredients data['ingredients'] = rawIngredients['ingredients'] data['recipe_summary'] = recipe_summary data['searchable_keys'] = createSearchableKeys(ingredients, title, tags) if not image or not recipe_summary: return "no image, not saved" if scraper.raiting() == 0: return 'no raiting, not saved' savedPath = '../recipes/' + title + '.json' with open(savedPath, 'w') as outfile: json.dump(data, outfile) return title
def scrape_recipe(url): recipe = {} try: scraper = scrape_me(url) recipe = { 'name': scraper.title(), 'ingredients': scraper.ingredients(), 'instructions': scraper.instructions().split("\n"), 'image': scraper.image(), 'url': url, } except WebsiteNotImplementedError: pass if not recipe: parsed_uri = urllib.parse.urlparse(url) domain = parsed_uri.netloc.lower() parser = parsers.getParser(domain) if parser is None: return None recipe = parser.Parse(url) #try: # recipe = parser.Parse(url) #except: # return recipe return recipe
def main(food): from googlesearch import search from recipe_scrapers import scrape_me # google a recipe from allrecipes.com query = food + " allrecipes.com" # find the desired url url = "" for i in search(query, tld="com", num=1, stop=1, pause=2): url = i print("url: ", url) # assign scraper to this url scraper = scrape_me(url) # find the ingredients for the recipe and convert that into a string ingredients = str(scraper.ingredients()) #remove brackets and add line breaks in the string to make it more readable ingredients = ingredients.replace("',", "\n") #replace commas with line breaks ingredients = ingredients.replace("®", "") #remove registered mark for speech purposes ingredients = ingredients.replace("[", "") #remove left bracket ingredients = ingredients.replace("]", "") #remove right bracket ingredients = ingredients.replace("'", "") #remove single quotes #if nothing is returned if ingredients == "": return "Not specific enough" return ingredients
def scrape_recipe(url): recipe = {} parsed_uri = urllib.parse.urlparse(url) domain = parsed_uri.netloc.lower() domain = domain.replace('www.', '', 1) if domain.startswith('www.') else domain parser = parsers.getParser(domain) if parser is not None: recipe = parser.Parse(url) if not recipe: try: scraper = scrape_me(url) instructions = [ i.strip() for i in scraper.instructions().split("\n") if i.strip() ] recipe = { 'name': scraper.title(), 'ingredients': scraper.ingredients(), 'instructions': instructions, 'image': scraper.image(), 'url': url, } except WebsiteNotImplementedError: pass return recipe
def recipe_scraper2json(args, url): from recipe_scrapers import scrape_me print_debug("Using recipe-scraper module...") recipe_json={} recipe_json['url'] = url try: scraper = scrape_me(url) recipe_json['title'] = scraper.title() recipe_json['description'] = '' recipe_json['yield'] = scraper.yields() recipe_json['preptime'] = '' recipe_json['cooktime'] = '' recipe_json['totaltime'] = minutes2time(scraper.total_time()) recipe_json['ingredient_groups'] = [] recipe_json['ingredient_groups'].append(json.loads('{"title":"","ingredients":[]}')) recipe_json['ingredient_groups'][0]['ingredients'] = scraper.ingredients() recipe_json['direction_groups'] = [] recipe_json['direction_groups'].append(json.loads('{"group":"","directions":[]}')) instructions = scraper.instructions().split('\n') recipe_json['direction_groups'][0]['directions'] = instructions except: raise UrlError(url, 'URL not supported.') return recipe_json
def scrape_url(url): """ function that scrapes recipe data""" try: scraper = scrape_me(url, wild_mode=True) print(f'{scraper.title()}') print(f'cooking time: {scraper.total_time()}') print(f'number of servings {scraper.yields()[:2]}') print('\nRECIPE\n') for i in scraper.ingredients(): print(i) print('\nINSTRUCTIONS\n') print(scraper.instructions()) print(f'\nimage: {scraper.image()}') print(f'\nsource: {scraper.host()}') #print(scraper.links()) print('\nNUTRIENTS\n') for k, v in scraper.nutrients().items(): print(f'{k}:{v}') # if available print(f'\nauthor: {scraper.author()}') print(f'\ncanonical_url: {scraper.canonical_url()}') print(f'\nlanguage: {scraper.language()}') print(f'\nreviews: {scraper.reviews()}') print(f'\nsite_name: {scraper.site_name()}') except: print(f'no information retrieved from {url}') return 0
def get_recipe(url): """ function to scrape a recipe with a given url and return the content in a ordered way as a JSON """ try: # list to store all parts of the recipe complete_recipe = [] # query the given url scraper = scrape_me(url) # append the title of the recipe complete_recipe.append({"title": scraper.title()}) # parse ingredients ingredients_dict = parse_ingredients_mod(scraper) complete_recipe.append(ingredients_dict) # parse instructions instructions_list = scraper.instructions().splitlines() complete_recipe.append({"instructions": instructions_list}) return json.dumps({"recipe": complete_recipe}) except Exception as e: return json.dumps({"error": f"{e}"})
def get_recipe(URL): #.title() #.ingredients() #.total_time() #.instructions() #.links() return scrape_me(URL)
def add_new_recipe(): # this function adds new recipes via a URL and displays the recipe print( "Try this URL if you need inspiration: https://www.allrecipes.com/recipe/8499/basic-chicken-salad\n" ) URL = input('What is the recipe URL? ') try: scraper = scrape_me(URL) display_title_bar() ingredients = [] print('\n**' + scraper.title() + '**\n') print("Yields: {}\n".format(scraper.yields())) print('INGREDIENTS') for ingredient in scraper.ingredients(): ingredients.append(ingredient) print(ingredient) print('\nINSTRUCTIONS') # for instructions in scraper.instructions(): print(scraper.instructions()) recipe = { 'title': scraper.title(), 'ingredients': ingredients, 'instructions': scraper.instructions() } if debug: print(recipe.title) choice = input('\nWould you like to save this recipe? (y/n) ') if choice == 'y': save_recipe(recipe) return True except: print("That website is not supported, please try again.") return True
def scrape_minamlist_baker(self): source = 'minamalist_baker' recipe_list = [ x["_id"] for x in list(self.urls.find({"source": source}, {"_id": 1})) ] for i in range(1, 63): page = scrape_me( "https://minimalistbaker.com/recipe-index/?fwp_paged=" + str(i)).links() wait() for link in page: for link in page: if 'tabindex' in link.keys(): recipe = link['href'] if recipe not in recipe_list: self.urls.insert_one({ "_id": recipe, "name": recipe.split(".com/")[1][:-1], 'read': False, 'type': [], 'source': source, }) recipe_list.append(recipe) print(f"Recipes Scraped: {len(recipe_list)}")
def scrape_inspiralized(self): source = 'inspiralized' recipe_list = [ x["_id"] for x in list(self.urls.find({"source": source}, {"_id": 1})) ] for i in range(1, 60): page = scrape_me("https://inspiralized.com/recipe-index/page/" + str(i)).links() wait() for link in page: if "data-id" in link.keys(): recipe = link['href'] if recipe not in recipe_list: self.urls.insert_one({ "_id": recipe, "name": recipe.split(".com/")[1][:-1], 'read': False, 'type': [], 'source': source, "website_id": link["data-id"] }) recipe_list.append(recipe) print(f"Recipes Scraped: {len(recipe_list)}")
def scrape_kreme_de_la_krum(self): source = 'creme_de_la_crum' recipe_list = [ x["_id"] for x in list(self.urls.find({"source": source}, {"_id": 1})) ] for i in range(1, 53): page = scrape_me( "https://www.lecremedelacrumb.com/recipe-index//page/" + str(i)).links() wait() for link in page: for link in page: if 'rel' in link.keys(): if "bookmark" in link["rel"]: recipe = link['href'] if recipe not in recipe_list: self.urls.insert_one({ "_id": recipe, "name": recipe.split(".com/")[1][:-1], 'read': False, 'type': [], 'source': source, }) recipe_list.append(recipe) print(f"Recipes Scraped: {len(recipe_list)}")
def find_recipe(keywords: str): """ Tries to find a recipe given the keywords (a space-separated string of words). Only URLs supported by the `recipe-scrapers` library are considered. :param keywords: Keywords describing the dish to search :return: a :py:class:Recipe object describing a found recipe or `None` if nothing was found """ for url in search(keywords + " recipe"): if any(urlparse(url).netloc.endswith(k) for k in SCRAPERS.keys()): scraper = scrape_me(url) nutrients = {} for nutr, value in scraper.nutrients().items(): name = ' '.join(camel_case_regex.findall(nutr)) nutrients[name[0].upper() + name[1:].lower()] = value return Recipe( url=url, title=scraper.title(), total_time=scraper.total_time(), yields=scraper.yields(), ingredients=scraper.ingredients(), instructions=scraper.instructions(), nutrients=nutrients, image=scraper.image() ) return None
def get_recipe(self, recipe_url): scraper = scrape_me(recipe_url) print(type(scraper.title())) print(type(scraper.instructions())) myIngredients = scraper.ingredients() myIngredientsString = ', '.join(myIngredients[1:]) print(myIngredientsString) image = Image.open(r'test.jpg') draw = ImageDraw.Draw(image) # specified font size fontsize = 20 # starting font size font = ImageFont.truetype('gillsans.ttf', fontsize) text1 = myIngredientsString text_color = (0, 0, 0) text_start_height = 5 self.draw_multiple_line_text(image, scraper.title(), font, 'red', text_start_height) self.draw_multiple_line_text(image, "Ingredients", font, 'red', 50) self.draw_multiple_line_text(image, text1, font, text_color, 75) self.draw_multiple_line_text(image, "Instructions", font, 'red', 575) self.draw_multiple_line_text(image, scraper.instructions(), font, text_color, 600) print("Jackpot") image.save('pil_text.png')
def get_recipe(url): try: scrap = scrape_me(url) except: print('Could not scrape URL {}'.format(url)) return {} try: title = scrap.title() except AttributeError: title = None try: ingredients = scrap.ingredients() except AttributeError: ingredients = None try: instructions = scrap.instructions() except AttributeError: instructions = None try: picture_link = scrap.picture() except AttributeError: picture_link = None return { 'title': title, 'ingredients': ingredients, 'instructions': instructions, 'picture_link': picture_link, }
def ingredients_from_url(): response = request.get_json() scraper = scrape_me(response["url"]) return jsonify({ "ingredients": "\n".join(scraper.ingredients()), "instructions": scraper.instructions().replace("\n", "\n\n") })
def setUp(self): with open("tests/test_data/wild_mode.testhtml", encoding="utf-8") as testfile: options = { "wild_mode": True, "exception_handling": False, "test": True } self.wild_mode_scraper = scrape_me(testfile, **options)
def scrape(request): try: scraper = scrape_me(request.POST["url"]) form = form_from_scrape(scraper, request.POST["url"]) context = {'form':form} return render(request, "recipes/scrape.html", context) except WebsiteNotImplementedError: msg = "the url " + request.POST["url"] + " is not supported by recipe_scraper" return write(request, error_message_link=msg)
def returnScraped(url): scraper = scrape_me(url) return format_response({ scraper.title(), scraper.total_time(), scraper.ingredients(), scraper.instructions() })
def test(url): scraper = scrape_me(url) rawIngredients = scraper.ingredients() ingredients = parseIngredients(rawIngredients['parsed_ingredients']) tags = scraper.tags() print(tags) return
def scrape(self): recipes = [] for url in self._urls: try: if MongoHelper.getRecipeByUrl(url).count() > 0: print('Recipe is already in DB for URL:{}'.format(url)) continue scraper = scrape_me(url) if not self._isRecipe(scraper): continue name = scraper.title() ingredients = scraper.ingredients() directions = scraper.instructions() servingCount = scraper.yields() totalTime = scraper.total_time() image = scraper.image() ratings = scraper.ratings() recipe = { 'name': name, 'url': url, 'ingredients': ingredients, 'directions': directions, 'servingCount': servingCount, 'image': image, 'totalTime': totalTime, 'sourceName': self.website.name, 'ratings': ratings, 'scrapeTime': datetime.datetime.now(), 'language': self.website.language } recipes.append(recipe) print('Scraped Recipe: {}, from URL: {}, RecipeBatch#: {}'. format(name, url, len(recipes))) if len(recipes) >= self._recipeBuffer: recipeIds = MongoHelper.insertRecipes(recipes) recipes = [] print( '{} Recipes have been successfully written: {}'.format( Crawler._recipeBuffer, recipeIds)) time.sleep(self._sleepTime ) # Sleeping between requests to avoid limit except: print('Could not parse url: ', url) continue
def get_recipe(dishname): instructions = [] link = get_link(dishname) if (link != None): try: scraper = scrape_me(link) instructions = scraper.instructions() except: instructions = [] return instructions
def answer(dish): dish = dish.replace(" ", "-") dish += "-" query = f"https://www.foodnetwork.com/search/{dish}" recipe = scrape_me() # $(".m-MediaBlock__a-Headline a")[0].href text = "Recipe for %s. Total time: %s. Here is the list of ingredients. %s" % ( recipe.title(), recipe.total_time(), recipe.ingredients()) return statement(text)
def scrape_host_the_toast(self): source = 'host_the_toast' recipe_list = [ x["_id"] for x in list(self.urls.find({"source": source}, {"_id": 1})) ] page = scrape_me("https://hostthetoast.com/recipes/").links() categories = [] # Some sites require category exploration to loop through recipes for link in page: if "category" in link["href"]: categories.append(link['href']) for c in categories: c_name = c.split("/category/")[1][:-1] for i in range(1, 40): page = scrape_me(c + "page/" + str(i)).links() if len(page) < 35: break wait() for link in page: if 'rel' in link.keys(): if "bookmark" in link["rel"]: recipe = link['href'] if recipe not in recipe_list: self.urls.insert_one({ "_id": recipe, "name": recipe.split("hostthetoast.com/")[1][:-1], 'read': False, 'type': [c_name], 'source': source }) recipe_list.append(recipe) print(f"Recipes Scraped: {len(recipe_list)}")
def getDataAndWrite(url, img): try: data = scrape_me(url) if data.title() is "" or data.ingredients() is "" or data.instructions( ) is "": return writeDataToFile(data.title(), data.total_time(), data.ingredients(), data.instructions(), img) except: print("error for url: " + url) return
def main(): recipes = get_recipes() for recipe in recipes: scrape = scrape_me(recipe) print("scraping: {0}".format(scrape.title())) recipe_dict = recipe_to_dict(scrape) write_to_file(recipe_dict) image_url = find_image(recipe_dict.get("source"), scrape.host(), recipe_dict.get("title")) filename = "{0}.{1}".format(get_file_name(recipe_dict), get_file_type(image_url)) download_image(filename, image_url)
def web_scraper(web_page_url): """Web scraper for recipes from multiple websites""" page = scrape_me(web_page_url) return { "recipe_title" : page.title(), "recipe_ingred" : page.ingredients(), "recipe_direct" : page.instructions(), "recipe_url" : web_page_url, "image_url" : page.image() }