def test_keeps_non_tag_text_after_dash(self): self.assertEqual( ketogasm.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title">Pumpkin Seed Bark – Dark Chocolate and Sea Salt</h1>""" )), u'Pumpkin Seed Bark \u2013 Dark Chocolate and Sea Salt') self.assertEqual( ketogasm.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title">Keto Flatbread Recipe – Low Carb, Gluten Free</h1>""" )), u'Keto Flatbread Recipe')
def test_strips_tags_after_dash(self): self.assertEqual( ketogasm.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title">Spicy Chicken Sausage – Low Carb, Gluten-Free</h1>""" )), u'Spicy Chicken Sausage')
def test_strips_bracketed_text(self): self.assertEqual( ketogasm.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title">Low Carb Moscow Mule – [Keto, Alcohol, Sugar Free]</h1>""" )), 'Low Carb Moscow Mule')
def test_scrapes_title_with_no_flavor_text(self): self.assertEqual( ruled_me.scrape_title( http.TextResponse( url='', body=""" <h1>Keto Beef Wellington</h1> """)), 'Keto Beef Wellington')
def test_strips_tags_after_pipe(self): self.assertEqual( ketogasm.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title">Spanish Cauliflower Rice | Low Carb</h1>""")), 'Spanish Cauliflower Rice')
def test_strips_tags_after_pipe(self): self.assertEqual( low_carb_yum.scrape_title( http.TextResponse(url='', body=""" <meta property="og:title" content="Almond Flour Biscuits - Paleo Low Carb" />""" )), 'Almond Flour Biscuits')
def test_when_meta_section_does_not_specify_category_raises_exception(self): with self.assertRaises(errors.NoRecipeFoundError): keto_size_me.scrape_category( http.TextResponse( url='https://ketosizeme.com/keto-bulletproof-coffee/', body=""" <meta property="article:section" content="Keto Brands We Love" /> """))
def test_strips_trailing_page_title(self): self.assertEqual( hey_keto_mama.scrape_title( http.TextResponse( url='', body=""" <meta property="og:title" content="Cream Cheese & Salami Keto Pinwheels - Hey Keto Mama" />""" )), u'Cream Cheese & Salami Keto Pinwheels')
def test_scrapes_image(self): self.assertEqual( ruled_me.scrape_image( http.TextResponse( url='', body=""" <meta property="og:image" content="https://ruled.me/recipe-image.jpg" />""")), 'https://ruled.me/recipe-image.jpg')
def test_scrapes_title_and_removes_flavor_text(self): self.assertEqual( ketoconnect.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title"> <a href="https://www.ketoconnect.net/recipe/cauliflower-waffles/">Cauliflower Waffles | Bacon and Cheddar!</a> </h1>""")), 'Cauliflower Waffles')
def test_scrapes_title_with_no_flavor_text(self): self.assertEqual( ketoconnect.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title"> <a href="https://www.ketoconnect.net/recipe/keto-butter-chicken/">Keto Butter Chicken</a> </h1>""")), 'Keto Butter Chicken')
def test_scrapes_hierarchical_category(self): self.assertEqual( ruled_me.scrape_category( http.TextResponse( url='', body=""" <div class="postCategories"> Keto Recipes > <a rel="nofollow" href="https://www.ruled.me/keto-recipes/" title="Dinner">Dinner</a> </div>""")), 'entree')
def test_scrapes_title_with_multiple_h1(self): self.assertEqual( ketoconnect.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title"> <a href="https://www.ketoconnect.net/recipe/cooked-oven-meat/">Cooked Oven Meat</a> </h1> <h1>Non-title text</h1>""")), 'Cooked Oven Meat')
def test_scrapes_simple_category(self): self.assertEqual( ruled_me.scrape_category( http.TextResponse( url='', body=""" <div class="postCategories"> > <a rel="nofollow" href="" title="Dinner">Dinner</a> </div>""")), 'entree')
def test_scrapes_opengraph_image(self): self.assertEqual( ketoconnect.scrape_image( http.TextResponse(url='', body=""" <meta property="og:image" content="https://www.ketoconnect.net/recipe-image.jpg" /> """)), 'https://www.ketoconnect.net/recipe-image.jpg')
def test_strips_tags_after_colon(self): self.assertEqual( ketogasm.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title">Vodka Mojito: Low Carb and Sugar-Free</h1>""")), 'Vodka Mojito') self.assertEqual( ketogasm.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title">Hot Buttered Rum Recipe: Low Carb, Sugar Free</h1>""") ), 'Hot Buttered Rum Recipe') self.assertEqual( ketogasm.scrape_title( http.TextResponse(url='', body=""" <h1 class="entry-title">Gin Fizz Cocktail Recipe – Low Carb & Sugar Free!</h1> """)), 'Gin Fizz Cocktail Recipe')
def test_scrapes_non_opengraph_image(self): self.assertEqual( ketoconnect.scrape_image( http.TextResponse(url='', body=""" <div id="tve_editor"> <span class="junk"> <img class="tve_image" alt="" style="width: 400px;" src="https://www.ketoconnect.net/recipe-image.jpg" width="400" height="600" data-attachment-id="9282"> </span> </div>""")), 'https://www.ketoconnect.net/recipe-image.jpg')
def test_reads_none_category_when_category_not_defined(self): self.assertEqual( ketogasm.scrape_category( http.TextResponse(url='', body=""" <script type="application/ld+json"> { "@context":"http:\/\/schema.org\/", "@type":"Recipe", "name":"Roasted Pumpkin Seeds Recipe" }""")), None)
def test_scrapes_reverse_hierarchical_category(self): self.assertEqual( ruled_me.scrape_category( http.TextResponse( url='', body=""" <html> <h1>Cauliflower Mac & Cheese</h1> <div class="postCategories"> Side Items > <a rel="nofollow" href="https://www.ruled.me/keto-recipes/side-items/" title="Keto Recipes">Keto Recipes</a> </div> </html>""")), 'side')
def parse(metadata, html): # Reconstruct the scrapy response from HTML. response = http.TextResponse(url=metadata['url'], body=html) scraper = _find_scraper(metadata['url']) title = titles.canonicalize(scraper.scrape_title(response, metadata)) ingredients = _parse_ingredients( scraper.scrape_ingredients(response, metadata)) return { 'url': metadata['url'], 'title': title, 'category': scraper.scrape_category(response, metadata), 'mainImage': scraper.scrape_image(response, metadata), 'ingredients': ingredients, 'publishedTime': scraper.scrape_published_time(response, metadata), }
def test_scrapes_non_opengraph_image(self): self.assertEqual( ketoconnect.scrape_category( http.TextResponse(url='', body=''), { 'referer': 'https://www.ketoconnect.net/main-dishes/', }), 'entree')