def test_mocked_source_ok(self): """Test that lyrics of the mocked page are correctly scraped""" url = self.source['url'] + self.source['path'] if os.path.isfile(url_to_filename(url)): res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(self.source['title'], res), url)
def test_is_page_candidate_exact_match(self): from bs4 import SoupStrainer, BeautifulSoup for s in self.sourcesOk: url = unicode(s["url"] + s["path"]) html = lyrics.fetch_url(url) soup = BeautifulSoup(html, "html.parser", parse_only=SoupStrainer("title")) self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, s["title"], s["artist"]), True, url)
def test_is_page_candidate(self): for s in self.sourcesOk: url = unicode(s['url'] + s['path']) html = lyrics.fetch_url(url) soup = BeautifulSoup(html) if not soup.title: continue self.assertEqual( lyrics.is_page_candidate(url, soup.title.string, s['title'], s['artist']), True, url)
def test_is_page_candidate(self): for s in self.sourcesOk: url = unicode(s['url'] + s['path']) html = lyrics.fetch_url(url) soup = BeautifulSoup(html) if not soup.title: continue self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, s['title'], s['artist']), True, url)
def test_google_sources_ok(self): """Test if lyrics present on websites registered in beets google custom search engine are correctly scraped.""" if not check_lyrics_fetched(): self.skipTest("Run lyrics_download_samples.py script first.") for s in GOOGLE_SOURCES: url = s['url'] + s['path'] if os.path.isfile(url_to_filename(url)): res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(s['title'], res), url)
def test_is_page_candidate(self): from bs4 import SoupStrainer, BeautifulSoup for s in self.sourcesOk: url = unicode(s['url'] + s['path']) html = lyrics.fetch_url(url) soup = BeautifulSoup(html, "html.parser", parse_only=SoupStrainer('title')) self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, s['title'], s['artist']), True, url)
def test_is_page_candidate_exact_match(self): """Test matching html page title with song infos -- when song infos are present in the title.""" from bs4 import SoupStrainer, BeautifulSoup s = self.source url = unicode(s['url'] + s['path']) html = lyrics.fetch_url(url) soup = BeautifulSoup(html, "html.parser", parse_only=SoupStrainer('title')) self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, s['title'], s['artist']), True, url)
def test_sources_ok(self): for s in self.sourcesOk: url = s['url'] + s['path'] res = lyrics.scrape_lyrics_from_html(lyrics.fetch_url(url)) self.assertTrue(lyrics.is_lyrics(res), url) self.assertTrue(is_lyrics_content_ok(s['title'], res), url)