Exemplo n.º 1
0
 def test_is_page_candidate_fuzzy_match(self):
     url = u"http://www.example.com/lazy_madonna_beatles"
     urlTitle = u"example.com | lazy madonna lyrics by the beatles"
     title = u"Lady Madonna"
     artist = u"The Beatles"
     # very small diffs (typo) are ok
     self.assertEqual(lyrics.is_page_candidate(url, urlTitle, title, artist), True, url)
     # reject different title
     urlTitle = u"example.com | busy madonna lyrics by the beatles"
     self.assertEqual(lyrics.is_page_candidate(url, urlTitle, title, artist), False, url)
     # (title, artist) != (artist, title)
     urlTitle = u"example.com | the beatles lyrics by Lazy Madonna"
     self.assertEqual(lyrics.is_page_candidate(url, urlTitle, title, artist), False, url)
Exemplo n.º 2
0
    def test_is_page_candidate_fuzzy_match(self):
        """Test matching html page title with song infos -- when song infos are
        not present in the title."""
        s = self.source
        url = s['url'] + s['path']
        urlTitle = u'example.com | Beats song by John doe'

        # very small diffs (typo) are ok eg 'beats' vs 'beets' with same artist
        self.assertEqual(lyrics.is_page_candidate(url, urlTitle, s['title'],
                         s['artist']), True, url)
        # reject different title
        urlTitle = u'example.com | seets bong lyrics by John doe'
        self.assertEqual(lyrics.is_page_candidate(url, urlTitle, s['title'],
                         s['artist']), False, url)
Exemplo n.º 3
0
    def test_is_page_candidate_fuzzy_match(self):
        """Test matching html page title with song infos -- when song infos are
        not present in the title."""
        s = self.source
        url = s['url'] + s['path']
        urlTitle = u'example.com | Beats song by John doe'

        # very small diffs (typo) are ok eg 'beats' vs 'beets' with same artist
        self.assertEqual(lyrics.is_page_candidate(url, urlTitle, s['title'],
                         s['artist']), True, url)
        # reject different title
        urlTitle = u'example.com | seets bong lyrics by John doe'
        self.assertEqual(lyrics.is_page_candidate(url, urlTitle, s['title'],
                         s['artist']), False, url)
Exemplo n.º 4
0
 def test_is_page_candidate_fuzzy_match(self):
     url = u'http://www.example.com/lazy_madonna_beatles'
     urlTitle = u'example.com | lazy madonna lyrics by the beatles'
     title = u'Lady Madonna'
     artist = u'The Beatles'
     # very small diffs (typo) are ok
     self.assertEqual(
         lyrics.is_page_candidate(url, urlTitle, title, artist), True, url)
     # reject different title
     urlTitle = u'example.com | busy madonna lyrics by the beatles'
     self.assertEqual(
         lyrics.is_page_candidate(url, urlTitle, title, artist), False, url)
     # (title, artist) != (artist, title)
     urlTitle = u'example.com | the beatles lyrics by Lazy Madonna'
     self.assertEqual(
         lyrics.is_page_candidate(url, urlTitle, title, artist), False, url)
Exemplo n.º 5
0
    def test_is_page_candidate_exact_match(self):
        from bs4 import SoupStrainer, BeautifulSoup

        for s in self.sourcesOk:
            url = unicode(s["url"] + s["path"])
            html = lyrics.fetch_url(url)
            soup = BeautifulSoup(html, "html.parser", parse_only=SoupStrainer("title"))
            self.assertEqual(lyrics.is_page_candidate(url, soup.title.string, s["title"], s["artist"]), True, url)
Exemplo n.º 6
0
 def test_is_page_candidate(self):
     for s in self.sourcesOk:
         url = unicode(s['url'] + s['path'])
         html = lyrics.fetch_url(url)
         soup = BeautifulSoup(html)
         if not soup.title:
             continue
         self.assertEqual(
             lyrics.is_page_candidate(url, soup.title.string, s['title'],
                                      s['artist']), True, url)
Exemplo n.º 7
0
 def test_is_page_candidate(self):
     for s in self.sourcesOk:
         url = unicode(s['url'] + s['path'])
         html = lyrics.fetch_url(url)
         soup = BeautifulSoup(html)
         if not soup.title:
             continue
         self.assertEqual(lyrics.is_page_candidate(url, soup.title.string,
                                                   s['title'], s['artist']),
                          True, url)
Exemplo n.º 8
0
    def test_is_page_candidate(self):
        from bs4 import SoupStrainer, BeautifulSoup

        for s in self.sourcesOk:
            url = unicode(s['url'] + s['path'])
            html = lyrics.fetch_url(url)
            soup = BeautifulSoup(html, "html.parser",
                                 parse_only=SoupStrainer('title'))
            self.assertEqual(lyrics.is_page_candidate(url, soup.title.string,
                                                      s['title'], s['artist']),
                             True, url)
Exemplo n.º 9
0
 def test_is_page_candidate_exact_match(self):
     """Test matching html page title with song infos -- when song infos are
     present in the title."""
     from bs4 import SoupStrainer, BeautifulSoup
     s = self.source
     url = unicode(s['url'] + s['path'])
     html = lyrics.fetch_url(url)
     soup = BeautifulSoup(html, "html.parser",
                          parse_only=SoupStrainer('title'))
     self.assertEqual(lyrics.is_page_candidate(url, soup.title.string,
                                               s['title'], s['artist']),
                      True, url)
Exemplo n.º 10
0
 def test_is_page_candidate_exact_match(self):
     """Test matching html page title with song infos -- when song infos are
     present in the title."""
     from bs4 import SoupStrainer, BeautifulSoup
     s = self.source
     url = unicode(s['url'] + s['path'])
     html = lyrics.fetch_url(url)
     soup = BeautifulSoup(html, "html.parser",
                          parse_only=SoupStrainer('title'))
     self.assertEqual(lyrics.is_page_candidate(url, soup.title.string,
                                               s['title'], s['artist']),
                      True, url)