def got_results (self, result, callback, *data): if result is None: callback (None, *data) return result = result.decode('iso-8859-1') # no indication of anything else.. results = re.sub('\n', '', re.sub('\r', '', result)) if re.search('(<tr><td bgcolor="#BBBBBB".*)(More Songs >)', results) is not None: body = re.split('(<tr><td bgcolor="#BBBBBB".*)(More Songs >)', results)[1] entries = re.split('<tr><td bgcolor="#BBBBBB"', body) entries.pop(0) print("found %d entries; looking for [%s,%s]" % (len(entries), self.title, self.artist)) for entry in entries: url = re.split('(\/display[^"]*)', entry)[1] artist = re.split('(Artist:.*html">)([^<]*)', entry)[2].strip() title = re.split('(\/display[^>]*)([^<]*)', entry)[2][1:].strip() if self.artist != "": artist_str = rb.string_match(self.artist, artist) else: artist_str = artist_match + 0.1 title_str = rb.string_match(self.title, title) print("checking [%s,%s]: match strengths [%f,%f]" % (title.strip(), artist.strip(), title_str, artist_str)) if title_str > title_match and artist_str > artist_match: loader = rb.Loader() loader.get_url ('http://display.lyrics.astraweb.com' + url, self.parse_lyrics, callback, *data) return callback (None, *data) return
def got_results (self, result, callback, *data): if result is None: callback (None, *data) return results = re.sub('\n', '', re.sub('\r', '', result)) if re.search('(<tr><td bgcolor="#BBBBBB".*)(More Songs >)', results) is not None: body = re.split('(<tr><td bgcolor="#BBBBBB".*)(More Songs >)', results)[1] entries = re.split('<tr><td bgcolor="#BBBBBB"', body) entries.pop(0) print "found %d entries; looking for [%s,%s]" % (len(entries), self.title, self.artist) for entry in entries: url = re.split('(\/display[^"]*)', entry)[1] artist = re.split('(Artist:.*html">)([^<]*)', entry)[2].strip() title = re.split('(\/display[^>]*)([^<]*)', entry)[2][1:].strip() if self.artist != "": artist_str = rb.string_match(self.artist, artist) else: artist_str = artist_match + 0.1 title_str = rb.string_match(self.title, title) print "checking [%s,%s]: match strengths [%f,%f]" % (title.strip(), artist.strip(), title_str, artist_str) if title_str > title_match and artist_str > artist_match: loader = rb.Loader() loader.get_url ('http://display.lyrics.astraweb.com' + url, self.parse_lyrics, callback, *data) return callback (None, *data) return
def search_artist(self, artist_page, callback, *data): """Search for the link to the page of artist in artists_page """ if artist_page is None: callback(None, *data) return link_section = re.split( '<SCRIPT LANGUAGE="javascript" src="tban2.js"></SCRIPT>', artist_page, 1)[1] pattern_link = '<a href="' pattern_artist = '([^"]*)">*([^<]*)</a><br><br>' links = re.split(pattern_link, link_section.lower()) links.pop(0) best_match = () for line in links: artist = re.findall(pattern_artist, line) if len(artist) == 0: continue artist_link, artist_name = artist[0] artist_url = 'http://www.darklyrics.com/%s' % (artist_link) if artist_link[:5] == 'http:': continue artist_name = artist_name.strip() smvalue = rb.string_match(artist_name, self.artist_ascii) if smvalue > min_artist_match: best_match = (smvalue, artist_url, artist_name) if not best_match: # Lyrics are located in external site callback(None, *data) return loader = rb.Loader() self.artist = best_match[2] loader.get_url(best_match[1], self.search_song, callback, *data)
def search_song(self, songlist, callback, *data): """If artist's page is found, search_song looks for the song. The artist page contains a list of all the albums and links to the songs lyrics from this. """ if songlist is None: callback(None, *data) return # Search for all the <a> # filter for those that has the artist name string_match # and for those which its content is artist string_match # Sort by values given from string_match # and get the best link_section = re.split('LYRICS<BR></FONT>', songlist)[1] link_section = link_section.lower() pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)" target="_blank"><FONT COLOR="#CCCCCC">(.*)</FONT></a><br>' matches = re.findall(pattern_song.lower(), link_section) best_match = "" for line in matches: artist, album, number, title = line smvalue = rb.string_match(title.lower().replace(' ', ''), self.title.lower().replace(' ', '')) if smvalue > min_song_match: best_match = self.SongFound(smvalue, title, number, album, artist) if not best_match: callback(None, *data) return loader = rb.Loader() url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % ( best_match.artist, best_match.album) self.title = best_match.title self.titlenumber = best_match.number loader.get_url(url, self.parse_lyrics, callback, *data)
def search_artist(self, artist_page, callback, *data): """Search for the link to the page of artist in artists_page """ if artist_page is None: callback (None, *data) return link_section = re.split ('<SCRIPT LANGUAGE="javascript" src="tban2.js"></SCRIPT>', artist_page, 1)[1] pattern_link = '<a href="' pattern_artist = '([^"]*)">*([^<]*)</a><br><br>' links = re.split (pattern_link, link_section.lower()) links.pop(0) best_match = () for line in links: artist = re.findall(pattern_artist, line) if len(artist) == 0: continue artist_link, artist_name = artist[0] artist_url = 'http://www.darklyrics.com/%s' % (artist_link) if artist_link[:5] == 'http:': continue artist_name = artist_name.strip() smvalue = rb.string_match (artist_name, self.artist_ascii) if smvalue > min_artist_match: best_match = (smvalue, artist_url, artist_name) if not best_match: # Lyrics are located in external site callback (None, *data) return loader = rb.Loader () self.artist = best_match[2] loader.get_url (best_match[1], self.search_song, callback, *data)
def got_lyrics (self, lyrics, callback, *data): if lyrics is None: callback (None, *data) return element = cElementTree.fromstring(lyrics) if element.find("response").attrib['code'] is not '0': print "got failed response:" + lyrics callback (None, *data) return match = None matches = element.find("searchResults").findall("result") print "got %d result(s)" % (len(matches)) for m in matches: matchtitle = m.findtext("title") matchartist = m.findtext("artist/name") # if we don't know the artist, then anyone will do if self.artist != "": artist_str = rb.string_match(self.artist, matchartist) else: artist_str = artist_match + 0.1 title_str = rb.string_match(self.title, matchtitle) if artist_str > artist_match and title_str > title_match: print "found acceptable match, artist: %s (%f), title: %s (%f)" % (matchartist, artist_str, matchtitle, title_str) match = m break else: print "skipping match, artist: %s (%f), title: %s (%f)" % (matchartist, artist_str, matchtitle, title_str) if match is not None: hid = m.attrib['hid'].encode('utf-8') lurl = "http://api.leoslyrics.com/api_lyrics.php?auth=Rhythmbox&hid=%s" % (urllib.quote(hid)) loader = rb.Loader() loader.get_url (lurl, self.parse_lyrics, callback, *data) else: print "no acceptable match found" callback (None, *data)
def search_song (self, songlist, callback, *data): """If artist's page is found, search_song looks for the song. The artist page contains a list of all the albums and links to the songs lyrics from this. """ if songlist is None: callback (None, *data) return # Search for all the <a> # filter for those that has the artist name string_match # and for those which its content is artist string_match # Sort by values given from string_match # and get the best link_section = re.split('LYRICS<BR></FONT>', songlist)[1] link_section = link_section.lower() pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)" target="_blank"><FONT COLOR="#CCCCCC">(.*)</FONT></a><br>' matches = re.findall (pattern_song.lower(), link_section) best_match = "" for line in matches: artist, album, number, title = line smvalue = rb.string_match (title.lower().replace(' ', '' ), self.title.lower().replace(' ', '')) if smvalue > min_song_match: best_match = self.SongFound(smvalue, title, number, album, artist) if not best_match: callback (None, *data) return loader = rb.Loader () url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (best_match.artist, best_match.album) self.title = best_match.title self.titlenumber = best_match.number loader.get_url (url, self.parse_lyrics, callback, *data)