def got_results (self, result, callback, *data): if result is None: callback (None, *data) return results = re.sub('\n', '', re.sub('\r', '', result)) if re.search('(<tr><td bgcolor="#BBBBBB".*)(More Songs >)', results) is not None: body = re.split('(<tr><td bgcolor="#BBBBBB".*)(More Songs >)', results)[1] entries = re.split('<tr><td bgcolor="#BBBBBB"', body) entries.pop(0) print "found %d entries; looking for [%s,%s]" % (len(entries), self.title, self.artist) for entry in entries: url = re.split('(\/display[^"]*)', entry)[1] artist = re.split('(Artist:.*html">)([^<]*)', entry)[2].strip() title = re.split('(\/display[^>]*)([^<]*)', entry)[2][1:].strip() if self.artist != "": artist_str = string_match(self.artist, artist) else: artist_str = artist_match + 0.1 title_str = string_match(self.title, title) print "checking [%s,%s]: match strengths [%f,%f]" % (title.strip(), artist.strip(), title_str, artist_str) if title_str > title_match and artist_str > artist_match: loader = rb.Loader() loader.get_url ('http://display.lyrics.astraweb.com' + url, self.parse_lyrics, callback, *data) return callback (None, *data) return
def search_artist(self, artist_page, callback, *data): """Search for the link to the page of artist in artists_page """ if artist_page is None: callback (None, *data) return link_section = re.split ('<SCRIPT LANGUAGE="javascript" src="tban2.js"></SCRIPT>', artist_page, 1)[1] pattern_link = '<a href="' pattern_artist = '([^"]*)">*([^<]*)</a><br><br>' links = re.split (pattern_link, link_section.lower()) links.pop(0) best_match = () for line in links: artist = re.findall(pattern_artist, line) if len(artist) == 0: continue artist_link, artist_name = artist[0] artist_url = 'http://www.darklyrics.com/%s' % (artist_link) if artist_link[:5] == 'http:': continue artist_name = artist_name.strip() smvalue = string_match (artist_name, self.artist_ascii) if smvalue > min_artist_match: best_match = (smvalue, artist_url, artist_name) if not best_match: # Lyrics are located in external site callback (None, *data) return loader = rb.Loader () self.artist = best_match[2] loader.get_url (best_match[1], self.search_song, callback, *data)
def got_lyrics(self, lyrics, callback, *data): if lyrics is None: callback(None, *data) return element = cElementTree.fromstring(lyrics) if element.find("response").attrib['code'] is not '0': print "got failed response:" + lyrics callback(None, *data) return match = None matches = element.find("searchResults").findall("result") print "got %d result(s)" % (len(matches)) for m in matches: matchtitle = m.findtext("title") matchartist = m.findtext("artist/name") # if we don't know the artist, then anyone will do if self.artist != "": artist_str = string_match(self.artist, matchartist) else: artist_str = artist_match + 0.1 title_str = string_match(self.title, matchtitle) if artist_str > artist_match and title_str > title_match: print "found acceptable match, artist: %s (%f), title: %s (%f)" % ( matchartist, artist_str, matchtitle, title_str) match = m break else: print "skipping match, artist: %s (%f), title: %s (%f)" % ( matchartist, artist_str, matchtitle, title_str) if match is not None: hid = m.attrib['hid'].encode('utf-8') lurl = "http://api.leoslyrics.com/api_lyrics.php?auth=Rhythmbox&hid=%s" % ( urllib.quote(hid)) loader = rb.Loader() loader.get_url(lurl, self.parse_lyrics, callback, *data) else: print "no acceptable match found" callback(None, *data)
def got_lyrics (self, lyrics, callback, *data): if lyrics is None: callback (None, *data) return element = cElementTree.fromstring(lyrics) if element.find("response").attrib['code'] is not '0': print "got failed response:" + lyrics callback (None, *data) return match = None matches = element.find("searchResults").findall("result") print "got %d result(s)" % (len(matches)) for m in matches: matchtitle = m.findtext("title") matchartist = m.findtext("artist/name") # if we don't know the artist, then anyone will do if self.artist != "": artist_str = string_match(self.artist, matchartist) else: artist_str = artist_match + 0.1 title_str = string_match(self.title, matchtitle) if artist_str > artist_match and title_str > title_match: print "found acceptable match, artist: %s (%f), title: %s (%f)" % (matchartist, artist_str, matchtitle, title_str) match = m break else: print "skipping match, artist: %s (%f), title: %s (%f)" % (matchartist, artist_str, matchtitle, title_str) if match is not None: hid = m.attrib['hid'].encode('utf-8') lurl = "http://api.leoslyrics.com/api_lyrics.php?auth=Rhythmbox&hid=%s" % (urllib.quote(hid)) loader = rb.Loader() loader.get_url (lurl, self.parse_lyrics, callback, *data) else: print "no acceptable match found" callback (None, *data)
def search_song (self, songlist, callback, *data): """If artist's page is found, search_song looks for the song. The artist page contains a list of all the albums and links to the songs lyrics from this. """ if songlist is None: callback (None, *data) return # Search for all the <a> # filter for those that has the artist name string_match # and for those which its content is artist string_match # Sort by values given from string_match # and get the best link_section = re.split('LYRICS<BR></FONT>', songlist)[1] link_section = link_section.lower() pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)" target="_blank"><FONT COLOR="#CCCCCC">(.*)</FONT></a><br>' matches = re.findall (pattern_song.lower(), link_section) best_match = "" for line in matches: artist, album, number, title = line smvalue = string_match (title.lower().replace(' ', '' ), self.title.lower().replace(' ', '')) if smvalue > min_song_match: best_match = self.SongFound(smvalue, title, number, album, artist) if not best_match: callback (None, *data) return loader = rb.Loader () url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (best_match.artist, best_match.album) self.title = best_match.title self.titlenumber = best_match.number loader.get_url (url, self.parse_lyrics, callback, *data)
def get_best_match_urls (self, search_results): best = None best_match = 0.0 print "attempting to find best search result from %d sets" % len(search_results) for result in search_results: if not hasattr (result, "Item"): # Search was unsuccessful, try next batch job continue valid = filter(self.__valid_match, result.Item) print "%d valid results in this set" % len(valid) for item in valid: album_match = DEFAULT_MATCH item_album = str(getattr(item.ItemAttributes, "Title", "")) if item_album != "": album_match = string_match (self.search_album, item_album) # match against all returned artist names, taking the best result item_artists = getattr(item.ItemAttributes, "Artist", []) if len(item_artists) > 0: artist_match = 0.0 for artist in item_artists: # special case 'various artists' for great justice if artist == "Various Artists": m = DEFAULT_MATCH else: m = string_match (self.search_artist, artist) if m > artist_match: artist_match = m else: artist_match = DEFAULT_MATCH # figure out the match strength if self.search_album == "": this_match = artist_match elif self.search_artist == "": this_match = album_match else: # this probably isn't the best way to combine match strengths. # extremely low values for one match should disqualify a result completely, # so we don't end up with the wrong album for the right artist. this_match = (artist_match + album_match) / 2 # reject results with a match strength below a given floor if album_match < REJECT_MATCH or artist_match < REJECT_MATCH: result = "rejected" elif this_match > best_match: best = item best_match = this_match result = "best" else: result = "discarded" print "search result: \"%s\" (%f) by \"%s\" (%f): %f, %s" % (item_album, album_match, str(item_artists), artist_match, this_match, result) print "best result has match strength %f" % best_match if best_match > MINIMUM_MATCH: urls = [getattr (best, size).URL for size in ("LargeImage", "MediumImage") if hasattr (best, size)] if urls: print "got urls: %s" % urls return urls # No search was successful print "no search results to return" return []
def get_best_match_urls(self, search_results): best = None best_match = 0.0 print "attempting to find best search result from %d sets" % len( search_results) for result in search_results: if not hasattr(result, "Item"): # Search was unsuccessful, try next batch job continue valid = filter(self.__valid_match, result.Item) print "%d valid results in this set" % len(valid) for item in valid: album_match = DEFAULT_MATCH item_album = str(getattr(item.ItemAttributes, "Title", "")) if item_album != "": album_match = string_match(self.search_album, item_album) # match against all returned artist names, taking the best result item_artists = getattr(item.ItemAttributes, "Artist", []) if len(item_artists) > 0: artist_match = 0.0 for artist in item_artists: # special case 'various artists' for great justice if artist == "Various Artists": m = DEFAULT_MATCH else: m = string_match(self.search_artist, artist) if m > artist_match: artist_match = m else: artist_match = DEFAULT_MATCH # figure out the match strength if self.search_album == "": this_match = artist_match elif self.search_artist == "": this_match = album_match else: # this probably isn't the best way to combine match strengths. # extremely low values for one match should disqualify a result completely, # so we don't end up with the wrong album for the right artist. this_match = (artist_match + album_match) / 2 # reject results with a match strength below a given floor if album_match < REJECT_MATCH or artist_match < REJECT_MATCH: result = "rejected" elif this_match > best_match: best = item best_match = this_match result = "best" else: result = "discarded" print "search result: \"%s\" (%f) by \"%s\" (%f): %f, %s" % ( item_album, album_match, str(item_artists), artist_match, this_match, result) print "best result has match strength %f" % best_match if best_match > MINIMUM_MATCH: urls = [ getattr(best, size).URL for size in ("LargeImage", "MediumImage") if hasattr(best, size) ] if urls: print "got urls: %s" % urls return urls # No search was successful print "no search results to return" return []