def parse_songlyrics_songs_by_lyrics(searchString): ''' Uses songlyrics.com for searching songs by a line of it's lyrics. Returns list of songs. ''' log.debug( "Searching for a song that contains the line '%s' on songlyrics.com..." % searchString) url = "http://www.songlyrics.com/index.php?section=search&searchW=%s&submit=Search&searchIn4=lyrics" % urllib2.quote( searchString.encode("utf8")) log.debug('Fetching %s...' % url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() remove_strings = [ '(remix)', '(Remix)', '(live)', '(Live)', '(single)', '(Single)' ] chars_to_trim = ['\n', '\r', ' ', ',', "'", '.', '?', '!'] searchString = searchString.lower() track = "" soup = BeautifulSoup(response) for div in soup.find_all('div', class_='serpresult'): if 'cover)' in div.text.lower() or 'cover]' in div.text.lower(): continue if not div.a: continue artist = div.a.img['alt'] title = div.a['title'] for s in remove_strings: title.replace(s, '') title = title.strip() gen = LyricsGrabber.parse(title, artist) if gen: lyrics = gen.next() if lyrics: lyrics = lyrics.lyrics.lower() for c in chars_to_trim: lyrics = lyrics.replace(c, '') matching_blocks = SequenceMatcher( None, lyrics, searchString.replace(' ', '')).get_matching_blocks() biggest_match_block_size = sorted(matching_blocks, key=lambda x: x.size, reverse=True)[0].size if 1.0 * biggest_match_block_size / len(searchString) > 0.70: track = "%s - %s" % (artist, title) break return track
def parse_shironet_songs_by_lyrics(searchString): ''' Uses shironet.co.il for searching songs by a line of it's lyrics. Returns list of songs. ''' log.debug( "Searching for a song that contains the line '%s' in shironet.co.il..." % searchString) url = "http://shironet.mako.co.il/searchSongs?q=%s&type=lyrics" % urllib2.quote( searchString.encode("utf8")) log.debug('Fetching %s...' % url) obj = urllib2.urlopen(url, timeout=config.metadata_timeout) response = obj.read() obj.close() chars_to_trim = ['\n', '\r', ' ', ',', "'", '.', '?', '!'] searchString = searchString.lower() track = "" soup = BeautifulSoup(response) for link in soup.find_all('a', class_="search_link_name_big", href=re.compile('wrkid=')): title = link.text.strip() artist = link.find_next_sibling('a').text.strip() gen = LyricsGrabber.parse(title, artist) if gen: lyrics = gen.next() if lyrics: lyrics = lyrics.lyrics.lower() for c in chars_to_trim: lyrics = lyrics.replace(c, '') matching_blocks = SequenceMatcher( None, lyrics, searchString.replace(' ', '')).get_matching_blocks() biggest_match_block_size = sorted(matching_blocks, key=lambda x: x.size, reverse=True)[0].size if 1.0 * biggest_match_block_size / len(searchString) > 0.70: track = "%s - %s" % (artist, title) break return track