def find_song_info(self, url): ret = True resp = common.get_url_content(url) encoding = 'sjis' html = resp.decode(encoding, 'ignore') prefix = '<table border=0 cellpadding=0 cellspacing=5>' suffix = '</td></table>' infoString = common.get_string_by_start_end_string(prefix, suffix, html) self.title = common.strip_tags( common.get_string_by_start_end_string('<td>', '</td>', infoString) ) self.artist = common.strip_tags( common.get_string_by_start_end_string('<td><a href=', '</a></td>', infoString) ) prefix = '<table border=0 cellpadding=0 cellspacing=0>' suffix = '</td></table>' lyricAndMusic = common.get_string_by_start_end_string(prefix, suffix, infoString) pattern = u'作詞 : (.*)<br>' self.lyricist = common.get_first_group_by_pattern(lyricAndMusic, pattern) pattern = u'作曲 : (.*)</td>' self.composer = common.get_first_group_by_pattern(lyricAndMusic, pattern) return ret
def find_lyric(self, html): prefix = '<div id="lyrics-body-text">' suffix = '</div>' rawLyric = common.get_string_by_start_end_string(prefix, suffix, html) rawLyric = rawLyric.replace('<br />', '\n') rawLyric = rawLyric.replace("<p class='verse'>", '\n\n') rawLyric = common.strip_tags(rawLyric).strip() self.lyric = rawLyric return True
def find_lyric(self, html): prefix = '<div id="lyrics">' suffix = '</div>' rawLyric = common.get_string_by_start_end_string(prefix, suffix, html) rawLyric = rawLyric.replace('<br/>', '\n') rawLyric = common.unicode2string(rawLyric) rawLyric = common.strip_tags(rawLyric).strip() self.lyric = rawLyric return True
def get_lyric_1st_part(self, html): prefix = '<canvas id="lyrics" ' suffix = '</canvas>' rawLyric = common.get_string_by_start_end_string(prefix, suffix, html) if not rawLyric: logging.info('Failed to get lyric string') return None encodedLyric = common.strip_tags(rawLyric) lyric_1st = common.unicode2string(encodedLyric) return lyric_1st
def parse_lyric(self, url, html): prefix = '<div id="lyric-trunk">' suffix = '</div>' lyric = common.get_string_by_start_end_string(prefix, suffix, html) if not lyric: logging.error('Failed to parse lyric') return False lyric = common.strip_tags(lyric) lyric = common.unicode2string(lyric).strip() self.lyric = lyric return True
def parse_artist_title(self, html): startStr = '"description" content="' endStr = u'の歌詞ページです' infoStr = common.get_string_by_start_end_string(startStr, endStr, html) if not infoStr: return None infoStr = infoStr.replace(startStr, '') infoStr = infoStr.replace(endStr, '') infoStr = infoStr.strip() items = infoStr.split(' / ') if len(items) == 2: self.title = common.unicode2string(items[0]) self.artist = common.unicode2string(items[1])