示例#1
0
    def find_song_info(self, content):
#         content = content.decode('utf-8', 'ignore')

        pattern = u'<meta property="og:title" content="(.*?) 歌詞 \|'
        title = common.get_first_group_by_pattern(content, pattern)
        title = common.htmlspecialchars_decode(title)
        self.title = title

        pattern = u'<meta property="og:description" content="(.*?)が歌う'
        artist = common.get_first_group_by_pattern(content, pattern)
        artist = common.htmlspecialchars_decode(artist)
        self.artist = artist

        prefixes = {
            'lyricist': u'作詞</dt>',
            'composer': u'作曲</dt>',
        }
        suffix = '</dd>'

        for key in prefixes:
            prefix = prefixes[key]
            value = common.find_string_by_prefix_suffix(content, prefix, suffix, False)
            if value:
                value = common.strip_tags(value).strip()
                value = common.htmlspecialchars_decode(value)
                setattr(self, key, value)

        return True
示例#2
0
文件: kget.py 项目: omusico/lyric-get
    def parse_song_info(self, url, html):
        ret = True

        pattern = '<h1.*?>(.*)</h1>'
        title = common.get_first_group_by_pattern(html, pattern)
        if title:
            title = common.htmlspecialchars_decode(common.strip_tags(title)).strip()
            self.title = title
        else:
            logging.warning('Failed to parse title of url [%s]', url)
            ret = False

        patterns = {
            'artist': '">([^<]*)</a></span></td></tr>',
            'lyricist': 'td>([^<]*)<br></td></tr><tr>',
            'composer': 'td>([^<]*)<br></td></tr></table>',
        }

        prefix = '<table class="lyric-data">'
        suffix = '</table>'
        info_table = common.find_string_by_prefix_suffix(html, prefix, suffix)
        info_table = info_table.replace('\n', '')

        for key in patterns:
            pattern = patterns[key]

            value = common.get_first_group_by_pattern(info_table, pattern)
            if value:
                value = common.htmlspecialchars_decode(common.strip_tags(value)).strip()
                setattr(self, key, value)
            else:
                logging.warning('Failed to get %s', key)
                ret = False

        return ret
示例#3
0
    def find_song_info(self, url):
        ret = True
        resp = common.get_url_content(url)

        encoding = 'sjis'
        html = resp.decode(encoding, 'ignore')

        prefix = '<table border=0 cellpadding=0 cellspacing=5>'
        suffix = '</td></table>'
        infoString = common.get_string_by_start_end_string(prefix, suffix, html)

        self.title = common.strip_tags(
            common.get_string_by_start_end_string('<td>', '</td>', infoString)
        )

        self.artist = common.strip_tags(
            common.get_string_by_start_end_string('<td><a href=', '</a></td>', infoString)
        )

        prefix = '<table border=0 cellpadding=0 cellspacing=0>'
        suffix = '</td></table>'
        lyricAndMusic = common.get_string_by_start_end_string(prefix, suffix, infoString)

        pattern = u'作詞 : (.*)<br>'
        self.lyricist = common.get_first_group_by_pattern(lyricAndMusic, pattern)

        pattern = u'作曲 : (.*)</td>'
        self.composer = common.get_first_group_by_pattern(lyricAndMusic, pattern)

        return ret
示例#4
0
    def get_song_id(self, url):
        if not url:
            return None

        pattern = '/lyrics/([0-9]+)'
        id = common.get_first_group_by_pattern(url, pattern)
        if not id:
            pattern = '/kashi/([0-9]+)'
            id = common.get_first_group_by_pattern(url, pattern)

        return id
示例#5
0
    def get_csrf_token(self, html):
        pattern = '(/lib/pl-lib.js[^"]+)'

        pl_lib_js = common.get_first_group_by_pattern(html, pattern)
        if not pl_lib_js:
            return None

        url = urlparse.urljoin(site_url, pl_lib_js)

        r = self.s.get(url)

        pattern = "'X-CSRF-Token', '(.*?)'"
        token = common.get_first_group_by_pattern(r.text, pattern)

        return token
示例#6
0
    def find_lyric(self, url):
        pattern = 'item-([0-9]+)\.html'

        song_id = common.get_first_group_by_pattern(url, pattern)

        if not song_id:
            logging.info('Failed to get id of url [%s]', url)
            return False

        song_url = 'http://www.kasi-time.com/item_js.php?no=' + song_id
        data = common.get_url_content(song_url)
        if not data:
            logging.info('Failed to get content of url [%s]', song_url)
            return False

        lyric = data.decode('utf-8', 'ignore')
        lyric = lyric.replace("document.write('", "")
        lyric = lyric.replace("');", "")
        lyric = lyric.replace("<br>", "\n")
        lyric = lyric.replace("&nbsp;", " ")
        lyric = common.htmlspecialchars_decode(lyric)
        lyric = common.unicode2string(lyric)
        lyric = common.strip_slash(lyric)
        lyric = lyric.strip()

        # test for half to full
        lyric = common.half2full(lyric)

        self.lyric = lyric

        return True
示例#7
0
    def find_lyric(self, url):
        pattern = 'surl=([^&=]+)'

        song_id = common.get_first_group_by_pattern(url, pattern)

        if not song_id:
            logging.error('Failed to get id of url [%s]', url)
            return False

        song_url = 'http://www.animap.jp/kasi/phpflash/flashphp.php?unum=' + song_id
        data = common.get_url_content(song_url)
        if not data:
            logging.error('Failed to get content of url [%s]', song_url)
            return False

        prefix = 'test2='
        pos = data.find(prefix)
        if pos == -1:
            logging.error('Failed to find lyric position of url [%s]', url)
            return False

        lyric = data[pos + len(prefix):]
        lyric = lyric.decode('sjis').strip()

        # test for half to full
        lyric = common.half2full(lyric)

        self.lyric = lyric

        return True
示例#8
0
    def find_song_info(self, url):
        ret = True
        html = common.get_url_content(url)

        encoding = 'sjis'
        html = html.decode(encoding, 'ignore')

        patterns = {
            'title': u'<h2[^>]*>([^<]+)</h2>',
            'artist': u'歌手:<h3.*?><a href="/artist/[0-9]+/".*?>(.+?)</a></h3>',
            'lyricist': u'作詞:<h4.*?>([^<]+)</h4>',
            'composer': u'作曲:<h4.*?>([^<]+)</h4>'
        }

        for key in patterns:
            pattern = patterns[key]

            value = common.get_first_group_by_pattern(html, pattern)

            if not value:
                logging.info('Failed to get %s of url [%s]', key, url)
                ret = False
            else:
                value = common.unicode2string(common.strip_tags(value))
                setattr(self, key, value)

        return ret
示例#9
0
    def find_song_info(self, content):
        prefix = "<div id='lyricBlock'>"
        suffix = '</table>'
        info_block = common.find_string_by_prefix_suffix(content, prefix, suffix, False)

        prefix = '<h2>'
        suffix = '</h2>'
        title = common.find_string_by_prefix_suffix(info_block, prefix, suffix, False)

        self.title = common.htmlspecialchars_decode(common.unicode2string(title))

        patterns = {
            'artist': u'>歌:(.*?)</td>',
            'lyricist': u'>作詞:(.*?)</td>',
            'composer': u'>作曲:(.*?)</td>'
        }

        for key in patterns:
            pattern = patterns[key]

            value = common.get_first_group_by_pattern(info_block, pattern)
            if value:
                value = common.strip_tags(common.htmlspecialchars_decode(value)).strip()
                setattr(self, key, value)
            else:
                logging.debug('Failed to get %s, pattern: %s' % (key, pattern, ))

        return True
示例#10
0
    def find_song_info(self, url):
        ret = True
        html = common.get_url_content(url)

        encoding = 'euc_jp'
        html = html.decode(encoding, 'ignore')

        patterns = {
            'title': 'title',
            'artist': 'artist',
            'lyricist': 'sakusi',
            'composer': 'sakyoku',
        }

        for key in patterns:
            key_for_pattern  = patterns[key]

            pattern = u'<INPUT type="hidden" name=%s value="([^"]*)">' % (key_for_pattern, )
            value = common.get_first_group_by_pattern(html, pattern)

            if not value:
                logging.info('Failed to get %s of url [%s]', key, url)
                ret = False
            else:
                value = common.htmlspecialchars_decode(value).strip()
                setattr(self, key, value)

        return ret
示例#11
0
    def get_xml_parameters(self, url):
        bytes = common.get_url_content(url)

        pattern = "query +: +'([^']+)'"
        query = common.get_first_group_by_pattern(bytes, pattern)

        return query
示例#12
0
    def find_song_info(self, html):
        ret = True

        prefix = '<font color="#FFFFFF"><b>'
        suffix = "</b></font>"
        title = common.strip_tags(common.find_string_by_prefix_suffix(html, prefix, suffix)).strip()
        if title:
            self.title = title

        prefix = '<font size="-1" color="#FFFFFF">'
        suffix = "</font>"
        info = common.find_string_by_prefix_suffix(html, prefix, suffix)
        info = re.sub("     +", "", info)
        info = info.replace("\r", "").replace("\n", "")

        patterns = {"artist": u"歌:(.*?)/", "lyricist": u"詞:(.*?)/", "composer": u"曲:(.*?)<"}

        for key in patterns:
            pattern = patterns[key]

            value = common.get_first_group_by_pattern(info, pattern)
            if value:
                setattr(self, key, value)
            else:
                logging.info("Failed to get %s from info %s", key, info)
                return False

        return ret
示例#13
0
    def get_lyric_id(self, url):
        pattern = 'lyrics/([0-9]+)'

        lyric_id = common.get_first_group_by_pattern(url, pattern)
        if not lyric_id:
            logging.error('Failed to parse id of lyric from url')
            return False

        return lyric_id
示例#14
0
    def find_song_info(self, html):
        pattern = '<meta property="og:title" content="([^"]+)" />'
        ogTitle = common.get_first_group_by_pattern(html, pattern)

        artist, title = ogTitle.split(':')

        self.title = title
        self.artist = artist

        return True
示例#15
0
    def find_lyric(self, url):
        pattern = '/[a-z]+/([0-9]+)/'

        song_id = common.get_first_group_by_pattern(url, pattern)
        if not song_id:
            # try old pattern
            # http://www.uta-net.com/user/phplib/view_0.php?ID=17248
            pattern = 'ID=([0-9]+)'
            song_id = common.get_first_group_by_pattern(url, pattern)

        if not song_id:
            logging.info('Failed to get id of url [%s]', url)
            return False

        showkasi_pattern = 'http://www.uta-net.com/user/phplib/swf/showkasi.php?ID=%s&WIDTH=530&HEIGHT=810'
        song_url = showkasi_pattern % (song_id, )
        data = common.get_url_content(song_url)
        if not data:
            logging.info('Failed to get content of url [%s]', song_url)
            return False

        prefix = '<\0\0'
        suffix = '\0'
        lyric = common.find_string_by_prefix_suffix(data, prefix, suffix, False)

        if not lyric:
            logging.error('Failed to get lyric of url [%s]', url)
            return False

        lyric = unicode(lyric, 'utf8')
        lyric = lyric.strip()

        # test for half to full
        lyric = common.half2full(lyric)

        self.lyric = lyric

        return True
示例#16
0
    def parse_song_info(self, html):
        pattern = u'<title>(.+?) 歌詞 / .*</title>'
        self.title = common.get_first_group_by_pattern(html, pattern)

        pattern = u"<div class='artists'>歌:(.*) 作詞:(.*) 作曲:(.*)</div>"
        matches = common.get_matches_by_pattern(html, pattern)
        if not matches:
            return False

        self.artist = matches.group(1)
        self.lyricist = matches.group(2)
        self.composer = matches.group(3)

        return True
示例#17
0
    def find_song_info(self, content):
        pattern = 'og:description" content="(.*)"'
        og_desc = common.get_first_group_by_pattern(content, pattern)
        if og_desc:
            pattern = u'(.*?)「(.*?)」'
            matches = common.get_matches_by_pattern(og_desc, pattern)
            if matches:
                artist = matches.group(1)
                artist = artist.replace(u'歌詞サーチ ', '')
                self.artist = artist
                self.title  = matches.group(2)
            else:
                logging.debug('og desc: %s' % (og_desc))

        prefix = '="lyrics_info_text"'
        suffix = '</div>'
        info_text = common.find_string_by_prefix_suffix(content, prefix, suffix, False)
        if not info_text:
            logging.info('Failed to find lyrics info text')
        one_line = info_text.replace('\n', '')

        patterns = {
            'lyricist': u'>作詞</p><p class="info_detail">(.*?)</p>',
            'composer': u'>作曲</p><p class="info_detail">(.*?)</p>',
        }

        for key in patterns:
            pattern = patterns[key]

            value = common.get_first_group_by_pattern(one_line, pattern)
            if value:
                value = common.strip_tags(common.htmlspecialchars_decode(value)).strip()
                setattr(self, key, value)
            else:
                logging.debug('Failed to get %s, pattern: %s' % (key, pattern, ))

        return True
示例#18
0
    def find_song_info(self, html):
        ret = True

        patterns = {
            'title': '"musicSongTitle":"(.*?)"',
            'artist': '"musicArtistName":"(.*?)"'
        }

        for key in patterns:
            pattern = patterns[key]

            value = common.get_first_group_by_pattern(html, pattern)
            if value:
                setattr(self, key, value)

        return ret
示例#19
0
    def parse_lyric(self, html):
        prefix = '<PRE>'
        suffix = '</PRE>'
        raw_lyric = common.find_string_by_prefix_suffix(html, prefix, suffix, False)
        if not raw_lyric:
            logging.info('Failed to parse lyric from html [%s]', html)
            return False

        raw_lyric = raw_lyric.strip()

        pos = raw_lyric.find('\n\n')
        if pos < 0:
            logging.info('Failed to find two consecutive newlines')
            return False

        self.title = raw_lyric[0:pos]

        raw_lyric = raw_lyric[pos+2:]

        pos = raw_lyric.find('\n\n')
        if pos < 0:
            logging.info('Failed to find two consecutive newlines')
            return False

        info = raw_lyric[0:pos]
        self.lyric = raw_lyric[pos+2:]

        patterns = {
            'artist': u'歌:(.+)',
            'lyricist': u'作詞:(.+?)/',
            'composer': u'作曲:(.+?)/',
            'arranger': u'編曲:(.+?)/',
        }

        for key in patterns:
            pattern  = patterns[key]

            value = common.get_first_group_by_pattern(info, pattern)

            if not value:
                logging.info('Failed to get %s of url [%s]', key, url)
            else:
                value = common.htmlspecialchars_decode(value).strip()
                setattr(self, key, value)

        return True
示例#20
0
    def find_song_info(self, url):
        ret = True
        html = common.get_url_content(url)

        encoding = 'utf-8'
        html = html.decode(encoding, 'ignore')

        pattern = '<h1>(.*)</h1>'
        value = common.get_first_group_by_pattern(html, pattern).strip()
        if value:
            self.title = value
        else:
            logging.error('Failed to find title of url [%s]', url)
            ret = False

        prefix = '<div class="person_list">'
        suffix = '</div>'
        info_table = common.find_string_by_prefix_suffix(html, prefix, suffix)

        patterns = {
            'artist': u'歌手',
            'lyricist': u'作詞者',
            'composer': u'作曲者',
            'arranger': u'編曲者',
        }

        for key in patterns:
            pattern = patterns[key]

            prefix = u'<th>%s</th>' % (pattern)
            suffix = '</td>'

            value = common.find_string_by_prefix_suffix(info_table, prefix, suffix, False)
            if not value:
                continue
            value = common.strip_tags(value).strip()
            if value:
                setattr(self, key, value)

        return ret
示例#21
0
    def find_lyric(self, url):
        pattern = '\?([0-9]+)'

        song_id = common.get_first_group_by_pattern(url, pattern)
        if not song_id:
            logging.error('Failed to get id of url [%s]', url)
            return False

        params = self.get_hidden_params()

        query = '%s%s&time=%s' % (params['query_prefix'], song_id, time.localtime(), )
        logging.debug('query:%s' % (query, ))

        post_url = 'http://www.kashinavi.com/cgi-bin/kashi.cgi'
        resp = common.get_url_content(post_url, query)
        if not resp:
            logging.error('Failed to get content of url [%s], query [%s]', post_url, query)
            return False

        raw_lyric = resp.decode('utf-8', 'ignore')

        # if parsing rule changed, return debug info
        if raw_lyric.find(u'歌詞ナビTOPページより') > 0:
            self.lyric = '''
Site rule changed!
Please contact franklai
LoadVars::%s::myLoadVars
''' % (params['middle_value'])
            return True
        
        # else remove the useless part and return lyric
        front_str = 'kashi='
        start = raw_lyric.find(front_str) + len(front_str)
        lyric = raw_lyric[start:]
        lyric = lyric.strip()

        self.lyric = lyric

        return True
示例#22
0
    def get_song_id(self, url):
        pattern = '/song/([0-9]+)'
        song_id = common.get_first_group_by_pattern(url, pattern)

        return song_id