def tokenize_lyrics(self, songname, process=True): """Tokenizes the lyrics, given a song name, and returns the tokens in a list. Args: songname (str): The name of the song to tokenize process (bool, optional): Whether to process the tokens or simply use them as is (as space-seprated values in a sentence). Usually, you want process to be True. Defaults to True. Returns: list list[str]: A list of lists of tokens, one for each line. """ self.lrc_str = self.env.read_lrc_file(songname) self.lrc_obj = pylrc.parse(self.lrc_str) self.lyric_list = self.lrc_to_lyric_list(self.lrc_obj) logger.info(f'Tokenizing lyrics for song {songname}') tokens_list = [ self.tokenize_phrase(lyric_line, process=process) for lyric_line in self.lyric_list ] logger.debug('Generated token list from lines of lrc file.') self.tokens_list = tokens_list return tokens_list
def test_offset(self): song_path = os.path.join(self.static_path, 'P!nk - Bridge of Light.lrc') with open(song_path, "r", encoding="UTF-8") as song_file: lrc_text = pylrc.parse(song_file.read()) self.assertEqual(lrc_text[0].hours, 0) self.assertEqual(lrc_text[0].minutes, 0) self.assertEqual(lrc_text[0].seconds, -13) self.assertEqual(lrc_text[0].milliseconds, -270) self.assertEqual(lrc_text[4].hours, 0) self.assertEqual(lrc_text[4].minutes, 0) self.assertEqual(lrc_text[4].seconds, 1) self.assertEqual(lrc_text[4].milliseconds, 890) self.assertEqual(lrc_text[24].hours, 0) self.assertEqual(lrc_text[24].minutes, 1) self.assertEqual(lrc_text[24].seconds, 29) self.assertEqual(lrc_text[24].milliseconds, 0) self.assertEqual(lrc_text[32].hours, 0) self.assertEqual(lrc_text[32].minutes, 1) self.assertEqual(lrc_text[32].seconds, 56) self.assertEqual(lrc_text[32].milliseconds, 400)
def lrc_to_lyric_list(self, lrc_str): """Returns a generator with each line of lyrics, with no timestamps. Arguments: lrc_file_path {str} -- A string representation of a valid lrc file format. """ logger.debug('Converting lrc file to list of lyric strings.') return [x.text for x in pylrc.parse(lrc_str)]
def tokenize_lyrics(self, songname, process=True): self.lrc_str = self.env.read_lrc_file(songname) self.lrc_obj = pylrc.parse(self.lrc_str) self.lyric_list = self.lrc_to_lyric_list(self.lrc_obj) logger.info(f'Tokenizing lyrics for song {songname}') tokens_list = [ self.tokenize_phrase(lyric_line) for lyric_line in self.lyric_list ] logger.debug('Generated token list from lines of lrc file.') self.tokens_list = tokens_list return tokens_list
def test_lrc_to_srt(self): for file in os.listdir(self.static_path): if file.endswith('.lrc'): lrc_path = os.path.join(self.static_path, file) lrc_file = open(lrc_path) lrc_text = lrc_file.read() lrc_file.close() sub = pylrc.parse(lrc_text) srt_file = open(os.path.splitext(lrc_path)[0] + '.srt') srt_text = srt_file.read() srt_file.close() self.assertEqual(sub.toSRT(), srt_text)
def zingen(self): lrc_file = open(self.tekst, encoding="utf8", errors='ignore') lrc_string = ''.join(lrc_file.readlines()) lrc_file.close() subs = pylrc.parse(lrc_string) mp3_file = self.liedje song_has_finished = False instance = vlc.Instance() player = instance.media_player_new() media = instance.media_new_path(mp3_file) # Your audio file here player.set_media(media) events = player.event_manager() events.event_attach(vlc.EventType.MediaPlayerEndReached, SongFinished) print("Playing {0} from {1}".format(subs.title, subs.artist)) player.play() line = 0 num_lines = len(subs) line_printed = False while not song_has_finished: sec = player.get_time() / 1000 if line + 1 == num_lines or sec < subs[line + 1].time: if not line_printed: print("\r {0}".format(subs[line].text.rstrip(), end='', flush=True)) line_printed = True else: line += 1 line_printed = False
import pylrc # https://github.com/doakey3/pylrc import glob for file in glob.glob("*.lrc"): filename = file[0:7] # assume fnt-xxx.lrc file format lrc_file = open(file, encoding="utf-8") lrc_lines = lrc_file.readlines() lrc_parse = ''.join(lrc_lines) lrc_file.close() cue = open(filename + '.cue', 'w', encoding="utf-8") cue.write('\ufeff') lrc = pylrc.parse(lrc_parse) print(lrc.artist, '-', lrc.title) trackno = 1 cue.write('REM GENRE Podcast\n') cue.write( 'REM COMMENT "This cue file has been generated by Manual, @CatVsHumanity on Twitter. No rights reserved."\n' ) cue.write('TITLE "' + lrc.title + '"\n') cue.write('PERFORMER "' + lrc.artist + '"\n') cue.write('FILE "' + filename + '.mp3' + '" MP3\n') for line in lrc_lines[3:]: tracktime = line[line.find("[") + 1:line.find("]")].replace('.', ':') cue.write(' TRACK ' + str(trackno).zfill(2) + ' AUDIO\n') cue.write(' TITLE "' + line.split(']', 1)[1].rstrip('\n').replace('"', '\u201d') + '"\n')
def parse_lrc(self, lines): lrc_string = "\n".join(lines) subs = pylrc.parse(lrc_string) return subs