def write_file(self, fname): # TODO: converter to go between self.subs_format and out_format if fname is None: out_format = self._sub_format else: out_format = os.path.splitext(fname)[-1][1:] subs = list(self.gen_raw_resolved_subs()) if self._sub_format in ('ssa', 'ass'): ssaf = pysubs2.SSAFile() ssaf.events = subs ssaf.styles = self.styles if self.info is not None: ssaf.info = self.info to_write = ssaf.to_string(out_format) elif self._sub_format == 'srt' and out_format in ('ssa', 'ass'): to_write = pysubs2.SSAFile.from_string( srt.compose(subs)).to_string(out_format) elif out_format == 'srt': to_write = srt.compose(subs) else: raise NotImplementedError('unsupported output format: %s' % out_format) to_write = to_write.encode(self.encoding) if six.PY3: with open(fname or sys.stdout.fileno(), 'wb') as f: f.write(to_write) else: with (fname and open(fname, 'wb')) or sys.stdout as f: f.write(to_write)
def write_srt_to_file(fname, subs): if sys.version_info[0] > 2: with open(fname or sys.stdout.fileno(), 'w', encoding='utf-8') as f: return f.write(srt.compose(subs)) else: with (fname and open(fname, 'w')) or sys.stdout as f: return f.write(srt.compose(subs).encode('utf-8'))
def write_file(self, fname: str) -> None: # TODO: converter to go between self.subs_format and out_format if fname is None: out_format = self._sub_format else: out_format = os.path.splitext(fname)[-1][1:] subs = list(self.gen_raw_resolved_subs()) if self._sub_format in ("ssa", "ass"): ssaf = pysubs2.SSAFile() ssaf.events = subs if self._styles is not None: ssaf.styles = self._styles if self._info is not None: ssaf.info = self._info if self._fonts_opaque is not None: ssaf.fonts_opaque = self._fonts_opaque to_write = ssaf.to_string(out_format) elif self._sub_format == "srt" and out_format in ("ssa", "ass"): to_write = pysubs2.SSAFile.from_string( srt.compose(subs)).to_string(out_format) elif out_format == "srt": to_write = srt.compose(subs) else: raise NotImplementedError("unsupported output format: %s" % out_format) to_write = to_write.encode(self._encoding) if six.PY3: with open(fname or sys.stdout.fileno(), "wb") as f: f.write(to_write) else: with (fname and open(fname, "wb")) or sys.stdout as f: f.write(to_write)
def write_file(self, fname): if sys.version_info[0] > 2: with open(fname or sys.stdout.fileno(), 'w', encoding=self.encoding) as f: return f.write(srt.compose(self)) else: with (fname and open(fname, 'w')) or sys.stdout as f: return f.write(srt.compose(self).encode(self.encoding))
def print_for_align(self, count=4): data = [] for sub in self.subs: lines = srt.compose(sub.sub[:count]) res = [] for l in lines.splitlines(): res += textwrap.wrap(l, COLUMN_WIDTH) data.append(res) out = itertools.zip_longest(*data, fillvalue="") for s in out: print("{} | {}".format(s[0]+(COLUMN_WIDTH-len(s[0]))*" ", s[1])) print("----------------------------------------------------") data = [] for sub in self.subs: lines = srt.compose(sub.sub[-count:], reindex=False) res = [] for l in lines.splitlines(): res += textwrap.wrap(l, COLUMN_WIDTH) data.append(res) out = itertools.zip_longest(*data, fillvalue="") for s in out: print("{} | {}".format(s[0]+(COLUMN_WIDTH-len(s[0]))*" ", s[1]))
def write_srt_to_file(fname, subs): if fname is None: return sys.stdout.write(srt.compose(subs)) if sys.version_info[0] > 2: with open(fname, 'w', encoding='utf-8') as f: return f.write(srt.compose(subs)) else: with open(fname, 'w') as f: return f.write(srt.compose(subs).encode('utf-8'))
def generate_srt(ctx, result, **kwargs): # Generate srt file for video. fps = kwargs['metadata']['output_fps'] frame = kwargs['metadata']['frame_num'] current_time = float(frame) / fps step = datetime.timedelta(milliseconds=1. / fps * 1000) duration = kwargs['metadata']['duration'] global object_srt global caption_srt object_classes = collections.Counter(result['detection_classes']) classes_string = ', '.join( [f'{name}: {count}' for name, count in object_classes.items()]) if not object_srt: start = datetime.timedelta(milliseconds=0) else: start = datetime.timedelta(seconds=current_time) - step end = start + step sub = srt.Subtitle(index=len(object_srt) + 1, start=start, end=end, content=classes_string) if object_srt and object_srt[-1].content == classes_string: object_srt[-1].end = end elif not object_srt or object_srt[-1].content != classes_string: object_srt.append(sub) if ctx.build_caption: captions = result['captions'] if len(captions) > 0: caption = captions[0] if not caption_srt: start = datetime.timedelta(milliseconds=0) else: start = datetime.timedelta(seconds=current_time) - step end = start + step sub = srt.Subtitle(index=len(caption_srt) + 1, start=start, end=end, content=caption) if caption_srt and caption_srt[-1].content == caption: caption_srt[-1].end = end elif not caption_srt or caption_srt[-1].content != caption: caption_srt.append(sub) if current_time + 2 >= duration: with open(PARAMS['objects_srt_file'], 'w') as sw: sw.write(srt.compose(object_srt)) with open(PARAMS['captions_srt_file'], 'w') as sw: sw.write(srt.compose(caption_srt))
def cleanLine(text_in): try: subs = list(srt.parse(text_in, ignore_errors=True)) if len(subs) > 0: # Trim white spaces text_stripped = [] for i in range(len(subs)): orig_text = subs[i].content stripped_text = subs[i].content.strip() if orig_text != stripped_text: text_stripped.append(subs[i].index) subs[i].content = subs[i].content.strip() # Find the list index of the empty lines. This is different than the srt index! # The list index starts from 0, but the srt index starts from 1. count = 0 to_delete = [] for sub in subs: if not sub.content: to_delete.append(count) count = count + 1 to_delete.sort(reverse=True) # Delete the empty/blank subtitles for i in to_delete: del subs[i] # Fix Index and trim white spaces for i in range(len(subs)): subs[i].index = i + 1 if not text_stripped and not to_delete: logger.debug("CleanLine, Subtitle clean. No changes made.") return 0, srt.compose(subs) else: logger.debug("Index of subtitles deleted: {0}".format( [i + 1 for i in to_delete])) logger.debug( "Index of subtitles trimmed: {0}".format(text_stripped)) logger.debug('{0} deleted, {1} trimmed'.format( len(to_delete), len(text_stripped))) return len(subs), srt.compose(subs) else: logger.debug('No subtitles found.') except Exception as e: logger.debug(f"CleanSubtitle_CL, unexpected error: {e}")
def write_srt(self, srt_filename): srt_file = open(srt_filename, 'w') if srt_file == None: ctypes.windll.user32.MessageBoxW(0, "cannot write %s permission denied" % srt_filename, "Text 2 subtitles", 1 + 0x30) return print(srt.compose(self.subtitles), file=srt_file) srt_file.close()
def make_subs(filename,sub_data): filename = filename.replace('.mov','.srt') print filename, sub_data subtitles = [] for item in sorted(sub_data,key=int): print item, sub_data[item] seconds = int(item) index = 1 start = timedelta(0,seconds,0) end = timedelta(0,seconds+10,0) content = sub_data[item] s = Subtitle(index, start, end, content, proprietary='') subtitles.append(s) # print srt.compose(subtitles) print len(subtitles) file = open(path + filename, "wb") file.write(srt.compose(subtitles).encode('utf-8')) file.close() print "output to:",path+filename
def test_parsing_no_content(subs): for subtitle in subs: subtitle.content = "" reparsed_subtitles = srt.parse( srt.compose(subs, reindex=False, strict=False)) subs_eq(reparsed_subtitles, subs)
def cleanUp(text_in): # okrugle zagrade '(\([^\)]*\))' # kockaste zagrade '(\[[^]]*\])' # vitičaste zagrade '(\{[^}]*\})' # crtice na početku prazne linije '^\s*?\-+\s*?(?<=$)' # Tačka na kraju, prazna linija '(^\s*?\.+)$' # Zarez na kraju, prazna linija '(^\s*?,+)$' # Tačka zarez na kraju, prazna linija '(^\s*?;+)$' # Spejs na kraju linije '(\s*?)$' # Uzvičnici '(^\s*?!+\s*?)$' # Znak pitanja '(^\s*?\?+\s*?)$' # Prva prazna linija '(?<=,\d\d\d)\n\n(?=\w)' # '(?<=,\d\d\d)\n\n(?=\s*\S*?)' # reg-4 = re.compile(r'((?!\n)([A-Z\s]*){1,3}(?=\:)(?<![0-9a-z])\:\s)') reg_4 = re.compile( r"^\s*\-\.+\s+|(([A-Z ]*){1,3}(?=\:)(?<![0-9a-z])\:\s)|^[ \t]*", re.M) reg_P6 = re.compile( r"(\([^\)]*\))|(\[[^]]*\])|(\{[^}]*\})|(<i>\s*<\/i>)|^\s*?\-+\s*?(?<=$)", re.M) reg4n = re.compile(r'([A-Z ]*) [0-3](?=\:)') # MAN 1: broj 1-3 reg_P8 = re.compile( r"(\s*?)$|(^\s*?\.+)$|(^\s*?,+)$|(^\s*?;+)$|(^\s*?!+\s*?)$|(^\s*?\?+\s*?)$", re.M, ) reg_S9 = re.compile("(?<=,\d\d\d)\n\n(?=\w)|(?<=,\d\d\d)\n\n(?=\s*\S*?)", re.M) reg8a = re.compile( r'^\s*(?<=.)|^-(?<=$)', re.M) # Spejs na pocetku linije, i crtica na početku prazne linije regN = re.compile(r'(?<=^-)\:\s*', re.M) # dve tacke iza crtice regColon = re.compile(r"^\s*: *", re.M) RL = re.compile( r"\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}(?=\n\s*\d+\n*?)" ) # def opFile(in_text): # return in_text.replace(']:', ']').replace('):', ')').replace('}:', '}').replace(' ', ' ') textis = srt.parse(text_in, ignore_errors=True) text_subs = srt.compose(textis) try: fp3 = reg_4.sub("", text_subs) fp5 = reg_P6.sub("", fp3) # rf1 = opFile(fp5) rf1 = regColon.sub("", fp5) fp11 = reg_P8.sub("", rf1) fp13 = reg_S9.sub("\n", fp11) fp13 = RL.sub("\n", fp13) fp14 = regN.sub('', fp13) fp15 = reg8a.sub('', fp14) return fp15 except Exception as e: logger.debug(f"CleanSubtitle proc, unexpected error: {e}")
def saveSubtitle(videoFile, duration, thresh, language): files = [('Subtitle Files', '*.srt'), ('Text Document', '*.txt')] fileName = asksaveasfile(filetypes=files, defaultextension=files) if fileName: f = open(fileName.name, "w") f.write(srt.compose(Subtitle(videoFile, duration, thresh, language))) f.close()
def buildVTT(bookInfoFileName, outputDir): # 从json中读取分段信息。 info = {} with open(bookInfoFileName, 'r', encoding='UTF-8') as f: info = json.load(f) print(info['chapter']) for chapter in info['chapter']: subs = [] index = 0 offset = info['split'][chapter['start']]['start'] for i in range(chapter['start'], chapter['end'] + 1): split = info['split'][i] start = timedelta(milliseconds=(split['start'] - offset)) end = timedelta(milliseconds=(split['end'] - offset)) content = split['texc'] subs.append(srt.Subtitle(index, start, end, content)) index += 1 # 保存vtt字幕文件 vttfilename = '%s/%s.vtt' % ( outputDir, validateTitle('%02d %s' % (chapter['index'] + 1, chapter['title']))) with open(vttfilename, 'w', encoding='UTF-8') as f: # srt转成WebVTT格式 strVTT = srt2vtt(srt.compose(subs)) print(strVTT) f.write(strVTT) print('%s done.' % vttfilename) return
def shiftSubsNegative(self): for filename in self.filenames[0]: with open(filename, "r", encoding="utf8") as file: lines = file.readlines() file.close() sub = ''.join(map(str, lines)) subGen = srt.parse(sub) subtitles = list(subGen) retimed = [] for sub in subtitles: sub.start = sub.start - datetime.timedelta( milliseconds=self.timeShift) sub.end = sub.end - datetime.timedelta( milliseconds=self.timeShift) retimed.append(sub) final = srt.compose(retimed) with open(filename, "w", encoding="utf8") as file: for f in final: file.write(f) file.close() dlg = QDialog(self) dlg.setWindowTitle("Retiming done!") layout = QVBoxLayout() dlg.setLayout(layout) dlg.layout().addWidget(QLabel("Retiming successful!")) dlg.exec_()
def cleanSubs(self): for filename in self.filenames[0]: with open(filename, "r", encoding="utf8") as file: lines = file.readlines() file.close() validLines = [] for line in lines: line = re.sub("(.*)", "", line) line = re.sub("\\(.*\\)", "", line) validLines.append(line) sub = ''.join(map(str, validLines)) subGen = srt.parse(sub) subtitles = list(subGen) clean = [] for sub in subtitles: if sub.content is not None: clean.append(sub) final = srt.compose(clean) with open(filename, "w", encoding="utf8") as file: for f in final: file.write(f) file.close() dlg = QDialog(self) dlg.setWindowTitle("Done!") layout = QVBoxLayout() dlg.setLayout(layout) dlg.layout().addWidget(QLabel("Success!")) dlg.exec_()
def merge_srt(zh_file_path, en_file_path, new_file_path): zh_file = open(zh_file_path, mode='r', encoding='utf-8') zh_srt_s = zh_file.read() zh_file.close() en_file = open(en_file_path, mode='r', encoding='utf-8') en_srt_s = en_file.read() en_file.close() zh_subs = list(srt.parse(zh_srt_s)) zh_length = len(zh_subs) en_subs = list(srt.parse(en_srt_s)) en_length = len(en_subs) if zh_length != en_length: return False subs = [] for i in range(zh_length): temp_subtitle = zh_subs[i] temp_subtitle.content = temp_subtitle.content + '\n' + en_subs[ i].content temp_subtitle.content = srt.make_legal_content(temp_subtitle.content) subs.append(temp_subtitle) srt_s = srt.compose(subs) # 将srt_s输出到new_file new_file = open(new_file_path, 'w+', encoding='utf-8') new_file.write(srt_s) new_file.close() return True
def run(self) -> None: """ 번역된 srt 파일 생성 :return: None """ with self.__src_file.open('rt') as fp: file_contents = fp.read() filename = self.__src_file.name google_trans = google_translator() subtitles = list() for sub in srt.parse(file_contents): translated_content = google_trans.translate( sub.content, lang_src=self.__lang_src, lang_tgt=self.__lang_tgt) tmp_sub = srt.Subtitle(index=sub.index, start=sub.start, end=sub.end, content=translated_content, proprietary=sub.proprietary) sys.stdout.write('[{0}][{1}]: {2}\n'.format( filename, tmp_sub.index, tmp_sub.content)) subtitles.append(tmp_sub) # 번역된 srt 파일 쓰기 dst_file = self.__dst_dirpath / self.__src_file.name with dst_file.open('wt') as fp: fp.write(srt.compose(subtitles))
async def run_test(uri): async with websockets.connect(uri) as websocket: wf = wave.open(sys.argv[1], "rb") await websocket.send('{ "config" : { "sample_rate" : %d } }' % (wf.getframerate())) results = [] buffer_size = int(wf.getframerate() * 0.2) # 0.2 seconds of audio while True: data = wf.readframes(buffer_size) if len(data) == 0: break await websocket.send(data) results.append(await websocket.recv()) await websocket.send('{"eof" : 1}') results.append(await websocket.recv()) subs = [] for i, res in enumerate(results): jres = json.loads(res) if not 'result' in jres: continue words = jres['result'] for j in range(0, len(words), WORDS_PER_LINE): line = words[j : j + WORDS_PER_LINE] s = srt.Subtitle(index=len(subs), content=" ".join([l['word'] for l in line]), start=datetime.timedelta(seconds=line[0]['start']), end=datetime.timedelta(seconds=line[-1]['end'])) subs.append(s) print(srt.compose(subs))
def write_srt(args, subs): srt_file = args.out_file + ".srt" print("Writing {} subtitles to: {}".format(args.language_code, srt_file)) f = open(srt_file, 'w') f.writelines(srt.compose(subs)) f.close() return
def zameniImena(text_in): if len(list(srt.parse(text_in))) == 0: logger.debug(f"Transkrib, No subtitles found.") else: text_in = srt.compose(srt.parse(text_in, ignore_errors=True)) robj1 = re.compile(r'\b(' + '|'.join(map(re.escape, dictionary_1.keys())) + r')\b') robj2 = re.compile(r'\b(' + '|'.join(map(re.escape, dictionary_2.keys())) + r')\b') robj3 = re.compile(r'\b(' + '|'.join(map(re.escape, dictionary_0.keys())) + r')\b') robjN1 = re.compile(r'\b(' + '|'.join(map(re.escape, dict1_n.keys())) + r')\b') robjN2 = re.compile(r'\b(' + '|'.join(map(re.escape, dict2_n.keys())) + r')\b') robjN0 = re.compile(r'\b(' + '|'.join(map(re.escape, dict0_n.keys())) + r')\b') robjL0 = re.compile(r'\b(' + '|'.join(map(re.escape, dict0_n2.keys())) + r')\b') robjL1 = re.compile(r'\b(' + '|'.join(map(re.escape, dict1_n2.keys())) + r')\b') robjL2 = re.compile(r'\b(' + '|'.join(map(re.escape, dict2_n2.keys())) + r')\b') try: t_out1 = robj1.subn(lambda x: dictionary_1[x.group(0)], text_in) t_out2 = robj2.subn(lambda x: dictionary_2[x.group(0)], t_out1[0]) t_out3 = robj3.subn(lambda x: dictionary_0[x.group(0)], t_out2[0]) t_out4 = robjN1.subn(lambda x: dict1_n[x.group(0)], t_out3[0]) t_out5 = robjN2.subn(lambda x: dict2_n[x.group(0)], t_out4[0]) t_out6 = robjN0.subn(lambda x: dict0_n[x.group(0)], t_out5[0]) except Exception as e: logger.debug(F"Transkripcija, error: {e}") def doRepl(inobj, indict, text): try: out = inobj.subn(lambda x: indict[x.group(0)], text) return out[1] except IOError as e: logger.debug(f"Replace keys, I/O error: {e}") except Exception as e: logger.debug(f"Replace keys, unexpected error: {e}") if len(dict1_n2) != 0: doRepl(robjL1, dict1_n2, t_out6[0]) if len(dict2_n2) != 0: doRepl(robjL2, dict2_n2, t_out6[0]) if len(dict0_n2) != 0: doRepl(robjL0, dict0_n2, t_out6[0]) much = t_out1[1] + t_out2[1] + t_out3[1] + t_out4[1] + t_out5[1] + t_out6[1] logger.debug( 'Transkripcija u toku.\n--------------------------------------') logger.debug(f'Zamenjeno ukupno {much} imena i pojmova') return much, t_out6[0]
def main(args = argv[1:]): from argparse import ArgumentParser app = ArgumentParser("lrc_merge", description="merge simple timeline LRC into line-splited LRC", epilog="if the result is truncated, try to split your input in lines") app.add_argument("-dist", type=float, default=0.8, help="max distance for words in same sentence") app.add_argument("-min-len", type=float, default=0.0, help="min duration for last word in sentence (LRC only)") app.add_argument("-o", type=str, default="a.srt", help="ouput SRT file") app.add_argument("-sep", type=str, default=None, help="word seprator (or decided automatically from sentence)") app.add_argument("file", type=str, help="input SRT file (or 'lrc' and input from stdin)") cfg = app.parse_args(args) use_lrc = cfg.file == "lrc" inSameLine = lambda a, b: abs((a.start if use_lrc else a.end) - b.start).total_seconds() < cfg.dist #v regex findall has input size limitations... data = list(flatMap(lambda t: fromLrc(t, cfg.min_len), readLines("lrc")) if use_lrc else fromSrt(open(cfg.file).read())) print(" ".join([f"{srt.start.total_seconds()};{srt.content}" for srt in data])) print("== lyrics") result = list(zipTakeWhile(inSameLine, data) ) print(intoLrc(result, cfg.sep)) with open(cfg.o, "w+") as srtf: srtf.write(compose(intoSrt(result, cfg.sep)))
def write_srt(lang, lang_subs): filename = lang + ".srt" f = open(filename, "w") f.write(srt.compose(lang_subs, strict=True)) f.close() print("Wrote SRT file {}".format(filename)) return
def test_parsing_content_with_blank_lines(subs): for subtitle in subs: # We stuff a blank line in the middle so as to trigger the "special" # content parsing for erroneous SRT files that have blank lines. subtitle.content = subtitle.content + "\n\n" + subtitle.content reparsed_subtitles = srt.parse(srt.compose(subs, reindex=False, strict=False)) subs_eq(reparsed_subtitles, subs)
def test_parsing_no_content(subs): for subtitle in subs: subtitle.content = '' reparsed_subtitles = srt.parse(srt.compose( subs, reindex=False, strict=False, )) subs_eq(reparsed_subtitles, subs)
def test_parser_noncontiguous_ignore_errors(subs, fake_idx, garbage, fake_timedelta): composed = srt.compose(subs) srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta) composed = composed.replace( "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)) # Should not raise, we have ignore_errors list(srt.parse(composed, ignore_errors=True))
def compose_suggest_on_fail(subs, strict=True): try: return srt.compose(subs, strict=strict) except srt.SRTParseError as thrown_exc: log.fatal( 'Parsing failed, maybe you need to pass a different encoding ' 'with --encoding?') raise
def write_to_file(self, file=None): if self.srt_list is None: print('Skipping {} because of bad srt format.'.format(self.file)) return if file is None: file = self.file with open(file, "w", encoding="utf-8") as f: f.write(srt.compose(self.srt_list))
def translate_and_compose(input_file, output_file, src_lang: str, target_lang: str, encoding='UTF-8', mode='split', both=True, space=False): """ Translate the srt file Afrikaans af Albanian sq Amharic am Arabic ar Armenian hy Azerbaijani az Basque eu Belarusian be Bengali bn Bosnian bs Bulgarian bg Catalan ca Cebuano ceb Chinese(Simplified) zh-CN Chinese (Traditional) zh-TW Corsican co Croatian hr Czech cs Danish da Dutch nl English en Esperanto eo Estonian et Finnish fi French fr Frisian fy Galician gl Georgian ka German de Greek el Gujarati gu Haitian Creole ht Hausa ha Hawaiian haw Hebrew he Hindi hi Hmong hmn Hungarian hu Icelandic is Igbo ig Indonesian id Irish ga Italian it Japanese ja Javanese jw ... Explore more google translate supported language please visit: https://cloud.google.com/translate/docs/languages English, French, German ... are the language that split each word in a sentence by space Chinese, Japanese are NOT the language that split each word in a sentence by space mode: 'naive' or 'split' both: if it is True, save both src_lang and target_lang, otherwise save only target_lang :param input_file: input file path, only srt file supported currently :param output_file: output file path :param src_lang: source language. the ISO-639-1 language code of the input text :param target_lang: target language. the ISO-639-1 language code of the output text :param encoding: encoding of the input file :param mode: 'naive' or 'split' :param both: save both src_lang and target_lang or target_lang only :param space: is the vocabulary of target language split by space :return: None """ srt_file = open(input_file, encoding=encoding) subtitle = list(srt.parse(srt_file.read())) # filter out empty subs subtitle = [sub for sub in subtitle if sub.content.strip()] if mode == 'naive': translated_list = simple_translate_srt(subtitle, src_lang, target_lang) else: translated_list = translate_srt(subtitle, src_lang, target_lang, space=space) if len(subtitle) == len(translated_list): if both: for i in range(len(subtitle)): subtitle[i].content = subtitle[i].content.replace('\n', ' ') + '\n' + translated_list[i] else: for i in range(len(subtitle)): subtitle[i].content = translated_list[i] else: from pprint import pprint print('Error') pprint("Subtitles") pprint(subtitle) pprint("translated_list") pprint(translated_list) print('Error') with open(output_file, 'w', encoding='UTF-8') as f: f.write(srt.compose(subtitle))
def test_can_compose_without_ending_blank_line(input_subs): ''' Many sub editors don't add a blank line to the end, and many editors accept it. We should just accept this too in input. ''' composed = srt.compose(input_subs, reindex=False) composed_without_ending_blank = composed[:-1] reparsed_subs = srt.parse(composed_without_ending_blank) subs_eq(reparsed_subs, input_subs)
def compose_suggest_on_fail(subs, strict=True): try: return srt.compose(subs, strict=strict) except srt.SRTParseError as thrown_exc: log.fatal( 'Parsing failed, maybe you need to pass a different encoding ' 'with --encoding?' ) raise
def test_compose_and_parse_strict_crlf(input_subs): composed_raw = srt.compose(input_subs, reindex=False) composed = composed_raw.replace('\n', '\r\n') reparsed_subs = list(srt.parse(composed)) for sub in reparsed_subs: sub.content = sub.content.replace('\r\n', '\n') subs_eq(reparsed_subs, input_subs)
def test_compose_and_parse_strict_crlf(input_subs): composed_raw = srt.compose(input_subs, reindex=False) composed = composed_raw.replace("\n", "\r\n") reparsed_subs = list(srt.parse(composed)) for sub in reparsed_subs: sub.content = sub.content.replace("\r\n", "\n") subs_eq(reparsed_subs, input_subs)
def test_can_compose_without_ending_blank_line(input_subs): """ Many sub editors don't add a blank line to the end, and many editors accept it. We should just accept this too in input. """ composed = srt.compose(input_subs, reindex=False) composed_without_ending_blank = composed[:-1] reparsed_subs = srt.parse(composed_without_ending_blank) subs_eq(reparsed_subs, input_subs)
def test_parser_noncontiguous(subs, fake_idx, garbage, fake_timedelta): composed = srt.compose(subs) # Put some garbage between subs that should trigger our failed parsing # detection. Since we do some magic to try and detect blank lines that # don't really delimit subtitles, it has to look at least a little like an # SRT block. srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta) composed = composed.replace( "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage) ) with assert_raises(srt.SRTParseError): list(srt.parse(composed))
def test_compose_and_parse_from_file(input_subs): srt_file = StringIO(srt.compose(input_subs, reindex=False)) reparsed_subs = srt.parse(srt_file) subs_eq(reparsed_subs, input_subs)
def test_can_compose_without_eol_at_all(input_subs): composed = srt.compose(input_subs, reindex=False) composed_without_ending_blank = composed.rstrip('\r\n') reparsed_subs = srt.parse(composed_without_ending_blank) subs_eq(reparsed_subs, input_subs)
def test_compose_and_parse_strict_custom_eol(input_subs, eol): composed = srt.compose(input_subs, reindex=False, eol=eol) reparsed_subs = srt.parse(composed) subs_eq(reparsed_subs, input_subs)
def test_compose_and_parse_strict(input_subs): composed = srt.compose(input_subs, reindex=False) reparsed_subs = srt.parse(composed) subs_eq(reparsed_subs, input_subs)
def test_parsing_spaced_arrow(subs): spaced_block = srt.compose(subs, reindex=False, strict=False).replace("-->", "- >") reparsed_subtitles = srt.parse(spaced_block) subs_eq(reparsed_subtitles, subs)