def test_empty_subtitles(): # regression test for issue #11 text = dedent(""" 392 00:29:27,46 --> 00:29:29,83 I'm Liza Minnelli.. 393 00:00:00,00 --> 00:00:00,00 394 00:00:00,00 --> 00:00:00,00 """) ref = SSAFile() ref.append( SSAEvent(start=make_time(m=29, s=27, ms=460), end=make_time(m=29, s=29, ms=830), text="I'm Liza Minnelli..")) ref.append(SSAEvent(start=0, end=0, text="")) ref.append(SSAEvent(start=0, end=0, text="")) subs = SSAFile.from_string(text) assert subs.equals(ref)
def create_ass_file(subtitle_blocks: List[SubtitleBlock], ass_file, styles: Dict[str, StyleConfig]): subs = SSAFile() colors = [Color(255, 255, 255), Color(100, 100, 255), Color(255, 100, 100)] for k, name in enumerate(subtitle_blocks[0].names): my_style = subs.styles["Default"].copy() my_style.primarycolor = colors[k] my_style.fontsize = styles[name].fontsize my_style.shadow = 0 subs.styles[name] = my_style for sb in subtitle_blocks: start, end = None, None for name, text in sb.name_texts: if len(text) > 0: text = text.replace("_", " ") if start is None: start = sb.start end = sb.end sub_line = SSAEvent( start=start, end=end, text=text, ) sub_line.style = name subs.append(sub_line) else: print(f"WARNING: got empty block! {name} ") subs.save(ass_file)
def test_write_drawing(): # test for 7bde9a6c3a250cf0880a8a9fe31d1b6a69ff21a0 subs = SSAFile() e1 = SSAEvent() e1.start = 0 e1.end = 60000 e1.text = r"{\p1}m 0 0 l 100 0 100 100 0 100{\p0}test" e2 = SSAEvent() e2.start = 60000 e2.end = 120000 e2.text = "Subtitle number\\Ntwo." subs.append(e1) subs.append(e2) ref = dedent("""\ 1 00:01:00,000 --> 00:02:00,000 Subtitle number two. """) text = subs.to_string("srt") assert text.strip() == ref.strip()
def build(self, **extra): from pysubs2 import SSAFile, SSAEvent, Color # type: ignore[import] millis = lambda td: td / timedelta(milliseconds=1) subs = ( SSAEvent( start=millis(t - self.start), end =millis(t - self.start + timedelta(seconds=length)), text=text.replace('\n', r'\N'), # \N necessary for SSA files ) for t, text, length in self.l ) sf = SSAFile() style = sf.styles['Default'].copy() style.fontsize = 16 # default is 20, bit too much?? style.outlinecolor = Color(0, 0, 0, 50) # semitransparent style.shadow = 0.0 style.outline = 0.1 style.borderstyle = 3 # no idea why 3, but it makes the background apperar in conjunction with outline for k, v in extra.items(): setattr(style, k, v) sf.styles['Default'] = style for s in subs: sf.append(s) return sf.to_string('ass')
def append_subs(combined_subs, new_subs, style=None, formatter=None, exclude=None): """ Append a set of subs to a current set avoiding a clash if needed. Also allows for styling and formatting """ if exclude is None: exclude = [] new_combined_subs = SSAFile() if combined_subs: # First add the subs we are keeping new_combined_subs.extend(combined_subs) for sub in new_subs: # Add a style if style: sub.style = style # Perform the formatting if formatter: sub.text = formatter(sub.text) # See if we want to cater for clashes sub, second_sub = _adjust_for_clashing_subs(combined_subs, sub, exclude) # Prepare results if sub: new_combined_subs.append(sub) if second_sub: new_combined_subs.append(second_sub) new_combined_subs.sort() return new_combined_subs
def test_simple_write(): subs = SSAFile() e1 = SSAEvent() e1.start = 0 e1.end = 60000 e1.text = "ten--chars" e2 = SSAEvent() e2.start = 60000 e2.end = 120000 e2.text = "ten--chars-ten-chars" e3 = SSAEvent() e3.start = 60000 e3.end = 120000 e3.text = "Invisible subtitle." e3.is_comment = True subs.append(e1) subs.append(e2) subs.append(e3) ref = dedent("""\ 00:00:00:ten--chars 00:01:00:ten--chars-ten-chars """) text = subs.to_string("tmp") assert text.strip() == ref.strip()
def test_read_position_styling(): """position is ignored, italic is converted, color is ignored""" text = dedent("""\ 1 00:00:10,500 --> 00:00:13,000 X1:63 X2:223 Y1:43 Y2:58 <i>Elephant's Dream</i> 2 00:00:15,000 --> 00:00:18,000 X1:53 X2:303 Y1:438 Y2:453 <font color="cyan">At the left we can see...</font> """) ref = SSAFile() ref.append( SSAEvent(start=make_time(s=10.5), end=make_time(s=13), text="{\\i1}Elephant's Dream{\\i0}")) ref.append( SSAEvent(start=make_time(s=15), end=make_time(s=18), text="At the left we can see...")) subs = SSAFile.from_string(text) assert subs.equals(ref)
def save_to_srt(results, file_name): """Converts the output of the model to the standard subtitle format .srt""" subs = SSAFile() for result in results: event = SSAEvent(start=make_time(s=result['start']), end=make_time(s=result['end']), text=result['word']) subs.append(event) subs.save(file_name)
def test_writer_strips_tags(): subs = SSAFile() subs.append(SSAEvent(start=0, end=10, text="Let me tell you{a secret}.")) f = dedent("""\ {0}{0}1000 {0}{10}Let me tell you. """) assert subs.to_string("microdvd", fps=1000) == f
def test_insertion_of_wrong_type(): subs = SSAFile() subs.append(SSAEvent()) with assert_raises(TypeError): subs.append(42) with assert_raises(TypeError): subs.insert(42) with assert_raises(TypeError): subs[0] = 42
def test_writer_uses_original_fps(): subs = SSAFile() subs.append(SSAEvent(start=0, end=10, text="Hello!")) subs.fps = 1000 f = dedent("""\ {0}{0}1000 {0}{10}Hello! """) assert subs.to_string("microdvd") == f
def test_writer_handles_whitespace(): subs = SSAFile() subs.append(SSAEvent(start=0, end=10, text=r"Hello,\hworld!\NSo many\N\nNewlines.")) f = dedent("""\ {0}{0}1000 {0}{10}Hello, world!|So many||Newlines. """) assert subs.to_string("microdvd", fps=1000) == f
def test_writer_skips_comment_lines(): subs = SSAFile() subs.append(SSAEvent(start=0, end=10, text="Hello!")) subs.append(SSAEvent(start=0, end=10, text="World!")) subs[0].is_comment = True f = dedent("""\ {0}{0}1000 {0}{10}World! """) assert subs.to_string("microdvd", fps=1000) == f
def test_writer_handles_whitespace(): subs = SSAFile() subs.append( SSAEvent(start=0, end=10, text=r"Hello,\hworld!\NSo many\N\nNewlines.")) f = dedent("""\ {0}{0}1000 {0}{10}Hello, world!|So many||Newlines. """) assert subs.to_string("microdvd", fps=1000) == f
def test_simple_read(): text = dedent("""\ 00:00:00:ten--chars 00:01:00:ten--chars-ten-chars """) #calculate endtime from starttime + 500 miliseconds + 67 miliseconds per each character (15 chars per second) ref = SSAFile() ref.append(SSAEvent(start=0, end=make_time(ms=1840), text="ten--chars")) ref.append(SSAEvent(start=make_time(m=1), end=make_time(ms=62510), text="ten--chars-ten-chars")) subs = SSAFile.from_string(text) assert subs.equals(ref)
def filter_subs(combined_subs): filtered_subs = SSAFile() last_sub = None last_top_sub = None for sub in combined_subs: if sub.style == 'bottom': filtered_subs.append(sub) if last_top_sub and last_top_sub.end > sub.start: last_top_sub.end = sub.start last_sub = sub elif sub.style == 'top': if last_sub and last_sub.end > sub.start: sub.start = last_sub.end if sub.start < sub.end: filtered_subs.append(sub) last_top_sub = sub last_sub = sub else: filtered_subs.append(sub) filtered_removed_empty_subs = SSAFile() filtered_removed_empty_subs.styles = combined_subs.styles for sub in filtered_subs: if sub.end > sub.start: filtered_removed_empty_subs.append(sub) logger.info( f'Filtered subtitles from {len(combined_subs)} to {len(filtered_removed_empty_subs)} subtitles' ) return filtered_removed_empty_subs
def test_keep_unknown_html_tags(): # see issue #26 text = dedent("""\ 1 00:00:10,500 --> 00:00:13,000 <i>Elephant's <sub>Little</sub> Dream</i> 2 00:00:15,000 --> 00:00:18,000 <font color="cyan">At the left we can see...</font> """) ref_default = SSAFile() ref_default.append(SSAEvent(start=make_time(s=10.5), end=make_time(s=13), text="{\\i1}Elephant's Little Dream{\\i0}")) ref_default.append(SSAEvent(start=make_time(s=15), end=make_time(s=18), text="At the left we can see...")) ref_keep = SSAFile() ref_keep.append(SSAEvent(start=make_time(s=10.5), end=make_time(s=13), text="{\\i1}Elephant's <sub>Little</sub> Dream{\\i0}")) ref_keep.append(SSAEvent(start=make_time(s=15), end=make_time(s=18), text="<font color=\"cyan\">At the left we can see...</font>")) subs_default = SSAFile.from_string(text) subs_keep = SSAFile.from_string(text, keep_unknown_html_tags=True) assert subs_default.equals(ref_default) assert subs_keep.equals(ref_keep) assert subs_keep.to_string("srt") == ref_keep.to_string("srt")
def test_write_drawing(): subs = SSAFile() subs.append( SSAEvent(start=0, end=10, text=r"{\p1}m 0 0 l 100 0 100 100 0 100{\p0}test")) subs.append(SSAEvent(start=10, end=20, text="Let me tell you.")) f = dedent("""\ {0}{0}1000 {10}{20}Let me tell you. """) assert subs.to_string("microdvd", fps=1000) == f
def test_write_read(): subs = SSAFile() e1 = SSAEvent(text="Hello, world!") e2 = SSAEvent(text="The other subtitle.\\NWith two lines.", style="custom style") s1 = SSAStyle(italic=True, primarycolor=Color(r=255, g=0, b=0, a=0)) subs.append(e1) subs.append(e2) subs.styles["custom style"] = s1 json_text = subs.to_string("json") subs2 = SSAFile.from_string(json_text, "json") assert subs2.equals(subs)
def test_read_bad_tags(): """missing opening/closing tag, bad nesting, extra whitespace""" text = dedent("""\ 1 00:00:10,500 --> 00:00:13,000 < u><i><font color="red" >Elephant's < s>Dream< / i > Is Long</s> And Badly Nested</xyz> """) ref = SSAFile() ref.append(SSAEvent(start=make_time(s=10.5), end=make_time(s=13), text="{\\u1}{\\i1}Elephant's {\\s1}Dream{\\i0} Is Long{\\s0} And Badly Nested")) subs = SSAFile.from_string(text) assert subs.equals(ref)
def test_transform_framerate(): subs = SSAFile() subs.append(SSAEvent(start=0, end=10)) subs.append(SSAEvent(start=1000, end=1010)) with assert_raises(ValueError): subs.transform_framerate(1, 0) with assert_raises(ValueError): subs.transform_framerate(1, -1) with assert_raises(ValueError): subs.transform_framerate(0, 1) with assert_raises(ValueError): subs.transform_framerate(-1, 1) subs.transform_framerate(10, 20) assert subs[0] == SSAEvent(start=0, end=5) assert subs[1] == SSAEvent(start=500, end=505)
def merge_subs(starting_subs, tolerance_millis=1000, style=None): """ Take some subs and eliminate any blank spots where they are less than a tolerance (default of 1 second) """ merged_subs = SSAFile() for sub in starting_subs: if style and sub.style != style: continue if merged_subs and merged_subs[-1].end + tolerance_millis >= sub.start: merged_subs[-1].end = sub.start merged_subs.append(sub) if style: for sub in starting_subs: if sub.style != style: merged_subs.append(sub) merged_subs.sort() return merged_subs
def test_read_malformed(): """no line number, no empty line, leading whitespace, bad timestamp format""" text = dedent("""\ 00:00:00.000 ->00:01:00.000 An example subtitle. 0:01:00,00 --> 0:02:00,00 Subtitle number two. """) ref = SSAFile() ref.append(SSAEvent(start=0, end=make_time(m=1), text="An example subtitle.")) ref.append(SSAEvent(start=make_time(m=1), end=make_time(m=2), text="Subtitle number\\Ntwo.")) subs = SSAFile.from_string(text) assert subs.equals(ref)
def add_effects(subtitles): effected_subs = SSAFile() for sub in subtitles: content = sub.plaintext.strip().replace('\n', ' ') time_per_syllable = (sub.end - sub.start) / syllables.estimate(content) current_time = sub.start current_index = 0 for word in content.split(' '): sylls = syllables.estimate(word) sub_end_time = current_time + time_per_syllable * sylls current_index += len(word) if current_index == 0 else len(word) + 1 text = content[:current_index] + '{\\alpha&HFF}' + content[ current_index:] # adds transparency effected_subs.append( SSAEvent(start=current_time, end=sub_end_time, text=text)) current_time = sub_end_time return effected_subs
def save_to_subtitles(results, formatter): """ Save to subtitle file :param results: Dictionary containing info and start/end times :param formatter: Apply text formating to the subtitle :return: New subtitle file """ subs = SSAFile() for result in results: event = SSAEvent(start=make_time(s=result['start']), end=make_time(s=result['end']), text=formatter(result)) if 'highlight' in result and result['highlight']: event.style = 'red' subs.append(event) logger.info(f'Processed {len(results)} results to subtitle events') return subs
def test_simple_read(): text = dedent("""\ 1 00:00:00,000 --> 00:01:00,000 An example subtitle. 2 00:01:00,000 --> 00:02:00,000 Subtitle number two. """) ref = SSAFile() ref.append(SSAEvent(start=0, end=make_time(m=1), text="An example subtitle.")) ref.append(SSAEvent(start=make_time(m=1), end=make_time(m=2), text="Subtitle number\\Ntwo.")) subs = SSAFile.from_string(text) assert subs.equals(ref)
def compress_subs(subs, max_chars=30, max_stretch_millis=3000, max_oldest_millis=10000, style=None): """ Mostly for the use of speech subtitles this will take individual words and create a running subtitle """ # Phase 1 based on character count so that we dont overflow the screen # Phase 2 is to make sure that the oldest word on the screen has not been there for too long # First remove gaps where they exist merged_subs = merge_subs(subs, max_stretch_millis, style) char_count = 0 oldest_start_time = 0 compressed_subs = SSAFile() for sub in merged_subs: if style and sub.style is not style: continue char_count += len(sub.text) # Check the character count and reset if needed if char_count > max_chars: char_count = len(sub.text) oldest_start_time = sub.start # Check if subtitle has been on screen for too long then reset elif sub.start - oldest_start_time > max_oldest_millis: char_count = len(sub.text) oldest_start_time = sub.start # If there is a gap in time between subtitles then reset elif len(compressed_subs) > 0 and sub.start != compressed_subs[-1].end: char_count = len(sub.text) oldest_start_time = sub.start # Add this sub elif len(compressed_subs) > 0: sub.text = compressed_subs[-1].text + ' ' + sub.text char_count += 1 compressed_subs.append(sub) # Append all the other subs if style: for sub in merged_subs: if sub.style is not style: compressed_subs.append(sub) compressed_subs.sort() return compressed_subs
def test_read_position_styling(): """position is ignored, italic is converted, color is ignored""" text = dedent("""\ 1 00:00:10,500 --> 00:00:13,000 X1:63 X2:223 Y1:43 Y2:58 <i>Elephant's Dream</i> 2 00:00:15,000 --> 00:00:18,000 X1:53 X2:303 Y1:438 Y2:453 <font color="cyan">At the left we can see...</font> """) ref = SSAFile() ref.append(SSAEvent(start=make_time(s=10.5), end=make_time(s=13), text="{\\i1}Elephant's Dream{\\i0}")) ref.append(SSAEvent(start=make_time(s=15), end=make_time(s=18), text="At the left we can see...")) subs = SSAFile.from_string(text) assert subs.equals(ref)
def search_subtitles(self, subtitles, search_str): ix = create_in(tmp_dir, self.schema) with ix.writer() as ix_writer: for i, subtitle in enumerate(subtitles): ix_writer.add_document(index=i, content=subtitle.text) with ix.searcher() as ix_searcher: query = QueryParser('content', ix.schema).parse(search_str) results = ix_searcher.search(query) sorted_results = sorted(results, key=itemgetter('index')) ix.close() max_idx = len(subtitles) - 1 extracted = [] i = 0 while i < len(sorted_results): score = 0 subs = SSAFile() while i+len(subs) < len(sorted_results) \ and sorted_results[i+len(subs)]['index'] == sorted_results[i]['index']+len(subs): score = max(score, sorted_results[i + len(subs)].score) subs.append(subtitles[sorted_results[i + len(subs)]['index']]) lower_index = sorted_results[i]['index'] upper_index = sorted_results[i + len(subs) - 1]['index'] extracted.append( ExtractedSubtitles( subs, score, None if lower_index - 1 < 0 else subtitles[lower_index - 1].end, None if upper_index + 1 > max_idx else subtitles[upper_index + 1].start, )) i += len(subs) return extracted
def test_read_complex(): # regression test for #30 text = dedent("""\ WEBVTT X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:0 00:50.099 --> 00:53.299 line:85% align:middle Cuidem do seu grupo. Cuidem de suas fileiras. 01:54.255 --> 01:55.455 line:85% align:middle Parem! 01:58.155 --> 01:59.555 line:85% align:middle E, parem! """) ref = SSAFile() ref.append( SSAEvent(start=make_time(s=50, ms=99), end=make_time(s=53, ms=299), text=r"Cuidem do seu grupo.\NCuidem de suas fileiras.")) ref.append( SSAEvent(start=make_time(m=1, s=54, ms=255), end=make_time(m=1, s=55, ms=455), text="Parem!")) ref.append( SSAEvent(start=make_time(m=1, s=58, ms=155), end=make_time(m=1, s=59, ms=555), text="E, parem!")) subs = SSAFile.from_string(text) assert subs.equals(ref)
def test_empty_subtitles(): # regression test for issue #11 text = dedent(""" 392 00:29:27,46 --> 00:29:29,83 I'm Liza Minnelli.. 393 00:00:00,00 --> 00:00:00,00 394 00:00:00,00 --> 00:00:00,00 """) ref = SSAFile() ref.append(SSAEvent(start=make_time(m=29, s=27, ms=460), end=make_time(m=29, s=29, ms=830), text="I'm Liza Minnelli..")) ref.append(SSAEvent(start=0, end=0, text="")) ref.append(SSAEvent(start=0, end=0, text="")) subs = SSAFile.from_string(text) assert subs.equals(ref)
def test_simple_writing(): subs = SSAFile() subs.append(SSAEvent(start=0, end=1000, text="Hello!")) subs.append(SSAEvent(start=1000, end=2000, text="Hello World!\\NTwo-line subtitle!")) subs.append(SSAEvent(start=2000, end=3000, text="{\i1}This is an italic line {with some hidden content}")) subs.append(SSAEvent(start=3000, end=4000, text="This is a comment line")) subs[-1].is_comment = True reference_output = dedent("""\ [0][10] Hello! [10][20] Hello World!|Two-line subtitle! [20][30] This is an italic line """) assert subs.to_string("mpl2").strip() == reference_output.strip()
def test_simple_write(): subs = SSAFile() e1 = SSAEvent() e1.start = 0 e1.end = 60000 e1.text = "An example subtitle." e2 = SSAEvent() e2.start = 60000 e2.end = 120000 e2.text = "Subtitle number\\Ntwo." e3 = SSAEvent() e3.start = 60000 e3.end = 120000 e3.text = "Invisible subtitle." e3.is_comment = True subs.append(e1) subs.append(e2) subs.append(e3) ref = dedent("""\ WEBVTT 1 00:00:00,000 --> 00:01:00,000 An example subtitle. 2 00:01:00,000 --> 00:02:00,000 Subtitle number two. """) text = subs.to_string("vtt") assert text.strip() == ref.strip()
def compress(subs, max_chars=30, max_stretch_time=3, max_oldest_time=10): new_subs = SSAFile() # Phase 1 based on character count so that we dont overflow the screen # Phase 2 if the end of the last subtitle is close to the start of the next we want to stretch out the end # Phase 3 is to make sure that the oldest word on the screen has not been there for too long char_count = 0 current_text = '' current_event = None oldest_start_time = 0 for sub in subs: last_event = current_event current_event = SSAEvent() current_event.start = sub.start current_event.end = sub.end char_count += len(sub.text) # Check the character count and reset if needed if char_count > max_chars: current_text = sub.text char_count = len(sub.text) else: current_text = current_text + ' ' + sub.text # Check the stretch of subtitles make last one longer if last_event and current_event.start - last_event.end < max_stretch_time * 1000: last_event.end = current_event.start else: current_text = sub.text char_count = len(sub.text) # Make sure that the oldest subtitle on the screen is not too old if current_event.start - oldest_start_time > max_oldest_time * 1000: current_text = sub.text char_count = len(sub.text) oldest_start_time = sub.start current_event.text = current_text new_subs.append(current_event) logger.info( f'Compressed {len(subs)} subtitles into {len(new_subs)} subtitles') return new_subs
def test_repr_simple(): subs = SSAFile() subs.append(SSAEvent(start=make_time(m=5), end=make_time(m=6))) subs.append(SSAEvent(start=make_time(m=125), end=make_time(m=126))) subs.append(SSAEvent(start=make_time(m=15), end=make_time(m=16))) subs.styles["style1"] = SSAStyle() subs.styles["style2"] = SSAStyle() ref = "<SSAFile with 3 events and 3 styles, last timestamp 2:06:00>" assert repr(subs) == ref
def build_ref(): subs = SSAFile() subs.info["My Custom Info"] = "Some: Test, String." subs.styles["left"] = SSAStyle(alignment=7, bold=True) subs.styles["topleft"] = SSAStyle(alignment=4) subs.append(SSAEvent(start=0, end=make_time(m=1), text="An, example, subtitle.")) subs.append(SSAEvent(start=0, end=make_time(m=1), type="Comment", text="You can't see this one.")) subs.append(SSAEvent(start=make_time(m=1), end=make_time(m=2), text="Subtitle number\\Ntwo.")) return subs
def test_simple_write(): subs = SSAFile() e1 = SSAEvent() e1.start = 0 e1.end = 60000 e1.text = "An example subtitle." e2 = SSAEvent() e2.start = 60000 e2.end = 120000 e2.text = "Subtitle number\\Ntwo." e3 = SSAEvent() e3.start = 60000 e3.end = 120000 e3.text = "Invisible subtitle." e3.is_comment = True subs.append(e1) subs.append(e2) subs.append(e3) ref = dedent("""\ 1 00:00:00,000 --> 00:01:00,000 An example subtitle. 2 00:01:00,000 --> 00:02:00,000 Subtitle number two. """) text = subs.to_string("srt") assert text.strip() == ref.strip()