def test_linebreaks(self): sset = SubtitleSet('en') sset.append_subtitle(0, 1000, '''line 1<br />line 2<br />line 3''', escape=False) sset.append_subtitle(1000,200, 'second sub') output = unicode(TXTGenerator(sset)) self.assertEqual(output, TXT_LINEBREAKS)
class JSONParser(BaseTextParser): file_type = 'json' def __init__(self, input_string, pattern, language=None, flags=[], eager_parse=True): self.input_string = input_string self.pattern = pattern self.language = language super(JSONParser, self).__init__(input_string, pattern, language=language, flags=[], eager_parse=eager_parse) def to_internal(self): if not hasattr(self, 'sub_set'): self.sub_set = SubtitleSet(self.language) try: data = json.loads(self.input_string) except ValueError: raise SubtitleParserError("Invalid JSON data provided.") # Sort by the ``position`` key data = sorted(data, key=lambda k: k['position']) for sub in data: self.sub_set.append_subtitle(sub['start'], sub['end'], sub['text']) return self.sub_set
class JSONParser(BaseTextParser): file_type = 'json' def __init__(self, input_string, pattern, language=None, flags=[], eager_parse=True): self.input_string = input_string self.pattern = pattern self.language = language super(JSONParser, self).__init__(input_string, pattern, language=language, flags=[], eager_parse=eager_parse) def to_internal(self): if not hasattr(self, 'sub_set'): self.sub_set = SubtitleSet(self.language) try: data = json.loads(self.input_string) except ValueError: raise SubtitleParserError("Invalid JSON data provided.") # Sort by the ``position`` key data = sorted(data, key=lambda k: k['position']) for sub in data: self.sub_set.append_subtitle(sub['start'], sub['end'], sub['text']) return self.sub_set
def test_linebreaks(self): sset = SubtitleSet('en') sset.append_subtitle(0, 1000, '''line 1 line 2 line 3''') sset.append_subtitle(1000, 200, 'second sub') output = unicode(TXTGenerator(sset)) self.assertEqual(output, TXT_LINEBREAKS)
def test_add_subtitles_with_complete_true_but_unsynced_subs(self): # test adding subtitles with complete=True, but the subtitles # themseleves aren't complete. For this corner case, we should not # emit subtitles_published. subs = SubtitleSet(language_code='en') subs.append_subtitle(None, None, 'content') pipeline.add_subtitles(self.video, 'en', subs, complete=True) assert_equal(self.subtitles_published_handler.call_count, 0)
def create_subtitle_set(number_of_subtitles=0, synced=True): subtitle_set = SubtitleSet('en') for x in xrange(0, number_of_subtitles+1): start = x * 1000 if synced else None end = x * 1000 + 1000 if synced else None subtitle_set.append_subtitle(start, end, 'hey you %s' % x) return subtitle_set
class YoutubeParser(BaseTextParser): file_type = 'youtube' def __init__(self, input_string, language_code): self.language_code = language_code self._pattern = None self.input_string = input_string self.language = language_code def __iter__(self): if not hasattr(self, 'sub_set'): self.to_internal() for sub in self.sub_set: yield sub def to_internal(self): if not hasattr(self, 'sub_set'): try: self.sub_set = SubtitleSet(self.language) xml = etree.fromstring(self.input_string.encode('utf-8')) has_subs = False total_items = len(xml) for i,item in enumerate(xml): duration = 0 start = int(float(item.get('start')) * 1000) if hasattr(item, 'duration'): duration = int(float(item.get('dur', 0)) * 1000) elif i+1 < total_items: # youtube sometimes omits the duration attribute # in this case we're displaying until the next sub # starts next_item = xml[i+1] duration = int(float(next_item.get('start')) * 1000) - start else: # hardcode the last sub duration at 3 seconds duration = 3000 end = start + duration text = item.text and unescape_html(item.text) or u'' self.sub_set.append_subtitle(start, end, text) has_subs = True if not has_subs: raise ValueError("No subs") except Exception as e: raise SubtitleParserError(original_error=e) return self.sub_set
def _create_two_sub_forked_subs(self, request): session = create_two_sub_dependent_session(request) # now fork subtitles response = rpc.start_editing( request, session.video.video_id, 'es', subtitle_language_pk=session.video.subtitle_language('es').pk) session_pk = response['session_pk'] subtitle_set = SubtitleSet('es') subtitle_set.append_subtitle(500, 1500, 'hey') subtitle_set.append_subtitle(1600, 2500, 'you') rpc.finished_subtitles(request, session_pk, subtitle_set.to_xml(), forked=True) return Video.objects.get(pk=session.video.pk)
def _add_subtitles(sub_lang, num_subs, video, translated_from=None): subtitle_set = SubtitleSet(sub_lang.language_code) for i in xrange(0, num_subs): start_time=i * 1000 end_time =i + 800 subtitle_text = 'hey jude %s' % i subtitle_set.append_subtitle(start_time, end_time, subtitle_text) parents = [] if translated_from: parents.append(translated_from.get_tip()) return pipeline.add_subtitles(video, sub_lang.language_code, subtitle_set, parents=parents)
def test_log_in_then_save(self): request_0 = RequestMockup(NotAuthenticatedUser()) return_value = rpc.show_widget(request_0, VIDEO_URL, False) video_id = return_value['video_id'] return_value = rpc.start_editing( request_0, video_id, 'en', original_language_code='en') session_pk = return_value['session_pk'] sset = SubtitleSet('en') sset.append_subtitle(2300, 3400, 'hey') response = rpc.regain_lock(request_0, session_pk) self.assertEqual('ok', response['response']) request_0.user = self.user_0 rpc.finished_subtitles(request_0, session_pk, sset.to_xml()) sversion = sub_models.SubtitleVersion.objects.order_by('-pk')[0] sversion.subtitle_count = 1 self.assertEqual(request_0.user.pk, sversion.author.pk)
def test_fork_translation_dependent_on_forked(self): request = RequestMockup(self.user_0) video = self._create_two_sub_forked_subs(request) response = rpc.start_editing(request, video.video_id, 'fr', base_language_code='es') session_pk = response['session_pk'] rpc.finished_subtitles(request, session_pk, create_subtitle_set(2).to_xml()) fr_sl = models.Video.objects.get(video_id=video.video_id).subtitle_language('fr') self.assertEquals(False, fr_sl.is_forked) # now fork french return_value = rpc.show_widget(request, VIDEO_URL, False) video_id = return_value['video_id'] fr_sl = models.Video.objects.get(video_id=video_id).subtitle_language('fr') response = rpc.start_editing(request, video_id, 'fr', subtitle_language_pk=fr_sl.pk) session_pk = response['session_pk'] subtitles = SubtitleSet('fr', response['subtitles']['subtitles']) self.assertEquals(3, len(subtitles)) self.assertEquals('hey you 0', subtitles[0].text) self.assertEquals(0, subtitles[0].start_time) self.assertEquals(1000, subtitles[0].end_time) # update the timing on the French sub. updated = SubtitleSet('fr') updated.append_subtitle(1020, 1500, 'hey 0') updated.append_subtitle(2500, 3500, 'hey 1') rpc.finished_subtitles(request, session_pk, updated.to_xml(), forked=True) french_lang = models.Video.objects.get(video_id=video_id).subtitle_language('fr') fr_version = french_lang.get_tip() fr_version_subtitles = fr_version.get_subtitles() self.assertTrue(french_lang.is_forked) self.assertEquals(1020, fr_version_subtitles[0].start_time) spanish_lang = models.Video.objects.get(video_id=video_id).subtitle_language('es') es_version = spanish_lang.get_tip() es_version_subtitles = es_version.get_subtitles() self.assertEquals(True, spanish_lang.is_forked) self.assertEquals(500, es_version_subtitles[0].start_time)
class TXTParser(BaseTextParser): file_type = 'txt' _linebreak_re = re.compile(r"\n\n|\r\n\r\n|\r\r") def __init__(self, input_string, language=None, linebreak_re=_linebreak_re, eager_parse=True): self.language = language self.input_string = linebreak_re.split(input_string) if eager_parse: self.to_internal() def __len__(self): return len(self.input_string) def __nonzero__(self): return bool(self.input_string) def _result_iter(self): for item in self.input_string: output = {} output['start'] = None output['end'] = None output['text'] = utils.strip_tags(item) yield output def to_internal(self): if not hasattr(self, 'sub_set'): self.sub_set = SubtitleSet(self.language) valid = False for item in self._result_iter(): item['text'] = item['text'].replace("\n", '<br/>') if not valid and ''.join(item['text'].split()): valid = True self.sub_set.append_subtitle(item['start'], item['end'], item['text']) if not valid: raise SubtitleParserError("No subs") return self.sub_set
def test_unsynced_generator(self): subs = SubtitleSet('en') for x in xrange(0,5): subs.append_subtitle(None, None,"%s" % x) output = unicode(SBVGenerator(subs)) parsed = SBVParser(output,'en') internal = parsed.to_internal() subs = [x for x in internal.subtitle_items()] self.assertEqual(len(internal), 5) for i,sub in enumerate(subs): self.assertEqual(sub[0], None ) self.assertEqual(sub[1], None ) generated = SBVGenerator(internal) self.assertEqual(generated.format_time(None), u'9:59:59.990') self.assertIn(u'''9:59:59.990,9:59:59.990\r\n0\r\n\r\n9:59:59.990,9:59:59.990\r\n1\r\n\r\n9:59:59.990,9:59:59.990\r\n2\r\n\r\n9:59:59.990,9:59:59.990\r\n3\r\n\r\n9:59:59.990,9:59:59.990\r\n4\r\n''', unicode(generated))
def test_unsynced_generator(self): subs = SubtitleSet('en') for x in xrange(0,5): subs.append_subtitle(None, None,"%s" % x) output = unicode(DFXPGenerator(subs)) parsed = DFXPParser(output, 'en') internal = parsed.to_internal() subs = [x for x in internal.subtitle_items()] self.assertEqual(len(internal), 5) for i,sub in enumerate(subs): self.assertIsNone(sub[0]) self.assertIsNone(sub[1]) self.assertEqual(sub[2], str(i)) for node in internal.get_subtitles(): self.assertIsNone(get_attr(node, 'begin')) self.assertIsNone(get_attr(node, 'end'))
def test_unsynced_generator(self): subs = SubtitleSet('en') for x in xrange(0, 5): subs.append_subtitle(None, None, "%s" % x) output = unicode(SBVGenerator(subs, language='en')) parsed = SBVParser(output, 'en') internal = parsed.to_internal() subs = [x for x in internal.subtitle_items()] self.assertEqual(len(internal), 5) for i, sub in enumerate(subs): self.assertEqual(sub[0], None) self.assertEqual(sub[1], None) generated = SBVGenerator(internal) self.assertEqual(generated.format_time(None), u'9:59:59.000') self.assertIn( u'''9:59:59.000,9:59:59.000\r\n0\r\n\r\n9:59:59.000,9:59:59.000\r\n1\r\n\r\n9:59:59.000,9:59:59.000\r\n2\r\n\r\n9:59:59.000,9:59:59.000\r\n3\r\n\r\n9:59:59.000,9:59:59.000\r\n4\r\n''', unicode(generated))
def test_unsynced_generator(self): subs = SubtitleSet('en') for x in xrange(0,5): subs.append_subtitle(None, None,"%s" % x) output = unicode(DFXPGenerator(subs)) parsed = DFXPParser(output, 'en') internal = parsed.to_internal() subs = [x for x in internal.subtitle_items()] self.assertEqual(len(internal), 5) for i,sub in enumerate(subs): self.assertIsNone(sub[0]) self.assertIsNone(sub[1]) self.assertEqual(sub[2], str(i)) for node in internal.get_subtitles(): self.assertIsNone(get_attr(node, 'begin')) self.assertIsNone(get_attr(node, 'end'))
def test_unsynced_generator(self): subs = SubtitleSet("en") for x in xrange(0, 5): subs.append_subtitle(None, None, "%s" % x) output = unicode(SBVGenerator(subs, language="en")) parsed = SBVParser(output, "en") internal = parsed.to_internal() subs = [x for x in internal.subtitle_items()] self.assertEqual(len(internal), 5) for i, sub in enumerate(subs): self.assertEqual(sub[0], None) self.assertEqual(sub[1], None) generated = SBVGenerator(internal) self.assertEqual(generated.format_time(None), u"9:59:59.000") self.assertIn( u"""9:59:59.000,9:59:59.000\r\n0\r\n\r\n9:59:59.000,9:59:59.000\r\n1\r\n\r\n9:59:59.000,9:59:59.000\r\n2\r\n\r\n9:59:59.000,9:59:59.000\r\n3\r\n\r\n9:59:59.000,9:59:59.000\r\n4\r\n""", unicode(generated), )
class TXTParser(BaseTextParser): file_type = 'txt' _linebreak_re = re.compile(r"\n\n|\r\n\r\n|\r\r") def __init__(self, input_string, language=None, linebreak_re=_linebreak_re, eager_parse=True): self.language = language self.input_string = linebreak_re.split(input_string) if eager_parse: self.to_internal() def __len__(self): return len(self.input_string) def __nonzero__(self): return bool(self.input_string) def _result_iter(self): for item in self.input_string: output = {} output['start'] = None output['end'] = None output['text'] = utils.strip_tags(item) yield output def to_internal(self): if not hasattr(self, 'sub_set'): self.sub_set = SubtitleSet(self.language) valid = False for item in self._result_iter(): item['text'] = item['text'].replace("\n", '<br/>') if not valid and ''.join(item['text'].split()): valid = True self.sub_set.append_subtitle(item['start'], item['end'], item['text'], escape=False) if not valid: raise SubtitleParserError("No subs") return self.sub_set
def test_unsynced_generator(self): subs = SubtitleSet('en') for x in xrange(0, 5): subs.append_subtitle(None, None, "%s" % x) output = unicode(SRTGenerator(subs)) parsed = SRTParser(output, 'en') internal = parsed.to_internal() subs = [x for x in internal.subtitle_items()] self.assertEqual(len(internal), 5) for i, sub in enumerate(subs): self.assertEqual(sub.start_time, None) self.assertEqual(sub.end_time, None) generated = SRTGenerator(internal) self.assertEqual(generated.format_time(None), u'99:59:59,999') self.assertIn( u'''1\r\n99:59:59,999 --> 99:59:59,999\r\n0\r\n\r\n2\r\n99:59:59,999 --> 99:59:59,999\r\n1\r\n\r\n3\r\n99:59:59,999 --> 99:59:59,999\r\n2\r\n\r\n4\r\n99:59:59,999 --> 99:59:59,999\r\n3\r\n\r\n5\r\n99:59:59,999 --> 99:59:59,999\r\n4\r\n''', unicode(generated))
class TestFormatConvertion(TestCase): def setUp(self): self.subs = SubtitleSet(language_code='en') for x in range(0,10): self.subs.append_subtitle( from_ms=(x * 1000), to_ms=(x * 1000) + 1000, content="%s - and *italics* and **bold** and >>." % x ) def _retrieve(self, format): res = self.client.post(reverse("widget:convert_subtitles"), { 'subtitles': self.subs.to_xml(), 'language_code': 'pt-br', 'format': format, }) self.assertEqual(res.status_code , 200) data = json.loads(res.content) self.assertNotIn('errors', data) parser = babelsubs.load_from(data['result'], format).to_internal() parsed = [x for x in parser.subtitle_items()] self.assertEqual(len(parsed), 10) return res.content, parsed def test_srt(self): raw, parsed = self._retrieve('srt') self.assertEqual(parsed[1], (1000, 2000, '1 - and *italics* and **bold** and >>.', {'new_paragraph': False})) def test_ssa(self): raw, parsed = self._retrieve('ssa') self.assertEqual(parsed[1], (1000, 2000, '1 - and *italics* and **bold** and >>.', {'new_paragraph': False})) def test_dfxp(self): raw, parsed = self._retrieve('dfxp') self.assertEqual(parsed[1], (1000, 2000, '1 - and *italics* and **bold** and >>.', {'new_paragraph': False})) def test_sbv(self): raw, parsed = self._retrieve('sbv') self.assertEqual(parsed[1], (1000, 2000, '1 - and *italics* and **bold** and >>.', {'new_paragraph': False}))
def test_change_set(self): request = RequestMockup(self.user_0) session = create_two_sub_session(request) return_value = rpc.start_editing(request, session.video.video_id, 'en') session_pk = return_value['session_pk'] subtitle_set = SubtitleSet('en') subtitle_set.append_subtitle(0, 1000, 'hey you 3') subtitle_set.append_subtitle(1000, 2000, 'hey you 1') subtitle_set.append_subtitle(2000, 3000, 'hey you 1') rpc.finished_subtitles(request, session_pk, subtitle_set.to_xml()) video = Video.objects.get(pk=session.video.pk) language = video.subtitle_language('en') self.assertEqual(2, language.subtitleversion_set.full().count()) version = language.get_tip() time_change, text_change = version.get_changes() self.assertTrue(text_change > 0 and text_change <= 1) self.assertEqual(time_change, 0)
def test_dfxp_merge(self): en_subs = SubtitleSet('en') es_subs = SubtitleSet('es') en_subs.append_subtitle(1000, 1500, 'content') es_subs.append_subtitle(1000, 1500, 'spanish content') result = self.loader.dfxp_merge([en_subs, es_subs]) utils.assert_long_text_equal(result, """\ <tt xmlns:tts="http://www.w3.org/ns/ttml#styling" xmlns:ttp="http://www.w3.org/ns/ttml#parameter" xmlns:ttm="http://www.w3.org/ns/ttml#metadata" xmlns="http://www.w3.org/ns/ttml" xml:lang=""> <head> <metadata> <ttm:title></ttm:title> <ttm:description></ttm:description> <ttm:copyright/> </metadata> <styling> <style xml:id="test-style" tts:color="white" tts:fontSize="18px"/> </styling> <layout> <region xml:id="bottom" style="test-style" tts:origin="0 80%" tts:extent="100% 20%"/> <region xml:id="top" style="test-style" tts:origin="0 0" tts:extent="100% 20%"/> </layout> </head> <body region="bottom"> <div xml:lang="en"> <div> <p begin="00:00:01.000" end="00:00:01.500">content</p> </div> </div> <div xml:lang="es"> <div> <p begin="00:00:01.000" end="00:00:01.500">spanish content</p> </div> </div> </body> </tt> """)
class BaseTextParser(object): def __init__(self, input_string, pattern, language=None, flags=[], eager_parse=True): ''' If `eager_parse` is True will parse the subtitles right way, converting to our internal storage format, else only if you call `to_internal` directly (or `to`). Any errors during parsing will be of SubtitleParserError. Note that a file with no valid subs will be an error. ''' self.input_string = input_string self.pattern = pattern self.language = language self._pattern = re.compile(pattern, *flags) if eager_parse: self.to_internal() def __iter__(self): return self._result_iter() def __len__(self): return len(self._pattern.findall(self.input_string)) def __nonzero__(self): return bool(self._pattern.search(self.input_string)) def _result_iter(self): """ Should iterate over items like this: { 'start': ..., 'end': ..., 'text': ... } start_time and end_time in seconds. If it is not defined use -1. """ for item in self._matches: yield self._get_data(item.groupdict()) def _get_data(self, match): return match def _get_matches(self): return self._pattern.finditer(self.input_string) def __unicode__(self): return self.to(self.file_type) @classmethod def parse(cls, input_string, language=None): return cls(input_string, language) def to(self, type): from babelsubs import to if isinstance(type, list): type = type[0] return to(self.to_internal(), type, language=self.language) def to_internal(self): if not hasattr(self, 'sub_set'): match = None try: self.sub_set = SubtitleSet(self.language) for match in self._matches: item = self._get_data(match.groupdict()) # fix me: support markup text = self.get_markup(item['text']) self.sub_set.append_subtitle(item['start'], item['end'], text, escape=False) if match is None: raise ValueError("No subs found") except Exception as e: raise SubtitleParserError(original_error=e) return self.sub_set def get_markup(self, text): return text _matches = property(_get_matches)
class BaseTextParser(object): # xml based formats must let encoding handling to the xml parser # as the encoding will be declared on the root el. All other # parsers should allow unicode NO_UNICODE = False def __init__(self, input_string, pattern, language=None, flags=[], eager_parse=True): ''' If `eager_parse` is True will parse the subtitles right way, converting to our internal storage format, else only if you call `to_internal` directly (or `to`). Any errors during parsing will be of SubtitleParserError. Note that a file with no valid subs will be an error. ''' self.input_string = input_string self.pattern = pattern self.language = language self._pattern = re.compile(pattern, *flags) if eager_parse: self.to_internal() def __iter__(self): return self._result_iter() def __len__(self): return len(self._pattern.findall(self.input_string)) def __nonzero__(self): return bool(self._pattern.search(self.input_string)) def _result_iter(self): """ Should iterate over items like this: { 'start': ..., 'end': ..., 'text': ... } start_time and end_time in seconds. If it is not defined use -1. """ for item in self._matches: yield self._get_data(item.groupdict()) def _get_data(self, match): return match def _get_matches(self): if not isinstance(self.input_string, unicode) and not self.NO_UNICODE: self.input_string = self.input_string.decode('utf-8') return self._pattern.finditer(self.input_string) def __unicode__(self): return self.to(self.file_type) @classmethod def parse(cls, input_string, language=None): return cls(input_string, language) def to(self, type): from babelsubs import to if isinstance(type, list): type = type[0] return to(self.to_internal(), type, language=self.language) def to_internal(self): if not hasattr(self, 'sub_set'): match = None try: self.sub_set = SubtitleSet(self.language) for match in self._matches: item = self._get_data(match.groupdict()) text = self.get_markup(item['text']) self.sub_set.append_subtitle( item['start'], item['end'], text, region=item.get('region'), escape=False) if match is None: raise ValueError("No subs found") except Exception as e: raise SubtitleParserError(original_error=e) return self.sub_set def get_markup(self, text): return text.replace("\n", '<br/>') _matches = property(_get_matches)
def test_space_before_end_span(self): source = """<span fontStyle="italic">one<br/>two </span>three<span fontStyle="italic">four.</span>""" subs = SubtitleSet('en') subs.append_subtitle(0, 1000, source, escape=False) items = subs.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS) self.assertEqual(items[0].text, '<i>one\ntwo </i>three<i>four.</i>')
def test_regions(self): subs = SubtitleSet('en') sub = subs.append_subtitle(0, 1000, "test", region="top") generator = WEBVTTGenerator(subs) self.assertEqual(generator.format_cue_header(subs.subtitle_items()[0]), u'00:00:00.000 --> 00:00:01.000 line:1')
def test_space_before_end_span(self): source = """<span fontStyle="italic">one<br/>two </span>three<span fontStyle="italic">four.</span>""" subs = SubtitleSet('en') subs.append_subtitle(0, 1000, source, escape=False) items = subs.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS) self.assertEqual(items[0].text, '<i>one\ntwo </i>three<i>four.</i>')
def make_subtitle_set(language_code, num_subs=4): sset = SubtitleSet(language_code) for x in xrange(0, num_subs): sset.append_subtitle(x * 1000, x * 1000 - 1, "Sub %s" % x) return sset
def test_span_around_newline(self): source = 'one<span fontStyle="italic"><br/></span>two' subs = SubtitleSet('en') subs.append_subtitle(0, 1000, source, escape=False) items = subs.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS) self.assertEqual(items[0].text, 'one<i>\n</i>two')
def test_regions(self): subs = SubtitleSet('en') sub = subs.append_subtitle(0, 1000, "test", region="top") generator = WEBVTTGenerator(subs) self.assertEqual(generator.format_cue_header(subs.subtitle_items()[0]), u'00:00:00.000 --> 00:00:01.000 line:1')
def test_span_around_newline(self): source = 'one<span fontStyle="italic"><br/></span>two' subs = SubtitleSet('en') subs.append_subtitle(0, 1000, source, escape=False) items = subs.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS) self.assertEqual(items[0].text, 'one<i>\n</i>two')
class DFXPMergeTest(TestCase): def setUp(self): self.en_subs = SubtitleSet('en') self.es_subs = SubtitleSet('es') self.fr_subs = SubtitleSet('fr') self.en_subs.append_subtitle(1000, 1500, 'content') self.es_subs.append_subtitle(1000, 1500, 'spanish content') self.es_subs.append_subtitle(2000, 2500, 'spanish content 2', new_paragraph=True) self.fr_subs.append_subtitle(1000, 1500, 'french content') def test_dfxp_merge(self): result = DFXPGenerator.merge_subtitles( [self.en_subs, self.es_subs, self.fr_subs]) utils.assert_long_text_equal(result, """\ <tt xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling" xml:lang=""> <head> <metadata xmlns:ttm="http://www.w3.org/ns/ttml#metadata"> <ttm:title/> <ttm:description/> <ttm:copyright/> </metadata> <styling xmlns:tts="http://www.w3.org/ns/ttml#styling"> <style xml:id="amara-style" tts:color="white" tts:fontFamily="proportionalSansSerif" tts:fontSize="18px" tts:textAlign="center"/> </styling> <layout xmlns:tts="http://www.w3.org/ns/ttml#styling"> <region xml:id="amara-subtitle-area" style="amara-style" tts:extent="560px 62px" tts:padding="5px 3px" tts:backgroundColor="black" tts:displayAlign="after"/> </layout> </head> <body region="amara-subtitle-area"> <div xml:lang="en"> <div> <p begin="00:00:01.000" end="00:00:01.500">content</p> </div> </div> <div xml:lang="es"> <div> <p begin="00:00:01.000" end="00:00:01.500">spanish content</p> </div> <div> <p begin="00:00:02.000" end="00:00:02.500">spanish content 2</p> </div> </div> <div xml:lang="fr"> <div> <p begin="00:00:01.000" end="00:00:01.500">french content</p> </div> </div> </body> </tt> """) def test_merge_with_header(self): initial_ttml = etree.fromstring("""\ <tt xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling"> <head> <styling> <style xml:id="style" tts:color="foo" tts:fontSize="bar" /> </styling> <layout> <region xml:id="region" style="style" tts:extent="foo" tts:origin="bar" /> </layout> </head> <body /> </tt>""") result = DFXPGenerator.merge_subtitles( [self.en_subs, self.es_subs, self.fr_subs], initial_ttml=initial_ttml) utils.assert_long_text_equal(result, """\ <tt xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling" xml:lang=""> <head> <styling> <style xml:id="style" tts:color="foo" tts:fontSize="bar"/> </styling> <layout> <region xml:id="region" style="style" tts:extent="foo" tts:origin="bar"/> </layout> </head> <body> <div xml:lang="en"> <div> <p begin="00:00:01.000" end="00:00:01.500">content</p> </div> </div> <div xml:lang="es"> <div> <p begin="00:00:01.000" end="00:00:01.500">spanish content</p> </div> <div> <p begin="00:00:02.000" end="00:00:02.500">spanish content 2</p> </div> </div> <div xml:lang="fr"> <div> <p begin="00:00:01.000" end="00:00:01.500">french content</p> </div> </div> </body> </tt> """)
def _subs_to_sset(self, subs): sset = SubtitleSet(self.language.language_code) for s in subs: sset.append_subtitle(*s) return sset