def test_update_end_time(self): dfxp = utils.get_subs("pre-drm.dfxp").to_internal() dfxp_updated = utils.get_subs("pre-drm.dfxp").to_internal() for i in xrange(0, len(dfxp)): dfxp_updated.update(i, to_ms=1000*i) for i,sub in enumerate(dfxp_updated.subtitle_items()): self.assertEqual(i * 1000, sub.end_time)
def test_update_end_time(self): dfxp = utils.get_subs("pre-dmr.dfxp").to_internal() dfxp_updated = utils.get_subs("pre-dmr.dfxp").to_internal() for i in xrange(0, len(dfxp)): dfxp_updated.update(i, to_ms=1000 * i) for i, sub in enumerate(dfxp_updated.subtitle_items()): self.assertEqual(i * 1000, sub.end_time)
def test_ttfa(self): subs = utils.get_subs("pre-dmr.dfxp") self.assertEquals(len(subs), 419) # make sure the right namespace is in subs.subtitle_set._ttml.tag = '{http://www.w3.org/ns/ttml}tt' self.assertEqual(subs.subtitle_set._ttml.nsmap[None] , TTML_NAMESPACE_URI) subs = utils.get_subs("pre-dmr2.dfxp") self.assertEquals(len(subs), 19) # make sure the right namespace is in subs.subtitle_set._ttml.tag = '{http://www.w3.org/ns/ttml}tt'
def test_regions(self): subs = utils.get_subs("regions.vtt") items = subs.to_internal().subtitle_items() for sub in items[:4]: self.assertEquals(sub.region, "top") for sub in items[4:]: self.assertEquals(sub.region, None)
def test_ampersand_escaping(self): subs = utils.get_subs("basic.vtt") parsed = subs.to_internal() sub_data = [x for x in parsed.subtitle_items(WEBVTTGenerator.MAPPINGS)] self.assertEquals( sub_data[16].text, "such as MP4, theora, webM and <i>&</i> HTML 5.")
def test_internal_format(self): subs = utils.get_subs("simple.srt") parsed = subs.to_internal() sub_data = [x for x in parsed.subtitle_items(SRTGenerator.MAPPINGS)] self.assertEquals(sub_data[0].start_time, 4) self.assertEquals(sub_data[0].end_time, 2093) self.assertEquals(sub_data[0].text, "We started <b>Universal Subtitles</b> because we believe")
def test_internal_format(self): subs = utils.get_subs("simple.dfxp") parsed = subs.to_internal() sub_data = [x for x in parsed.subtitle_items()] self.assertEquals(sub_data[0][0], 1200) self.assertEquals(sub_data[0][1], 4467) self.assertEquals(sub_data[3][2], 'at least 7,000 years ago.')
def test_internal_format(self): subs = utils.get_subs("simple.sbv") parsed = subs.to_internal() sub_data = [x for x in parsed.subtitle_items()] self.assertEquals(sub_data[0][0], 48) self.assertEquals(sub_data[0][1], 2932) self.assertEquals(sub_data[0][2], 'We started Universal Subtitles because we believe')
def test_basic(self): subs = utils.get_subs("simple.srt") self.assertEquals(len(subs), 19) json_subs = JSONGenerator.generate(subs.to_internal()) json_subs = json.loads(json_subs) self.assertEquals(len(json_subs), 19)
def test_self_generate(self): parsed_subs1 = utils.get_subs("simple.dfxp") parsed_subs2 = DFXPParser( DFXPGenerator(parsed_subs1.subtitle_set, 'en').__unicode__()) for x1, x2 in zip([x for x in parsed_subs1.to_internal()], [x for x in parsed_subs2.to_internal()]): self.assertEquals(x1, x2)
def test_internal_format(self): subs = utils.get_subs("simple.srt") parsed = subs.to_internal() sub_data = [x for x in parsed.subtitle_items(SRTGenerator.MAPPINGS)] self.assertEquals(sub_data[0].start_time, 4) self.assertEquals(sub_data[0].end_time, 2093) self.assertEquals(sub_data[0].text, "We started Universal Subtitles because we believe")
def test_with_information_headers(self): # we ignore those headers for now, but at least we shouldn't fail on them subs = utils.get_subs("with-information-header.sbv") parsed = subs.to_internal() sub_data = [x for x in parsed.subtitle_items()] self.assertEquals(sub_data[0][0], 48) self.assertEquals(sub_data[0][1], 2932) self.assertEquals(sub_data[0][2], 'We started Universal Subtitles because we believe')
def test_generated_formatting(self): dfxp = utils.get_subs("with-formatting.dfxp").to_internal() subs = dfxp.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS) self.assertEqual(subs[2].text,'It has <b>bold</b> formatting' ) self.assertEqual(subs[3].text,'It has <i>italics</i> too' ) self.assertEqual(subs[4].text,'And why not <u>underline</u>' ) self.assertEqual(subs[5].text,'It has a html tag <a> should be in brackets' ) self.assertEqual(subs[6].text,'It has speaker changes >>>' )
def test_whitespace(self): subs = utils.get_subs("pre-dmr.dfxp") sub = subs.subtitle_set.subtitle_items( mappings=SRTGenerator.MAPPINGS)[0] self.assertEqual( sub.text, '''Last time, we began talking about\nresonance structures. And I'd like''' )
def test_nested_with_markup(self): dfxp = utils.get_subs("simple.dfxp").to_internal() # FIXME: actually this is wrong, as it's nested and we should have # the same text with underline and italics. At least we're not # loosing any text, and that's good enough for now, should be # fixed though. self.assertEqual( dfxp.get_content_with_markup(dfxp.get_subtitles()[38], SRTGenerator.MAPPINGS), 'a <u>word on </u><i>nested spans</i>')
def test_generated_formatting(self): dfxp = utils.get_subs("with-formatting.dfxp").to_internal() subs = dfxp.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS) self.assertEqual(subs[2].text, 'It has <b>bold</b> formatting') self.assertEqual(subs[3].text, 'It has <i>italics</i> too') self.assertEqual(subs[4].text, 'And why not <u>underline</u>') self.assertEqual(subs[5].text, 'It has a html tag <a> should be in brackets') self.assertEqual(subs[6].text, 'It has speaker changes >>>')
def test_round_trip(self): subs1 = utils.get_subs("simple.sbv") parsed1 = subs1.to_internal() output = unicode(SBVGenerator(parsed1)) subs2 = SBVParser(output, 'en') parsed2 = subs2.to_internal() self.assertEquals(len(subs1), len(subs2)) for x1, x2 in zip([x for x in parsed1.subtitle_items()], [x for x in parsed2.subtitle_items()]): self.assertEquals(x1, x2)
def test_nested_with_markup(self): dfxp = utils.get_subs("simple.dfxp").to_internal() self.assertEqual( dfxp.get_content_with_markup( dfxp.get_subtitles()[38], dict(linebreaks="\r\n", bold="<b>%s</b>", italics="<i>%s</i>", underline="<u>%s</u>")), 'a <u>word on <i>nested spans</i></u>')
def test_mixed_newlines(self): # some folks will have valid srts, then edit them on an editor # that will save line breaks on the current platform separator # e.g. \n on unix , \r... # make sure we normalize this stuff subs = utils.get_subs("Untimed_text.srt") parsed = subs.to_internal() self.assertEqual(len(subs), 43) # second sub should have a line break self.assertIn('<p begin="99:59:59.000" end="99:59:59.000">I\'m gutted. <br/>Absolutely gutted.</p>', parsed.to_xml())
def test_round_trip(self): subs1 = utils.get_subs("simple.srt") parsed1 = subs1.to_internal() srt_ouput = unicode(SRTGenerator(parsed1)) subs2 = SRTParser(srt_ouput, 'en') parsed2 = subs2.to_internal() self.assertEquals(len(subs1), len(subs2)) for x1, x2 in zip([x for x in parsed1.subtitle_items(SRTGenerator.MAPPINGS)], \ [x for x in parsed2.subtitle_items(SRTGenerator.MAPPINGS)]): self.assertEquals(x1, x2)
def test_line_breaks(self): subs = utils.get_subs("simple.sbv") parsed = subs.to_internal() lines = [ text for _, _, text, _ in parsed.subtitle_items(SBVGenerator.MAPPINGS) ] self.assertEquals( lines[13], 'We support videos on [br]YouTube, Blip.TV, Ustream, and many more.' )
def test_mixed_newlines(self): # some folks will have valid srts, then edit them on an editor # that will save line breaks on the current platform separator # e.g. \n on unix , \r... # make sure we normalize this stuff subs = utils.get_subs("Untimed_text.srt") parsed = subs.to_internal() self.assertEqual(len(subs), 43) # second sub should have a line break self.assertIn( '<p begin="99:59:59.000" end="99:59:59.000">I\'m gutted. <br/>Absolutely gutted.</p>', parsed.to_xml())
def test_equality_ignores_whitespace(self): subs_1 = utils.get_subs('pre-dmr.dfxp').subtitle_set subs_2 = utils.get_subs('pre-dmr-whitespace.dfxp').subtitle_set self.assertEqual(subs_1, subs_2)
def test_complex_formatting(self): # this is the srt used in our selenium tests subs = utils.get_subs("Timed_en.srt") self.assertEqual(len(subs), 72)
def test_timing_parser(self): parsed_subs = utils.get_subs("simple.ssa") subs = [a for a in parsed_subs.to_internal().subtitle_items()] self.assertEqual(subs[0][0], 40) self.assertEqual(subs[0][1], 2930)
def setUp(self): self.dfxp = utils.get_subs("multiline-italics.dfxp").to_internal()
def test_unsynced_as_generated_from_frontend(self): dfxp = utils.get_subs("dfxp-as-front-end-no-sync.dfxp").to_internal() for sub in dfxp.subtitle_items(): self.assertEqual(None, sub.start_time) self.assertEqual(None, sub.end_time)
def test_basic(self): subs = utils.get_subs("simple.srt") self.assertEquals(len(subs), 19)
def setUp(self): subs = utils.get_subs("simple.srt") self.parsed = subs.to_internal() self.sub_data = [x for x in self.parsed.subtitle_items(HTMLGenerator.MAPPINGS)]
def test_xml_literals(self): dfxp = utils.get_subs("with-xml-literals.dfxp").to_internal() subs = dfxp.subtitle_items() self.assertEqual(subs[2].text, 'It has <b>bold</b> formatting') self.assertEqual(subs[3].text, 'It has <i>italics</i> too') self.assertEqual(subs[4].text, 'And why not <u>underline</u>')
def test_whitespace(self): subs = utils.get_subs("pre-dmr.dfxp") sub = subs.subtitle_set.subtitle_items(mappings=SRTGenerator.MAPPINGS)[0] self.assertEqual(sub.text, '''Last time, we began talking about\nresonance structures. And I'd like''')
def test_self_generate(self): parsed_subs1 = utils.get_subs("simple.ssa") generated = SSAParser(unicode(parsed_subs1), 'en') for x1, x2 in zip([x for x in parsed_subs1.to_internal()], [x for x in generated.to_internal()]): self.assertEquals(x1, x2)
def test_regions(self): subs = utils.get_subs("regions.dfxp") items = subs.to_internal().subtitle_items() self.assertEquals(items[0].region, "top") for item in items[1:]: self.assertEquals(item.region, None)
def test_unsynced(self): sset = utils.get_subs('i-2376.dfxp').subtitle_set self.assertFalse(sset.fully_synced)
def test_curly_brackets(self): subs = utils.get_subs("curly_brackets.srt") parsed = subs.to_internal() sub_data = list(parsed.subtitle_items(SRTGenerator.MAPPINGS)) self.assertEquals(len(sub_data), 1) self.assertEquals(sub_data[0].text, "{ a } {{ b }} c")
def setUp(self): self.dfxp = utils.get_subs("with-formatting.dfxp").to_internal() self.subs = self.dfxp.subtitle_items(mappings=SRTGenerator.MAPPINGS)
def test_basic(self): subs = utils.get_subs("simple.dfxp") self.assertEquals(len(subs), 76)
def test_nested_tags(self): dfxp = utils.get_subs("simple.dfxp").to_internal() self.assertEqual( storage.get_contents(dfxp.get_subtitles()[37]), 'nested spans') self.assertEqual( storage.get_contents(dfxp.get_subtitles()[38]), 'a word on nested spans')
def test_pre_drm_dfxp(self): # tests a pretty feature rich dfpx file dfxp = utils.get_subs("pre-drm.dfxp").to_internal() self.assertEqual(len(dfxp), 419) dfxp = utils.get_subs("pre-drm2.dfxp").to_internal() self.assertEqual(len(dfxp), 19)
def test_self_generate(self): parsed_subs1 = utils.get_subs("simple.dfxp") parsed_subs2 = DFXPParser(DFXPGenerator(parsed_subs1.subtitle_set, 'en').__unicode__()) for x1, x2 in zip([x for x in parsed_subs1.to_internal()], [x for x in parsed_subs2.to_internal()]): self.assertEquals(x1, x2)
def test_ampersand_escaping(self): subs = utils.get_subs("simple.srt") parsed = subs.to_internal() sub_data = [x for x in parsed.subtitle_items(SRTGenerator.MAPPINGS)] self.assertEquals(sub_data[16].text, "such as MP4, theora, webM and & HTML 5.")
def test_nested_with_markup(self): dfxp = utils.get_subs("simple.dfxp").to_internal() self.assertEqual( dfxp.get_content_with_markup(dfxp.get_subtitles()[38], SRTGenerator.MAPPINGS), 'a <u>word on <i>nested spans</i></u>')
def test_comments(self): # test that the subtitle_items() method doesn't throw an exception # when there are comments in the DFXP. See gh-841 for details. dfxp = utils.get_subs("comments.dfxp").to_internal() list(dfxp.subtitle_items()) list(dfxp.subtitle_items(mappings=HTMLGenerator.MAPPINGS))
def test_update_language_code(self): subs = utils.get_subs("simple.dfxp").to_internal() subs.set_language('fr') lang_attr_name = '{http://www.w3.org/XML/1998/namespace}lang' self.assertEquals(subs._ttml.get(lang_attr_name), 'fr')
def test_unsyced_parsing(self): subs = utils.get_subs("Untimed_text.sbv") self.assertEquals(len(subs), 43)
def test_ampersand_escaping(self): subs = utils.get_subs("simple.srt") parsed = subs.to_internal() sub_data = [x for x in parsed.subtitle_items(SRTGenerator.MAPPINGS)] self.assertEquals(sub_data[16].text, "such as MP4, theora, webM and <i>&</i> HTML 5.")