def test_freeze_rollup_captions_contents(self): # There were no tests for ROLL-UP captions, but the library processed # Roll-Up captions. Make sure nothing changes during the refactoring scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2) captions = scc1.get_captions('en-US') actual_texts = [cap_.nodes[0].content for cap_ in captions] expected_texts = [ '>>> HI', "I'M KEVIN CUNNING AND AT", # Notice the missing 'N' at the end. This is because # the input is not OK (should only use 4 byte "words" # (filling in with '80' where only 2 bytes are # meaningful) "INVESTOR'S BANK WE BELIEVE I", 'HELPING THE LOCAL NEIGHBORHOOD', 'AND IMPROVING THE LIVES OF ALL', 'WE SERVE', # special chars. Last one should be printer 2 times # XXX this is a bug. '®°½', # special/ extended chars delete last 0-4 chars. # XXX - this is a bug. 'ABû', 'ÁÉÓ¡', "WHERE YOU'RE STANDING NOW,", "LOOKING OUT THERE, THAT'S AL", 'THE CROWD.', '>> IT WAS GOOD TO BE IN TH', "And restore Iowa's land, water", 'And wildlife.', '>> Bike Iowa, your source for' ] self.assertEqual(expected_texts, actual_texts)
def test_last_caption_zero_end_time_is_corrected(self): caption_set = SCCReader().read( SAMPLE_SCC_NO_EXPLICIT_END_TO_LAST_CAPTION) # noqa last_caption = caption_set.get_captions('en-US')[-1] self.assertEqual(last_caption.end, last_caption.start + 4 * 1000 * 1000)
def test_default_positioning_when_no_positioning_is_specified(self): caption_set = SCCReader().read(SAMPLE_NO_POSITIONING_AT_ALL_SCC) actual_caption_layouts = [ caption.layout_info.serialized() for caption in caption_set.get_captions('en-US') ] expected_caption_layouts = [ (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP)), (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP)) ] actual_node_layout_infos = [{ idx: [node.layout_info.serialized() for node in caption.nodes] } for idx, caption in enumerate(caption_set.get_captions('en-US'))] expected_node_layout_infos = [{ 0: [(((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))] }, { 1: [(((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))] }] self.assertEqual(expected_node_layout_infos, actual_node_layout_infos) self.assertEqual(expected_caption_layouts, actual_caption_layouts)
def test_italics_are_properly_read(self): def switches_italics(node): """Determine if the current node switches italics on or off, or raise ValueError is it's not a style node Style nodes should be deprecated in favor of another model, so this function is expected to go away. :type node: captionconvert.CaptionNode :rtype: bool """ if not node.type_ == node.STYLE: raise ValueError("This should be a style node.") return node.start caption_set = SCCReader().read(SAMPLE_SCC_WITH_ITALICS) nodes = caption_set.get_captions('en-US')[0].nodes # We assert that the text is specified in italics. # If Style nodes are replaced, the way these 3 assertions are made # will most likely change self.assertEqual(switches_italics(nodes[0]), True) self.assertEqual(switches_italics(nodes[2]), False) self.assertEqual(nodes[1].content, 'abababab')
def test_proper_timestamps(self): captions = SCCReader().read(SAMPLE_SCC_POP_ON) paragraph = captions.get_captions("en-US")[2] delta_start = abs(paragraph.start - 17000000) delta_end = abs(paragraph.end - 18752000) self.assertTrue(delta_start < TOLERANCE_MICROSECONDS) self.assertTrue(delta_end < TOLERANCE_MICROSECONDS)
def test_timing_is_properly_set_on_split_captions(self): caption_set = SCCReader().read( SAMPLE_SCC_PRODUCES_CAPTIONS_WITH_START_AND_END_TIME_THE_SAME) expected_timings = [('00:01:35.666', '00:01:40.866'), ('00:01:35.666', '00:01:40.866'), ('00:01:35.666', '00:01:40.866')] actual_timings = [(c_.format_start(), c_.format_end()) for c_ in caption_set.get_captions('en-US')] self.assertEqual(expected_timings, actual_timings)
def test_correct_last_bad_timing(self): # This fix was implemented with a hack. The commands for the Pop-on # captions will have to be reviewed, but until then this is good enough caption_set = SCCReader().read(SAMPLE_SCC_PRODUCES_BAD_LAST_END_TIME) expected_timings = [(1408266666.6666667, 1469700000.0), (3208266666.666667, 3269700000.0)] actual_timings = [(c_.start, c_.end) for c_ in caption_set.get_captions('en-US')] self.assertEqual(expected_timings, actual_timings)
def test_webvtt_newlines_are_properly_rendered(self): caption_set = SCCReader().read( SCC_THAT_GENERATES_WEBVTT_WITH_PROPER_NEWLINES) webvtt = WebVTTWriter().write(caption_set) self.assertEqual( webvtt, SAMPLE_WEBVTT_FROM_SCC_PROPERLY_WRITES_NEWLINES_OUTPUT)
def test_freeze_colon_spec_time(self): # Coverage doesn't mean we test that functionality, so assert that # all the timing specs that previously had coverage, will actually # remain unchanged. scc1 = SCCReader().read(SAMPLE_SCC_POP_ON) expected_timings = [(9776433.333333332, 12312300.0), (14781433.33333333, 16883533.333333332), (16950266.666666664, 18618600.000000004), (18685333.333333332, 20754066.666666664), (20820800.0, 26626600.0), (26693333.333333332, 32098733.333333332), (32165466.66666666, 36202833.33333332)] actual_timings = [(c_.start, c_.end) for c_ in scc1.get_captions('en-US')] self.assertEqual(expected_timings, actual_timings)
def _test_srt_to_scc_to_srt_conversion(self, srt_captions): captions_1 = SRTReader().read(srt_captions) scc_results = SCCWriter().write(captions_1) scc_captions = SCCReader().read(scc_results) srt_results = SRTWriter().write(scc_captions) captions_2 = SRTReader().read(srt_results) self.assertCaptionSetAlmostEquals(captions_1, captions_2, TOLERANCE_MICROSECONDS)
def test_freeze_semicolon_spec_time(self): scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2) captions = scc1.get_captions('en-US') expected_timings = [(766666.6666666667, 2800000.0), (2800000.0, 4600000.0), (4600000.0, 6166666.666666667), (6166666.666666667, 9733333.333333332), (9733333.333333332, 11266666.666666668), (11266666.666666668, 12266666.666666668), (12266666.666666668, 13266666.666666668), (13266666.666666668, 14266666.666666668), (14266666.666666668, 17066666.666666668), (17066666.666666668, 18666666.666666668), (18666666.666666668, 20233333.333333336), (20233333.333333336, 21833333.333333332), (21833333.333333332, 34933333.33333333), (34933333.33333333, 36433333.33333333), (36433333.33333333, 44300000.0), (44300000.0, 44866666.666666664)] actual_timings = [(c_.start, c_.end) for c_ in captions] self.assertEqual(expected_timings, actual_timings)
def test_scc_positioning_is_read(self): captions = SCCReader().read(text_type(SAMPLE_SCC_MULTIPLE_POSITIONING)) # SCC generates only origin, and we always expect it. expected_positioning = [ ((0.0, UnitEnum.PERCENT), (80.0, UnitEnum.PERCENT)), ((37.5, UnitEnum.PERCENT), (0.0, UnitEnum.PERCENT)), ((75.0, UnitEnum.PERCENT), (20.0, UnitEnum.PERCENT)), ((12.5, UnitEnum.PERCENT), (46.666666666666664, UnitEnum.PERCENT)), ((12.5, UnitEnum.PERCENT), (93.33333333333333, UnitEnum.PERCENT)), ((37.5, UnitEnum.PERCENT), (53.333333333333336, UnitEnum.PERCENT)), ((75.0, UnitEnum.PERCENT), (13.333333333333334, UnitEnum.PERCENT)), ((12.5, UnitEnum.PERCENT), (33.333333333333336, UnitEnum.PERCENT)), ((12.5, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)), ((75.0, UnitEnum.PERCENT), (6.666666666666667, UnitEnum.PERCENT)), ((37.5, UnitEnum.PERCENT), (40.0, UnitEnum.PERCENT)), ((12.5, UnitEnum.PERCENT), (73.33333333333333, UnitEnum.PERCENT)) ] actual_positioning = [ caption_.layout_info.origin.serialized() for caption_ in captions.get_captions('en-US') ] self.assertEqual(expected_positioning, actual_positioning)
def test_scc_reader_only_supports_unicode_input(self): with self.assertRaises(InvalidInputError): SCCReader().read(b'')
def test_dfxp_is_valid_xml_when_scc_source_has_weird_italic_commands(self): caption_set = SCCReader().read( SAMPLE_SCC_CREATED_DFXP_WITH_WRONGLY_CLOSING_SPANS) dfxp = DFXPWriter().write(caption_set) self.assertEqual(dfxp, SAMPLE_DFXP_WITH_PROPERLY_CLOSING_SPANS_OUTPUT)
def test_eoc_on_newline_rejection(self): with self.assertRaises(ValueError): caption_set = SCCReader().read(SAMPLE_SCC_FLASHING_CAPTIONS)
def test_eoc_first_command(self): caption_set = SCCReader().read(SAMPLE_SCC_EOC_FIRST_COMMAND) # just one caption, first EOC disappears num_captions = len(caption_set.get_captions('en-US')) self.assertEqual(num_captions, 1)
def test_empty_file(self): self.assertRaises(CaptionReadNoCaptions, SCCReader().read, SAMPLE_SCC_EMPTY)
def test_detection(self): self.assertTrue(SCCReader().detect(SAMPLE_SCC_POP_ON))
def test_caption_length(self): captions = SCCReader().read(SAMPLE_SCC_POP_ON) self.assertEqual(7, len(captions.get_captions("en-US")))
def test_scc_to_dfxp(self): caption_set = SCCReader().read(SAMPLE_SCC_MULTIPLE_POSITIONING) dfxp = DFXPWriter(relativize=False, fit_to_screen=False).write(caption_set) self.assertEqual(SAMPLE_DFXP_FROM_SCC_OUTPUT, dfxp)