Exemplo n.º 1
0
    def test_scc_to_dfxp(self, sample_dfxp_from_scc_output,
                         sample_scc_multiple_positioning):
        caption_set = SCCReader().read(sample_scc_multiple_positioning)
        dfxp = DFXPWriter(relativize=False,
                          fit_to_screen=False).write(caption_set)

        assert sample_dfxp_from_scc_output == dfxp
Exemplo n.º 2
0
    def test_italics_are_properly_read(self):
        def switches_italics(node):
            """Determine if the current node switches italics on or off, or
            raise ValueError is it's not a style node

            Style nodes should be deprecated in favor of another model, so this
            function is expected to go away.

            :type node: pycaption.CaptionNode
            :rtype: bool
            """
            if not node.type_ == node.STYLE:
                raise ValueError(u"This should be a style node.")

            return node.start

        caption_set = SCCReader().read(SAMPLE_SCC_WITH_ITALICS)
        nodes = caption_set.get_captions(u'en-US')[0].nodes

        # We assert that the text is specified in italics.
        # If Style nodes are replaced, the way these 3 assertions are made
        # will most likely change
        self.assertEqual(switches_italics(nodes[0]), True)
        self.assertEqual(switches_italics(nodes[2]), False)
        self.assertEqual(nodes[1].content, u'abababab')
Exemplo n.º 3
0
    def test_default_positioning_when_no_positioning_is_specified(self):
        caption_set = SCCReader().read(SAMPLE_NO_POSITIONING_AT_ALL_SCC)

        actual_caption_layouts = [
            caption.layout_info.serialized()
            for caption in caption_set.get_captions(u'en-US')
        ]

        expected_caption_layouts = [(((0.0, u'%'), (86.66666666666667, u'%')),
                                     None, None, (u'left', u'top')),
                                    (((0.0, u'%'), (86.66666666666667, u'%')),
                                     None, None, (u'left', u'top'))]

        actual_node_layout_infos = [{
            idx: [node.layout_info.serialized() for node in caption.nodes]
        } for idx, caption in enumerate(caption_set.get_captions('en-US'))]

        expected_node_layout_infos = [{
            0: [(((0.0, u'%'), (86.66666666666667, u'%')), None, None,
                 (u'left', u'top'))]
        }, {
            1: [(((0.0, u'%'), (86.66666666666667, u'%')), None, None,
                 (u'left', u'top'))]
        }]

        self.assertEqual(expected_node_layout_infos, actual_node_layout_infos)
        self.assertEqual(expected_caption_layouts, actual_caption_layouts)
Exemplo n.º 4
0
    def test_last_caption_zero_end_time_is_corrected(self):
        caption_set = SCCReader().read(
            SAMPLE_SCC_NO_EXPLICIT_END_TO_LAST_CAPTION)  # noqa

        last_caption = caption_set.get_captions('en-US')[-1]
        self.assertEqual(last_caption.end,
                         last_caption.start + 4 * 1000 * 1000)
Exemplo n.º 5
0
 def test_freeze_rollup_captions_contents(self):
     # There were no tests for ROLL-UP captions, but the library processed
     # Roll-Up captions. Make sure nothing changes during the refactoring
     scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2)
     captions = scc1.get_captions(u'en-US')
     actual_texts = [cap_.nodes[0].content for cap_ in captions]
     expected_texts = [
         u'>>> HI',
         u"I'M KEVIN CUNNING AND AT",
         # Notice the missing 'N' at the end. This is because
         # the input is not OK (should only use 4 byte "words"
         # (filling in with '80' where only 2 bytes are
         # meaningful)
         u"INVESTOR'S BANK WE BELIEVE I",
         u'HELPING THE LOCAL NEIGHBORHOOD',
         u'AND IMPROVING THE LIVES OF ALL',
         u'WE SERVE',
         # special chars. Last one should be printer 2 times
         # XXX this is a bug.
         u'®°½',
         # special/ extended chars delete last 0-4 chars.
         # XXX - this is a bug.
         u'ABû',
         u'ÁÉÓ¡',
         u"WHERE YOU'RE STANDING NOW,",
         u"LOOKING OUT THERE, THAT'S AL",
         u'THE CROWD.',
         u'>> IT WAS GOOD TO BE IN TH',
         u"And restore Iowa's land, water",
         u'And wildlife.',
         u'>> Bike Iowa, your source for'
     ]
     self.assertEqual(expected_texts, actual_texts)
Exemplo n.º 6
0
    def test_freeze_rollup_captions_contents(self, sample_scc_roll_up_ru2):
        # There were no tests for ROLL-UP captions, but the library processed
        # Roll-Up captions. Make sure nothing changes during the refactoring
        scc1 = SCCReader().read(sample_scc_roll_up_ru2)
        captions = scc1.get_captions('en-US')
        actual_texts = [cap_.nodes[0].content for cap_ in captions]
        expected_texts = [
            '>>> HI.',
            "I'M KEVIN CUNNING AND AT",
            "INVESTOR'S BANK WE BELIEVE IN",
            'HELPING THE LOCAL NEIGHBORHOODS',
            'AND IMPROVING THE LIVES OF ALL',
            'WE SERVE.',
            '®°½½',
            'ABû',
            'ÁÁÉÓ¡',
            "WHERE YOU'RE STANDING NOW,",
            "LOOKING OUT THERE, THAT'S AL",
            'THE CROWD.',
            '>> IT WAS GOOD TO BE IN TH',
            "And restore Iowa's land, water",
            'And wildlife.',
            '>> Bike Iowa, your source for',
        ]

        assert expected_texts == actual_texts
Exemplo n.º 7
0
    def test_webvtt_newlines_are_properly_rendered(self):
        caption_set = SCCReader().read(
            SCC_THAT_GENERATES_WEBVTT_WITH_PROPER_NEWLINES)
        webvtt = WebVTTWriter().write(caption_set)

        self.assertEqual(
            webvtt, SAMPLE_WEBVTT_FROM_SCC_PROPERLY_WRITES_NEWLINES_OUTPUT)
Exemplo n.º 8
0
    def test_scc_positioning_is_read(self):
        captions = SCCReader().read(
            six.text_type(SAMPLE_SCC_MULTIPLE_POSITIONING))

        # SCC generates only origin, and we always expect it.
        expected_positioning = [
            ((0.0, UnitEnum.PERCENT), (80.0, UnitEnum.PERCENT)),
            ((37.5, UnitEnum.PERCENT), (0.0, UnitEnum.PERCENT)),
            ((75.0, UnitEnum.PERCENT), (20.0, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (46.666666666666664, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (93.33333333333333, UnitEnum.PERCENT)),
            ((37.5, UnitEnum.PERCENT), (53.333333333333336, UnitEnum.PERCENT)),
            ((75.0, UnitEnum.PERCENT), (13.333333333333334, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (33.333333333333336, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
            ((75.0, UnitEnum.PERCENT), (6.666666666666667, UnitEnum.PERCENT)),
            ((37.5, UnitEnum.PERCENT), (40.0, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (73.33333333333333, UnitEnum.PERCENT))
        ]
        actual_positioning = [
            caption_.layout_info.origin.serialized() for caption_ in
            captions.get_captions('en-US')
        ]

        self.assertEqual(expected_positioning, actual_positioning)
Exemplo n.º 9
0
    def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2):
        scc1 = SCCReader().read(sample_scc_roll_up_ru2)
        captions = scc1.get_captions('en-US')
        expected_timings = [
            (766666.6666666667, 2800000.0),
            (2800000.0, 4600000.0),
            (4600000.0, 6166666.666666667),
            (6166666.666666667, 9733333.333333332),
            (9733333.333333332, 11266666.666666668),
            (11266666.666666668, 12266666.666666668),
            (12266666.666666668, 13266666.666666668),
            (13266666.666666668, 14266666.666666668),
            (14266666.666666668, 17066666.666666668),
            (17066666.666666668, 18666666.666666668),
            (18666666.666666668, 20233333.333333336),
            (20233333.333333336, 21833333.333333332),
            (21833333.333333332, 34933333.33333333),
            (34933333.33333333, 36433333.33333333),
            (36433333.33333333, 44300000.0),
            (44300000.0, 44866666.666666664),
        ]

        actual_timings = [(c_.start, c_.end) for c_ in captions]

        assert expected_timings == actual_timings
Exemplo n.º 10
0
    def test_multiple_formats(self, sample_scc_multiple_formats):
        # Test for captions that contain both pop on and paint on formats to
        # ensure the paint on lines are not repeated
        expected_text_lines = [
            "(Client's Voice)",
            'Remember that degree',
            'you got in taxation?',
            '(Danny)',
            "Of course you don't",
            "because you didn't!",
            "Your job isn't doing hard",
            'work...',
            "...it's making them do hard",
            'work...',
            '...and getting paid for it.',
            '(VO)',
            'Snap and sort your expenses to',
            'save over $4,600 at tax time.',
            'QUICKBOOKS. BACKING YOU.',
        ]

        captions = SCCReader().read(sample_scc_multiple_formats)\
            .get_captions('en-US')
        text_lines = [
            node.content
            for caption in captions
            for node in caption.nodes
            if node.type_ == CaptionNode.TEXT
        ]

        assert expected_text_lines == text_lines
Exemplo n.º 11
0
    def test_default_positioning_when_no_positioning_is_specified(self):
        caption_set = SCCReader().read(SAMPLE_NO_POSITIONING_AT_ALL_SCC)

        actual_caption_layouts = [
            caption.layout_info.serialized()
            for caption in caption_set.get_captions('en-US')
        ]

        expected_caption_layouts = [
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
             None, None, (HorizontalAlignmentEnum.LEFT,
                          VerticalAlignmentEnum.TOP)),
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
             None, None, (HorizontalAlignmentEnum.LEFT,
                          VerticalAlignmentEnum.TOP))
        ]

        actual_node_layout_infos = [{
            idx: [node.layout_info.serialized() for node in caption.nodes]
        } for idx, caption in enumerate(caption_set.get_captions('en-US'))]

        expected_node_layout_infos = [{
            0: [(((0.0, UnitEnum.PERCENT), (86.66666666666667,
                                            UnitEnum.PERCENT)), None, None,
                 (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]
        }, {
            1: [(((0.0, UnitEnum.PERCENT), (86.66666666666667,
                                            UnitEnum.PERCENT)), None, None,
                 (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]
        }]

        self.assertEqual(expected_node_layout_infos, actual_node_layout_infos)
        self.assertEqual(expected_caption_layouts, actual_caption_layouts)
Exemplo n.º 12
0
    def test_webvtt_newlines_are_properly_rendered(
            self, sample_webvtt_from_scc_properly_writes_newlines_output,
            scc_that_generates_webvtt_with_proper_newlines):
        caption_set = SCCReader().read(
            scc_that_generates_webvtt_with_proper_newlines)
        webvtt = WebVTTWriter().write(caption_set)

        assert webvtt == sample_webvtt_from_scc_properly_writes_newlines_output
Exemplo n.º 13
0
    def test_dfxp_is_valid_xml_when_scc_source_has_ampersand_character(
            self, sample_dfxp_with_ampersand_character,
            sample_scc_with_ampersand_character):
        caption_set = SCCReader().read(sample_scc_with_ampersand_character)

        dfxp = DFXPWriter().write(caption_set)

        assert dfxp == sample_dfxp_with_ampersand_character
Exemplo n.º 14
0
 def _test_srt_to_scc_to_srt_conversion(self, srt_captions):
     captions_1 = SRTReader().read(srt_captions)
     scc_results = SCCWriter().write(captions_1)
     scc_captions = SCCReader().read(scc_results)
     srt_results = SRTWriter().write(scc_captions)
     captions_2 = SRTReader().read(srt_results)
     self.assertCaptionSetAlmostEquals(captions_1, captions_2,
                                       TOLERANCE_MICROSECONDS)
Exemplo n.º 15
0
    def test_eoc_first_command(self, sample_scc_eoc_first_command):
        # TODO First caption should be ignored because it doesn't start with
        #  a pop/roll/paint on command
        caption_set = SCCReader().read(sample_scc_eoc_first_command)

        # just one caption, first EOC disappears
        num_captions = len(caption_set.get_captions('en-US'))

        assert num_captions == 2
Exemplo n.º 16
0
    def test_last_caption_zero_end_time_is_corrected(
            self, sample_scc_no_explicit_end_to_last_caption):
        caption_set = SCCReader().read(
            sample_scc_no_explicit_end_to_last_caption
        )

        last_caption = caption_set.get_captions('en-US')[-1]

        assert last_caption.end == last_caption.start + 4 * 1000 * 1000
Exemplo n.º 17
0
    def test_proper_timestamps(self, sample_scc_pop_on):
        captions = SCCReader().read(sample_scc_pop_on)
        paragraph = captions.get_captions("en-US")[2]

        delta_start = abs(paragraph.start - 17000000)
        delta_end = abs(paragraph.end - 18752000)

        assert delta_start < TOLERANCE_MICROSECONDS
        assert delta_end < TOLERANCE_MICROSECONDS
Exemplo n.º 18
0
    def test_dfxp_is_valid_xml_when_scc_source_has_weird_italic_commands(
            self, sample_dfxp_with_properly_closing_spans_output,
            sample_scc_created_dfxp_with_wrongly_closing_spans):
        caption_set = SCCReader().read(
            sample_scc_created_dfxp_with_wrongly_closing_spans)

        dfxp = DFXPWriter().write(caption_set)

        assert dfxp == sample_dfxp_with_properly_closing_spans_output
Exemplo n.º 19
0
    def test_proper_timestamps(self):
        captions = SCCReader().read(SAMPLE_SCC.decode(u'utf-8'))
        paragraph = captions.get_captions(u"en-US")[2]

        delta_start = abs(paragraph.start - 17000000)
        delta_end = abs(paragraph.end - 18752000)

        self.assertTrue(delta_start < TOLERANCE_MICROSECONDS)
        self.assertTrue(delta_end < TOLERANCE_MICROSECONDS)
Exemplo n.º 20
0
def make_srt(url, uid):
    r = requests.get(url)
    contents = SCCReader().read(r.text)
    txt = ""
    for i in contents.get_captions('en-US'):
        txt += str(i) + "\n"
    subsFileHandler = open(str(uid) + ".srt", "w", encoding='utf-8')
    subsFileHandler.write(txt)
    subsFileHandler.close()
    os.rename(str(uid) + ".srt", "fox-data/" + str(uid) + ".srt")
Exemplo n.º 21
0
    def test_correct_last_bad_timing(self):
        # This fix was implemented with a hack. The commands for the Pop-on
        # captions will have to be reviewed, but until then this is good enough
        caption_set = SCCReader().read(SAMPLE_SCC_PRODUCES_BAD_LAST_END_TIME)

        expected_timings = [(1408266666.6666667, 1469700000.0),
                            (3208266666.666667, 3269700000.0)]

        actual_timings = [(c_.start, c_.end)
                          for c_ in caption_set.get_captions(u'en-US')]
        self.assertEqual(expected_timings, actual_timings)
Exemplo n.º 22
0
    def test_timing_is_properly_set_on_split_captions(self):
        caption_set = SCCReader().read(
            SAMPLE_SCC_PRODUCES_CAPTIONS_WITH_START_AND_END_TIME_THE_SAME)
        expected_timings = [(u'00:01:35.666', u'00:01:40.866'),
                            (u'00:01:35.666', u'00:01:40.866'),
                            (u'00:01:35.666', u'00:01:40.866')]

        actual_timings = [(c_.format_start(), c_.format_end())
                          for c_ in caption_set.get_captions('en-US')]

        self.assertEqual(expected_timings, actual_timings)
Exemplo n.º 23
0
def make_srt(url):
    #url="https://static-media.fox.com/dcg/cc/2-guns/XMX1593_24029253693.scc"
    r = requests.get(url)
    contents = SCCReader().read(r.text)
    txt = ""
    for i in contents.get_captions('en-US'):
        txt += str(i) + "\n"
    subsFileHandler = open('sample' + ".srt", "w", encoding='utf-8')
    subsFileHandler.write(txt)
    subsFileHandler.close()
    print("Done")
Exemplo n.º 24
0
def parse_captions(scc_file_path, desired_ext):
    assert (desired_ext == ".txt"), "caption file must output to txt file"
    src_file = open(scc_file_path, "r")
    pycaps = SCCReader().read(src_file.read())
    with open(get_output_file_path(scc_file_path, desired_ext, "cc_output"),
              "w+") as output_file:
        for lang, caption in pycaps._captions.items():
            if lang == "en-US":
                for caption_at_timestep in caption:
                    output_file.write("{} {} {}\n".format(
                        caption_at_timestep.format_start(),
                        caption_at_timestep.format_end(),
                        caption_at_timestep.get_text()))
Exemplo n.º 25
0
    def test_correct_last_bad_timing(self,
                                     sample_scc_produces_bad_last_end_time):
        # This fix was implemented with a hack. The commands for the Pop-on
        # captions will have to be reviewed, but until then this is good enough
        caption_set = SCCReader().read(sample_scc_produces_bad_last_end_time)

        expected_timings = [
            (1408266666.6666667, 1469700000.0),
            (3208266666.666667, 3269700000.0),
        ]

        actual_timings = [
            (c_.start, c_.end) for c_ in caption_set.get_captions('en-US')
        ]

        assert expected_timings == actual_timings
Exemplo n.º 26
0
    def test_freeze_colon_spec_time(self):
        # Coverage doesn't mean we test that functionality, so assert that
        # all the timing specs that previously had coverage, will actually
        # remain unchanged.
        scc1 = SCCReader().read(SAMPLE_SCC_POP_ON)
        expected_timings = [(9776433.333333332, 12312300.0),
                            (14781433.33333333, 16883533.333333332),
                            (16950266.666666664, 18618600.000000004),
                            (18685333.333333332, 20754066.666666664),
                            (20820800.0, 26626600.0),
                            (26693333.333333332, 32098733.333333332),
                            (32165466.66666666, 36202833.33333332)]

        actual_timings = [(c_.start, c_.end)
                          for c_ in scc1.get_captions(u'en-US')]
        self.assertEqual(expected_timings, actual_timings)
Exemplo n.º 27
0
    def test_ignore_repeated_tab_offset(self, sample_scc_repeated_tab_offset):
        expected_lines = [
            '[Radio reporter]',
            'The I-10 Santa Monica Freeway',
            'westbound is jammed,',
            'due to a three-car accident',
            'blocking lanes 1 and 2',
        ]

        caption_set = SCCReader().read(sample_scc_repeated_tab_offset)
        actual_lines = [
            node.content
            for cap_ in caption_set.get_captions('en-US')
            for node in cap_.nodes
            if node.type_ == CaptionNode.TEXT
        ]

        assert expected_lines == actual_lines
Exemplo n.º 28
0
    def test_timing_is_properly_set_on_split_captions(
            self, sample_scc_produces_captions_with_start_and_end_time_the_same
    ):
        caption_set = SCCReader().read(
            sample_scc_produces_captions_with_start_and_end_time_the_same
        )
        expected_timings = [
            ('00:01:35.666', '00:01:40.866'),
            ('00:01:35.666', '00:01:40.866'),
            ('00:01:35.666', '00:01:40.866'),
        ]

        actual_timings = [
            (c_.format_start(), c_.format_end())
            for c_ in caption_set.get_captions('en-US')
        ]

        assert expected_timings == actual_timings
Exemplo n.º 29
0
    def test_freeze_semicolon_spec_time(self):
        scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2)
        captions = scc1.get_captions(u'en-US')
        expected_timings = [(766666.6666666667, 2800000.0),
                            (2800000.0, 4600000.0),
                            (4600000.0, 6166666.666666667),
                            (6166666.666666667, 9733333.333333332),
                            (9733333.333333332, 11266666.666666668),
                            (11266666.666666668, 12266666.666666668),
                            (12266666.666666668, 13266666.666666668),
                            (13266666.666666668, 14266666.666666668),
                            (14266666.666666668, 17066666.666666668),
                            (17066666.666666668, 18666666.666666668),
                            (18666666.666666668, 20233333.333333336),
                            (20233333.333333336, 21833333.333333332),
                            (21833333.333333332, 34933333.33333333),
                            (34933333.33333333, 36433333.33333333),
                            (36433333.33333333, 44300000.0),
                            (44300000.0, 44866666.666666664)]

        actual_timings = [(c_.start, c_.end) for c_ in captions]
        self.assertEqual(expected_timings, actual_timings)
Exemplo n.º 30
0
    def test_default_positioning_when_no_positioning_is_specified(
            self, sample_no_positioning_at_all_scc):
        caption_set = SCCReader().read(sample_no_positioning_at_all_scc)

        actual_caption_layouts = [
            caption.layout_info.serialized()
            for caption in caption_set.get_captions('en-US')
        ]

        expected_caption_layouts = [
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
             None, None,
             (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP)),
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
             None, None,
             (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]

        actual_node_layout_infos = [
            {idx: [node.layout_info.serialized() for node in caption.nodes]}
            for idx, caption in enumerate(caption_set.get_captions('en-US'))
        ]

        expected_node_layout_infos = [
            {0: [(((0.0, UnitEnum.PERCENT),
                   (86.66666666666667, UnitEnum.PERCENT)),
                  None,
                  None,
                  (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]},
            {1: [(((0.0, UnitEnum.PERCENT),
                   (86.66666666666667, UnitEnum.PERCENT)),
                  None,
                  None,
                  (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]}
        ]

        assert expected_node_layout_infos == actual_node_layout_infos
        assert expected_caption_layouts == actual_caption_layouts