예제 #1
0
def read_file(file_name):
    """ Reads a VTT file """

    data = WebVTT.read(file_name)
    captions = data.captions

    segments = []
    for caption in captions:
        seg = read_caption(caption)
        if seg is not None:
            segments.append(seg)

    return segments
예제 #2
0
class VttToSrtTranscoder:
    def __init__(self, suffix='', clear=True, vtt_reader=None):
        self.__suffix = suffix
        self.__clear = clear

        self.__out_ext = '.srt'
        self.__in_ext = '.vtt'

        self.__vtt_reader = vtt_reader
        if self.__vtt_reader is None:
            self.__vtt_reader = WebVTT()

    def get_in_ext(self):
        return self.__in_ext

    def get_out_ext(self):
        return self.__out_ext

    def __write_subs(self, out_fd, in_path):
        for index, caption in enumerate(self.__vtt_reader.read(in_path)):
            start = SubRipTime(0, 0, caption.start_in_seconds)
            end = SubRipTime(0, 0, caption.end_in_seconds)
            item = SubRipItem(index + 1, start, end,
                              html.unescape(caption.text))
            out_fd.write("%s\n" % str(item))

    def __create_out_filename(self, in_file_path):
        out_file_path = os.path.splitext(in_file_path)[0]
        if len(self.__suffix) > 0:
            out_file_path = "%s-%s" % (out_file_path, self.__suffix)

        return out_file_path + self.__out_ext

    def apply(self, in_vtt_path):

        in_vtt_path = os.path.abspath(in_vtt_path)
        validate_ext(in_vtt_path, self.__in_ext)

        out_file_path = self.__create_out_filename(in_vtt_path)

        with open(out_file_path, "w") as out_fd:
            self.__write_subs(out_fd, in_vtt_path)

        if self.__clear:
            os.remove(in_vtt_path)

        return out_file_path
# Find VTT file<
def vtt_file(out):
    for line in out.decode("utf-8").split("\n"):
        if "Writing video subtitles to: " in line:
            return line.partition(":")[2].strip()
    return None


filename = vtt_file(out)
if not filename:
    print(red("Video does not seem to have english subs", bold=True))
    sys.exit(1)

# Read source VTT & convert to HTML
vtt = WebVTT()
vtt.read(filename)

stmp = StringIO()
print("<div>", file=stmp)
for caption in vtt:
    print('<span data-start="{}" data-end="{}">{}</span>'.format(
        caption.start, caption.end, caption.text),
          file=stmp)
print("</div>", file=stmp)

# Translate
driver = TranslationDriver(args.lang)
strans = driver.translate(stmp.getvalue())

# Convert translated HTML back to VTT
vtt = WebVTT()
예제 #4
0
class WebVTTTestCase(unittest.TestCase):
    def setUp(self):
        self.webvtt = WebVTT()

    def _get_file(self, filename):
        return os.path.join(SUBTITLES_DIR, filename)

    def tearDown(self):
        if os.path.exists(OUTPUT_DIR):
            rmtree(OUTPUT_DIR)

    def test_create_caption(self):
        caption = Caption('00:00:00.500', '00:00:07.000',
                          ['Caption test line 1', 'Caption test line 2'])
        self.assertEqual(caption.start, '00:00:00.500')
        self.assertEqual(caption.start_in_seconds, 0.5)
        self.assertEqual(caption.end, '00:00:07.000')
        self.assertEqual(caption.end_in_seconds, 7)
        self.assertEqual(caption.lines,
                         ['Caption test line 1', 'Caption test line 2'])

    def test_save_captions(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))
        new_caption = Caption(
            '00:00:07.000', '00:00:11.890',
            ['New caption text line1', 'New caption text line2'])
        self.webvtt.captions.append(new_caption)
        self.webvtt.save()

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', '00:00:07.000 --> 00:00:11.890', 'New caption text line1',
            'New caption text line2'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_srt_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.srt'), OUTPUT_DIR)

        self.webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.500 --> 00:00:07.000',
            'Caption text #1',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_sbv_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('two_captions.sbv'), OUTPUT_DIR)

        self.webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.378 --> 00:00:11.378',
            'Caption text #1',
            '',
            '00:00:11.378 --> 00:00:12.305',
            'Caption text #2 (line 1)',
            'Caption text #2 (line 2)',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_to_other_location(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)

        self.webvtt.read(self._get_file('one_caption.vtt')).save(target_path)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'one_caption.vtt')))

    def test_save_specific_filename(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name.vtt')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(os.path.exists(output_file))

    def test_save_specific_filename_no_extension(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'custom_name.vtt')))

    def test_caption_timestamp_update(self):
        c = Caption('00:00:00.500', '00:00:07.000')
        c.start = '00:00:01.750'
        c.end = '00:00:08.250'

        self.assertEqual(c.start, '00:00:01.750')
        self.assertEqual(c.end, '00:00:08.250')

    def test_caption_text(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_caption_receive_text(self):
        c = Caption(text='Caption line #1\nCaption line #2')

        self.assertEqual(len(c.lines), 2)
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_supported_formats(self):
        self.assertListEqual(WebVTT().supported_formats(),
                             [sf[0] for sf in SUPPORTED_FORMATS])

    def test_update_text(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1 updated'
        self.assertEqual(c.text, 'Caption line #1 updated')

    def test_update_text_multiline(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1\nCaption line #2'

        self.assertEqual(len(c.lines), 2)

        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_update_text_wrong_type(self):
        c = Caption(text='Caption line #1')

        self.assertRaises(AttributeError, setattr, c, 'text', 123)

    def test_manipulate_lines(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        c.lines[0] = 'Caption line #1 updated'
        self.assertEqual(c.lines[0], 'Caption line #1 updated')

    def test_captions(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt.captions, list)

    def test_captions_prevent_write(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertRaises(AttributeError, setattr, self.webvtt, 'captions', [])

    def test_sequence_iteration(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt[0], Caption)
        self.assertEqual(len(self.webvtt), len(self.webvtt.captions))

    def test_save_no_filename(self):
        webvtt = WebVTT()
        self.assertRaises(MissingFilenameError, webvtt.save)

    def test_malformed_start_timestamp(self):
        self.assertRaises(MalformedCaptionError, Caption, '01:00')
예제 #5
0
 def read_caption(self, vtt_file):
     return WebVTT.read(vtt_file)
예제 #6
0
class WebVTTTestCase(unittest.TestCase):
    def setUp(self):
        self.webvtt = WebVTT()

    def _get_file(self, filename):
        return os.path.join(SUBTITLES_DIR, filename)

    def tearDown(self):
        if os.path.exists(OUTPUT_DIR):
            rmtree(OUTPUT_DIR)

    def test_create_caption(self):
        caption = Caption('00:00:00.500', '00:00:07.000',
                          ['Caption test line 1', 'Caption test line 2'])
        self.assertEqual(caption.start, '00:00:00.500')
        self.assertEqual(caption.start_in_seconds, 0.5)
        self.assertEqual(caption.end, '00:00:07.000')
        self.assertEqual(caption.end_in_seconds, 7)
        self.assertEqual(caption.lines,
                         ['Caption test line 1', 'Caption test line 2'])

    def test_write_captions(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.vtt'), OUTPUT_DIR)

        out = io.StringIO()
        self.webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))
        new_caption = Caption(
            '00:00:07.000', '00:00:11.890',
            ['New caption text line1', 'New caption text line2'])
        self.webvtt.captions.append(new_caption)
        self.webvtt.write(out)

        out.seek(0)
        lines = [line.rstrip() for line in out.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', '00:00:07.000 --> 00:00:11.890', 'New caption text line1',
            'New caption text line2'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_captions(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'one_caption.vtt'))
        new_caption = Caption(
            '00:00:07.000', '00:00:11.890',
            ['New caption text line1', 'New caption text line2'])
        self.webvtt.captions.append(new_caption)
        self.webvtt.save()

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', '00:00:07.000 --> 00:00:11.890', 'New caption text line1',
            'New caption text line2'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_srt_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.srt'), OUTPUT_DIR)

        self.webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.500 --> 00:00:07.000',
            'Caption text #1',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_sbv_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('two_captions.sbv'), OUTPUT_DIR)

        self.webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv'))
        self.webvtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.378 --> 00:00:11.378',
            'Caption text #1',
            '',
            '00:00:11.378 --> 00:00:12.305',
            'Caption text #2 (line 1)',
            'Caption text #2 (line 2)',
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_to_other_location(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)

        self.webvtt.read(self._get_file('one_caption.vtt')).save(target_path)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'one_caption.vtt')))

    def test_save_specific_filename(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name.vtt')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(os.path.exists(output_file))

    def test_save_specific_filename_no_extension(self):
        target_path = os.path.join(OUTPUT_DIR, 'test_folder')
        os.makedirs(target_path)
        output_file = os.path.join(target_path, 'custom_name')

        self.webvtt.read(self._get_file('one_caption.vtt')).save(output_file)
        self.assertTrue(
            os.path.exists(os.path.join(target_path, 'custom_name.vtt')))

    def test_caption_timestamp_update(self):
        c = Caption('00:00:00.500', '00:00:07.000')
        c.start = '00:00:01.750'
        c.end = '00:00:08.250'

        self.assertEqual(c.start, '00:00:01.750')
        self.assertEqual(c.end, '00:00:08.250')

    def test_caption_timestamp_format(self):
        c = Caption('01:02:03.400', '02:03:04.500')
        self.assertEqual(c.start, '01:02:03.400')
        self.assertEqual(c.end, '02:03:04.500')

        c = Caption('02:03.400', '03:04.500')
        self.assertEqual(c.start, '00:02:03.400')
        self.assertEqual(c.end, '00:03:04.500')

    def test_caption_text(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_caption_receive_text(self):
        c = Caption(text='Caption line #1\nCaption line #2')

        self.assertEqual(len(c.lines), 2)
        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_update_text(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1 updated'
        self.assertEqual(c.text, 'Caption line #1 updated')

    def test_update_text_multiline(self):
        c = Caption(text='Caption line #1')
        c.text = 'Caption line #1\nCaption line #2'

        self.assertEqual(len(c.lines), 2)

        self.assertEqual(c.text, 'Caption line #1\nCaption line #2')

    def test_update_text_wrong_type(self):
        c = Caption(text='Caption line #1')

        self.assertRaises(AttributeError, setattr, c, 'text', 123)

    def test_manipulate_lines(self):
        c = Caption(text=['Caption line #1', 'Caption line #2'])
        c.lines[0] = 'Caption line #1 updated'
        self.assertEqual(c.lines[0], 'Caption line #1 updated')

    def test_captions(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt.captions, list)

    def test_captions_prevent_write(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertRaises(AttributeError, setattr, self.webvtt, 'captions', [])

    def test_sequence_iteration(self):
        self.webvtt.read(self._get_file('sample.vtt'))
        self.assertIsInstance(self.webvtt[0], Caption)
        self.assertEqual(len(self.webvtt), len(self.webvtt.captions))

    def test_save_no_filename(self):
        webvtt = WebVTT()
        self.assertRaises(MissingFilenameError, webvtt.save)

    def test_malformed_start_timestamp(self):
        self.assertRaises(MalformedCaptionError, Caption, '01:00')

    def test_set_styles_from_text(self):
        style = Style()
        style.text = '::cue(b) {\n  color: peachpuff;\n}'
        self.assertListEqual(style.lines,
                             ['::cue(b) {', '  color: peachpuff;', '}'])

    def test_get_styles_as_text(self):
        style = Style()
        style.lines = ['::cue(b) {', '  color: peachpuff;', '}']
        self.assertEqual(style.text, '::cue(b) {color: peachpuff;}')

    def test_save_identifiers(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt'))
        self.webvtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'))

        with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', '00:00:00.500 --> 00:00:07.000', 'Caption text #1',
            '', 'second caption', '00:00:07.000 --> 00:00:11.890',
            'Caption text #2', '', '00:00:11.890 --> 00:00:16.320',
            'Caption text #3', '', '4', '00:00:16.320 --> 00:00:21.580',
            'Caption text #4', '', '00:00:21.580 --> 00:00:23.880',
            'Caption text #5', '', '00:00:23.880 --> 00:00:27.280',
            'Caption text #6'
        ]

        self.assertListEqual(lines, expected_lines)

    def test_save_updated_identifiers(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('using_identifiers.vtt'), OUTPUT_DIR)

        self.webvtt.read(os.path.join(OUTPUT_DIR, 'using_identifiers.vtt'))
        self.webvtt.captions[0].identifier = 'first caption'
        self.webvtt.captions[1].identifier = None
        self.webvtt.captions[3].identifier = '44'
        last_caption = Caption('00:00:27.280', '00:00:29.200',
                               'Caption text #7')
        last_caption.identifier = 'last caption'
        self.webvtt.captions.append(last_caption)
        self.webvtt.save(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'))

        with open(os.path.join(OUTPUT_DIR, 'new_using_identifiers.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT', '', 'first caption', '00:00:00.500 --> 00:00:07.000',
            'Caption text #1', '', '00:00:07.000 --> 00:00:11.890',
            'Caption text #2', '', '00:00:11.890 --> 00:00:16.320',
            'Caption text #3', '', '44', '00:00:16.320 --> 00:00:21.580',
            'Caption text #4', '', '00:00:21.580 --> 00:00:23.880',
            'Caption text #5', '', '00:00:23.880 --> 00:00:27.280',
            'Caption text #6', '', 'last caption',
            '00:00:27.280 --> 00:00:29.200', 'Caption text #7'
        ]

        self.assertListEqual(lines, expected_lines)