示例#1
0
 def test_sbv_get_caption_text_multiline(self):
     vtt = webvtt.from_sbv(self._get_file('sample.sbv'))
     self.assertEqual(vtt.captions[2].text,
                      'Caption text #3 (line 1)\nCaption text #3 (line 2)')
     self.assertListEqual(
         vtt.captions[2].lines,
         ['Caption text #3 (line 1)', 'Caption text #3 (line 2)'])
示例#2
0
def parse_sbv(sbv_path):
    """
  To get around the webvtt library not supporting empty subtitles
  """
    data = None
    with open(sbv_path, "r", encoding="utf-8") as f:
        data = f.readlines()

    data_out = []
    lines = []
    for d in data:
        if d != "\n":
            lines.append(d)
            continue

        if len(lines) >= 2:
            data_out.extend(lines)
            data_out.append(d)

        lines = []

    if len(lines) >= 2:
        data_out.extend(lines)

    out_path = path.join(TMP_DIR, path.basename(sbv_path) + ".tmp")
    with open(out_path, "w", encoding="utf-8") as f:
        for d in data_out:
            f.write(d)

    return webvtt.from_sbv(out_path)
示例#3
0
    def test_sbv_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('two_captions.sbv'), OUTPUT_DIR)

        vtt = webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv'))
        vtt.save()

        self.assertTrue(
            os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'),
                  'r',
                  encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.378 --> 00:00:11.378',
            'Caption text #1',
            '',
            '00:00:11.378 --> 00:00:12.305',
            'Caption text #2 (line 1)',
            'Caption text #2 (line 2)',
        ]

        self.assertListEqual(lines, expected_lines)
示例#4
0
def get_subtitle_file(filename: str) -> WebVTT:
    file_ext = os.path.splitext(filename)[1]
    if file_ext == ".srt":
        return webvtt.from_srt(filename)
    elif file_ext == ".sbv":
        return webvtt.from_sbv(filename)
    elif file_ext == ".vtt":
        return webvtt.read(filename)
    else:
        raise ValueError(filename)
示例#5
0
 def test_sbv_get_caption_text_multiline(self):
     vtt = webvtt.from_sbv(self._get_file('sample.sbv'))
     self.assertEqual(
         vtt.captions[2].text,
         'Caption text #3 (line 1)\nCaption text #3 (line 2)'
     )
     self.assertListEqual(
         vtt.captions[2].lines,
         ['Caption text #3 (line 1)', 'Caption text #3 (line 2)']
     )
示例#6
0
def sbv2df(sbv,textCol):
    """ 
    Store (start, end, and text) of each time segment in the sbv file in a row of a pandas dataframe.
    Input args 
        sbv (string): the file path of an sbv file
        textCol (string): the name of the text column
    """
    data = []
    global webvtt
    webvtt = webvtt.from_sbv(sbv)
    for caption in webvtt:
        data.append({'start':datetime.strptime(caption.start,'%H:%M:%S.%f').time(), 
                     'end':datetime.strptime(caption.end,'%H:%M:%S.%f').time(),
                     textCol:caption.text})
    df = pd.DataFrame(data)
    df = df.replace('\n',' ', regex=True)
    df = df[['start','end',textCol]] 
    return df
def fix_subtitle_sequencing(filename):
    if os.path.isfile(filename + ".bk"):
        print("Not overwriting original backup for {}, skipping.".format(
            filename))
        return

    subs = None
    if os.path.splitext(filename)[1] == ".srt":
        subs = webvtt.from_srt(filename)
    elif os.path.splitext(filename)[1] == ".sbv":
        subs = webvtt.from_sbv(filename)

    # Adjust timing and stretch subtitles for fixing the live ones which
    # get messed up by Youtube
    if "--fix-live" in sys.argv:
        for i in range(len(subs)):
            start = parse_time_stamp(subs[i].start)
            start -= timedelta(seconds=8)
            if start < timedelta(hours=0, minutes=0, seconds=0,
                                 milliseconds=0):
                start = timedelta(hours=0,
                                  minutes=0,
                                  seconds=0,
                                  milliseconds=0)
            end = start + timedelta(seconds=4)
            subs[i].start = format_time_stamp(start)
            subs[i].end = format_time_stamp(end)

    for i in range(len(subs) - 1):
        end = parse_time_stamp(subs[i].end)
        next_start = parse_time_stamp(subs[i + 1].start)
        if end > next_start:
            subs[i].end = subs[i + 1].start

    if not "--dry" in sys.argv:
        shutil.copy(filename, filename + ".bk")
        out_srt = os.path.splitext(filename)[0] + ".srt"
        with open(out_srt, "w", encoding="utf8") as f:
            subs.write(f, format="srt")
示例#8
0
    def test_sbv_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('two_captions.sbv'), OUTPUT_DIR)

        vtt = webvtt.from_sbv(os.path.join(OUTPUT_DIR, 'two_captions.sbv'))
        vtt.save()

        self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'two_captions.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'two_captions.vtt'), 'r', encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.378 --> 00:00:11.378',
            'Caption text #1',
            '',
            '00:00:11.378 --> 00:00:12.305',
            'Caption text #2 (line 1)',
            'Caption text #2 (line 2)',
        ]

        self.assertListEqual(lines, expected_lines)
示例#9
0
 def test_sbv_timestamps_in_seconds(self):
     vtt = webvtt.from_sbv(self._get_file('sample.sbv'))
     self.assertEqual(vtt.captions[1].start_in_seconds, 11.378)
     self.assertEqual(vtt.captions[1].end_in_seconds, 12.305)
示例#10
0
 def test_sbv_missing_caption_text(self):
     self.assertTrue(
         webvtt.from_sbv(
             self._get_file('missing_caption_text.sbv')).captions)
示例#11
0
 def test_sbv_get_caption_text(self):
     vtt = webvtt.from_sbv(self._get_file('sample.sbv'))
     self.assertEqual(vtt.captions[1].text, 'Caption text #2')
示例#12
0
 def test_sbv_timestamps_in_seconds(self):
     vtt = webvtt.from_sbv(self._get_file('sample.sbv'))
     self.assertEqual(vtt.captions[1].start_in_seconds, 11.378)
     self.assertEqual(vtt.captions[1].end_in_seconds, 12.305)
示例#13
0
 def test_sbv_timestamps_format(self):
     vtt = webvtt.from_sbv(self._get_file('sample.sbv'))
     self.assertEqual(vtt.captions[1].start, '00:00:11.378')
     self.assertEqual(vtt.captions[1].end, '00:00:12.305')
示例#14
0
 def test_sbv_total_length(self):
     self.assertEqual(
         webvtt.from_sbv(self._get_file('sample.sbv')).total_length,
         16
     )
示例#15
0
 def test_sbv_get_caption_text(self):
     vtt = webvtt.from_sbv(self._get_file('sample.sbv'))
     self.assertEqual(vtt.captions[1].text, 'Caption text #2')
示例#16
0
 def test_sbv_timestamps_format(self):
     vtt = webvtt.from_sbv(self._get_file('sample.sbv'))
     self.assertEqual(vtt.captions[1].start, '00:00:11.378')
     self.assertEqual(vtt.captions[1].end, '00:00:12.305')
示例#17
0
 def test_sbv_total_length(self):
     self.assertEqual(
         webvtt.from_sbv(self._get_file('sample.sbv')).total_length, 16)