Пример #1
0
 def test_is_unicode(self):
     # This tests makes no sense for Py3
     if is_py3:
         return
     # Defining non_unicode encodings list as defined in
     # https://docs.python.org/2/library/codecs.html#standard-encodings
     non_unicode_encodings = [
         'big5', 'big5hkscs', 'cp1250', 'cp1251', 'cp1252', 'cp1253',
         'cp1254', 'cp1255', 'cp1256', 'cp1257', 'cp1258',
         'euc_jp', 'euc_jis_2004', 'euc_jisx0213', 'euc_kr',
         'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp',
         'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004',
         'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr',
         'latin_1', 'iso8859_2', 'iso8859_3', 'iso8859_4',
         'iso8859_5', 'iso8859_6', 'iso8859_7', 'iso8859_8',
         'iso8859_9', 'iso8859_10', 'iso8859_11', 'iso8859_13',
         'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_16',
         'johab', 'koi8_r', 'koi8_u', 'mac_cyrillic', 'mac_greek',
         'mac_iceland', 'mac_latin2', 'mac_roman', 'mac_turkish',
         'ptcp154', 'shift_jis', 'shift_jis_2004', 'shift_jisx0213']
     for enc in non_unicode_encodings:
         non_unicode = cencode('non_unicode\n', enc)
         self.no_unicode_item = WebVTTItem(1, text=non_unicode)
         self.assertRaises(NotImplementedError, WebVTTItem.__str__,
                           self.no_unicode_item)
Пример #2
0
 def setUp(self):
     self.item = WebVTTItem(1, text="Hello world !")
     self.item.shift(minutes=1)
     self.item.end.shift(seconds=20)
     self.string = '1\n00:01:00.000 --> 00:01:20.000\nHello world !\n'
     self.bad_string = 'foobar'
     self.coordinates = ('1\n00:01:00.000 --> 00:01:20.000 X1:000 X2:000 '
                         'Y1:050 Y2:100\nHello world !\n')
     self.vtt = ('00:01:00.000 --> 00:01:20.000 D:vertical A:start '
                 'L:12%\nHello world !\n')
     self.string_index = 'foo\n00:01:00.000 --> 00:01:20.000\nHello !\n'
     self.dots = '1\n00:01:00.000 --> 00:01:20.000\nHello world !\n'
     self.no_index = '00:01:00,000 --> 00:01:20,000\nHello world !\n'
     self.junk_after_timestamp = ('1\n00:01:00,000 --> 00:01:20,000?\n'
                                  'Hello world !\n')
Пример #3
0
def merge_subtitle(sub_a, sub_b, delta):
    out = WebVTTFile()
    intervals = [item.start.ordinal for item in sub_a]
    intervals.extend([item.end.ordinal for item in sub_a])
    intervals.extend([item.start.ordinal for item in sub_b])
    intervals.extend([item.end.ordinal for item in sub_b])
    intervals.sort()

    j = k = 0
    for i in xrange(1, len(intervals)):
        start = WebVTTTime.from_ordinal(intervals[i - 1])
        end = WebVTTTime.from_ordinal(intervals[i])

        if (end - start) > delta:
            text_a, j = find_subtitle(sub_a, start, end, j)
            text_b, k = find_subtitle(sub_b, start, end, k)

            text = join_lines(text_a, text_b)
            if len(text) > 0:
                item = WebVTTItem(0, start, end, text)
                out.append(item)

    out.clean_indexes()
    return out
def main(options):
    # Ensure ffmpeg is around
    if not run_ffmpeg(['-version']):
        log.error(
            "ffmpeg needs to be available to strip audio from the video file.")
        exit(1)

    with NamedTemporaryFile(delete=True) as vid_file:
        log.info("Downloading %s - this might take a while." % options.vid_url)
        response = get(options.vid_url, stream=True)
        total_length = response.headers.get("content-length")
        if total_length is None:  # no content length header
            log.info("Unknown length - can't predict how long this will take.")
            f.write(response.content)
        else:
            bar = ProgressBar(max_value=int(total_length))
            dl = 0
            for data in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
                dl += len(data)
                vid_file.write(data)
                vid_file.flush()
                bar.update(dl)

        log.info("Download done. Stripping audio.")
        (wav_file, wav_file_name) = mkstemp('.wav')
        result = run_ffmpeg([
            "-y", "-i", vid_file.name, "-vn", "-acodec", "pcm_s16le", "-ar",
            "16000", "-ac", "1", wav_file_name
        ])
        if not result:
            close(wav_file)
            log.error("ffmpeg failed. Bailing.")
            exit(1)

        fs, audio = wav.read(wav_file_name)
        close(wav_file)

    log.info("Will write VTT to %s" % options.output)
    # Make sure the WAV is to code...
    log.info("Loading up WAV file...")

    if fs != 16000:
        log.error("Only 16000hz WAV files are usable.")
        exit(1)

    total_samples = len(audio)
    duration_hours, duration_minutes, duration_seconds = sample_index_to_time(
        len(audio))
    log.info("Approximate duration: %d:%02d:%02d" %
             (duration_hours, duration_minutes, duration_seconds))

    # Let's load up DeepSpeech and get it ready.
    log.info("Loading pre-trained DeepSpeech model...")
    root_model_dir = path.join(options.deepspeech_model_dir, MODEL_DIR)

    model = path.join(root_model_dir, MODEL_FILE)
    alphabet = path.join(root_model_dir, MODEL_ALPHABET)
    lang_model = path.join(root_model_dir, MODEL_LANG_MODEL)
    trie = path.join(root_model_dir, MODEL_TRIE)

    deepspeech = Model(model, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH)
    log.info("Done loading model.")

    log.info("Loading language model...")
    deepspeech.enableDecoderWithLM(alphabet, lang_model, trie, LM_WEIGHT,
                                   WORD_COUNT_WEIGHT, VALID_WORD_COUNT_WEIGHT)
    log.info("Done loading model.")

    playhead = 0

    out = WebVTTFile()

    bar = ProgressBar(max_value=total_samples)
    while playhead < (total_samples - 1):
        end_point = min(playhead + AUDIO_SEGMENT_SAMPLES, (total_samples - 1))
        segment = audio[playhead:end_point]
        inference = deepspeech.stt(segment, fs)
        log.debug("Inferred: %s" % inference)

        start_hours, start_minutes, start_seconds = sample_index_to_time(
            playhead)
        playhead = end_point
        end_hours, end_minutes, end_seconds = sample_index_to_time(playhead)

        if not inference or inference == "ah":
            continue

        for search, replace in INFERENCE_REPLACEMENTS.iteritems():
            inference = sub(r"\b" + search + r"\b", replace, inference)

        inference = fill(inference, width=MAX_CAPTION_WIDTH)

        start = WebVTTTime(start_hours, start_minutes, start_seconds)
        end = WebVTTTime(end_hours, end_minutes, end_seconds)

        item = WebVTTItem(0, start, end, inference)
        out.append(item)
        bar.update(playhead)

        out.save(options.output, encoding="utf-8")

    out.clean_indexes()
    out.save(options.output, encoding="utf-8")
Пример #5
0
 def setUp(self):
     self.item = WebVTTItem(1, text="Hello world !")
     self.item.shift(minutes=1)
     self.item.end.shift(seconds=20)
Пример #6
0
 def setUp(self):
     self.item = WebVTTItem()
Пример #7
0
 def test_multiple_item(self):
     vtt_file = WebVTTFile([
         WebVTTItem(1, {'seconds': 0}, {'seconds': 3}, 'Hello'),
         WebVTTItem(1, {'seconds': 1}, {'seconds': 2}, 'World !')
     ])
     self.assertEquals(vtt_file.text, 'Hello\nWorld !')
Пример #8
0
 def test_single_item(self):
     vtt_file = WebVTTFile(
         [WebVTTItem(1, {'seconds': 1}, {'seconds': 2}, 'Hello')])
     self.assertEquals(vtt_file.text, 'Hello')
Пример #9
0
 def test_shift(self):
     vtt_file = WebVTTFile([WebVTTItem()])
     vtt_file.shift(1, 1, 1, 1)
     self.assertEqual(vtt_file[0].end, (1, 1, 1, 1))
     vtt_file.shift(ratio=2)
     self.assertEqual(vtt_file[0].end, (2, 2, 2, 2))