def input_file(self): if not hasattr(self, '_source_file'): with open(self.arguments.file, 'rb') as f: content = f.read() encoding = detect(content).get('encoding') encoding = self.normalize_encoding(encoding) self._source_file = WebVTTFile.open(self.arguments.file, encoding=encoding, error_handling=WebVTTFile.ERROR_LOG) return self._source_file
def get_webvttfile(file_obj): """Get a WebVTTFile instance from a file-like object. """ file_obj.seek(0) contents = file_obj.read() # convert to unicode if it's a plain str if not isinstance(contents, unicode): # NOQA ignore F821 contents = codecs.decode(contents, 'utf-8') return WebVTTFile.from_string(contents)
def input_file(self): if not hasattr(self, '_source_file'): with open(self.arguments.file, 'rb') as f: content = f.read() encoding = detect(content).get('encoding') encoding = self.normalize_encoding(encoding) self._source_file = WebVTTFile.open( self.arguments.file, encoding=encoding, error_handling=WebVTTFile.ERROR_LOG) return self._source_file
def merge_subtitle(sub_a, sub_b, delta): out = WebVTTFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in xrange(1, len(intervals)): start = WebVTTTime.from_ordinal(intervals[i - 1]) end = WebVTTTime.from_ordinal(intervals[i]) if (end-start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = WebVTTItem(0, start, end, text) out.append(item) out.clean_indexes() return out
def merge_subtitle(sub_a, sub_b, delta): out = WebVTTFile() intervals = [item.start.ordinal for item in sub_a] intervals.extend([item.end.ordinal for item in sub_a]) intervals.extend([item.start.ordinal for item in sub_b]) intervals.extend([item.end.ordinal for item in sub_b]) intervals.sort() j = k = 0 for i in xrange(1, len(intervals)): start = WebVTTTime.from_ordinal(intervals[i - 1]) end = WebVTTTime.from_ordinal(intervals[i]) if (end - start) > delta: text_a, j = find_subtitle(sub_a, start, end, j) text_b, k = find_subtitle(sub_b, start, end, k) text = join_lines(text_a, text_b) if len(text) > 0: item = WebVTTItem(0, start, end, text) out.append(item) out.clean_indexes() return out
def convert_subs(vtt_filename, font="", size=""): output_filename = vtt_filename try: subs = WebVTTFile.open(vtt_filename) output_filename = vtt_filename.rstrip('.vtt') + ".ass" except InvalidFile: my_log("Not a VTT file.", xbmc.LOGDEBUG) subs = None except IOError: my_log("File not found.", xbmc.LOGDEBUG) subs = None #Internal rendering resolution used for scaling. Messing with this affects font sizes, etc. def_res = (720, 480) #Offset used for correcting the output. offset = (0, -45) #File header ass_header_temp = "[Script Info]\n" \ "; This is an Advanced Sub Station Alpha v4+ script.\n" \ "Title: converted from vtt\n" \ "ScriptType: v4.00+\n" \ "Collisions: Normal\n" \ "PlayDepth: 0\n" \ "PlayResX: {}\n" \ "PlayResY: {}\n\n" \ "[V4+ Styles]\n" \ "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, " \ "Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, " \ "Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n" ass_header = ass_header_temp.format(def_res[0], def_res[1]) #Style line template line_template = "Style: {Name},{Font},{Fontsize},{PrimaryColour},{SecondaryColour},{OutlineColour},{BackColour}," \ "{Bold},{Italic},{Underline},{StrikeOut},{ScaleX},{ScaleY},{Spacing},{Angle},{BorderStyle}," \ "{Outline},{Shadow},{Alignment},{MarginL},{MarginR},{MarginV},{Encoding}\n" #Event header template event_header = "[Events]\n" \ "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n" #Event line template event_template = "Dialogue: {Layer},{Start},{End},{Style},{Name},{MarginL},{MarginR},{MarginV},{Effect},{Text}\n" if not font: font = "Arial" if not size: size = "24" #Setup initial values for the styles initial_font_settings = { 'Font': font, "Fontsize": size, 'PrimaryColour': "&H00FFFFFF", #NOTE: this is AABBGGRR hex notation 'SecondaryColour': "&H0300FFFF", 'OutlineColour': "&H00000000", 'BackColour': "&H02000000", 'Bold': "0", 'Italic': "0", 'Underline': "0", 'StrikeOut': "0", 'ScaleX': "100", 'ScaleY': "100", 'Spacing': "0", 'Angle': "0", 'BorderStyle': "1", 'Outline': "2", 'Shadow': "1", 'Alignment': "2", 'MarginL': "0", 'MarginR': "0", 'MarginV': "0", 'Encoding': "1" } styles = dict() styles['dialogue'] = dict(initial_font_settings) styles['dialogue'][ 'PrimaryColour'] = "&H0000FFFF" #set the color to yellow styles['dialogue']['Name'] = 'dialogue' styles['song_lyrics'] = dict(initial_font_settings) styles['song_lyrics'][ 'PrimaryColour'] = "&H00FFFF00" # set the color to blue styles['song_lyrics']['Name'] = 'song_lyrics' styles['captions'] = dict(initial_font_settings) #copy the initial values, but don't make changes. reserved for future use if subs: ass_fh = open(output_filename, 'wb') #write out the header and the dialogue style ass_fh.write(ass_header) ass_fh.write(line_template.format(**styles['dialogue'])) ass_fh.write(line_template.format(**styles['song_lyrics'])) #find the 'special' sub blocks that specify an alignment for item in subs.data: if "align" in item.position or "Caption" in item.text or "caption" in item.text: #tweak the alignment in the styles (can't set alignment in events) # "1" is bottom left, "3" is bottom right (like numpad) if "align:left" in item.position: #it's probably not neccessary to do the .replace here styles['captions']['Name'] = item.index.replace('-', '_') styles['captions']['Alignment'] = "1" ass_fh.write(line_template.format(**styles['captions'])) elif "align:right" in item.position: styles['captions']['Name'] = item.index.replace('-', '_') styles['captions']['Alignment'] = "3" ass_fh.write(line_template.format(**styles['captions'])) else: styles['captions']['Name'] = item.index.replace('-', '_') styles['captions']['Alignment'] = "2" ass_fh.write(line_template.format(**styles['captions'])) ass_fh.write("\n\n") ass_fh.write(event_header) #write out the subtitles: ASS calls these events, VTT has these stored in <c> tags for item in subs.data: abs_vpos = 10 # don't want the 'default' margin to have the subtitles at # the absolute edge of the screen abs_hpos = 0 pos_parts = item.position.split() for item_pos in pos_parts: #vtt uses percentages, ass uses pixels. convert if 'line' in item_pos: # vtt's 'line' is percentage from top of screen (usually) item_pos_per = item_pos.split(':')[1].rstrip('%') per_float = float(item_pos_per) / 100 abs_vpos = per_float * def_res[1] abs_vpos = def_res[1] - abs_vpos + offset[1] abs_vpos = int(abs_vpos) if 'position' in item_pos: # while 'position' is percentage from left of screen (usually) item_pos_per = item_pos.split(':')[1].rstrip('%') per_float = float(item_pos_per) / 100 abs_hpos = per_float * def_res[0] abs_hpos = abs_hpos + offset[0] abs_hpos = int(abs_hpos) item_text = item.text_without_tags.encode('utf-8') #handle the timecodes, need to chop off leading 0 and trailing ms position if '.' in item.start.to_time().isoformat(): #isoformat doesn't print trailing zeros in ms position, #so we need to account for this. in this case we have ms's start_text = item.start.to_time().isoformat()[1:-4] else: # we add trailing zero's back start_text = item.start.to_time().isoformat()[1:] + '.00' if '.' in item.end.to_time().isoformat(): end_text = item.end.to_time().isoformat()[1:-4] else: end_text = item.end.to_time().isoformat()[1:] + '.00' #create the events, matching the styles to what we used before if "caption" in item.text or "Caption" in item.text: event = { 'Layer': "0", 'Start': start_text, 'End': end_text, 'Style': item.index.replace('-', '_'), 'Name': item.index, 'MarginL': abs_hpos, 'MarginR': "0", 'MarginV': abs_vpos, 'Effect': "", 'Text': item_text } elif "song" in item.text or "Song" in item.text: event = { 'Layer': "0", 'Start': start_text, 'End': end_text, 'Style': "song_lyrics", 'Name': item.index, 'MarginL': abs_hpos, 'MarginR': "0", 'MarginV': abs_vpos, 'Effect': "", 'Text': item_text } else: event = { 'Layer': "0", 'Start': start_text, 'End': end_text, 'Style': "dialogue", 'Name': item.index, 'MarginL': abs_hpos, 'MarginR': "0", 'MarginV': abs_vpos, 'Effect': "", 'Text': item_text } ass_fh.write(event_template.format(**event)) ass_fh.close() return output_filename
def main(options): # Ensure ffmpeg is around if not run_ffmpeg(['-version']): log.error( "ffmpeg needs to be available to strip audio from the video file.") exit(1) with NamedTemporaryFile(delete=True) as vid_file: log.info("Downloading %s - this might take a while." % options.vid_url) response = get(options.vid_url, stream=True) total_length = response.headers.get("content-length") if total_length is None: # no content length header log.info("Unknown length - can't predict how long this will take.") f.write(response.content) else: bar = ProgressBar(max_value=int(total_length)) dl = 0 for data in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE): dl += len(data) vid_file.write(data) vid_file.flush() bar.update(dl) log.info("Download done. Stripping audio.") (wav_file, wav_file_name) = mkstemp('.wav') result = run_ffmpeg([ "-y", "-i", vid_file.name, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", wav_file_name ]) if not result: close(wav_file) log.error("ffmpeg failed. Bailing.") exit(1) fs, audio = wav.read(wav_file_name) close(wav_file) log.info("Will write VTT to %s" % options.output) # Make sure the WAV is to code... log.info("Loading up WAV file...") if fs != 16000: log.error("Only 16000hz WAV files are usable.") exit(1) total_samples = len(audio) duration_hours, duration_minutes, duration_seconds = sample_index_to_time( len(audio)) log.info("Approximate duration: %d:%02d:%02d" % (duration_hours, duration_minutes, duration_seconds)) # Let's load up DeepSpeech and get it ready. log.info("Loading pre-trained DeepSpeech model...") root_model_dir = path.join(options.deepspeech_model_dir, MODEL_DIR) model = path.join(root_model_dir, MODEL_FILE) alphabet = path.join(root_model_dir, MODEL_ALPHABET) lang_model = path.join(root_model_dir, MODEL_LANG_MODEL) trie = path.join(root_model_dir, MODEL_TRIE) deepspeech = Model(model, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH) log.info("Done loading model.") log.info("Loading language model...") deepspeech.enableDecoderWithLM(alphabet, lang_model, trie, LM_WEIGHT, WORD_COUNT_WEIGHT, VALID_WORD_COUNT_WEIGHT) log.info("Done loading model.") playhead = 0 out = WebVTTFile() bar = ProgressBar(max_value=total_samples) while playhead < (total_samples - 1): end_point = min(playhead + AUDIO_SEGMENT_SAMPLES, (total_samples - 1)) segment = audio[playhead:end_point] inference = deepspeech.stt(segment, fs) log.debug("Inferred: %s" % inference) start_hours, start_minutes, start_seconds = sample_index_to_time( playhead) playhead = end_point end_hours, end_minutes, end_seconds = sample_index_to_time(playhead) if not inference or inference == "ah": continue for search, replace in INFERENCE_REPLACEMENTS.iteritems(): inference = sub(r"\b" + search + r"\b", replace, inference) inference = fill(inference, width=MAX_CAPTION_WIDTH) start = WebVTTTime(start_hours, start_minutes, start_seconds) end = WebVTTTime(end_hours, end_minutes, end_seconds) item = WebVTTItem(0, start, end, inference) out.append(item) bar.update(playhead) out.save(options.output, encoding="utf-8") out.clean_indexes() out.save(options.output, encoding="utf-8")
except getopt.GetoptError, err: print str(err) usage() sys.exit(2) #Settings default values delta = WebVTTTime(milliseconds=500) encoding = "utf_8" #- if len(args) <> 3: usage() sys.exit(2) for o, a in opts: if o in ("-d", "--delta"): delta = WebVTTTime(milliseconds=int(a)) elif o in ("-e", "--encoding"): encoding = a elif o in ("-h", "--help"): usage() sys.exit() subs_a = WebVTTFile.open(args[0], encoding=encoding) subs_b = WebVTTFile.open(args[1], encoding=encoding) out = merge_subtitle(subs_a, subs_b, delta) out.save(args[2], encoding=encoding) if __name__ == "__main__": main()
def test_shift(self): vtt_file = WebVTTFile([WebVTTItem()]) vtt_file.shift(1, 1, 1, 1) self.assertEqual(vtt_file[0].end, (1, 1, 1, 1)) vtt_file.shift(ratio=2) self.assertEqual(vtt_file[0].end, (2, 2, 2, 2))
def test_default_value(self): self.assertEqual(self.file.eol, os.linesep) vtt_file = WebVTTFile(eol='\r\n') self.assertEqual(vtt_file.eol, '\r\n')
def setUp(self): self.file = WebVTTFile()
def setUp(self): self.duck = WebVTTFile()
def test_multiple_item(self): vtt_file = WebVTTFile([ WebVTTItem(1, {'seconds': 0}, {'seconds': 3}, 'Hello'), WebVTTItem(1, {'seconds': 1}, {'seconds': 2}, 'World !') ]) self.assertEquals(vtt_file.text, 'Hello\nWorld !')
def test_single_item(self): vtt_file = WebVTTFile( [WebVTTItem(1, {'seconds': 1}, {'seconds': 2}, 'Hello')]) self.assertEquals(vtt_file.text, 'Hello')
opts, args = getopt.getopt(sys.argv[1:], 'hd:e:', ["help", "encoding=", "delta="]) except getopt.GetoptError, err: print str(err) usage() sys.exit(2) #Settings default values delta = WebVTTTime(milliseconds=500) encoding="utf_8" #- if len(args) <> 3: usage() sys.exit(2) for o, a in opts: if o in ("-d", "--delta"): delta = WebVTTTime(milliseconds=int(a)) elif o in ("-e", "--encoding"): encoding = a elif o in ("-h", "--help"): usage() sys.exit() subs_a = WebVTTFile.open(args[0], encoding=encoding) subs_b = WebVTTFile.open(args[1], encoding=encoding) out = merge_subtitle(subs_a, subs_b, delta) out.save(args[2], encoding=encoding) if __name__ == "__main__": main()