def new_caption_set_from_match( self, match: Dict[int, Dict[int, str]]) -> CaptionSet: new_captions = [] for s, sentence in enumerate(self.sentences): for c, caption in enumerate(sentence.captions): trans = match[s][c] new_caption = deepcopy(caption.raw_caption) new_caption.nodes = [CaptionNode.create_text(trans.strip())] new_captions.append(new_caption) # print(f'"{caption.raw_text}"', f'"{trans}"') new_caption_set = CaptionSet({'en': new_captions}) return new_caption_set
def replace_unknown_language(self, lang_code): """ This essentially sets the "unknown" language in the caption set, by replacing the key with this new language code :param lang_code: A string with the language code to replace the unknown language with """ caption_set = self.get_caption_set() captions = {} for lang in caption_set.get_languages(): set_lang = lang_code if lang == LANGUAGE_CODE_UNKNOWN else lang captions[set_lang] = caption_set.get_captions(lang) # Replace caption_set with new version, having replaced unknown language self.caption_set = CaptionSet(captions, styles=dict(caption_set.get_styles()), layout_info=caption_set.layout_info)
def convert(self, lang_code): """ Converts the caption set to the VTT format :param lang_code: A string with one of the languages to output the captions for :type: lang_code: str :return: A string with the converted caption contents :rtype: str """ caption_set = self.get_caption_set() captions = caption_set.get_captions(lang_code) if not captions: raise InvalidSubtitleLanguageError( "Language '{}' is not present in caption set".format( lang_code)) styles = caption_set.get_styles() layout_info = caption_set.get_layout_info(lang_code) lang_caption_set = CaptionSet({lang_code: captions}, styles=dict(styles), layout_info=layout_info) return self.writer.write(lang_caption_set)
def run_pipeline(url=None, hmm=None, lm=None, dict=None, caption_format='webvtt', out_file=None): if url is None: raise Exception('No URL specified!') pipeline = Gst.parse_launch('uridecodebin name=source ! audioconvert !' + ' audioresample ! pocketsphinx name=asr !' + ' fakesink') source = pipeline.get_by_name('source') source.set_property('uri', url) pocketsphinx = pipeline.get_by_name('asr') if hmm: pocketsphinx.set_property('hmm', hmm) if lm: pocketsphinx.set_property('lm', lm) if dict: pocketsphinx.set_property('dict', dict) bus = pipeline.get_bus() # Start playing pipeline.set_state(Gst.State.PLAYING) cap_set = CaptionSet() captions = [] # Wait until error or EOS while True: try: msg = bus.timed_pop(Gst.CLOCK_TIME_NONE) if msg: #if msg.get_structure(): # print(msg.get_structure().to_string()) if msg.type == Gst.MessageType.EOS: break struct = msg.get_structure() if struct and struct.get_name() == 'pocketsphinx': if struct['final']: c = Caption() c.start = struct['start_time'] / Gst.USECOND c.end = struct['end_time'] / Gst.USECOND c.nodes.append( CaptionNode.create_text(struct['hypothesis'])) captions.append(c) except KeyboardInterrupt: pipeline.send_event(Gst.Event.new_eos()) # Free resources pipeline.set_state(Gst.State.NULL) cap_set.set_captions('en-US', captions) writer = SRTWriter() if caption_format == 'srt' else WebVTTWriter() caption_data = writer.write(cap_set) if out_file is not None: codecs.open(out_file, 'w', 'utf-8').write(caption_data) else: print(caption_data)
stories = codecs.open('story.txt', 'r', 'utf-8').readlines() def microsec(t): return t * 1000000 offset = 0.0 captions = [] for line in sys.stdin: if line.startswith(' '): continue tokens = line.split() if len(tokens) != 3: continue dirname = tokens[0] index = int(dirname.split('/')[-1]) - 1 duration = float(tokens[2]) print duration text = stories[index] cap = Caption(microsec(offset), microsec(offset + duration), [CaptionNode.create_text(text)]) offset += duration captions.append(cap) caps = CaptionSet({'en': captions}) srt = codecs.open('output.srt', 'w', 'utf-8') srt.write(SRTWriter().write(caps)) srt.close()