def read(self, content, lang='en-US'): caption_set = CaptionSet() lines = content.splitlines() start_line = 0 captions = [] while start_line < len(lines): if not lines[start_line].isdigit(): break caption = Caption() end_line = self._find_text_line(start_line, lines) timing = lines[start_line + 1].split('-->') caption.start = self._srttomicro(timing[0].strip(' \r\n')) caption.end = self._srttomicro(timing[1].strip(' \r\n')) for line in lines[start_line + 2:end_line - 1]: # skip extra blank lines if not caption.nodes or line != '': caption.nodes.append(CaptionNode.create_text(line)) caption.nodes.append(CaptionNode.create_break()) # remove last line break from end of caption list caption.nodes.pop() captions.append(caption) start_line = end_line caption_set.set_captions(lang, captions) return caption_set
def _convert_to_caption(self, buffer, start): # check to see if previous caption needs an end-time if self.scc and self.scc[-1].end == 0: self.scc[-1].end = start # initial variables caption = Caption() caption.start = start caption.end = 0 # Not yet known; filled in later self.open_italic = False self.first_element = True # split into elements (e.g. break, italics, text) for element in buffer.split('<$>'): # skip empty elements if element.strip() == '': continue # handle line breaks elif element == '{break}': self._translate_break(caption) # handle open italics elif element == '{italic}': # add italics caption.nodes.append(CaptionNode.create_style(True, {'italics': True})) # open italics, no longer first element self.open_italic = True self.first_element = False # handle clone italics elif element == '{end-italic}' and self.open_italic: caption.nodes.append(CaptionNode.create_style(False, {'italics': True})) self.open_italic = False # handle text else: # add text caption.nodes.append(CaptionNode.create_text(' '.join(element.split()))) # no longer first element self.first_element = False # close any open italics left over if self.open_italic == True: caption.nodes.append(CaptionNode.create_style(False, {'italics': True})) # remove extraneous italics tags in the same caption self._remove_italics(caption) # only add captions to list if content inside exists if caption.nodes: self.scc.append(caption)
def _translate_break(self, caption): # if break appears at start of caption, skip break if self.first_element == True: return # if the last caption was a break, skip this break elif caption.nodes[-1].type == CaptionNode.BREAK: return # close any open italics elif self.open_italic == True: caption.nodes.append(CaptionNode.create_style(False, {'italics': True})) self.open_italic = False # add line break caption.nodes.append(CaptionNode.create_break())
def _translate_span(self, tag): # convert tag attributes args = self._translate_attrs(tag) # only include span tag if attributes returned if args != '': node = CaptionNode.create_style(True, args) self.line.append(node) # recursively call function for any children elements for a in tag.contents: self._translate_tag(a) node = CaptionNode.create_style(False, args) self.line.append(node) else: for a in tag.contents: self._translate_tag(a)
def _translate_tag(self, tag): # convert text if isinstance(tag, NavigableString): if tag.strip() != '': node = CaptionNode.create_text(tag.strip()) self.nodes.append(node) # convert line breaks elif tag.name == 'br': self.nodes.append(CaptionNode.create_break()) # convert italics elif tag.name == 'span': # convert span self._translate_span(tag) else: # recursively call function for any children elements for a in tag.contents: self._translate_tag(a)
def run_pipeline(url=None, hmm=None, lm=None, dict=None, caption_format="webvtt", out_file=None): if url is None: raise Exception("No URL specified!") pipeline = Gst.parse_launch( "uridecodebin name=source ! audioconvert !" + " audioresample ! pocketsphinx name=asr !" + " fakesink" ) source = pipeline.get_by_name("source") source.set_property("uri", url) pocketsphinx = pipeline.get_by_name("asr") if hmm: pocketsphinx.set_property("hmm", hmm) if lm: pocketsphinx.set_property("lm", lm) if dict: pocketsphinx.set_property("dict", dict) bus = pipeline.get_bus() # Start playing pipeline.set_state(Gst.State.PLAYING) cap_set = CaptionSet() captions = [] # Wait until error or EOS while True: try: msg = bus.timed_pop(Gst.CLOCK_TIME_NONE) if msg: # if msg.get_structure(): # print(msg.get_structure().to_string()) if msg.type == Gst.MessageType.EOS: break struct = msg.get_structure() if struct and struct.get_name() == "pocketsphinx": if struct["final"]: c = Caption() c.start = struct["start_time"] / Gst.USECOND c.end = struct["end_time"] / Gst.USECOND c.nodes.append(CaptionNode.create_text(struct["hypothesis"])) captions.append(c) except KeyboardInterrupt: pipeline.send_event(Gst.Event.new_eos()) # Free resources pipeline.set_state(Gst.State.NULL) cap_set.set_captions("en-US", captions) writer = SRTWriter() if caption_format == "srt" else WebVTTWriter() caption_data = writer.write(cap_set) if out_file is not None: codecs.open(out_file, "w", "utf-8").write(caption_data) else: print(caption_data)
def _translate_tag(self, tag): # convert text if isinstance(tag, NavigableString): self.line.append(CaptionNode.create_text(tag.strip())) # convert line breaks elif tag.name == 'br': self.line.append(CaptionNode.create_break()) # convert italics elif tag.name == 'i': self.line.append(CaptionNode.create_style(True, {'italics': True})) # recursively call function for any children elements for a in tag.contents: self._translate_tag(a) self.line.append( CaptionNode.create_style(False, {'italics': True})) elif tag.name == 'span': self._translate_span(tag) else: # recursively call function for any children elements for a in tag.contents: self._translate_tag(a)
def new_caption_set_from_match( self, match: Dict[int, Dict[int, str]]) -> CaptionSet: new_captions = [] for s, sentence in enumerate(self.sentences): for c, caption in enumerate(sentence.captions): trans = match[s][c] new_caption = deepcopy(caption.raw_caption) new_caption.nodes = [CaptionNode.create_text(trans.strip())] new_captions.append(new_caption) # print(f'"{caption.raw_text}"', f'"{trans}"') new_caption_set = CaptionSet({'en': new_captions}) return new_caption_set
def _combine_matching_captions(self, caption_set): for lang in caption_set.get_languages(): captions = caption_set.get_captions(lang) new_caps = captions[:1] for caption in captions[1:]: if caption.start == new_caps[-1].start and caption.end == new_caps[-1].end: new_caps[-1].nodes.append(CaptionNode.create_break()) new_caps[-1].nodes.extend(caption.nodes) else: new_caps.append(caption) caption_set.set_captions(lang, new_caps) return caption_set
def run_pipeline(url=None, hmm=None, lm=None, dict=None, caption_format='webvtt', out_file=None): if url is None: raise Exception('No URL specified!') pipeline = Gst.parse_launch('uridecodebin name=source ! audioconvert !' + ' audioresample ! pocketsphinx name=asr !' + ' fakesink') source = pipeline.get_by_name('source') source.set_property('uri', url) pocketsphinx = pipeline.get_by_name('asr') if hmm: pocketsphinx.set_property('hmm', hmm) if lm: pocketsphinx.set_property('lm', lm) if dict: pocketsphinx.set_property('dict', dict) bus = pipeline.get_bus() # Start playing pipeline.set_state(Gst.State.PLAYING) cap_set = CaptionSet() captions = [] # Wait until error or EOS while True: try: msg = bus.timed_pop(Gst.CLOCK_TIME_NONE) if msg: #if msg.get_structure(): # print(msg.get_structure().to_string()) if msg.type == Gst.MessageType.EOS: break struct = msg.get_structure() if struct and struct.get_name() == 'pocketsphinx': if struct['final']: c = Caption() c.start = struct['start_time'] / Gst.USECOND c.end = struct['end_time'] / Gst.USECOND c.nodes.append( CaptionNode.create_text(struct['hypothesis'])) captions.append(c) except KeyboardInterrupt: pipeline.send_event(Gst.Event.new_eos()) # Free resources pipeline.set_state(Gst.State.NULL) cap_set.set_captions('en-US', captions) writer = SRTWriter() if caption_format == 'srt' else WebVTTWriter() caption_data = writer.write(cap_set) if out_file is not None: codecs.open(out_file, 'w', 'utf-8').write(caption_data) else: print(caption_data)
stories = codecs.open('story.txt', 'r', 'utf-8').readlines() def microsec(t): return t * 1000000 offset = 0.0 captions = [] for line in sys.stdin: if line.startswith(' '): continue tokens = line.split() if len(tokens) != 3: continue dirname = tokens[0] index = int(dirname.split('/')[-1]) - 1 duration = float(tokens[2]) print duration text = stories[index] cap = Caption(microsec(offset), microsec(offset + duration), [CaptionNode.create_text(text)]) offset += duration captions.append(cap) caps = CaptionSet({'en': captions}) srt = codecs.open('output.srt', 'w', 'utf-8') srt.write(SRTWriter().write(caps)) srt.close()