def break_sentences(args, subs, alternative): firstword = True charcount = 0 idx = len(subs) + 1 content = "" for w in alternative.words: if firstword: # first word in sentence, record start time start = w.start_time.ToTimedelta() charcount += len(w.word) content += " " + w.word.strip() if ("." in w.word or "!" in w.word or "?" in w.word or charcount > args.max_chars or ("," in w.word and not firstword)): # break sentence at: . ! ? or line length exceeded # also break if , and not first word subs.append( srt.Subtitle(index=idx, start=start, end=w.end_time.ToTimedelta(), content=srt.make_legal_content(content))) firstword = True idx += 1 content = "" charcount = 0 else: firstword = False return subs
def merge_srt(zh_file_path, en_file_path, new_file_path): zh_file = open(zh_file_path, mode='r', encoding='utf-8') zh_srt_s = zh_file.read() zh_file.close() en_file = open(en_file_path, mode='r', encoding='utf-8') en_srt_s = en_file.read() en_file.close() zh_subs = list(srt.parse(zh_srt_s)) zh_length = len(zh_subs) en_subs = list(srt.parse(en_srt_s)) en_length = len(en_subs) if zh_length != en_length: return False subs = [] for i in range(zh_length): temp_subtitle = zh_subs[i] temp_subtitle.content = temp_subtitle.content + '\n' + en_subs[ i].content temp_subtitle.content = srt.make_legal_content(temp_subtitle.content) subs.append(temp_subtitle) srt_s = srt.compose(subs) # 将srt_s输出到new_file new_file = open(new_file_path, 'w+', encoding='utf-8') new_file.write(srt_s) new_file.close() return True
def say(self, string, time): string = srt.make_legal_content(string.strip()) if string == '': self.flush(time) return if string == self.string: return self.flush(time) self.string = string self.start = time
def generate_new_subtitle(self ): subtitle_text = '\n'.join(self.caption) subtitle_text = srt.make_legal_content(subtitle_text) start = self.time_now stop = start + self.time_delta self.subtitles.append( srt.Subtitle(len(self.subtitles)+1, start, stop, subtitle_text) ) self.time_now = stop self.caption.clear()
def translate_srt(file_path, new_file_path, source, target): # 从file中读取文本,以字符串形式存储到srt_s file = open(file_path, mode='r', encoding='utf-8') srt_s = file.read() file.close() # 将srt_s字符串转为包含Subtitle对象的列表subs subs = list(srt.parse(srt_s)) length = len(subs) # 将subs里Subtitle对象的content存储到ls,形成['字幕1','字幕2']的格式 ls = [] for i in range(length): temp_content = re.sub(r'<\/?[\s\S]*?(?:".*")*>', '', subs[i].content) # 去除所有<>标签 ls.append(temp_content) ls_ = [] # 经过翻译的ls # 写这一堆是因为单次请求的字符数不得大于2000 pre_index = 0 string = '' for i in range(length): if len(string + ls[i]) < 2000: string = string + ls[i] if i == length - 1: temp_ls = ls[pre_index:] ls_ = ls_ + translate(source, target, temp_ls) elif len(ls[i]) >= 2000: # print('该段字符数大于2000!') break else: string = '' temp_ls = ls[pre_index:i] ls_ = ls_ + translate(source, target, temp_ls) pre_index = i # 更新subs里Subtitle对象的content for i in range(length): subs[i].content = srt.make_legal_content(ls_[i]) # subs转为字符串srt_s_ srt_s_ = srt.compose(subs) # 将srt_s_输出到new_file new_file = open(new_file_path, 'w+', encoding='utf-8') new_file.write(srt_s_) new_file.close()
def break_sentences(args, subs, alternative): firstword = True charcount = 0 idx = len(subs) + 1 content = "" for w in alternative.words: if firstword: # first word in sentence, record start time start_hhmmss = time.strftime('%H:%M:%S', time.gmtime(w.start_time.seconds)) print("start_hhmmss: " + start_hhmmss) start_ms = int(w.start_time.nanos / 1000000) #print("start_ms: " + start_ms) start = start_hhmmss + "," + str(start_ms) print("start: " + start) charcount += len(w.word) content += " " + w.word.strip() if ("." in w.word or "!" in w.word or "?" in w.word or charcount > args.max_chars or ("," in w.word and not firstword)): # break sentence at: . ! ? or line length exceeded # also break if , and not first word end_hhmmss = time.strftime('%H:%M:%S', time.gmtime(w.end_time.seconds)) print("end_hhmmss: " + end_hhmmss) end_ms = int(w.end_time.nanos / 1000000) end = end_hhmmss + "," + str(end_ms) subs.append( srt.Subtitle(index=idx, start=srt.srt_timestamp_to_timedelta(start), end=srt.srt_timestamp_to_timedelta(end), content=srt.make_legal_content(content))) firstword = True idx += 1 content = "" charcount = 0 else: firstword = False return subs
def merge_two_subtitles(index, sub_title_left, sub_title_right): start = min(sub_title_left.start, sub_title_right.start) end = max(sub_title_left.end, sub_title_right.end) content = srt.make_legal_content('{}\n{}'.format(sub_title_left.content, sub_title_right.content)) return srt.Subtitle(index, start, end, content)
def get_lines(self): lines = srt.parse(srt.make_legal_content(self.file.read())) self.file.seek(0) return lines