示例#1
0
文件: utils.py 项目: Indigo6/pdf_ocr
def simplify_ass(cfg):
    # subs = SSAFile.load('S03E09.ass', encoding="utf-16-le")
    subs = SSAFile.load(cfg.OUT)

    last_txt = None
    last_index = 0

    i = 0
    length = len(subs)
    while i < length:
        line = subs[i]
        if not is_contain_chinese(line.text):
            subs.__delitem__(i)
            length -= 1
        elif line.text == last_txt:
            subs[last_index].end = line.end
            subs.__delitem__(i)
            length -= 1
        else:
            last_index = i
            last_txt = line.text
            i += 1
    subs.save(cfg.OUT)
示例#2
0
subtitle_ass: SSAFile = None

# work_dir = 'D:\BaiduYunDownload\Hyouka\Hyouka-01'
# work_dir = 'D:\BaiduYunDownload\Oreimo\Oreimo-02'
# work_dir = 'D:\BaiduYunDownload\GirlsLastTour\GirlsLastTour-01'
# work_dir = 'D:\BaiduYunDownload\SAOII\mkv\SAOII-08'
work_dir = 'D:\BaiduYunDownload\Children\Children-02'

file_list = os.listdir(work_dir)
i = 1
for filename in file_list:
    file_path = os.path.join(work_dir, filename)
    if os.path.isfile(file_path):
        i += 1
        sub = SSAFile.load(file_path)
        if subtitle_ass:
            if filename == 'c.ass' or filename == 'm.ass':
                sub.shift(s=1)
            subtitle_ass += sub
        else:
            if filename == 'c.ass' or filename == 'm.ass':
                sub.shift(s=1)
            subtitle_ass = sub
# for parent, dirnames, filenames in os.walk(work_dir,  followlinks=True):
#     for filename in filenames:
#         file_path = os.path.join(parent, filename)
#         sub = SSAFile.load(file_path)
#         if subtitle_ass:
#             subtitle_ass += sub
#         else:
示例#3
0
文件: __main__.py 项目: vxzms/iriya
def main(argv=None):
    parser = argparse.ArgumentParser(
        prog=ENTRYPOINT,
        description=
        "Checks whether all characters in ASS file exist in declared fonts")
    parser.add_argument("files", nargs="+")
    parser.add_argument("--log-level",
                        "-l",
                        default="info",
                        choices=("debug", "info", "warn", "error"))

    args = parser.parse_args(argv)
    logging.basicConfig(level=args.log_level.upper())
    log = logging.getLogger(ENTRYPOINT)
    contexts = {}
    have_nonexistent_char = False

    def get_context(key):
        if key not in contexts:
            log.debug("New font: %s", key)
            contexts[key] = FontContext(**key._asdict())

        return contexts[key]

    for name in args.files:
        ssa = SSAFile.load(name)
        line_number = 0
        for event in ssa.events:
            line_number += 1
            if "type=Comment" in str(event):
                continue
            style = ssa.styles[event.style]
            key = key_from_style(style)
            context = get_context(key)

            text = event.text
            while text:
                m = TAG_RE.search(text)
                display_text = text if not m else text[:m.start()]
                log.debug("Text block: %s", display_text)
                result = context.check(display_text)
                if result:
                    have_nonexistent_char = True
                    log.warning("%s Dialogue #%s: [%s] does not exist in %s",
                                name, line_number, "".join(result), key)

                text = "" if not m else text[m.end():]
                if not text:
                    break

                ovr_tags = m.group(0)[1:-1].split("\\")
                log.debug("Override tags: %s", ovr_tags)
                for tag in ovr_tags:
                    tag = tag.rstrip()
                    if tag[:1].lower() == "r":
                        if len(tag) == 1:
                            key = key_from_style(style)
                        else:
                            style_str = tag[1:]
                            key = key_from_style(ssa.styles[style_str])
                        continue

                    tag_match = TAG_PART_RE.match(tag)
                    if not tag_match:
                        continue

                    type = tag_match.group(1).lower()
                    if type == "fn":
                        key = key._replace(name=tag_match.group(2))
                    elif type == "b":
                        key = key._replace(bold=bool(int(tag_match.group(2))))
                    elif type == "i":
                        key = key._replace(
                            italics=bool(int(tag_match.group(2))))

                context = get_context(key)

    sys.exit(1 if have_nonexistent_char else 0)
示例#4
0
    def add_word(self,
                 word,
                 collection,
                 start,
                 end,
                 name,
                 add_type,
                 word_type,
                 group,
                 word_id='',
                 wordset_id=''):
        clean_word = word.strip()
        puresave_filename = name.split('.')[0] + "~" + clean_word
        # row = {
        #     "videaname":puresave_filename,
        #     "wordbase_type":"video"
        # }
        # w = WordbaseHelper()
        # w.init_word(row,clean_word)
        # w.insert(row,collection)

        data = {
            'filename': puresave_filename,
            'wordbase_collection': collection,
            'word': word,
            'add_type': add_type,
            'word_type': word_type,
            'group': group,
            'word_id': word_id,
            'wordset_id': wordset_id,
        }

        work_dir = "D:\BaiduYunDownload"
        file_path = ""
        parent_path = ""
        double_loop_flag = False
        for parent, dirnames, filenames in os.walk(work_dir, followlinks=True):
            for filename in filenames:
                if filename == name:
                    parent_path = parent
                    file_path = os.path.join(parent, filename)
                    double_loop_flag = True
                    break
            if double_loop_flag:
                break
        start_time = float(start)
        end_time = float(end)
        pure_filename = name.split('.')[0]

        subfile_path = os.path.join(parent_path, pure_filename + ".srt")
        video_clip = VideoFileClip(file_path)
        clip = video_clip.subclip(start_time, end_time)
        target = "D:\BaiduYunDownload\\videos\\" + puresave_filename + ".mp4"
        clip.write_videofile(target,
                             codec='libx264',
                             verbose=False,
                             audio=True)
        video_clip.close()

        subtitle = SSAFile.load(subfile_path)
        text = '''
        1
        00:00:00,000 --> 00:00:00,000
        
        '''
        temp = SSAFile().from_string(text)
        for sub in subtitle:
            if sub.start >= start_time * 1000 and sub.end <= end_time * 1000:
                text = sub.text.replace(
                    clean_word, '<c.video-heightlight>' + clean_word + '</c>')
                sub.text = text
                sub.shift(s=-start_time)
                temp.append(sub)
        sub_target = "D:\BaiduYunDownload\\videos\\" + puresave_filename
        temp.save(sub_target + '.srt')
        vtt = WebVTT().from_srt(sub_target + '.srt')
        vtt.save(sub_target + '.vtt')

        files = {
            "video": open(target, "rb"),
            "subtitle": open(sub_target + '.vtt', "rb")
        }
        # print(files)

        # r = requests.post('http://127.0.0.1:5000/video', data=data,files=files)
        r = requests.post('http://' + server_ip + '/video',
                          data=data,
                          files=files)
        # print(r.request)

        return "true"
示例#5
0
def getalltext(filename):
    relustarray = []
    subs = SSAFile.load(filename)
    for line in subs:
        relustarray.append(line.text)
    return relustarray
示例#6
0
文件: utils.py 项目: Indigo6/pdf_ocr
def ocr_with_timeline(video, video_path, box, ocr_reader, lang, main_window, progress_bar):
    fps = video.get(5)

    _, video_name = os.path.split(video_path)
    video_name = video_name.split('.')[0]
    frame_dir = 'frame/'+video_name+'/'

    subs = SSAFile.load(frame_dir+'/split_vision.ass')

    prefix = video_path[:-len(video_path.split('.')[-1])]

    total = len(subs)
    start = time.time()
    count = 0

    re_chinese = re.compile(u"[\u4e00-\u9fa5]+")
    re_ascii = re.compile(r'\w+', re.ASCII)

    i = 0
    length = len(subs)
    while i < length:
        line = subs[i]
        srt_start = line.start / 1000
        srt_end = line.end / 1000
        # print(fmt_time(srt_start), fmt_time(srt_end))
        srt_mid = (srt_start + srt_end) / 2
        frame_id = int(srt_mid * fps)

        video.set(cv.CAP_PROP_POS_FRAMES, frame_id)  # 设置要获取的帧号
        _, frame = video.read()
        clipped_frame = frame[box[0][0]:box[0][1], box[1][0]:box[1][1]]

        result = ocr_reader.ocr(clipped_frame)
        print(result)
        if len(result) == 0:
            subs.__delitem__(i)
            length -= 1
        else:
            # subs[i].text = result[0]
            if len(lang) == 2:
                split = list(map(lambda x: len(x), [re.findall(re_chinese,result[i]) for i in range(len(result))]))
                eng_str = []
                ch_str = []
                iseng = 1
                for str_ind in reversed(range(len(result))):
                    iseng += split[str_ind]
                    if iseng == 1:
                        eng_str.append(result[str_ind])
                    else:
                        ch_str.append(result[str_ind])
                eng_str = ' '.join(reversed(eng_str))
                ch_str = ''.join(reversed(ch_str))
                subs[i].text = re.findall(re_chinese, ch_str)[0]
                subs[i+1].text = ' '.join(re.findall(re_ascii, eng_str))
            else:
                subtitle = ''.join(result)
                subs[i].text = re.findall(re_chinese, subtitle)[0]

            i += len(lang)

        count += len(lang)
        progress_bar.setValue(int(count / total * 100) + 1)
        if (count % 1) == 0 or count == total:
            elapsed = time.time() - start
            eta = (total - count) / count * elapsed
            print("[{}/{}], Elapsed: {}, ETA: {}".format(count, total, fmt_time(elapsed), fmt_time(eta)))
            subs.save(prefix+'ass', format_='ass')

    progress_bar.setValue(100)
    QMessageBox.information(main_window, "提示", "字幕生成成功!", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes)
    progress_bar.setValue(0)
示例#7
0
def split_subtitle():
    path = './CHS_test.srt'

    subs = SSAFile.load(path)
    print(subs[0].plaintext)
示例#8
0
                    cv.imwrite(
                        "{}/f{}_l{}.jpg".format(frame_dir, fc_start,
                                                fc - fc_start), last_srt_frame)
                    if output_segged_frame:
                        cv.imwrite(
                            "{}/f{}_l{}_segged.jpg".format(
                                frame_dir, fc_start, fc - fc_start),
                            last_srt_seg_frame)
                srt_count += 1
                fc_start = 0

        progress_bar.setValue(int(fc / frames_num * 100) + 1)
        # progress_bar.format(Q)
        elapsed = time.time() - time_start
        eta = (frames_num - fc) * elapsed / fc if fc > 0 else 0
        print('[%d/%d] Elapsed: %s, ETA: %s' %
              (fc, frames_num, fmt_time(elapsed), fmt_time(eta)))
    video_path.split('/')
    subs.save(frame_dir + '/split_vision.ass')
    progress_bar.setValue(100)
    QMessageBox.information(main_window, "提示", "时间轴生成成功!",
                            QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes)
    progress_bar.setValue(0)


if __name__ == "__main__":
    sub = SSAFile.load('demo/empty.ass')
    sub.append(
        SSAEvent(start=0, end=make_time(s=2.5), text="New first subtitle"))
    print("test")