示例#1
0
def _index_ass(writer, object_id: str,
               evevnts: Iterable[ass.document.Dialogue]):
    for idx, event in enumerate(evevnts):
        writer.add_document(object_id=object_id,
                            start=srt.timedelta_to_srt_timestamp(event.Start),
                            end=srt.timedelta_to_srt_timestamp(event.End),
                            content=event.Text,
                            idx=idx)
示例#2
0
def _index_srt(writer, object_id: str, subtitles: Iterable[srt.Subtitle]):
    for subtitle in subtitles:
        writer.add_document(object_id=object_id,
                            start=srt.timedelta_to_srt_timestamp(
                                subtitle.start),
                            end=srt.timedelta_to_srt_timestamp(subtitle.end),
                            content=subtitle.content,
                            idx=subtitle.index)
示例#3
0
def main():
    """Parsing arguments and generating an XML file."""
    parser = argparse.ArgumentParser(prog="find_word",
                                     description="Searching for strings in a subtitle file and generating an edit decision list")

    parser.add_argument("-i", "--inputfile", help="input .srt file", required=True)
    parser.add_argument("-o", "--outputfile", help="output .xml file")
    parser.add_argument("-w", "--word", help="search for word(s)", required=True)
    parser.add_argument("-c",
                        "--cut",
                        action="store_true",
                        help="Automatically cutting the video file. (input video file, output video file)")
    parser.add_argument("-v", "--verbose", action='store_true', help="verbose mode")

    args = parser.parse_args()

    # Verbose mode
    if args.verbose:
        print('Reading subtitle .srt file', CRED + args.inputfile + CEND)
        if args.outputfile is not None:
            print('Output .XML file is', CRED + args.outputfile + CEND)
        print('Search word(s) is/are', CRED + args.word + CEND)
        if args.cut is not None:
            print(args.cut)

    subtitle = open(args.inputfile, "r")
    data = list(srt.parse(subtitle))

    cut_list = pandas.DataFrame(columns=['start', 'end', 'content'])
    if args.outputfile is not None:
        xml = open(args.outputfile, "w")

    for i in range(len(data)):
        if args.word in data[i].content:
            start = srt.timedelta_to_srt_timestamp(data[i].start)
            end = srt.timedelta_to_srt_timestamp(data[i].end)

            # Verbose mode
            if args.verbose:
                print(start, end)
            if args.outputfile is not None:
                start = re.sub(',', '.', start)
                end = re.sub(',', '.', end)
                xml.write('''<entry producer="producer0" in="%s" out="%s" />\n''' %(start, end))
                cut_list = cut_list.append({'start': start,
                                            'end':end,
                                            'content': data[i].content
                                            },
                                           ignore_index=True)

    if args.cut:
        cut_list.to_pickle("./cut_list.pkl")
示例#4
0
def srt_to_po_converter(src_fp, dest_fp):
    subtitles = srt.parse(src_fp.read().decode("utf-8-sig"))

    for cue in subtitles:
        lines = cue.content.splitlines()

        msgid = NEW_LINE_TAG.join(lines)
        start = srt.timedelta_to_srt_timestamp(cue.start)
        end = srt.timedelta_to_srt_timestamp(cue.end)
        comment = f"{cue.index}\n{start} --> {end}"

        unit = polib.POEntry(msgid=msgid, comment=comment)
        dest_fp.write(f"{unit}\n".encode("utf-8"))
示例#5
0
文件: test_srt.py 项目: smartree/srt
def test_parser_noncontiguous_ignore_errors(subs, fake_idx, garbage,
                                            fake_timedelta):
    composed = srt.compose(subs)
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    composed = composed.replace(
        "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage))
    # Should not raise, we have ignore_errors
    list(srt.parse(composed, ignore_errors=True))
def get_subtitles_from_file(filename, index, url=None, name=None):
    transcriptions = []
    with open(f'data/subtitles/{filename}') as f:
        text = f.read()
        subtitles = srt.parse(text)
        f.close()

    for i, sub in enumerate(subtitles):
        transcriptions.append({
            'index':
            i + 1,
            'start':
            srt.timedelta_to_srt_timestamp(sub.start),
            'end':
            srt.timedelta_to_srt_timestamp(sub.end),
            'content':
            sub.content
        })

    return get_json_response(transcriptions, index, url, name)
示例#7
0
def test_parser_noncontiguous(subs, fake_idx, garbage, fake_timedelta):
    composed = srt.compose(subs)

    # Put some garbage between subs that should trigger our failed parsing
    # detection. Since we do some magic to try and detect blank lines that
    # don't really delimit subtitles, it has to look at least a little like an
    # SRT block.
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    composed = composed.replace(
        "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage))

    with pytest.raises(srt.SRTParseError):
        list(srt.parse(composed))
示例#8
0
文件: test_srt.py 项目: cdown/srt
def test_parser_noncontiguous(subs, fake_idx, garbage, fake_timedelta):
    composed = srt.compose(subs)

    # Put some garbage between subs that should trigger our failed parsing
    # detection. Since we do some magic to try and detect blank lines that
    # don't really delimit subtitles, it has to look at least a little like an
    # SRT block.
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    composed = composed.replace(
        "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
    )

    with assert_raises(srt.SRTParseError):
        list(srt.parse(composed))
示例#9
0
文件: test_srt.py 项目: cdown/srt
def test_parser_didnt_match_to_end_raises(subs, fake_idx, garbage, fake_timedelta):
    srt_blocks = [sub.to_srt() for sub in subs]
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    garbage = "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
    srt_blocks.append(garbage)
    composed = "".join(srt_blocks)

    with assert_raises(srt.SRTParseError) as thrown_exc:
        list(srt.parse(composed))

    # Since we will consume as many \n as needed until we meet the lookahead
    # assertion, leading newlines in `garbage` will be stripped.
    garbage_stripped = garbage.lstrip("\n")

    eq(garbage_stripped, thrown_exc.exception.unmatched_content)
    eq(len(composed) - len(garbage_stripped), thrown_exc.exception.expected_start)
    eq(len(composed), thrown_exc.exception.actual_start)
示例#10
0
def test_parser_didnt_match_to_end_raises(subs, fake_idx, garbage, fake_timedelta):
    srt_blocks = [sub.to_srt() for sub in subs]
    srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
    garbage = "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
    srt_blocks.append(garbage)
    composed = "".join(srt_blocks)

    with assert_raises(srt.SRTParseError) as thrown_exc:
        list(srt.parse(composed))

    # Since we will consume as many \n as needed until we meet the lookahead
    # assertion, leading newlines in `garbage` will be stripped.
    garbage_stripped = garbage.lstrip("\n")

    eq(garbage_stripped, thrown_exc.exception.unmatched_content)
    eq(len(composed) - len(garbage_stripped), thrown_exc.exception.expected_start)
    eq(len(composed), thrown_exc.exception.actual_start)
示例#11
0
#%%
import srt

# %%
with open('video\sub_rus2.srt', mode="r", encoding="utf-8") as input_file:
    res = list(
        srt.parse("\n".join([x.rstrip() for x in input_file.readlines()])))
# %%
res[0]
# %%
srt.compose(res)
# %%
srt.timedelta_to_srt_timestamp(res[0].start)
# %%
import re

res[0].content
# %%
s = "\nasd \tasd\nasd \t33 s\n\n asd"
s = re.sub(r"\r?\n|r", '', s).replace("\t", '')

print(s)
# %%
print(a)
# %%
示例#12
0
def test_srt_timestamp_to_timedelta_too_short_raises(ts):
    srt_ts = srt.timedelta_to_srt_timestamp(ts)[:-1]
    with assert_raises(ValueError):
        srt.srt_timestamp_to_timedelta(srt_ts)
示例#13
0
def test_timedelta_to_srt_timestamp_can_go_over_24_hours(days):
    srt_timestamp = srt.timedelta_to_srt_timestamp(timedelta(days=days))
    srt_timestamp_hours = int(srt_timestamp.split(':')[0])
    eq(srt_timestamp_hours, days * HOURS_IN_DAY)
 def composeSub(self, sub):
     """"""
     start = srt.timedelta_to_srt_timestamp(sub.start)
     end = srt.timedelta_to_srt_timestamp(sub.end)
     return f"{sub.index}\n{start} --> {end}\n{sub.content}\n\n"
示例#15
0
文件: test_srt.py 项目: vpvn/srt
def test_srt_timestamp_to_timedelta_too_short_raises(ts):
    srt_ts = srt.timedelta_to_srt_timestamp(ts)[:-1]
    with assert_raises(ValueError):
        srt.srt_timestamp_to_timedelta(srt_ts)
示例#16
0
文件: srt.py 项目: themucha/coldtype
def srt_to_frame(fps, st):
    tc = Timecode(fps, srt.timedelta_to_srt_timestamp(st).replace(",", "."))
    return tc.frame_number - 86400 - 21600
示例#17
0
def get_transcriptions(response, bin_size=3):
    transcriptions = []
    index = 0

    for result in response.results:
        try:
            if result.alternatives[0].words[0].start_time.seconds:
                # bin start -> for first word of result
                start_sec = result.alternatives[0].words[0].start_time.seconds
                start_microsec = result.alternatives[0].words[
                    0].start_time.nanos * 0.001
            else:
                # bin start -> For First word of response
                start_sec = 0
                start_microsec = 0
            end_sec = start_sec + bin_size  # bin end sec

            # for last word of result
            last_word_end_sec = result.alternatives[0].words[
                -1].end_time.seconds
            last_word_end_microsec = result.alternatives[0].words[
                -1].end_time.nanos * 0.001

            # bin transcript
            transcript = result.alternatives[0].words[0].word

            index += 1  # subtitle index

            for i in range(len(result.alternatives[0].words) - 1):
                try:
                    word = result.alternatives[0].words[i + 1].word
                    word_start_sec = result.alternatives[0].words[
                        i + 1].start_time.seconds
                    word_start_microsec = result.alternatives[0].words[
                        i +
                        1].start_time.nanos * 0.001  # 0.001 to convert nana -> micro
                    word_end_sec = result.alternatives[0].words[
                        i + 1].end_time.seconds
                    word_end_microsec = result.alternatives[0].words[
                        i + 1].end_time.nanos * 0.001

                    if word_end_sec < end_sec:
                        transcript = transcript + " " + word
                    else:
                        previous_word_end_sec = result.alternatives[0].words[
                            i].end_time.seconds
                        previous_word_end_microsec = result.alternatives[
                            0].words[i].end_time.nanos * 0.001

                        # append bin transcript
                        # transcriptions.append(srt.Subtitle(index, datetime.timedelta(0, start_sec, start_microsec), datetime.timedelta(0, previous_word_end_sec, previous_word_end_microsec), transcript))
                        transcriptions.append({
                            'index':
                            index,
                            'start':
                            srt.timedelta_to_srt_timestamp(
                                datetime.timedelta(0, start_sec,
                                                   start_microsec)),
                            'end':
                            srt.timedelta_to_srt_timestamp(
                                datetime.timedelta(
                                    0, previous_word_end_sec,
                                    previous_word_end_microsec)),
                            'content':
                            transcript
                        })
                        # reset bin parameters
                        start_sec = word_start_sec
                        start_microsec = word_start_microsec
                        end_sec = start_sec + bin_size
                        transcript = result.alternatives[0].words[i + 1].word

                        index += 1
                except IndexError:
                    pass
            # append transcript of last transcript in bin
            # transcriptions.append(srt.Subtitle(index, datetime.timedelta(0, start_sec, start_microsec), datetime.timedelta(0, last_word_end_sec, last_word_end_microsec), transcript))
            transcriptions.append({
                'index':
                index,
                'start':
                srt.timedelta_to_srt_timestamp(
                    datetime.timedelta(0, start_sec, start_microsec)),
                'end':
                srt.timedelta_to_srt_timestamp(
                    datetime.timedelta(0, last_word_end_sec,
                                       last_word_end_microsec)),
                'content':
                transcript
            })
            index += 1
        except IndexError:
            pass

    return transcriptions
示例#18
0
def test_timedelta_to_srt_timestamp_can_go_over_24_hours(days):
    srt_timestamp = srt.timedelta_to_srt_timestamp(timedelta(days=days))
    srt_timestamp_hours = int(srt_timestamp.split(":")[0])
    assert srt_timestamp_hours == days * HOURS_IN_DAY
示例#19
0
文件: test_srt.py 项目: rodericc/srt
def test_timedelta_to_srt_timestamp_can_go_over_24_hours(days):
    srt_timestamp = srt.timedelta_to_srt_timestamp(timedelta(days=days))
    srt_timestamp_hours = int(srt_timestamp.split(':')[0])
    eq(srt_timestamp_hours, days * HOURS_IN_DAY)
示例#20
0
def test_bad_timestamp_format_raises(ts):
    ts = srt.timedelta_to_srt_timestamp(ts)
    ts = ts.replace(":", "t", 1)
    with pytest.raises(srt.TimestampParseError):
        srt.srt_timestamp_to_timedelta(ts)
captionCC = list(cp)

CCPointList = []
for i in range(len(captionCC)):

    if captionCC[i].content.startswith("what") or captionCC[i].content.startswith("george"):
        CCPointList.append(i)
        print(captionCC[i].content)
        print(captionCC[i].start)

print(CCPointList)

filename = "Podcast1.mp4"
n = 0
for j in range(len(CCPointList)):
    lastNumber = CCPointList[-1]
    n += 1
    if CCPointList[j] >= 0 and CCPointList[j] != lastNumber:
        starttime = srt.timedelta_to_srt_timestamp(captionCC[CCPointList[j]].start)
        endtime = srt.timedelta_to_srt_timestamp(captionCC[CCPointList[j + 1]].start)
        clip = VideoFileClip(filename).subclip(starttime, endtime)
        clip.write_videofile(f"clip{n}.mp4")
        clip.close()

    elif CCPointList[j] == CCPointList[-1]:
        starttime = srt.timedelta_to_srt_timestamp(captionCC[CCPointList[j]].start)
        endtime = srt.timedelta_to_srt_timestamp(captionCC[-1].end)
        clip1 = VideoFileClip(filename).subclip(starttime, endtime)
        clip1.write_videofile(f"clip{n}.mp4")
        clip1.close()