Пример #1
0
def test_sort_and_reindex_not_in_place_matches(input_subs, start_index):
    # Make copies for both sort_and_reindex calls so that they can't affect
    # each other
    not_in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]
    in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]

    nip_ids = [id(sub) for sub in not_in_place_subs]
    ip_ids = [id(sub) for sub in in_place_subs]

    not_in_place_output = list(
        srt.sort_and_reindex(
            not_in_place_subs,
            start_index=start_index,
        ), )
    in_place_output = list(
        srt.sort_and_reindex(in_place_subs,
                             start_index=start_index,
                             in_place=True), )

    # The results in each case should be the same
    subs_eq(not_in_place_output, in_place_output)

    # Not in place sort_and_reindex should have created new subs
    assert_false(any(id(sub) in nip_ids for sub in not_in_place_output))

    # In place sort_and_reindex should be reusing the same subs
    assert_true(all(id(sub) in ip_ids for sub in in_place_output))
Пример #2
0
 def generate_subtitles(self):
     cumulativeEnd = 0
     cumulativeStart = 0
     numberOfList = len(self.render_list)
     srt_list = []
     caption = ''
     start = 0
     end = 0
     counter = 1
     for index,include in tqdm(enumerate(self.render_list)):
         word = include.word
         newCaption = caption + word
         cumulativeStart = round(cumulativeEnd,2)
         cumulativeEnd += round(include.get_duration(),2)
         end = cumulativeEnd = round(cumulativeEnd,2)
         if(len(newCaption) < 32):
             caption = newCaption
         elif(len(newCaption) >= 32):
             newStart = timedelta(seconds=start)
             newEnd = timedelta(seconds=end)
             subtitle = srt.Subtitle(index=counter,start=newStart,
                                 end=newEnd,content=newCaption)
             srt_list.append(subtitle)
             counter += 1
             start = end
             newCaption = caption = ''
     newStart = timedelta(seconds=start)
     newEnd = timedelta(seconds=end)
     subtitle = srt.Subtitle(index=counter,start=newStart,
                         end=newEnd,content=newCaption)
     srt_list.append(subtitle)
     self.srt_list = srt_list
     return srt_list           
Пример #3
0
def merge_close_subtitles(subs: List[srt.Subtitle], dist=0.05) -> List[srt.Subtitle]:
    """
    Usually one long phrase is broken down to multitude of subtitles having
    some fixed time distance between them. This method reconstruct it back from separate sub pieces.
    :param subs: List of subtitles
    :param dist: Distance between two subtitles in order to be considered as part of one
    :return: List of subtitles where subtitles having overlapping
     with regard to dist intervals are merged into one
    """
    starts = np.array(list(map(lambda sub: sub.start.total_seconds(), subs)))[1:]
    ends = np.array(list(map(lambda sub: sub.end.total_seconds(), subs)))[:-1]
    to_merge = (np.round(starts - ends)) <= dist
    merged_subs = [subs[0]]
    for i, p in enumerate(to_merge, 1):
        if p:
            f = merged_subs[-1]
            s = subs[i]
            merged_subs[-1] = srt.Subtitle(f.index, f.start, end=s.end, content=f"{f.content}\n{s.content}",
                                           proprietary=f.proprietary)
        else:
            sub = subs[i]
            nsub = srt.Subtitle(len(merged_subs) + 1, sub.start, end=sub.end, content=sub.content,
                                proprietary=sub.proprietary)
            merged_subs.append(nsub)
    return merged_subs
Пример #4
0
def add(subs, start, end, content="", adjust=False):
    """
    Adds a subtitle to subtitles in the correct position.

    :param subs: :py:class:`Subtitle` objects
    :param datetime.timedelta start: The timestamp the subtitle starts at.
    :param datetime.timedelta end: The timestamp the subtitle ends at.
    :param boolean adjust: Whether to adjust the timestamps of subsequent subtitles.
    :rtype: :term:`generator` of :py:class:`Subtitle` objects
    """
    if end <= start:
        raise ValueError(
            "The end timestamp can't occur before or at the start timestamp.")

    # ensures list compatibility
    subs = (x for x in subs) if not isinstance(subs, GeneratorType) else subs

    # Add the subtitle in the correct position.
    added = False
    idx = 1
    adjust_time = datetime.timedelta(0)
    subtitle = _utils.tryNext(subs)
    while subtitle is not None:
        subtitle_start = subtitle.start

        if not added and ((start == subtitle_start and end < subtitle.end)
                          or start < subtitle_start):
            yield srt.Subtitle(
                idx,
                start,
                end,
                content,
            )
            idx += 1
            adjust_time = end - start if adjust else adjust_time
            added = True

        yield srt.Subtitle(
            idx,
            subtitle_start + adjust_time,
            subtitle.end + adjust_time,
            subtitle.content,
        )
        idx += 1
        subtitle = _utils.tryNext(subs)

    if not added:
        yield srt.Subtitle(
            idx,
            start,
            end,
            content,
        )
Пример #5
0
def generate_srt(ctx, result, **kwargs):
    # Generate srt file for video.
    fps = kwargs['metadata']['output_fps']
    frame = kwargs['metadata']['frame_num']
    current_time = float(frame) / fps
    step = datetime.timedelta(milliseconds=1. / fps * 1000)
    duration = kwargs['metadata']['duration']

    global object_srt
    global caption_srt

    object_classes = collections.Counter(result['detection_classes'])
    classes_string = ', '.join(
        [f'{name}: {count}' for name, count in object_classes.items()])
    if not object_srt:
        start = datetime.timedelta(milliseconds=0)
    else:
        start = datetime.timedelta(seconds=current_time) - step

    end = start + step
    sub = srt.Subtitle(index=len(object_srt) + 1,
                       start=start,
                       end=end,
                       content=classes_string)
    if object_srt and object_srt[-1].content == classes_string:
        object_srt[-1].end = end
    elif not object_srt or object_srt[-1].content != classes_string:
        object_srt.append(sub)

    if ctx.build_caption:
        captions = result['captions']
        if len(captions) > 0:
            caption = captions[0]
            if not caption_srt:
                start = datetime.timedelta(milliseconds=0)
            else:
                start = datetime.timedelta(seconds=current_time) - step

            end = start + step
            sub = srt.Subtitle(index=len(caption_srt) + 1,
                               start=start,
                               end=end,
                               content=caption)
            if caption_srt and caption_srt[-1].content == caption:
                caption_srt[-1].end = end
            elif not caption_srt or caption_srt[-1].content != caption:
                caption_srt.append(sub)

    if current_time + 2 >= duration:
        with open(PARAMS['objects_srt_file'], 'w') as sw:
            sw.write(srt.compose(object_srt))
        with open(PARAMS['captions_srt_file'], 'w') as sw:
            sw.write(srt.compose(caption_srt))
Пример #6
0
def transcribe():
    results = []
    subs = []
    while True:
        data = process.stdout.read(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            results.append(rec.Result())
    results.append(rec.FinalResult())

    for i, res in enumerate(results):
        jres = json.loads(res)
        if not 'result' in jres:
            continue
        words = jres['result']
        for j in range(0, len(words), WORDS_PER_LINE):
            line = words[j:j + WORDS_PER_LINE]
            s = srt.Subtitle(
                index=len(subs),
                content=" ".join([l['word'] for l in line]),
                start=datetime.timedelta(seconds=line[0]['start']),
                end=datetime.timedelta(seconds=line[-1]['end']))
            subs.append(s)
    return subs
Пример #7
0
def create_subtitles(caption: Groups) -> List[srt.Subtitle]:
    """
    A srt.Subtitle instance is made for every caption group, with the start
    time from the first element in the caption group and the end time of the
    last element in the caption group.

    Args:
        caption: The caption groups, consists of a list of our custom
            Caption-list dataformats.

    Returns:
        List of srt.Subtitle instances, created from the caption groups.
    """
    punc = re.compile(r' ([,.?!])')
    nl = re.compile(r'\n ')

    subtitles = []
    for i, group in enumerate(caption):
        text = ' '.join(word.text for word in group)

        # strip spaces in front of punctuation
        text = punc.sub(r'\g<1>', text)
        text = nl.sub(r'\n', text)

        start = group[0].start
        end = group[-1].end
        sub = srt.Subtitle(i, timedelta(seconds=start), timedelta(seconds=end),
                           text)
        subtitles.append(sub)

    return subtitles
Пример #8
0
async def run_test(uri):
    async with websockets.connect(uri) as websocket:
        wf = wave.open(sys.argv[1], "rb")
        await websocket.send('{ "config" : { "sample_rate" : %d } }' % (wf.getframerate()))

        results = []
        buffer_size = int(wf.getframerate() * 0.2) # 0.2 seconds of audio
        while True:
            data = wf.readframes(buffer_size)

            if len(data) == 0:
                break

            await websocket.send(data)
            results.append(await websocket.recv())

        await websocket.send('{"eof" : 1}')
        results.append(await websocket.recv())

        subs = []
        for i, res in enumerate(results):
           jres = json.loads(res)
           if not 'result' in jres:
               continue
           words = jres['result']
           for j in range(0, len(words), WORDS_PER_LINE):
               line = words[j : j + WORDS_PER_LINE] 
               s = srt.Subtitle(index=len(subs), 
                   content=" ".join([l['word'] for l in line]),
                   start=datetime.timedelta(seconds=line[0]['start']), 
                   end=datetime.timedelta(seconds=line[-1]['end']))
               subs.append(s)

        print(srt.compose(subs))
Пример #9
0
 def run(self) -> None:
     """
     번역된 srt 파일 생성
     :return: None
     """
     with self.__src_file.open('rt') as fp:
         file_contents = fp.read()
     filename = self.__src_file.name
     google_trans = google_translator()
     subtitles = list()
     for sub in srt.parse(file_contents):
         translated_content = google_trans.translate(
             sub.content,
             lang_src=self.__lang_src,
             lang_tgt=self.__lang_tgt)
         tmp_sub = srt.Subtitle(index=sub.index,
                                start=sub.start,
                                end=sub.end,
                                content=translated_content,
                                proprietary=sub.proprietary)
         sys.stdout.write('[{0}][{1}]: {2}\n'.format(
             filename, tmp_sub.index, tmp_sub.content))
         subtitles.append(tmp_sub)
     # 번역된 srt 파일 쓰기
     dst_file = self.__dst_dirpath / self.__src_file.name
     with dst_file.open('wt') as fp:
         fp.write(srt.compose(subtitles))
Пример #10
0
def break_sentences(args, subs, alternative):
    firstword = True
    charcount = 0
    idx = len(subs) + 1
    content = ""

    for w in alternative.words:
        if firstword:
            # first word in sentence, record start time
            start = w.start_time.ToTimedelta()

        charcount += len(w.word)
        content += " " + w.word.strip()

        if ("." in w.word or "!" in w.word or "?" in w.word
                or charcount > args.max_chars
                or ("," in w.word and not firstword)):
            # break sentence at: . ! ? or line length exceeded
            # also break if , and not first word
            subs.append(
                srt.Subtitle(index=idx,
                             start=start,
                             end=w.end_time.ToTimedelta(),
                             content=srt.make_legal_content(content)))
            firstword = True
            idx += 1
            content = ""
            charcount = 0
        else:
            firstword = False
    return subs
Пример #11
0
def add_subtitle(evt):
    print(evt.result.json)
    global index
    global last_duration
    global last_start
    global last_text
    global last_offset

    data = json.loads((evt.result.json))
    if last_offset != data["Offset"]:
        last_start += last_duration
    current_duration = data["Duration"] + last_start

    start_s, start_ms = convertduration(last_duration * slow_factor)
    end_s, end_ms = convertduration(current_duration * slow_factor)

    transcript.append(
        srt.Subtitle(
            index, datetime.timedelta(seconds=start_s, milliseconds=start_ms),
            datetime.timedelta(seconds=end_s, milliseconds=end_ms), last_text))

    index += 1
    last_duration = current_duration
    last_text = data["Text"]
    last_offset = data["Offset"]
Пример #12
0
def buildVTT(bookInfoFileName, outputDir):
    # 从json中读取分段信息。
    info = {}
    with open(bookInfoFileName, 'r', encoding='UTF-8') as f:
        info = json.load(f)
        print(info['chapter'])
        for chapter in info['chapter']:
            subs = []
            index = 0
            offset = info['split'][chapter['start']]['start']
            for i in range(chapter['start'], chapter['end'] + 1):
                split = info['split'][i]
                start = timedelta(milliseconds=(split['start'] - offset))
                end = timedelta(milliseconds=(split['end'] - offset))
                content = split['texc']
                subs.append(srt.Subtitle(index, start, end, content))
                index += 1

            # 保存vtt字幕文件
            vttfilename = '%s/%s.vtt' % (
                outputDir,
                validateTitle('%02d %s' %
                              (chapter['index'] + 1, chapter['title'])))
            with open(vttfilename, 'w', encoding='UTF-8') as f:
                # srt转成WebVTT格式
                strVTT = srt2vtt(srt.compose(subs))
                print(strVTT)
                f.write(strVTT)
                print('%s done.' % vttfilename)
    return
Пример #13
0
def srt_offset(subs, td_seconds):
    if not isinstance(td_seconds, timedelta):
        td_seconds = timedelta(seconds=td_seconds)
    for sub in subs:
        yield srt.Subtitle(index=sub.index,
                           start=sub.start + td_seconds,
                           end=sub.end + td_seconds,
                           content=sub.content)
Пример #14
0
 def fit(self, subs, *_):
     scaled_subs = []
     for sub in subs:
         scaled_subs.append(srt.Subtitle(index=sub.index,
                                         start=sub.start * self.scale_factor,
                                         end=sub.end * self.scale_factor,
                                         content=sub.content))
     self.subs_ = SrtSubtitles(scaled_subs, encoding=subs.encoding)
     return self
Пример #15
0
 def fit(self, subs, *_):
     offset_subs = []
     for sub in subs:
         offset_subs.append(srt.Subtitle(index=sub.index,
                                         start=sub.start + self.td_seconds,
                                         end=sub.end + self.td_seconds,
                                         content=sub.content))
     self.subs_ = SrtSubtitles(offset_subs, encoding=subs.encoding)
     return self
Пример #16
0
def gen_sub_data(sub_id, idx, length, single_dur):
    subs = ''
    for i in range(1, length+1):
        s = srt.Subtitle(
                index=i,
                start=timedelta(seconds=i*single_dur),
                end=timedelta(seconds=(i+1)*single_dur/2),
                content='ID={}, IDX={}, Sentence #{}'.format(sub_id, idx, i))
        subs += s.to_srt()
    return subs.encode('utf-8')
Пример #17
0
def generate_subtitles(sentences, durations_in_seconds):
    subtitles = []
    start = timedelta(seconds=0)
    for i, (sentence,
            duration) in enumerate(zip(sentences, durations_in_seconds)):
        end = start + timedelta(seconds=duration)
        subtitles.append(
            srt.Subtitle(index=i + 1, start=start, end=end, content=sentence))
        start = end
    return subtitles
Пример #18
0
def gen_subs(filename):
    with open(filename + "-out.json", "r") as in_file:
        words = json.loads(in_file.read())["words"]
        print(words)

    index = 0
    subtitle = ""
    start_time = 0
    end_time = 0
    subtitles = []

    for word in words:
        print(word)
        word["end_time"] = word["start_time "] + word["duration"]
        if word["duration"] < MAX_WORD_TIME:
            if start_time + MAX_SUB_TIME >= word["end_time"] and subtitle:
                subtitle += " "
                subtitle += word["word"]
                end_time = max(word['end_time'], start_time + MIN_SUB_TIME)
            elif subtitle:
                index += 1
                subtitles.append(
                    srt.Subtitle(index, timedelta(seconds=start_time),
                                 timedelta(seconds=end_time), subtitle))
                subtitle = ""

            if not subtitle:
                start_time = word['start_time ']
                subtitle += word['word']
                end_time = max(word['end_time'], start_time + MIN_SUB_TIME)

    if subtitle:
        subtitles.append(
            srt.Subtitle(index, timedelta(seconds=start_time),
                         timedelta(seconds=end_time), subtitle))

    # subtitles = list(srt.sort_and_reindex(subtitles))

    with open(filename + ".srt", "w") as f:
        f.write(
            srt.compose(subtitles, reindex=True, start_index=1, strict=True))
 def make_subtitles(self):
     #make subtitles
     for i, time_start in enumerate(self.times_list):
         time_end = time_start + self.interval_len
         try:
             subtitle = srt.Subtitle(index=i + 1,
                                     start=time_start,
                                     end=time_end,
                                     content=self.ascii_content_str[i])
             self.srt_list.append(subtitle)
         except IndexError as e:
             logging.error(e)
 def display_sc(start, end, sc_list):
     display_sorted_sc = sorted(sc_list,
                                key=lambda x:
                                (-float(x[0]), -int(x[2])))
     content = "\n".join([sc[3] for sc in display_sorted_sc])
     LIMIT = 100
     if len(content) >= LIMIT:
         content = content[:LIMIT - 2] + "…"
     return srt.Subtitle(index=0,
                         start=timedelta(seconds=start),
                         end=timedelta(seconds=end),
                         content=content)
Пример #21
0
def create_srt_file(srt_chunks):
    subtitles = []
    for i, chunk in enumerate(srt_chunks):
        end_time = chunk['end_time']
        if i != len(srt_chunks) - 1:
            end_time = srt_chunks[i + 1]['start_time']
        subtitles.append(
            srt.Subtitle(i + 1, timedelta(seconds=chunk['start_time']),
                         timedelta(seconds=end_time), chunk['text'].strip()))
    srt_content = srt.compose(subtitles)
    write_srt_file(srt_content, "en")
    return srt_content
Пример #22
0
def create_subtitles_and_transcript(speech_to_text_response):
    #3 Seconds
    bin = 3.0
    duration = 0
    transcriptions = []
    transcript = ""
    index, prev = 0, 0
    wordstartsec, wordstartmicrosec = 0, 0
    for i in range(len(speech_to_text_response)):
        # Forms the sentence until the bin size condition is met
        transcript = transcript + " " + speech_to_text_response[i]["Word"]

        # Checks whether the elapsed duration is less than the bin size
        if (int((duration / 10000000)) < bin):
            wordstartsec, wordstartmicrosec = convertduration(
                speech_to_text_response[i]["Offset"])
            duration = duration + speech_to_text_response[i]["Offset"] - prev
            prev = speech_to_text_response[i]["Offset"]
            # transcript = transcript + " " + speech_to_text_response[i]["Word"]
        else:
            index = index + 1
            #transcript = transcript + " " + speech_to_text_response[i]["Word"]
            transcriptions.append(
                srt.Subtitle(
                    index,
                    datetime.timedelta(0, wordstartsec, wordstartmicrosec),
                    datetime.timedelta(0, wordstartsec + bin, 0), transcript))
            duration = 0
            #logger.info(transcript)
            transcript = ""

    transcriptions.append(
        srt.Subtitle(index,
                     datetime.timedelta(0, wordstartsec, wordstartmicrosec),
                     datetime.timedelta(0, wordstartsec + bin, 0), transcript))
    subtitles = srt.compose(transcriptions)
    logger.info("Outputting subtitles to " + subtitle_output)
    with open(subtitle_output, "w") as f:
        f.write(subtitles)
Пример #23
0
def get_chunk_subtitles(
    speaker_map_path, audio_in_path, start_index=1, offset=0, lag_correction=-0.001
):

    with open(speaker_map_path, "r") as in_json:
        data = json.load(in_json)
        results = data["channels"]["0"]
        n_speakers = int(results["summary"]["speaker-map"]["speaker_count"])
        raw_data = results["transitions"]["speaker-map"]

    audio_data = sr.AudioFile(audio_in_path)
    chunk_srt = []
    with audio_data as source:
        for entry in raw_data:

            offset_timedelta = timedelta(milliseconds=offset)

            start_time = (
                timedelta(milliseconds=(entry["timestamp_start"])) + offset_timedelta
            )
            end_time = (
                timedelta(milliseconds=(entry["timestamp_end"])) + offset_timedelta
            )

            speaker = entry["result"]
            duration = entry["timestamp_end"] / 1000 - entry["timestamp_start"] / 1000

            audio = r.record(source, duration=duration)

            # We only deal with speakers,
            if speaker != "no_speech":
                transcript = r.recognize_google(audio, language="fr-FR", show_all=True)
                # We take the first entry
                if len(transcript) != 0:
                    text = "{}: {}".format(
                        speaker, transcript["alternative"][0]["transcript"]
                    )

                    # generate_srt(
                    #     start_index,
                    #     start_time,
                    #     end_time,
                    #     transcript["alternative"][0]["transcript"],
                    # )
                    srt_part = srt.Subtitle(
                        index=start_index, start=start_time, end=end_time, content=text
                    )
                    chunk_srt.append(srt_part)
                    start_index += 1
            offset += lag_correction
    return start_index, entry["timestamp_end"], chunk_srt
    def flush(self, time):
        if self.string == '' or self.start is None:
            return

        self._index += 1
        sub = srt.Subtitle(self._index, self.start, time, self.string)

        self.notifier.notify_subtitle(sub)
        self.subs.append(sub)
        self.file.write(sub.to_srt())
        self.file.flush()

        self.string = ''
        self.start = None
Пример #25
0
def find_by_timestamp(
        subs,
        timestamp_one=datetime.timedelta(0),
        timestamp_two=datetime.timedelta(0),
        adjust=False,
):
    """
    Finds subtitles from subtitles by timestamp.
    When timestamp one > timestamp two, subtitles up to timestamp two and
    subtitles after timestamp one will be found.

    :param subs: :py:class:`Subtitle` objects
    :param datetime.timedelta timestamp_one: The timestamp to find from.
    :param datetime.timedelta timestamp_two: The timestamp to find to.
    :param boolean adjust: Whether to adjust the timestamps of found subtitles.
    :rtype: :term:`generator` of :py:class:`Subtitle` objects
    """
    # ensure subs is iterable
    subs = (x for x in subs) if not isinstance(subs, GeneratorType) else subs

    # Split the subtitle at the start and end of the block(s).
    subs = srt.tools.split.split(subs, timestamp_one)
    subs = srt.tools.split.split(subs, timestamp_two)

    # edge cases
    subtitle = _utils.tryNext(subs)
    sequential = timestamp_one < timestamp_two
    if subtitle is None or (sequential and timestamp_two <= subtitle.start):
        return

    # Find the subtitles using a generator.
    idx = 1
    adjust_time = timestamp_one if adjust else datetime.timedelta(0)
    while subtitle is not None:
        start = subtitle.start

        if (timestamp_one == timestamp_two or
            (sequential and timestamp_one <= start and start < timestamp_two)
                or (not sequential and
                    (start < timestamp_two or timestamp_one <= start))):
            yield srt.Subtitle(
                idx,
                subtitle.start - adjust_time,
                subtitle.end - adjust_time,
                subtitle.content,
            )
            idx += 1

        subtitle = _utils.tryNext(subs)
Пример #26
0
def po_to_srt_converter(src_fp, dest_fp):
    po = polib.pofile(src_fp.read().decode("utf-8-sig"))

    for unit in po:
        translated_content = unit.msgstr or unit.msgid
        lines = translated_content.split(NEW_LINE_TAG)
        content = "\n".join(lines)

        index, timecodes = unit.comment.split("\n", maxsplit=1)
        start, end = timecodes.split(" --> ")
        start = srt.srt_timestamp_to_timedelta(start)
        end = srt.srt_timestamp_to_timedelta(end)

        cue = srt.Subtitle(index=index, start=start, end=end, content=content)
        dest_fp.write(cue.to_srt().encode("utf-8"))
Пример #27
0
def Subtitle(videoFile, duration, thresh, language1):
    videoToWav(videoFile)

    r = sr.Recognizer()

    myaudio = pydub.AudioSegment.from_wav('Extracted.wav')
    speak = pydub.silence.detect_nonsilent(myaudio,
                                           min_silence_len=int(duration),
                                           silence_thresh=int(thresh))
    speak = [(int(start / 1000), int(stop / 1000)) for start, stop in speak]

    speakClear = []
    skip = []
    for idx, val in enumerate(speak):
        if idx not in skip:
            if val[1] - val[0] < 3:
                try:
                    speakClear.append(val[0], speak[idx + 1][1])
                    skip.append(idx + 1)
                except:
                    speakClear.append(val)

            else:
                speakClear.append(val)

    subs = []
    timer = 0

    with sr.AudioFile("Extracted.wav") as source:
        for i, v in enumerate(speakClear):
            audioText = r.record(source, duration=v[1] - timer)

            try:
                word = r.recognize_google(audioText, language=language1)
                subs.append(
                    srt.Subtitle(index=(i + 1),
                                 start=dt.timedelta(seconds=v[0]),
                                 end=dt.timedelta(seconds=v[1]),
                                 content=word))
                timer += (v[1] - timer)
                print(v[1])
            except sr.UnknownValueError:
                print("Something Wrong")
            except Exception as e:
                print(e)
    print("Process Finished.")

    return subs
Пример #28
0
    def generate_new_subtitle(self ):
        subtitle_text = '\n'.join(self.caption)
        
        subtitle_text = srt.make_legal_content(subtitle_text)

        start = self.time_now
        stop = start + self.time_delta
        self.subtitles.append(
            srt.Subtitle(len(self.subtitles)+1, 
                         start, 
                         stop,
                         subtitle_text)
            )

        self.time_now = stop
        self.caption.clear()
Пример #29
0
def write_srt_file(transcriptions, id):
    v_transcriptions = []
    for trans in transcriptions:
        index = trans['index']
        start = srt.srt_timestamp_to_timedelta(trans['start'])
        end = srt.srt_timestamp_to_timedelta(trans['end'])
        content = trans['content']
        v_transcriptions.append(srt.Subtitle(index, start, end, content))

    new_filename = f'subtitles_{id}.srt'
    subtitles = srt.compose(v_transcriptions)
    with open(f'data/subtitles/{new_filename}', "w") as f:
        f.write(subtitles)
        f.close()

    return new_filename
Пример #30
0
def make_videos_from_words(word, srt_file_location):
    # opening the subtitle file
    subtitle_file = open(srt_file_location)

    # parsing the subtitle file through the srt library
    subtitle_generator = srt.parse(subtitle_file)
    subs = list(subtitle_generator)

    index = 0
    for sub in subs:
        # if the searching word in the current sub
        if word.lower() in sub.content.lower():
            # calculating the share of the word in the sentence or the sub
            word_share_in_content = len(word) / len(sub.content.lower())
            # the index that the word starts in
            word_start = sub.content.lower().find(word.lower())
            # the percentage of the sub that the word starts in
            starting_percentage = word_start / len(sub.content.lower())

            start = sub.start.microseconds / 1000000 + sub.start.seconds
            end = sub.end.microseconds / 1000000 + sub.end.seconds
            time = end - start

            # starting from the time that the word starts in
            start += time * starting_percentage - .2

            word_time = time * word_share_in_content
            end = start + word_time + .5

            # making the subtitle for the output video
            sub_for_video = srt.Subtitle(1,
                                         start=timedelta(seconds=0),
                                         end=timedelta(seconds=end - start),
                                         content=word)
            with open(f"{word}{index}.srt", "w") as final:
                final.write(srt.compose([sub_for_video]))

            # making the output video
            ffmpeg_extract_subclip("s.mp4",
                                   start,
                                   end,
                                   targetname=f"{word}{index}.mp4")

            index += 1

    # closing the srt file
    subtitle_file.close()