예제 #1
0
파일: import_data.py 프로젝트: ziarn/podv2
 def convert_to_vtt(self, new_file):
     try:
         webvtt.from_srt(new_file).save(new_file[:-3] + "vtt")
         new_file = new_file[:-3] + "vtt"
         return new_file
     except UnicodeDecodeError:
         print("************ codecs ***********")
         with codecs.open(new_file,
                          "r",
                          encoding="latin-1") as sourceFile:
             with codecs.open(new_file[:-3] + "txt",
                              "w",
                              "utf-8") as targetFile:
                 contents = sourceFile.read()
                 targetFile.write(contents)
         webvtt.from_srt(
             new_file[:-3] + "txt").save(new_file[:-3] + "vtt")
         new_file = new_file[:-3] + "vtt"
         return new_file
     except webvtt.errors.MalformedFileError:
         print("************ "
               "The file does not have a valid format. !!!!! "
               "************")
         print(new_file)
         print("************ ************")
     return ""
예제 #2
0
def convert_vtt_caption(request, file):
    from nas.utils.utils2 import WebVTTWriter
    f = File.objects.get(pk=file)
    vtt = webvtt.from_srt(f.file.path)
    captions = vtt.captions
    content = WebVTTWriter().write(captions)
    return JsonResponse(data={"content": content})
예제 #3
0
    def srtToCaptions(self, vttObject):

        captions = []
        srt = ""
        # Get metadata
        s3 = boto3.client('s3')
        try:
            self.logger.debug("Getting data from s3://" + vttObject["Bucket"] +
                              "/" + vttObject["Key"])
            srt = S3Helper().readFromS3(vttObject["Bucket"], vttObject["Key"])
            self.logger.debug(srt)
        except Exception as e:
            raise e
        #buffer = StringIO(srt)
        f = NamedTemporaryFile(mode='w+', delete=False)
        f.write(srt)
        f.close()
        for srtcaption in webvtt.from_srt(f.name):
            caption = {}
            self.logger.debug(srtcaption)
            caption["start"] = self.formatTimeVTTtoSeconds(srtcaption.start)
            caption["end"] = self.formatTimeVTTtoSeconds(srtcaption.end)
            caption["caption"] = srtcaption.lines[0]
            self.logger.debug("Caption Object:{}".format(caption))
            captions.append(caption)

        return captions
예제 #4
0
def srt_translate(upload_folder, download_folder, file_name,
                  file_name_persian):
    with open("%s%s" % (download_folder, file_name_persian), "a+") as f:

        counter = 1
        captions = webvtt.from_srt("%s%s" % (upload_folder, file_name))
        for caption in captions:
            f.write(str(counter))
            f.write("\n")
            # print(caption.start,"-->", caption.end)
            f.write(caption.start)
            f.write(" --> ")
            f.write(caption.end)
            f.write("\n")
            # print(translator.translate(caption.text, src="en", dest="fa").text)
            f.write(
                translator.translate(caption.text, src="en", dest="fa").text)
            f.write("\n\n")
            # print()
            counter += 1
        f.close()
        print("Done. --> %s%s" % (upload_folder, file_name_persian))
        return file_name_persian


# #TODO detect file format
# if y == ".srt":
#     srt_translate(file_name, file_name_persian)
# elif y == ".vtt":
#     vtt_translate(file_name, file_name_persian)
# elif y == ".sbv":
#     pass
# else:
#     print("dont detect!!!")
예제 #5
0
def save_media_caption_file(file_guid, language, file_name, f_handle):
    """
    Save caption file to the proper 
    """
    ret = False
    print("Trying to save caption file: " + file_guid + "/" + language + "/" + \
        file_name)

    try:
        file_name = os.path.basename(file_name)
        parts = os.path.splitext(file_name)

        dest_path = get_media_file_path(file_guid)
        dest_path = dest_path.replace(".mp4",
                                      "_" + language + parts[1].lower())

        out_file = open(dest_path, 'wb')
        out_file.write(f_handle.read())
        out_file.close()

        # Do we need to convert to VTT?
        if dest_path.lower().endswith("srt"):
            vtt = webvtt.from_srt(dest_path)
            output_caption_file = dest_path.replace("srt", "vtt")
            vtt.save(output_caption_file)
            #print("Saved " + language + " to " + output_caption_file)

        ret = True
    except Exception as ex:
        print("Error saving caption file! " + file_guid + "/" + dest_path +
              "/" + language + "\n" + str(ex))

    return ret
예제 #6
0
 def test_srt_parse_get_caption_data(self):
     vtt = webvtt.from_srt(self._get_file('one_caption.srt'))
     self.assertEqual(vtt.captions[0].start_in_seconds, 0.5)
     self.assertEqual(vtt.captions[0].start, '00:00:00.500')
     self.assertEqual(vtt.captions[0].end_in_seconds, 7)
     self.assertEqual(vtt.captions[0].end, '00:00:07.000')
     self.assertEqual(vtt.captions[0].lines[0], 'Caption text #1')
     self.assertEqual(len(vtt.captions[0].lines), 1)
예제 #7
0
def srt2vtt(srt_filename, vtt_filename):
    try:
        #webvtt does not accept io.buffered only string path
        vtt = webvtt.from_srt(srt_filename)
        #webvtt does not accept io.buffered only string path
        vtt.save(vtt_filename)
    except webvtt.errors.MalformedCaptionError as e:
         return str(e)
예제 #8
0
 def test_srt_parse_get_caption_data(self):
     vtt = webvtt.from_srt(self._get_file('one_caption.srt'))
     self.assertEqual(vtt.captions[0].start_in_seconds, 0.5)
     self.assertEqual(vtt.captions[0].start, '00:00:00.500')
     self.assertEqual(vtt.captions[0].end_in_seconds, 7)
     self.assertEqual(vtt.captions[0].end, '00:00:07.000')
     self.assertEqual(vtt.captions[0].lines[0], 'Caption text #1')
     self.assertEqual(len(vtt.captions[0].lines), 1)
예제 #9
0
    def to_vtt(self, filename):
        """
        Get the VTT content given an SRT file. Will return the original content if
        the file is already in VTT format.
        """

        if filename.lower().endswith('.vtt'):
            return filename

        import webvtt

        with self._file_lock:
            try:
                webvtt.read(filename)
                return filename
            except Exception:
                webvtt.from_srt(filename).save()
                return '.'.join(filename.split('.')[:-1]) + '.vtt'
예제 #10
0
def craete_vtt_from(srt_file):
    try:
        vtt = webvtt.from_srt(srt_file)
        path_vtt_file = os.path.splitext(srt_file)[0] + '.vtt'
        vtt.save(path_vtt_file)

    #! The file does not have a valid format.
    except:
        return
예제 #11
0
def get_subtitle_file(filename: str) -> WebVTT:
    file_ext = os.path.splitext(filename)[1]
    if file_ext == ".srt":
        return webvtt.from_srt(filename)
    elif file_ext == ".sbv":
        return webvtt.from_sbv(filename)
    elif file_ext == ".vtt":
        return webvtt.read(filename)
    else:
        raise ValueError(filename)
예제 #12
0
    def _init_content(self):
        self.content = []
        obj = webvtt.from_srt(self.file)

        for index, caption in enumerate(obj.captions):
            self.content.append({
                "start": caption.start,
                "end": caption.end,
                "text": filter_typos(caption.text),
                "identifier": str(index + 1)
            })
예제 #13
0
def convert_to_vtt(srt_path):
    srt_content = None
    for encoding in [
            'utf-8',
            'iso-8859-1',
    ]:
        try:
            with open(str(srt_path), encoding=encoding) as f:
                srt_content = f.read().strip()
                break
        except UnicodeDecodeError:
            pass
    assert srt_content, 'no detectable encoding'
    with tempfile.NamedTemporaryFile(mode='w',
                                     suffix='.srt',
                                     encoding='utf-8',
                                     delete=False) as tmp_srt:
        tmp_srt.write(srt_content)
    webvtt.from_srt(tmp_srt.name).save()
    with open(tmp_srt.name.replace('.srt', '.vtt')) as vtt:
        return vtt.read()
예제 #14
0
def search_subs(film, lang):
    url = 'https://subtitle-api.org/videos/{}/subtitles'.format(film.imdb_id)
    params = dict(
        lang=lang,
        format="SUBRIP",
    )
    r = requests.get(url=url, params=params)
    if r.status_code != 200:
        return
    resp = json.loads(r.text)

    for item in resp['items']:
        url = 'https://subtitle-api.org/videos/{}/subtitles/{}'.format(
            film.imdb_id, item['id'])
        r = requests.get(url=url, params=params)
        if r.status_code != 200:
            continue
        temp_file = NamedTemporaryFile(delete=True)
        temp_file.write(r.content)
        temp_file.flush()

        if lang == "ru":
            film.ru_sub_srt.save(film.imdb_id + "_ru.srt",
                                 File(temp_file),
                                 save=True)
            webvtt.from_srt(settings.MEDIA_ROOT + '/' +
                            str(film.ru_sub_srt)).save()
            film.ru_sub_vtt = film.imdb_id + "_ru.vtt"
            film.save()
        else:
            film.en_sub_srt.save(film.imdb_id + "_en.srt",
                                 File(temp_file),
                                 save=True)
            webvtt.from_srt(settings.MEDIA_ROOT + '/' +
                            str(film.en_sub_srt)).save()
            film.en_sub_vtt = film.imdb_id + "_en.vtt"
            film.save()
        return
예제 #15
0
    def test_convert_from_srt_to_vtt_and_back_gives_same_file(self):
        copy(self._get_file('sample.srt'), OUTPUT_DIR)

        vtt = webvtt.from_srt(os.path.join(OUTPUT_DIR, 'sample.srt'))
        vtt.save_as_srt(os.path.join(OUTPUT_DIR, 'sample_converted.srt'))

        with open(os.path.join(OUTPUT_DIR, 'sample.srt'),
                  'r',
                  encoding='utf-8') as f:
            original = f.read()

        with open(os.path.join(OUTPUT_DIR, 'sample_converted.srt'),
                  'r',
                  encoding='utf-8') as f:
            converted = f.read()

        self.assertEqual(original.strip(), converted.strip())
예제 #16
0
	def generate_time_data(yt,framesdir="frames"):#0 time in s     1 data (image or text)     2 type ("image" or "text")     3 time as timestamped
		yt.maxseconds=int(yt.meta["duration"])
		yt.maxframes=int(os.popen('mediainfo --Output="Video;%FrameCount%" '+yt.videofile).read().replace("\n",""))	
		time_data=[]
		if(yt.subs==1):
			if(yt.subfile[-3:]=="srt"):
				subvtt=webvtt.from_srt(yt.subfile)
			if(yt.subfile[-3:]=="vtt"):
				subvtt=webvtt.read(yt.subfile)
			 #0 time in s     1 data (image or text)     2 type ("image" or "text")     3 time as timestamped
			for i in subvtt:
				ti_me=sum(x * float(t) for x, t in zip([3600, 60, 1], i.end.split(":")))
				time_data.append([ti_me, i.text, "text", i.start])
			no_duplicates=[]
			buf=[]
			for elements in time_data:
				if(elements[2]=="text"):
					t=elements[0]
					for i in elements[1].split("\n"):
						if(i.replace(" ","")!=""):
							buf.append([t,i,elements[3]])
			lastone=[]
			for i in buf:
				if(i[1]!=lastone):
					no_duplicates.append([i[0],i[1],"text",i[2]])
					lastone=i[1]
			time_data=no_duplicates

		frameslist=os.listdir(framesdir)
		frames=[]
		for i in frameslist:
			framenr=int(i[:i.find(".")])
			framepos=framenr/yt.maxframes
			framesec=framepos*yt.maxseconds
			hour_=int(framesec/3600)
			minute_=int((framesec/60)-hour_*60)
			second_=int((framesec-(hour_*3600))-(minute_*60))
			tstmp=str(hour_).zfill(2)+":"+str(minute_).zfill(2)+":"+str(second_).zfill(2)
			time_data.append([framesec,framenr,"image", tstmp])
		time_data.sort(key=lambda tup:tup[0])
		yt.time_data=time_data
		return time_data
예제 #17
0
    def test_srt_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.srt'), OUTPUT_DIR)

        vtt = webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt'))
        vtt.save()

        self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'), 'r', encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.500 --> 00:00:07.000',
            'Caption text #1',
        ]

        self.assertListEqual(lines, expected_lines)
예제 #18
0
파일: webvtt.py 프로젝트: glut23/webvtt-py
    def test_srt_conversion(self):
        os.makedirs(OUTPUT_DIR)
        copy(self._get_file('one_caption.srt'), OUTPUT_DIR)

        vtt = webvtt.from_srt(os.path.join(OUTPUT_DIR, 'one_caption.srt'))
        vtt.save()

        self.assertTrue(os.path.exists(os.path.join(OUTPUT_DIR, 'one_caption.vtt')))

        with open(os.path.join(OUTPUT_DIR, 'one_caption.vtt'), 'r', encoding='utf-8') as f:
            lines = [line.rstrip() for line in f.readlines()]

        expected_lines = [
            'WEBVTT',
            '',
            '00:00:00.500 --> 00:00:07.000',
            'Caption text #1',
        ]

        self.assertListEqual(lines, expected_lines)
def fix_subtitle_sequencing(filename):
    if os.path.isfile(filename + ".bk"):
        print("Not overwriting original backup for {}, skipping.".format(
            filename))
        return

    subs = None
    if os.path.splitext(filename)[1] == ".srt":
        subs = webvtt.from_srt(filename)
    elif os.path.splitext(filename)[1] == ".sbv":
        subs = webvtt.from_sbv(filename)

    # Adjust timing and stretch subtitles for fixing the live ones which
    # get messed up by Youtube
    if "--fix-live" in sys.argv:
        for i in range(len(subs)):
            start = parse_time_stamp(subs[i].start)
            start -= timedelta(seconds=8)
            if start < timedelta(hours=0, minutes=0, seconds=0,
                                 milliseconds=0):
                start = timedelta(hours=0,
                                  minutes=0,
                                  seconds=0,
                                  milliseconds=0)
            end = start + timedelta(seconds=4)
            subs[i].start = format_time_stamp(start)
            subs[i].end = format_time_stamp(end)

    for i in range(len(subs) - 1):
        end = parse_time_stamp(subs[i].end)
        next_start = parse_time_stamp(subs[i + 1].start)
        if end > next_start:
            subs[i].end = subs[i + 1].start

    if not "--dry" in sys.argv:
        shutil.copy(filename, filename + ".bk")
        out_srt = os.path.splitext(filename)[0] + ".srt"
        with open(out_srt, "w", encoding="utf8") as f:
            subs.write(f, format="srt")
예제 #20
0
def audio_crop(video_id):
    video = VideoUpload.objects.get(pk=video_id)

    vtt = webvtt.from_srt(str(video.subfile.path))
    vtt.save()

    f = pysrt.open(str(video.subfile.path))
    time_start_end = settings.MEDIA_ROOT + str(
        video.id) + '/subtitle/' + '/tse.txt'
    file = open(time_start_end, "w")
    for i in range(len(f)):
        timestamp = "{}, {}:{}:{}.{}, {}:{}:{}.{}\n".format(
            i + 1, f[i].start.hours, f[i].start.minutes, f[i].start.seconds,
            f[i].start.milliseconds, f[i].end.hours, f[i].end.minutes,
            f[i].end.seconds, f[i].end.milliseconds)
        file.write(timestamp)
    file.close()

    ip_video = settings.MEDIA_ROOT + str(video.videofile)
    os.system('mkdir {}'.format(settings.MEDIA_ROOT + str(video.id) +
                                '/audio'))
    os.system('mkdir {}'.format(settings.MEDIA_ROOT + str(video.id) +
                                '/audio/crop'))
    ip_audio = settings.MEDIA_ROOT + str(video.id) + '/audio' + "/audio.mp3"
    os.system('ffmpeg -hide_banner -i {} -vn {}'.format(ip_video, ip_audio))
    time.sleep(3)

    crop = open(time_start_end, "r")
    for line in crop:
        res = tuple(map(str, line.split(', ')))
        z = res[2].rstrip()
        op_audio = settings.MEDIA_ROOT + str(
            video.id) + '/audio/crop' + "/op_{}.mp3".format(res[0])
        os.system('ffmpeg -hide_banner -loglevel panic -i {} -ss {} -to {} {}'.
                  format(ip_audio, res[1], z, op_audio))
        time.sleep(0.2)
    crop.close()
예제 #21
0
 def srt_text(self):
     obj = webvtt.from_srt(self.filename)
     content = " ".join(
         [filter_typos(caption.text) for caption in obj.captions])
     return content
예제 #22
0
 def test_srt_empty_caption_text(self):
     self.assertTrue(webvtt.from_srt(self._get_file('missing_caption_text.srt')).captions)
예제 #23
0
 def test_sbv_parse_captions(self):
     self.assertEqual(
         len(webvtt.from_srt(self._get_file('sample.srt')).captions), 5)
예제 #24
0
 def test_srt_empty_gets_removed(self):
     captions = webvtt.from_srt(
         self._get_file('missing_caption_text.srt')).captions
     self.assertEqual(len(captions), 4)
예제 #25
0
 def test_srt_timestamps_format(self):
     vtt = webvtt.from_srt(self._get_file('sample.srt'))
     self.assertEqual(vtt.captions[2].start, '00:00:11.890')
     self.assertEqual(vtt.captions[2].end, '00:00:16.320')
예제 #26
0
 def test_srt_parse_captions(self):
     self.assertTrue(webvtt.from_srt(self._get_file('sample.srt')).captions)
예제 #27
0
 def test_srt_empty_caption_text(self):
     self.assertTrue(
         webvtt.from_srt(
             self._get_file('missing_caption_text.srt')).captions)
예제 #28
0
def pull_youtube_caption(yt_url, media_guid):
    # Download the specified caption file.

    if is_media_captions_present(media_guid):
        print("VTT file present.")
        time.sleep(5)
        return True

    # Pull the db info
    media_file = db(db.media_files.media_guid == media_guid).select().first()
    if media_file is None:
        print("ERROR - Unable to find a db record for " + str(media_guid))
        # Slight pause - let scheduler grab output
        return False

    (w2py_folder, applications_folder, app_folder) = get_app_folders()

    target_file = get_media_file_path(media_guid, "srt")
    from pytube import YouTube
    try:
        yt = YouTube(yt_url.replace("/embed/", "/watch?v="),
                     proxies=get_youtube_proxies())
    except HTTPError as ex:
        if ex.code == 429:
            # Need to try again later
            # Pass this exception up the stack
            #raise ex
            pass
        print("HTTP ERROR: " + str(ex))
        # Slight pause - let scheduler grab output
        time.sleep(5)
        return False
    except Exception as ex:
        msg = "Bad YT URL? " + yt_url + " -- " + str(ex)
        print(msg)
        return False

    for cap in yt.captions:
        lang = cap.code
        output_caption_file = target_file.replace(".srt", "_" + lang + ".srt")
        #print("Trying to saving " + lang + " to " + output_caption_file)

        try:
            print("Saving " + lang + " to " + output_caption_file)
            #caption_url = cap.url
            #r = requests.get(caption_url)
            caption_srt = cap.generate_srt_captions()
            # Save SRT file
            f = open(output_caption_file, "wb")
            f.write(caption_srt.encode('utf-8'))
            f.close()

            # Convert to webvtt format
            vtt = webvtt.from_srt(output_caption_file)
            output_caption_file = output_caption_file.replace("srt", "vtt")
            vtt.save(output_caption_file)
            print("Saved " + lang + " to " + output_caption_file)

        except Exception as ex:
            print("Error - unable to grab caption for lang: " + yt_url + " / " + lang + \
                "\n\n" + str(ex))
            continue

    # Slight pause - let scheduler grab output
    time.sleep(5)
    return True
예제 #29
0
 def test_srt_total_length(self):
     self.assertEqual(
         webvtt.from_srt(self._get_file('sample.srt')).total_length, 23)
예제 #30
0
 def convert_files_to_vtt(self):
     for path, directories, files in os.walk(self.srt_path):
         for file in files:
             webvtt.from_srt(f"{path}/{file}").save(
                 f"{self.vtt_path}{file.replace('srt', 'vtt')}"
             )
예제 #31
0
 def test_srt_parse_captions(self):
     self.assertTrue(webvtt.from_srt(self._get_file('sample.srt')).captions)
예제 #32
0
 def test_srt_empty_gets_removed(self):
     captions = webvtt.from_srt(self._get_file('missing_caption_text.srt')).captions
     self.assertEqual(len(captions), 4)
예제 #33
0
import os
import webvtt

path = input('Filepath where your files are located: ')
files = [os.path.join(path, name) for path, subdirs, files in os.walk(path) for name in files]
captionfiles = ['.srt', '.webvtt', '.vtt']
for f in files:
    extension = os.path.splitext(f)[-1]
    if extension.lower() in captionfiles:
        try:
            if extension == '.srt':
                caption = webvtt.from_srt(f)
            else:
                caption = webvtt.read(f)
            txtcontent = [cap.text for cap in caption]
            outputtxt = os.path.basename(f).replace(extension, '.txt')
            with open(outputtxt, 'w') as outputfile:
                outputfile.write('\n'.join(txtcontent))
        except Exception as e:
            print("{} for {}".format(e, f))
예제 #34
0
 def test_srt_timestamps_format(self):
     vtt = webvtt.from_srt(self._get_file('sample.srt'))
     self.assertEqual(vtt.captions[2].start, '00:00:11.890')
     self.assertEqual(vtt.captions[2].end, '00:00:16.320')
예제 #35
0
 def test_sbv_parse_captions(self):
     self.assertEqual(
         len(webvtt.from_srt(self._get_file('sample.srt')).captions),
         5
     )
예제 #36
0
파일: subs.py 프로젝트: frydaiii/vidPlayer
            path = os.path.join(zipDir, dirs.pop())
            if os.path.isdir(path):
                for anotherPath in os.listdir(path):
                    dirs.append(anotherPath)
            elif os.path.isfile(path):
                files.append(path)

        # get all srt file
        srts = []
        for file in files:
            if file[-3:] == 'srt': srts.append(file)

        # convert
        try:
            for srt in srts:
                webvtt.from_srt(srt).save()
        except Exception:
            for srt in srts:
                f = open(srt, 'r', encoding='utf-16')
                sub = "WEBVTT\n\n"
                for line in f:
                    temp = line[:-1]
                    if not temp.isdigit():
                        sub += line.replace(',', '.')
                f.close()

                vtt = srt.replace('.srt', '.vtt')
                f = open(vtt, 'w')
                f.write(sub)
                f.close()
    except Exception as e:
예제 #37
0
#!/usr/bin/env python3

import webvtt

webvtt = webvtt.from_srt('/Users/hepting/Downloads/01_CS-428+828-201930_otter_ai.srt')
webvtt.save()
예제 #38
0
 def test_srt_total_length(self):
     self.assertEqual(
         webvtt.from_srt(self._get_file('sample.srt')).total_length,
         23
     )