Пример #1
0
def checkFormat(myFile, verbose):
    myFormat = ""
    
    if verbose:
        print "--- Checking subtitle format"
        
    capsFile = open(myFile)  # open sub
    caps = capsFile.read()  # read sub
    reader = detect_format(caps)  # detect format with pycaption
    
    if verbose:
        print "--- pycaption says: %s" % reader
        
    if reader:
        if "srt" in str(reader):
            myFormat = "srt"
        elif "sami" in str(reader):
            myFormat = "sami"
        elif "dfxp" in str(reader):
            myFormat = "dfxp"
    else:
        if verbose:
            print "*** pycaption could not detect format"
            print "--- Checking if it's kanal5's own format..."
        if caps.startswith('[{"startMillis":'):
            if verbose:
                print "--- It probably is kanal5 format"
            myFormat = "kanal5"
    
    capsFile.close()  # close sub
    
    return myFormat
Пример #2
0
def download(request, filename_hash, id, slug, extension):
    closedcaptions = get_object_or_404(
        ClosedCaptions,
        id=id,
        event__slug__iexact=slug,
    )
    if extension not in FILE_EXTENSIONS.values():
        raise http.Http404('Unrecognized extension')
    if closedcaptions.filename_hash != filename_hash:
        raise http.Http404('Unrecognized hash')

    for key, ext in FILE_EXTENSIONS.items():
        if ext == extension:
            output_writer = SUPPORTED_WRITERS[key]
    content = closedcaptions.file.read()
    if not (closedcaptions.file.name.lower().endswith('.ttml')
            or closedcaptions.file.name.lower().endswith('.dfxp')):
        content = content.decode('utf-8')

    reader = pycaption.detect_format(content)
    assert reader
    converter = pycaption.CaptionConverter()
    converter.read(content, reader())
    response = http.HttpResponse()
    response['Content-Type'] = CONTENT_TYPES.get(extension, 'text/plain')
    response.write(converter.write(output_writer()))
    return response
Пример #3
0
def download(request, filename_hash, id, slug, extension):
    closedcaptions = get_object_or_404(
        ClosedCaptions,
        id=id,
        event__slug__iexact=slug,
    )
    if extension not in FILE_EXTENSIONS.values():
        raise http.Http404('Unrecognized extension')
    if closedcaptions.filename_hash != filename_hash:
        raise http.Http404('Unrecognized hash')

    for key, ext in FILE_EXTENSIONS.items():
        if ext == extension:
            output_writer = SUPPORTED_WRITERS[key]
    content = closedcaptions.file.read()
    if not (
        closedcaptions.file.name.lower().endswith('.ttml') or
        closedcaptions.file.name.lower().endswith('.dfxp')
    ):
        content = content.decode('utf-8')

    reader = pycaption.detect_format(content)
    assert reader
    converter = pycaption.CaptionConverter()
    converter.read(content, reader())
    response = http.HttpResponse()
    response['Content-Type'] = CONTENT_TYPES.get(extension, 'text/plain')
    response.write(converter.write(output_writer()))
    return response
Пример #4
0
def convert_subtitles(closedcaption):
    str_output = ''
    count = 0
    for closedcaption_url, i in closedcaption:
        count = int(i) + 1
        if closedcaption_url is not None:
            try:
                cc_content = common.smart_unicode(
                    connection.getURL(closedcaption_url,
                                      connectiontype=0).replace(' 9137', ''))
                reader = detect_format(cc_content)
                if reader:

                    str_output = common.smart_utf8(SRTWriter().write(
                        reader().read(cc_content)))
                    file = open(
                        os.path.join(ustvpaths.DATAPATH,
                                     'subtitle-%s.srt' % str(count)), 'w')
                    file.write(str_output)
                    str_output = ''
                    file.close()
                else:
                    print "Unknown sub type"
            except Exception, e:
                print "Exception with Subs: ", e
Пример #5
0
def middleware_convert_sub(response, **kwargs):
    data = response.stream.content.decode('utf8')
    reader = detect_format(data)
    if reader:
        data = WebVTTWriter().write(reader().read(data))
        response.stream.content = data.encode('utf8')
        response.headers['content-type'] = 'text/vtt'
Пример #6
0
 def select_subtitles_file(self, fn):
   if not os.path.isfile(fn):
     def f():
       dialog = Gtk.MessageDialog(self.win, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.CLOSE, "File Not Found")
       dialog.format_secondary_text("Could not find subtitles file: %s" % fn)
       dialog.run()
       dialog.destroy()
     GLib.idle_add(f)
     return
   fn = os.path.abspath(fn)
   ext = fn.split('.')[-1]
   display_name = os.path.basename(fn)
   if ext=='vtt':
     with open(fn) as f:
       self.subtitles = f.read()
   else:
     with open(fn,'rb') as f:
       caps = f.read()
       try: caps = caps.decode()
       except UnicodeDecodeError: caps = caps.decode('latin-1')
     if caps.startswith('\ufeff'): # BOM
       caps = caps[1:]
     converter = pycaption.CaptionConverter()
     converter.read(caps, pycaption.detect_format(caps)())
     self.subtitles = converter.write(pycaption.WebVTTWriter())
   pos = len(self.subtitle_store)
   self.subtitle_store.append([display_name, pos-2, self.subtitles])
   self.subtitle_combo.set_active(pos)
Пример #7
0
def convert_subtitles_to_srt(i: str, o: str):
    ext = os.path.splitext(i)[1]
    if ext == '.srt':
        import shutil
        shutil.copy(i, o)
    elif ext in ('.ttml', '.xml', '.dfxp', '.tt'):
        # TTML
        from media_management_scripts.support.ttml2srt import convert_to_srt
        convert_to_srt(i, o)
    else:
        # VTT, SCC, etc

        from pycaption import detect_format, SRTWriter
        subtitle_str = _read_file(i)
        reader = detect_format(subtitle_str)
        if reader:
            subtitle_str = SRTWriter().write(reader().read(subtitle_str))
            with open(o, 'w') as file:
                file.write(subtitle_str)
        else:
            # Attempt to use FFMPEG
            from media_management_scripts.support.executables import ffmpeg
            from media_management_scripts.support.executables import execute_with_output
            args = [
                ffmpeg(), '-loglevel', 'fatal', '-y', '-i', i, '-c:s', 'srt', o
            ]
            ret, output = execute_with_output(args)
            if ret != 0:
                raise Exception(
                    'Exception during subtitle conversion: {}'.format(output))
Пример #8
0
    def gen_thumbnail(self):
        container = self.fn.lower().split(".")[-1]
        thumbnail_fn = None
        subtitle_ids = []
        if container in ('aac', 'mp3', 'wav'):
            cmd = ['ffmpeg', '-i', self.fn, '-f', 'ffmetadata', '-']
        else:
            thumbnail_fn = tempfile.mkstemp(suffix='.jpg',
                                            prefix='gnomecast_thumbnail_')[1]
            os.remove(thumbnail_fn)
            cmd = [
                'ffmpeg', '-y', '-i', self.fn, '-f', 'mjpeg', '-vframes', '1',
                '-ss', '27', '-vf', 'scale=600:-1', thumbnail_fn
            ]
        self.ffmpeg_desc = output = subprocess.check_output(
            cmd, stderr=subprocess.STDOUT)
        for line in output.decode().split('\n'):
            line = line.strip()
            if line.startswith('Duration:'):
                self.duration = parse_ffmpeg_time(line.split()[1].strip(','))
            if line.startswith('Stream') and 'Subtitle' in line:
                id = line.split()[1].strip('#').replace(':', '.')
                id = id[:id.index('(')]
                subtitle_ids.append(id)
        print('subtitle_ids', subtitle_ids)

        def f():
            if thumbnail_fn:
                self.thumbnail_image.set_from_file(thumbnail_fn)
                os.remove(thumbnail_fn)
                self.win.resize(1, 1)
            self.update_status()

        GLib.idle_add(f)
        new_subtitles = []
        for subtitle_id in subtitle_ids:
            srt_fn = tempfile.mkstemp(suffix='.srt',
                                      prefix='gnomecast_subtitles_')[1]
            output = subprocess.check_output([
                'ffmpeg', '-y', '-i', self.fn, '-vn', '-an',
                '-codec:s:%s' % subtitle_id, 'srt', srt_fn
            ],
                                             stderr=subprocess.STDOUT)
            with open(srt_fn) as f:
                caps = f.read()
            print('caps', caps)
            converter = pycaption.CaptionConverter()
            converter.read(caps, pycaption.detect_format(caps)())
            subtitles = converter.write(pycaption.WebVTTWriter())
            new_subtitles.append((subtitle_id, subtitles))
            os.remove(srt_fn)

        def f():
            pos = len(self.subtitle_store)
            for id, subs in new_subtitles:
                self.subtitle_store.append([id, pos - 2, subs])
                pos += 1

        GLib.idle_add(f)
Пример #9
0
def loadSrt(filename):
    fileid = os.path.basename(filename).split('.')[0]
    with codecs.open(filename, 'r', 'utf-8') as srtfile:
        content = srtfile.read()
        reader = pycaption.detect_format(content)
        if reader:
            caps = reader().read(content)
            return fileid, caps
def Subtitleload(inputfile):
    wttcaps=io.open(inputfile,"r",encoding='utf-8').read()
    converter = pycaption.CaptionConverter()
    #Captionset object as intermediary
    reader = pycaption.detect_format(wttcaps)
    converter.read(wttcaps,reader())
    srtcaps = io.open('youroutputfiledirectorygoeshere.srt', "w", encoding='utf-8')
    #Write converter object into srtcaps file
    srtcaps.write(converter.write(SRTWriter()))
Пример #11
0
def convert_file(input_captions, output_writer):
    reader = pycaption.detect_format(input_captions)

    if not reader:
        raise RuntimeError('Unrecognized format')

    converter = pycaption.CaptionConverter()
    converter.read(input_captions, reader())
    return converter.write(output_writer)
Пример #12
0
def convert_file(input_captions, output_writer):
    reader = pycaption.detect_format(input_captions)

    if not reader:
        raise RuntimeError('Unrecognized format')

    converter = pycaption.CaptionConverter()
    converter.read(input_captions, reader())
    return converter.write(output_writer)
def convert_subs_to_vtt(input_subs_path, output_vtt_path):
    with open(input_subs_path, 'r') as f:
        text = f.read().decode(utils.get_file_encoding(input_subs_path))
        reader = detect_format(text)
        subs = reader().read(text)

        output_text = WebVTTWriter().write(subs)

        with open(output_vtt_path, 'w') as w:
            w.write(output_text)
Пример #14
0
def _webvtt(url, _data_path, _headers, **kwargs):
    r = Session().get(url, headers=_headers)

    data = r.content.decode('utf8')
    reader = detect_format(data)

    data = WebVTTWriter().write(reader().read(data))
    with open(_data_path, 'wb') as f:
        f.write(data.encode('utf8'))

    return _data_path + '|content-type=text/vtt'
Пример #15
0
def middleware_convert_sub(response, **kwargs):
    data = response.stream.content.decode('utf8')
    reader = detect_format(data)
    if reader:
        data = WebVTTWriter().write(reader().read(data))
        if ADDON_DEV:
            path = 'special://temp/convert_sub.middleware'
            real_path = xbmc.translatePath(path)
            with open(real_path, 'wb') as f:
                f.write(data.encode('utf8'))
        response.stream.content = data.encode('utf8')
        response.headers['content-type'] = 'text/vtt'
Пример #16
0
 def clean_file(self):
     value = self.cleaned_data['file']
     # it must be possible to read it with pycaption
     reader = pycaption.detect_format(value.read())
     extension = value.name.lower().split('.')[-1]
     valid_extensions = ('sami', 'ttml', 'srt', 'dfxp', 'dfxp', 'vtt',
                         'scc')
     # It's important to also check the file extension because,
     # for example, if you pass in a .py file, it will be recognized
     # by the WebVTTReader reader.
     if not reader or extension not in valid_extensions:
         raise forms.ValidationError(
             'Not a valid caption file that could be recognized')
     return value
def download_subtitle(url, destination):
    if not url:
        return False
    r = requests.get(url)
    if not r.ok:
        return False
    reader = detect_format(r.text)
    if not reader:
        return False
    srt = SRTWriter().write(reader().read(r.text))
    if xbmcvfs.exists(destination):
        xbmcvfs.delete(destination)
    f = xbmcvfs.File(destination, 'w')
    f.write(srt.encode("utf-8"))
    f.close()
    return True
Пример #18
0
 def clean_file(self):
     value = self.cleaned_data['file']
     # it must be possible to read it with pycaption
     reader = pycaption.detect_format(value.read())
     extension = value.name.lower().split('.')[-1]
     valid_extensions = (
         'sami', 'ttml', 'srt', 'dfxp', 'dfxp', 'vtt', 'scc'
     )
     # It's important to also check the file extension because,
     # for example, if you pass in a .py file, it will be recognized
     # by the WebVTTReader reader.
     if not reader or extension not in valid_extensions:
         raise forms.ValidationError(
             'Not a valid caption file that could be recognized'
         )
     return value
Пример #19
0
    def convert_caps_to_vtt(caps):
        """
        Utility method to convert any supported transcripts into WebVTT format.

        Supported input formats: DFXP/TTML - SAMI - SCC - SRT - WebVTT.

        Arguments:
            caps (unicode): Raw transcripts.
        Returns:
            unicode: Transcripts converted into WebVTT format.
        """
        if caps:
            reader = detect_format(caps)
            if reader:
                return WebVTTWriter().write(reader().read(caps))
        return u''
Пример #20
0
 def select_subtitles_file(self, fn):
     ext = fn.split('.')[-1]
     display_name = os.path.basename(fn)
     if ext == 'vtt':
         with open(fn) as f:
             self.subtitles = f.read()
     else:
         with open(fn) as f:
             caps = f.read()
         if caps.startswith('\ufeff'):  # BOM
             caps = caps[1:]
         converter = pycaption.CaptionConverter()
         converter.read(caps, pycaption.detect_format(caps)())
         self.subtitles = converter.write(pycaption.WebVTTWriter())
     pos = len(self.subtitle_store)
     self.subtitle_store.append([display_name, pos - 2, self.subtitles])
     self.subtitle_combo.set_active(pos)
Пример #21
0
    def save_videojs(self, data, suffix=''):
        """
        The saving handler.
        """
        i18n_ = self.runtime.service(self, "i18n").ugettext

        self.display_name = data['display_name']
        self.url = data['url'].strip()

        for language in self.languages.keys():
            subtitle_text = data['subtitle_text_' + language].strip()
            if subtitle_text:
                reader = detect_format(subtitle_text)
                if reader:
                    try:
                        subtitle = WebVTTWriter().write(
                            reader().read(subtitle_text))
                    except:
                        return Response(json.dumps({
                            'error':
                            i18n_(
                                "Error occurred while saving VTT subtitles for language %s"
                            ) % language.upper()
                        }),
                                        status=400,
                                        content_type='application/json',
                                        charset='utf8')
                    h = HTMLParser()
                    self.subtitles[language] = h.unescape(subtitle)

                    self.create_subtitles_file(self.subtitles[language])
                else:
                    return Response(json.dumps({
                        'error':
                        i18n_(
                            "Error occurred while saving VTT subtitles for language %s"
                        ) % language.upper()
                    }),
                                    status=400,
                                    content_type='application/json',
                                    charset='utf8')
            else:
                self.subtitles[language] = ""

        return {'result': 'success'}
Пример #22
0
def parse_subtitles(path):

#    print('parsing {}'.format(path))

    caps = None

    if os.path.exists(path):
        with open(path, 'r', encoding='latin-1') as fp:
            content = fp.read().replace('<tt:', '<').replace('</tt:', '</')
            reader = detect_format(content)
            if reader:
                try:
                    set = reader().read(content)        
                    caps = set.get_captions(set.get_languages()[0])
                except Exception as ex:
                    print('ERROR: {}'.format(ex))
            
    return caps
Пример #23
0
 def transform_old_subtitle_to_new_form_if_exist(self, modify=False):
     """
     It is possible to modify self.subtitles only in studio_view and save_videojs functions, so in studio_view we only dynamically modify
     self.subtitles dict, but in studio_view pernamently.
     """
     i18n_ = self.runtime.service(self, "i18n").ugettext
     subtitles = copy.deepcopy(self.subtitles)
     if (not 'pl' in self.subtitles
         ) and self.subtitle_url and self.subtitle_text:
         reader = detect_format(self.subtitle_text)
         if reader:
             try:
                 subtitle = WebVTTWriter().write(reader().read(
                     self.subtitle_text))
             except:
                 return Response(json.dumps({
                     'error':
                     i18n_(
                         "Error occurred while saving VTT subtitles for language PL"
                     )
                 }),
                                 status=400,
                                 content_type='application/json',
                                 charset='utf8')
             h = HTMLParser()
             subtitles['pl'] = h.unescape(subtitle)
             if modify:
                 self.subtitles = subtitles
                 self.subtitle_url = self.subtitle_text = ""
             else:
                 return subtitles
         else:
             return Response(json.dumps({
                 'error':
                 i18n_(
                     "Error occurred while saving VTT subtitles for language PL"
                 )
             }),
                             status=400,
                             content_type='application/json',
                             charset='utf8')
     else:
         return subtitles
Пример #24
0
def convert_to_vtt(caps):
    """Convert subtitles to WebVTT format

    Note that if the subtitles are already in VTT format, nothing is done.

    Args:
        caps (str or unicode)
    Returns:
    caps (unicode): None if the format could not be detected.
    """
    if isinstance(caps, str):
        caps = caps.decode('utf-8')
    caps = caps.strip(u"\ufeff").strip(u"\n").strip(u"\r")
    sub_reader = pycaption.detect_format(caps)
    if sub_reader is None:
        return None
    if sub_reader != pycaption.WebVTTReader:
        caps = pycaption.WebVTTWriter().write(sub_reader().read(caps))
    return caps
Пример #25
0
def convert_to_vtt(caps):
    """Convert subtitles to WebVTT format

    Note that if the subtitles are already in VTT format, nothing is done.

    Args:
        caps (str or unicode)
    Returns:
    caps (unicode): None if the format could not be detected.
    """
    if isinstance(caps, str):
        caps = caps.decode('utf-8')
    caps = caps.strip(u"\ufeff").strip(u"\n").strip(u"\r")
    sub_reader = pycaption.detect_format(caps)
    if sub_reader is None:
        return None
    if sub_reader != pycaption.WebVTTReader:
        caps = pycaption.WebVTTWriter().write(sub_reader().read(caps))
    return caps
Пример #26
0
def get_reader(captions, options):
    """Return caption reader."""

    try:
        reader = pycaption.detect_format(captions)
    except IndexError:
        reader = None

    if reader is None:
        raise Exception('No caption format detected')

    reader_options = {'content': captions}

    if reader is pycaption.SCCReader:
        if options.offset:
            reader_options['offset'] = options.offset
        if options.lang:
            reader_options['lang'] = options.lang

    return reader, reader_options
Пример #27
0
def convert_subs(subtitle_path: Path):
    """Convert any valid subtitle file to srt subtitles for processing, using pycaption; then process them.

    Args:
        subtitle_path -- path of subtitles to convert
    Return True if successful, False otherwise.
    """
    with open(subtitle_path, encoding='utf-8') as sub_file:
        subtitles = sub_file.read()
    subtitle_reader_class = pycaption.detect_format(subtitles)
    if not subtitle_reader_class:
        return False

    subtitle_reader = subtitle_reader_class()
    srt_subtitles = pycaption.SRTWriter().write(subtitle_reader.read(subtitles))
    with open(subtitle_path.with_suffix('.srt'), 'w', encoding='utf-8') as sub_file:
        sub_file.write(srt_subtitles)

    srt_to_timestamps(subtitle_path.with_suffix('.srt'))
    return True
Пример #28
0
def convert_subtitles(closedcaption):
	str_output = ''
	count = 0
	for closedcaption_url, i in closedcaption:
		count = int(i) + 1
		if closedcaption_url is not None:
			try:
				cc_content = common.smart_unicode(connection.getURL(closedcaption_url, connectiontype = 0).replace(' 9137', ''))
				reader = detect_format(cc_content)
				if reader:
				
					str_output = common.smart_utf8(SRTWriter().write(reader().read(cc_content)))
					file = open(os.path.join(ustvpaths.DATAPATH, 'subtitle-%s.srt' % str(count)), 'w')
					file.write(str_output)
					str_output=''
					file.close()
				else:
					print "Unknown sub type"
			except  Exception, e:
				print "Exception with Subs: ", e
Пример #29
0
def convert_to_vtt(path):
    """
    Convert a subtitles file from any format (e.g: srt) to vtt. This is
    necessary for use with videojs, which supports only vtt subtitles.
    """
    caps = open(path, 'rb').read()
    try:
        caps = caps.decode('utf8')
    except UnicodeDecodeError:
        # Attempt to read with custom encoding
        detected = chardet.detect(caps)
        caps = caps.decode(detected['encoding'])

    caps = caps.strip("\ufeff").strip("\n").strip("\r")
    sub_reader = pycaption.detect_format(caps)
    if sub_reader is None:
        return None
    if sub_reader != pycaption.WebVTTReader:
        read_caps = sub_reader().read(caps)
        caps = pycaption.WebVTTWriter().write(read_caps)
    return caps
Пример #30
0
def get_captions(url):
    """
    Given a url to a caption file, return a list of captions in the file.
    """
    res = requests.get(url)
    if res.status_code != 200:
        print('Error fetching "%s": HTTP response %d' % (url, res.status_code))
        sys.exit(1)

    caps = res.text
    reader = pycaption.detect_format(caps)
    if not reader:
        print("Error: Could not determine caption format!")
        sys.exit(1)

    cap_set = reader().read(caps)
    langs = cap_set.get_languages()
    if len(langs) > 1:
        print("Error: too many languages in caption file: %s" % (", ".join(langs)))
        sys.exit(1)

    return cap_set.get_captions(langs[0])
Пример #31
0
 def update_subtitles(self):
   subtitle_ids = []
   cmd = ['ffprobe', '-i', self.fn]
   output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
   for line in output.decode().split('\n'):
     line = line.strip()
     if line.startswith('Stream') and 'Subtitle' in line:
       id = line.split()[1].strip('#').replace(':','.')
       id = id[:id.index('(')]
       subtitle_ids.append(id)
   print('subtitle_ids', subtitle_ids)
   new_subtitles = []
   for subtitle_id in subtitle_ids:
     srt_fn = tempfile.mkstemp(suffix='.srt', prefix='gnomecast_subtitles_')[1]
     output = subprocess.check_output(['ffmpeg', '-y', '-i', self.fn, '-vn', '-an', '-codec:s:%s' % subtitle_id, 'srt', srt_fn], stderr=subprocess.STDOUT)
     with open(srt_fn) as f:
       caps = f.read()
     #print('caps', caps)
     converter = pycaption.CaptionConverter()
     converter.read(caps, pycaption.detect_format(caps)())
     subtitles = converter.write(pycaption.WebVTTWriter())
     new_subtitles.append((subtitle_id, subtitles))
     os.remove(srt_fn)
   def f():
     self.subtitle_store.clear()
     self.subtitle_store.append(["No subtitles.", -1, None])
     self.subtitle_store.append(["Add subtitle file...", -2, None])
     self.subtitle_combo.set_active(0)
     pos = len(self.subtitle_store)
     for id, subs in new_subtitles:
       self.subtitle_store.append([id, pos-2, subs])
       pos += 1
   GLib.idle_add(f)
   ext = self.fn.split('.')[-1]
   sexts = ['vtt', 'srt']
   for sext in sexts:
     if os.path.isfile(self.fn[:-len(ext)] + sext):
       self.select_subtitles_file(self.fn[:-len(ext)] + sext) 
       break
Пример #32
0
    def save_videojs(self, data, suffix=''):
        """
        The saving handler.
        """
        self.display_name = data['display_name']
        self.url = data['url'].strip()
        self.allow_download = True if data[
            'allow_download'] == "True" else False  # Str to Bool translation
        self.source_text = data['source_text']

        if not os.path.exists(settings.MEDIA_ROOT + 'subtitle/polish/'):
            os.makedirs(settings.MEDIA_ROOT + 'subtitle/polish/')

        self.subtitle_url = ''
        if data['subtitle_text']:
            reader = detect_format(data['subtitle_text'])
            if reader:
                subtitle = WebVTTWriter().write(reader().read(
                    data['subtitle_text']))

                filename = str(uuid.uuid4())

                f = codecs.open(
                    settings.MEDIA_ROOT + 'subtitle/polish/' + filename, 'w',
                    'utf-8')
                f.write(subtitle)
                f.close()

                self.subtitle_url = settings.MEDIA_URL + 'subtitle/polish/' + filename

        self.source_url = data['source_url'].strip()
        self.subtitle_text = data['subtitle_text']
        self.start_time = ''.join(
            data['start_time'].split())  # Remove whitespace
        self.end_time = ''.join(data['end_time'].split())  # Remove whitespace

        return {
            'result': 'success',
        }
Пример #33
0
    def get_subtitles(self, captions):
        subtitles = []

        for idx, caption in enumerate(captions):
            try:
                r = self._session.get(caption['file'])
                reader = detect_format(r.text)
                srt = SRTWriter().write(reader().read(r.text))
            except:
                log.debug('Failed to parse subtitle: {}'.format(
                    caption['file']))
            else:
                srtfile = xbmc.translatePath(
                    'special://temp/curiosity{}.{}.srt'.format(
                        idx, caption['code'])).decode('utf-8')

                with codecs.open(srtfile, "w", "utf-8") as f:
                    f.write(srt)

                subtitles.append(srtfile)

        return subtitles
Пример #34
0
def get_captions(url):
    '''
    Given a url to a caption file, return a list of captions in the file.
    '''
    res = requests.get(url)
    if res.status_code != 200:
        print('Error fetching "%s": HTTP response %d' % (url, res.status_code))
        sys.exit(1)

    caps = res.text
    reader = pycaption.detect_format(caps)
    if not reader:
        print('Error: Could not determine caption format!')
        sys.exit(1)

    cap_set = reader().read(caps)
    langs = cap_set.get_languages()
    if len(langs) > 1:
        print('Error: too many languages in caption file: %s' %
              (', '.join(langs)))
        sys.exit(1)

    return cap_set.get_captions(langs[0])
Пример #35
0
def upload_subtitle(public_video_id, subtitle_public_id, language_code,
                    content):
    """
    Convert subtitle to VTT and upload it.

    Args:
        public_video_id (str)
        subtitle_id (str)
        language_code (str)
        content (bytes)
    """
    # Note: if this ever raises an exception, we should convert it to SubtitleInvalid
    content = content.decode('utf-8')

    # Convert to VTT, whatever the initial format
    content = content.strip("\ufeff\n\r")
    sub_reader = pycaption.detect_format(content)
    if sub_reader is None:
        raise exceptions.SubtitleInvalid("Could not detect subtitle format")
    if sub_reader != pycaption.WebVTTReader:
        content = pycaption.WebVTTWriter().write(sub_reader().read(content))

    backend.get().upload_subtitle(public_video_id, subtitle_public_id,
                                  language_code, content)
Пример #36
0
    def v2():
        transcript_filename = '12022017 NBPY SCC.scc'
        transcript_pathname = os.path.join( self.show_dir,
              "assets", "transcripts", transcript_filename )

        caps = open(transcript_pathname, encoding='iso-8859-1').read()
        reader = pycaption.detect_format(caps)
        transcript = reader().read(caps)
        language = transcript.get_languages()[0] # ['en-US']
        lines = transcript.get_captions(language)

        cls = Cut_List.objects.filter(
            episode=episode, apply=True).order_by('sequence')

        # video_start = None

        ep_t_out_filename = os.path.join( self.show_dir,
                "transcripts", episode.slug + ".scc" )

# head 12022017\ North\ Bay\ Day\ 1.txt
# 10:06:56:00 >> Hi, everyone!  Welcome to
        transcript_start = datetime.datetime.strptime(
                "12022017 10:06:56", '%m%d%Y %H:%M:%S' )

        with open(ep_t_out_filename,'w') as f:

            segs=[]
            for cl in cls:

                start = cl.get_start_wall()
                end = cl.get_end_wall()
                segs.append( (start,end) )

                # wall time of the start of the first clip:
                # if video_start is None:
                #    video_start = start

                """
                loffset_ms = (
                        transcript_start - video_start
                        ).total_seconds() * 1000

                print( {start





                for l in transcript:
                  if start <= l['timestamp']  <= end:

                      offset = l['timestamp'] - video_start
                      seconds = int(offset.total_seconds())
                      hms = seconds//3600, (seconds%3600)//60, seconds%60
                      hms = "{}:{}:{}".format(*hms)

                      l = "{}: {}".format( hms, l['text'])
                      f.write(l+'\n')

                """
        pprint( segs )
        for s,e in segs:
            # print("s: {}".format(s))
            # print("e: {}".format(e))
            # df='%Y-%m-%d %H:%M:%S'
            df="%H:%M:%S"
            print( "{} - {}".format ( s.strftime(df), e.strftime(df) ) )
Пример #37
0
 def set_transcript_from_file(self):
     content = self.file.read()
     reader = pycaption.detect_format(content)
     converter = pycaption.CaptionConverter()
     converter.read(content, reader())
     self.transcript = converter.write(JSONTranscriptWriter())
Пример #38
0
    def v2():
        transcript_filename = '12022017 NBPY SCC.scc'
        transcript_pathname = os.path.join( self.show_dir,
              "assets", "transcripts", transcript_filename )

        caps = open(transcript_pathname, encoding='iso-8859-1').read()
        reader = pycaption.detect_format(caps)
        transcript = reader().read(caps)
        language = transcript.get_languages()[0] # ['en-US']
        lines = transcript.get_captions(language)

        cls = Cut_List.objects.filter(
            episode=episode, apply=True).order_by('sequence')

        # video_start = None

        ep_t_out_filename = os.path.join( self.show_dir,
                "transcripts", episode.slug + ".scc" )

# head 12022017\ North\ Bay\ Day\ 1.txt
# 10:06:56:00 >> Hi, everyone!  Welcome to
        transcript_start = datetime.datetime.strptime(
                "12022017 10:06:56", '%m%d%Y %H:%M:%S' )

        with open(ep_t_out_filename,'w') as f:

            segs=[]
            for cl in cls:

                start = cl.get_start_wall()
                end = cl.get_end_wall()
                segs.append( (start,end) )

                # wall time of the start of the first clip:
                # if video_start is None:
                #    video_start = start

                """
                loffset_ms = (
                        transcript_start - video_start
                        ).total_seconds() * 1000

                print( {start





                for l in transcript:
                  if start <= l['timestamp']  <= end:

                      offset = l['timestamp'] - video_start
                      seconds = int(offset.total_seconds())
                      hms = seconds//3600, (seconds%3600)//60, seconds%60
                      hms = "{}:{}:{}".format(*hms)

                      l = "{}: {}".format( hms, l['text'])
                      f.write(l+'\n')

                """
        pprint( segs )
        for s,e in segs:
            # print("s: {}".format(s))
            # print("e: {}".format(e))
            # df='%Y-%m-%d %H:%M:%S'
            df="%H:%M:%S"
            print( "{} - {}".format ( s.strftime(df), e.strftime(df) ) )
Пример #39
0
def event_closed_captions_submit(request, event_id, id):
    event = get_object_or_404(Event, id=event_id)
    closedcaptions = get_object_or_404(ClosedCaptions, event=event, id=id)

    # XXX This might change. Instead of using the same tag as the one
    # being used, we might use a custom one just for the transcription
    # service.
    submission, = VidlySubmission.objects.filter(
        event=event, tag=event.template_environment['tag'])

    if request.method == 'POST':
        form = forms.SubmitClosedCaptionsForm(request.POST)
        if form.is_valid():
            file_format = form.cleaned_data['file_format']
            base_url = get_base_url(request)
            public_url = base_url + reverse('closedcaptions:download',
                                            args=(
                                                closedcaptions.filename_hash,
                                                closedcaptions.id,
                                                event.slug,
                                                file_format,
                                            ))

            # Send it in
            vidly.update_media_closed_captions(
                submission.tag,
                submission.url,
                public_url,
                hd=submission.hd,
                notify_url=None  # XXX improve this some day
            )
            if not closedcaptions.submission_info:
                closedcaptions.submission_info = {}
            if not closedcaptions.submission_info.get('submissions'):
                closedcaptions.submission_info['submissions'] = []
            closedcaptions.submission_info['submissions'].append({
                'tag':
                submission.tag,
                'url':
                submission.url,
                'public_url':
                public_url,
                'hd':
                submission.hd,
                'date':
                timezone.now().isoformat(),
            })
            closedcaptions.save()
            messages.success(
                request,
                '{} submitted for Vid.ly transcoding'.format(public_url))
            return redirect('manage:event_closed_captions', event.id)
    else:
        form = forms.SubmitClosedCaptionsForm()

    content = closedcaptions.file.read()
    reader = pycaption.detect_format(content)
    converter = pycaption.CaptionConverter()
    converter.read(content, reader())
    last_caption = converter.write(LastTimestampWriter()) / 1000000

    context = {
        'form': form,
        'event': closedcaptions.event,
        'closedcaptions': closedcaptions,
        'last_caption': last_caption,
        'submission': submission,
    }
    return render(request, 'manage/event_closed_captions_submit.html', context)
Пример #40
0
def event_closed_captions_submit(request, event_id, id):
    event = get_object_or_404(Event, id=event_id)
    closedcaptions = get_object_or_404(ClosedCaptions, event=event, id=id)

    # XXX This might change. Instead of using the same tag as the one
    # being used, we might use a custom one just for the transcription
    # service.
    submission, = VidlySubmission.objects.filter(
        event=event,
        tag=event.template_environment['tag']
    )

    if request.method == 'POST':
        form = forms.SubmitClosedCaptionsForm(request.POST)
        if form.is_valid():
            file_format = form.cleaned_data['file_format']
            base_url = get_base_url(request)
            public_url = base_url + reverse(
                'closedcaptions:download', args=(
                    closedcaptions.filename_hash,
                    closedcaptions.id,
                    event.slug,
                    file_format,
                )
            )

            # Send it in
            vidly.update_media_closed_captions(
                submission.tag,
                submission.url,
                public_url,
                hd=submission.hd,
                notify_url=None  # XXX improve this some day
            )
            if not closedcaptions.submission_info:
                closedcaptions.submission_info = {}
            if not closedcaptions.submission_info.get('submissions'):
                closedcaptions.submission_info['submissions'] = []
            closedcaptions.submission_info['submissions'].append({
                'tag': submission.tag,
                'url': submission.url,
                'public_url': public_url,
                'hd': submission.hd,
                'date': timezone.now().isoformat(),
            })
            closedcaptions.save()
            messages.success(
                request,
                '{} submitted for Vid.ly transcoding'.format(
                    public_url
                )
            )
            return redirect('manage:event_closed_captions', event.id)
    else:
        form = forms.SubmitClosedCaptionsForm()

    content = closedcaptions.file.read()
    reader = pycaption.detect_format(content)
    converter = pycaption.CaptionConverter()
    converter.read(content, reader())
    last_caption = converter.write(LastTimestampWriter()) / 1000000

    context = {
        'form': form,
        'event': closedcaptions.event,
        'closedcaptions': closedcaptions,
        'last_caption': last_caption,
        'submission': submission,
    }
    return render(request, 'manage/event_closed_captions_submit.html', context)