Exemplo n.º 1
0
def read_captions(captions, options):
    reader_kwargs = {
        'read_invalid_positioning': options.read_invalid_positioning
    }

    scc_reader = pycaption.SCCReader(**reader_kwargs)
    srt_reader = pycaption.SRTReader(**reader_kwargs)
    sami_reader = pycaption.SAMIReader(**reader_kwargs)
    dfxp_reader = pycaption.DFXPReader(**reader_kwargs)
    vtt_reader = pycaption.WebVTTReader(**reader_kwargs)

    if scc_reader.detect(captions):
        if options.lang:
            return scc_reader.read(captions,
                                   lang=options.lang,
                                   offset=int(options.offset))
        else:
            return scc_reader.read(captions, offset=int(options.offset))
    elif srt_reader.detect(captions):
        return srt_reader.read(captions)
    elif sami_reader.detect(captions):
        return sami_reader.read(captions)
    elif dfxp_reader.detect(captions):
        return dfxp_reader.read(captions)
    elif vtt_reader.detect(captions):
        return vtt_reader.read(captions)
    else:
        raise Exception('No caption format detected :(')
Exemplo n.º 2
0
def main(argv):
    inputfile = ''
    inputType = ''
    outputType = ''

    try:
        opts, args = getopt.getopt(argv, "h:i:f:t:")
    except getopt.GetoptError:
        print('test.py -i <inputfile> -f <intputType> -t <outputType>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('test.py -i <inputfile> -f <intputType> -t <outputType>')
            sys.exit()
        elif opt in ("-i", "--ifile"):
            inputfile = arg
        elif opt in ("-f", "--sfile"):
            inputType = arg
        elif opt in ("-t", "--tfile"):
            outputType = arg

    if inputType == outputType:
        print('Error: input type and output type are same format')
        sys.exit(1)

    with io.open(inputfile) as f:
        str1 = f.read()
    inputValue = inputType.lower()

    if inputValue == 'scc':
        c = pycaption.SCCReader().read(str1)
    elif inputValue == 'srt':
        c = pycaption.SRTReader().read(str1)
    elif inputValue == 'dfxp':
        c = pycaption.DFXPReader().read(str1)
    elif inputValue == 'webvtt':
        c = pycaption.WebVTTReader().read(str1)
    else:
        print('Error: invalid input type. <srt/scc/webvtt/dfxp> allowed')
        sys.exit(1)

    outputValue = outputType.lower()
    if outputValue == 'scc':
        print(pycaption.SCCWriter().write(c))
    elif outputValue == 'srt':
        print(pycaption.SRTWriter().write(c))
    elif outputValue == 'dfxp':
        print(pycaption.DFXPWriter().write(c))
    elif outputValue == 'webvtt':
        print(pycaption.WebVTTWriter().write(c))
    else:
        print('Error: invalid output type. <srt/scc/webvtt/dfxp> allowed')
        sys.exit(1)
Exemplo n.º 3
0
def test_captions():
	srt_caps = '''1
	00:00:09,209 --> 00:00:12,312
	This is an example SRT file,which, while extremely short,is still a valid SRT file.


	2
	00:00:19,209 --> 00:00:22,312
	This is an example SRT file,which, while extremely short,is still a valid SRT file.
	'''
	converter = pycaption.CaptionConverter()
	converter.read(srt_caps,pycaption.SRTReader())
	print(converter.write(pycaption.WebVTTWriter()))
def read_captions(captions, options):
    scc_reader = pycaption.SCCReader()
    srt_reader = pycaption.SRTReader()
    sami_reader = pycaption.SAMIReader()
    dfxp_reader = pycaption.DFXPReader()

    if scc_reader.detect(captions):
        if options.lang:
            return scc_reader.read(captions, lang=options.lang,
                                   offset=int(options.offset))
        else:
            return scc_reader.read(captions, offset=float(options.offset))
    elif srt_reader.detect(captions):
        return srt_reader.read(captions)
    elif sami_reader.detect(captions):
        return sami_reader.read(captions)
    elif dfxp_reader.detect(captions):
        return dfxp_reader.read(captions)
    else:
        raise Exception('No caption format detected :(')
    def run(self):
        for input_file in self.input_files:
            input_file_name = os.path.basename(input_file)
            input_type = os.path.splitext(input_file)[1].lower()[1:]
            output_file = os.path.join(
                self.output_folder,
                os.path.splitext(input_file_name)[0] + '.' + self.output_type)
            output_file_name = os.path.basename(output_file)
            if os.path.exists(output_file) and not self.overwrite_on:
                self.log_signal.emit("{}을 건너뜁니다...".format(input_file_name))
                continue
            self.log_signal.emit("{}을 읽습니다...".format(input_file_name))

            with open(output_file, 'w',
                      encoding=self.output_encoding) as file_out:
                reader = None
                encoding = None
                content = None

                with open(input_file, 'rb') as file_in:
                    encoding = chardet.detect(file_in.read())['encoding']

                with open(input_file, 'r', encoding=encoding) as file_in:
                    content = file_in.read()

                if input_type == "smi":
                    reader = pycaption.SAMIReader().read(content)
                elif input_type == "srt":
                    reader = pycaption.SRTReader().read(content)

                if self.output_type == "smi":
                    file_out.write(pycaption.SAMIWriter().write(reader))
                elif self.output_type == "srt":
                    file_out.write(pycaption.SRTWriter().write(reader))
                elif self.output_type == "txt":
                    file_out.write(TextWriter().write(reader))
                elif self.output_type == "ats":
                    open(output_file, 'wb').write(AtsWriter().write(reader))

                self.log_signal.emit("{}으로 변환했습니다".format(output_file_name))
Exemplo n.º 6
0
def read_captions(input_filename):

    byte_chunk = os.path.getsize(input_filename)
    raw = open(input_filename, 'rb').read(byte_chunk)

    if raw.startswith(codecs.BOM_UTF8):
        encoding = 'utf-8-sig'
    else:
        result = chardet.detect(raw)
        encoding = result['encoding']

    try:
        captions = io.open(input_filename, 'r', encoding=encoding).read()
    except UnicodeDecodeError as e:
        print(e.message + " from file - " + input_filename)
        raise e

    srt_reader = pycaption.SRTReader()
    if srt_reader.detect(captions):
        return srt_reader.read(captions)
    else:
        raise Exception('Illegal srt format %s :(' % input_filename)