Пример #1
0
    def __init__(self, sub, ref):
        self.sub = sub
        self.ref = ref

        self.onError = lambda src, err: None

        self.correlator = gizmo.Correlator(
                settings().windowSize,
                settings().minCorrelation,
                settings().maxPointDist,
                settings().minPointsNo,
                settings().minWordsSim)
        self.correlator.connectStatsCallback(self.onStatsUpdate)
        self.refWordsSink = self.correlator.pushRefWord
        self.subtitlesCollector = subtitle.SubtitlesCollector()

        self.stats = gizmo.CorrelationStats()
        self.effortBegin = None
        self.statsLock = threading.Lock()

        self.translator = None
        self.subPipeline = None
        self.refPipelines = []
        self.pipelines = []
        self.wordsDumpers = []
Пример #2
0
    def _initInternal(self, options, runCb=None):
        logger.info('initializing synchronization jobs')
        for stream in (self.sub, self.ref):
            if stream.type == 'subtitle/text' and not stream.enc and len(
                    stream.streams) == 1:
                stream.enc = encdetect.detectEncoding(stream.path, stream.lang)

        self.correlator = gizmo.Correlator(options['windowSize'],
                                           options['minCorrelation'],
                                           options['maxPointDist'],
                                           options['minPointsNo'],
                                           options['minWordsSim'])
        self.correlator.connectStatsCallback(self.onStatsUpdate)
        self.refWordsSink = self.correlator.pushRefWord
        self.subtitlesCollector = subtitle.SubtitlesCollector()

        self.subPipeline = pipeline.createProducerPipeline(self.sub)
        self.subPipeline.connectEosCallback(self.onSubEos)
        self.subPipeline.connectErrorCallback(self.onSubError)
        self.subPipeline.addSubsListener(self.subtitlesCollector.addSubtitle)
        self.subPipeline.addSubsListener(self.correlator.pushSubtitle)
        self.subPipeline.addWordsListener(self.correlator.pushSubWord)

        if self.sub.lang and self.ref.lang and self.sub.lang != self.ref.lang:
            self.dictionary = dictionary.loadDictionary(
                self.ref.lang, self.sub.lang, options['minWordLen'])
            self.translator = gizmo.Translator(self.dictionary)
            self.translator.setMinWordsSim(options['minWordsSim'])
            self.translator.addWordsListener(self.correlator.pushRefWord)
            self.refWordsSink = self.translator.pushWord

        self.refPipelines = pipeline.createProducerPipelines(
            self.ref, no=options['jobsNo'], runCb=runCb)

        for p in self.refPipelines:
            p.connectEosCallback(self.onRefEos)
            p.connectErrorCallback(self.onRefError)
            p.addWordsListener(self.refWordsSink)

        self.pipelines = [self.subPipeline] + self.refPipelines

        for p in self.pipelines:
            p.configure(minWordLen=options['minWordLen'],
                        minWordProb=options['minWordProb'])

        dumpSources = {
            'sub': [self.subPipeline],
            'subPipe': [self.subPipeline],
            'subRaw': [self.subPipeline.getRawWordsSource()],
            'ref': [self.translator] if self.translator else self.refPipelines,
            'refPipe': self.refPipelines,
            'refRaw': [p.getRawWordsSource() for p in self.refPipelines],
        }

        for srcId, path in options.get('dumpWords', []):
            sources = dumpSources.get(srcId)
            if sources:
                logger.debug('dumping %s to %s (from %i sources)', srcId, path,
                             len(sources))
                if path:
                    wd = wordsdump.WordsFileDump(path, overwrite=True)
                else:
                    wd = wordsdump.WordsStdoutDump(srcId)
                self.wordsDumpers.append(wd)
                for source in sources:
                    source.addWordsListener(wd.pushWord)
Пример #3
0
    def __init__(self, listener, subs, refs, refsCache=None):
        self.listener = listener
        self.subs = subs
        self.refs = refs
        self.refsCache = refsCache

        self.fps = refs.stream().frameRate
        if self.fps == None:
            self.fps = refs.fps

        self.correlator = gizmo.Correlator(settings().windowSize,
                                           settings().minCorrelation,
                                           settings().maxPointDist,
                                           settings().minPointsNo,
                                           settings().minWordsSim)

        self.stats = gizmo.CorrelationStats()
        self.statsLock = threading.Lock()
        self.correlator.connectStatsCallback(self.onStatsUpdate)

        self.subtitlesCollector = subtitle.SubtitlesCollector()

        for stream in (subs, refs):
            if stream.type == 'subtitle/text' and not stream.enc and len(
                    stream.streams) == 1:
                stream.enc = encdetect.detectEncoding(stream.path, stream.lang)

        self.subPipeline = pipeline.createProducerPipeline(subs)
        self.subPipeline.connectEosCallback(self.onSubEos)
        self.subPipeline.connectErrorCallback(self.onSubError)
        self.subPipeline.connectSubsCallback(
            self.subtitlesCollector.addSubtitle)
        self.subPipeline.connectWordsCallback(self.correlator.pushSubWord)

        if subs.lang and refs.lang and subs.lang != refs.lang:
            self.dictionary = dictionary.loadDictionary(
                refs.lang, subs.lang,
                settings().minWordLen)
            self.translator = gizmo.Translator(self.dictionary)
            self.translator.setMinWordsSim(settings().minWordsSim)
            self.translator.connectWordsCallback(self.correlator.pushRefWord)
            self.refWordsSink = self.translator.pushWord
        else:
            self.refWordsSink = self.correlator.pushRefWord

        if refsCache and refsCache.isValid(self.refs):
            logger.info('restoring cached reference words (%i)',
                        len(refsCache.data))

            for word in refsCache.data:
                self.refWordsSink(word)

            self.refPipelines = pipeline.createProducerPipelines(
                refs, timeWindows=refsCache.progress)

        else:
            if refsCache:
                refsCache.init(refs)

            self.refPipelines = pipeline.createProducerPipelines(
                refs, no=getJobsNo())

        for p in self.refPipelines:
            p.connectEosCallback(self.onRefEos)
            p.connectErrorCallback(self.onRefError)
            p.connectWordsCallback(self.onRefWord)

        self.pipelines = [self.subPipeline] + self.refPipelines