Exemplo n.º 1
0
    def _initInternal(self, runCb=None):
        logger.info('initializing synchronization jobs')
        for stream in (self.sub, self.ref):
            if stream.type == 'subtitle/text' and not stream.enc and len(
                    stream.streams) == 1:
                stream.enc = encdetect.detectEncoding(stream.path, stream.lang)

        self.subPipeline = pipeline.createProducerPipeline(self.sub)
        self.subPipeline.connectEosCallback(self.onSubEos)
        self.subPipeline.connectErrorCallback(self.onSubError)
        self.subPipeline.addSubsListener(self.subtitlesCollector.addSubtitle)
        self.subPipeline.addSubsListener(self.correlator.pushSubtitle)
        self.subPipeline.addWordsListener(self.correlator.pushSubWord)

        if self.sub.lang and self.ref.lang and self.sub.lang != self.ref.lang:
            self.dictionary = dictionary.loadDictionary(
                self.ref.lang, self.sub.lang,
                settings().minWordLen)
            self.translator = gizmo.Translator(self.dictionary)
            self.translator.setMinWordsSim(settings().minWordsSim)
            self.translator.addWordsListener(self.correlator.pushRefWord)
            self.refWordsSink = self.translator.pushWord

        self.refPipelines = pipeline.createProducerPipelines(self.ref,
                                                             no=getJobsNo(),
                                                             runCb=runCb)

        for p in self.refPipelines:
            p.connectEosCallback(self.onRefEos)
            p.connectErrorCallback(self.onRefError)
            p.addWordsListener(self.refWordsSink)

        self.pipelines = [self.subPipeline] + self.refPipelines

        dumpSources = {
            'sub': [self.subPipeline],
            'subPipe': [self.subPipeline],
            'subRaw': [self.subPipeline.getRawWordsSource()],
            'ref': [self.translator] if self.translator else self.refPipelines,
            'refPipe': self.refPipelines,
            'refRaw': [p.getRawWordsSource() for p in self.refPipelines],
        }

        for srcId, path in settings().dumpWords:
            sources = dumpSources.get(srcId)
            if sources:
                logger.debug('dumping %s to %s (from %i sources)', srcId, path,
                             len(sources))
                if path:
                    wd = wordsdump.WordsFileDump(path, overwrite=True)
                else:
                    wd = wordsdump.WordsStdoutDump(srcId)
                self.wordsDumpers.append(wd)
                for source in sources:
                    source.addWordsListener(wd.pushWord)
Exemplo n.º 2
0
    def init(self, runCb=lambda: True):
        logger.info('initializing synchronization jobs')
        for stream in (self.sub, self.ref):
            if stream.type == 'subtitle/text' and not stream.enc and len(
                    stream.streams) == 1:
                stream.enc = encdetect.detectEncoding(stream.path, stream.lang)

        if not runCb():
            return

        self.subPipeline = pipeline.createProducerPipeline(self.sub)
        self.subPipeline.connectEosCallback(self.onSubEos)
        self.subPipeline.connectErrorCallback(self.onSubError)
        self.subPipeline.connectSubsCallback(
            self.subtitlesCollector.addSubtitle)
        self.subPipeline.connectWordsCallback(self.correlator.pushSubWord)

        if not runCb():
            return

        if self.sub.lang and self.ref.lang and self.sub.lang != self.ref.lang:
            self.dictionary = dictionary.loadDictionary(
                self.ref.lang, self.sub.lang,
                settings().minWordLen)
            self.translator = gizmo.Translator(self.dictionary)
            self.translator.setMinWordsSim(settings().minWordsSim)
            self.translator.connectWordsCallback(self.correlator.pushRefWord)
            self.refWordsSink = self.translator.pushWord

        if not runCb():
            return

        self.refPipelines = pipeline.createProducerPipelines(self.ref,
                                                             no=getJobsNo(),
                                                             runCb=runCb)

        for p in self.refPipelines:
            p.connectEosCallback(self.onRefEos)
            p.connectErrorCallback(self.onRefError)
            p.connectWordsCallback(self.refWordsSink)

        self.pipelines = [self.subPipeline] + self.refPipelines
Exemplo n.º 3
0
    def __init__(self, listener, subs, refs, refsCache=None):
        self.listener = listener
        self.subs = subs
        self.refs = refs
        self.refsCache = refsCache

        self.fps = refs.stream().frameRate
        if self.fps == None:
            self.fps = refs.fps

        self.correlator = gizmo.Correlator(settings().windowSize,
                                           settings().minCorrelation,
                                           settings().maxPointDist,
                                           settings().minPointsNo,
                                           settings().minWordsSim)

        self.stats = gizmo.CorrelationStats()
        self.statsLock = threading.Lock()
        self.correlator.connectStatsCallback(self.onStatsUpdate)

        self.subtitlesCollector = subtitle.SubtitlesCollector()

        for stream in (subs, refs):
            if stream.type == 'subtitle/text' and not stream.enc and len(
                    stream.streams) == 1:
                stream.enc = encdetect.detectEncoding(stream.path, stream.lang)

        self.subPipeline = pipeline.createProducerPipeline(subs)
        self.subPipeline.connectEosCallback(self.onSubEos)
        self.subPipeline.connectErrorCallback(self.onSubError)
        self.subPipeline.connectSubsCallback(
            self.subtitlesCollector.addSubtitle)
        self.subPipeline.connectWordsCallback(self.correlator.pushSubWord)

        if subs.lang and refs.lang and subs.lang != refs.lang:
            self.dictionary = dictionary.loadDictionary(
                refs.lang, subs.lang,
                settings().minWordLen)
            self.translator = gizmo.Translator(self.dictionary)
            self.translator.setMinWordsSim(settings().minWordsSim)
            self.translator.connectWordsCallback(self.correlator.pushRefWord)
            self.refWordsSink = self.translator.pushWord
        else:
            self.refWordsSink = self.correlator.pushRefWord

        if refsCache and refsCache.isValid(self.refs):
            logger.info('restoring cached reference words (%i)',
                        len(refsCache.data))

            for word in refsCache.data:
                self.refWordsSink(word)

            self.refPipelines = pipeline.createProducerPipelines(
                refs, timeWindows=refsCache.progress)

        else:
            if refsCache:
                refsCache.init(refs)

            self.refPipelines = pipeline.createProducerPipelines(
                refs, no=getJobsNo())

        for p in self.refPipelines:
            p.connectEosCallback(self.onRefEos)
            p.connectErrorCallback(self.onRefError)
            p.connectWordsCallback(self.onRefWord)

        self.pipelines = [self.subPipeline] + self.refPipelines