def _initInternal(self, runCb=None): logger.info('initializing synchronization jobs') for stream in (self.sub, self.ref): if stream.type == 'subtitle/text' and not stream.enc and len( stream.streams) == 1: stream.enc = encdetect.detectEncoding(stream.path, stream.lang) self.subPipeline = pipeline.createProducerPipeline(self.sub) self.subPipeline.connectEosCallback(self.onSubEos) self.subPipeline.connectErrorCallback(self.onSubError) self.subPipeline.addSubsListener(self.subtitlesCollector.addSubtitle) self.subPipeline.addSubsListener(self.correlator.pushSubtitle) self.subPipeline.addWordsListener(self.correlator.pushSubWord) if self.sub.lang and self.ref.lang and self.sub.lang != self.ref.lang: self.dictionary = dictionary.loadDictionary( self.ref.lang, self.sub.lang, settings().minWordLen) self.translator = gizmo.Translator(self.dictionary) self.translator.setMinWordsSim(settings().minWordsSim) self.translator.addWordsListener(self.correlator.pushRefWord) self.refWordsSink = self.translator.pushWord self.refPipelines = pipeline.createProducerPipelines(self.ref, no=getJobsNo(), runCb=runCb) for p in self.refPipelines: p.connectEosCallback(self.onRefEos) p.connectErrorCallback(self.onRefError) p.addWordsListener(self.refWordsSink) self.pipelines = [self.subPipeline] + self.refPipelines dumpSources = { 'sub': [self.subPipeline], 'subPipe': [self.subPipeline], 'subRaw': [self.subPipeline.getRawWordsSource()], 'ref': [self.translator] if self.translator else self.refPipelines, 'refPipe': self.refPipelines, 'refRaw': [p.getRawWordsSource() for p in self.refPipelines], } for srcId, path in settings().dumpWords: sources = dumpSources.get(srcId) if sources: logger.debug('dumping %s to %s (from %i sources)', srcId, path, len(sources)) if path: wd = wordsdump.WordsFileDump(path, overwrite=True) else: wd = wordsdump.WordsStdoutDump(srcId) self.wordsDumpers.append(wd) for source in sources: source.addWordsListener(wd.pushWord)
def init(self, runCb=lambda: True): logger.info('initializing synchronization jobs') for stream in (self.sub, self.ref): if stream.type == 'subtitle/text' and not stream.enc and len( stream.streams) == 1: stream.enc = encdetect.detectEncoding(stream.path, stream.lang) if not runCb(): return self.subPipeline = pipeline.createProducerPipeline(self.sub) self.subPipeline.connectEosCallback(self.onSubEos) self.subPipeline.connectErrorCallback(self.onSubError) self.subPipeline.connectSubsCallback( self.subtitlesCollector.addSubtitle) self.subPipeline.connectWordsCallback(self.correlator.pushSubWord) if not runCb(): return if self.sub.lang and self.ref.lang and self.sub.lang != self.ref.lang: self.dictionary = dictionary.loadDictionary( self.ref.lang, self.sub.lang, settings().minWordLen) self.translator = gizmo.Translator(self.dictionary) self.translator.setMinWordsSim(settings().minWordsSim) self.translator.connectWordsCallback(self.correlator.pushRefWord) self.refWordsSink = self.translator.pushWord if not runCb(): return self.refPipelines = pipeline.createProducerPipelines(self.ref, no=getJobsNo(), runCb=runCb) for p in self.refPipelines: p.connectEosCallback(self.onRefEos) p.connectErrorCallback(self.onRefError) p.connectWordsCallback(self.refWordsSink) self.pipelines = [self.subPipeline] + self.refPipelines
def __init__(self, listener, subs, refs, refsCache=None): self.listener = listener self.subs = subs self.refs = refs self.refsCache = refsCache self.fps = refs.stream().frameRate if self.fps == None: self.fps = refs.fps self.correlator = gizmo.Correlator(settings().windowSize, settings().minCorrelation, settings().maxPointDist, settings().minPointsNo, settings().minWordsSim) self.stats = gizmo.CorrelationStats() self.statsLock = threading.Lock() self.correlator.connectStatsCallback(self.onStatsUpdate) self.subtitlesCollector = subtitle.SubtitlesCollector() for stream in (subs, refs): if stream.type == 'subtitle/text' and not stream.enc and len( stream.streams) == 1: stream.enc = encdetect.detectEncoding(stream.path, stream.lang) self.subPipeline = pipeline.createProducerPipeline(subs) self.subPipeline.connectEosCallback(self.onSubEos) self.subPipeline.connectErrorCallback(self.onSubError) self.subPipeline.connectSubsCallback( self.subtitlesCollector.addSubtitle) self.subPipeline.connectWordsCallback(self.correlator.pushSubWord) if subs.lang and refs.lang and subs.lang != refs.lang: self.dictionary = dictionary.loadDictionary( refs.lang, subs.lang, settings().minWordLen) self.translator = gizmo.Translator(self.dictionary) self.translator.setMinWordsSim(settings().minWordsSim) self.translator.connectWordsCallback(self.correlator.pushRefWord) self.refWordsSink = self.translator.pushWord else: self.refWordsSink = self.correlator.pushRefWord if refsCache and refsCache.isValid(self.refs): logger.info('restoring cached reference words (%i)', len(refsCache.data)) for word in refsCache.data: self.refWordsSink(word) self.refPipelines = pipeline.createProducerPipelines( refs, timeWindows=refsCache.progress) else: if refsCache: refsCache.init(refs) self.refPipelines = pipeline.createProducerPipelines( refs, no=getJobsNo()) for p in self.refPipelines: p.connectEosCallback(self.onRefEos) p.connectErrorCallback(self.onRefError) p.connectWordsCallback(self.onRefWord) self.pipelines = [self.subPipeline] + self.refPipelines