def procureModel(self): if self.options.resume_from_checkpoint: model = ModelTemplate.resume(self.options.resume_from_checkpoint) self.sequitur = model.sequitur elif self.options.modelFile: if sys.version_info[:2] >= (3, 0): model = pickle.load(open(self.options.modelFile, 'rb'), encoding='latin1') else: try: model = pickle.load(open(self.options.modelFile, 'rb')) except ValueError: print('This error most likely occured because the loaded model was created in python3.\n', file=sys.stderr) raise self.sequitur = model.sequitur else: self.sequitur = Sequitur() model = None if self.options.shouldRampUp: model.rampUp() if self.options.trainSample: model = self.trainModel(model) if not model: print('failed to estimate or load model', file=self.log) return if not model: raise UsageError # model.sequenceModel.showMostProbable(sys.stdout, model.sequitur.symbol, limit=250) if self.options.shouldTranspose: model.transpose() if self.options.newModelFile: oldSize, newSize = model.strip() print('stripped number of multigrams from %d to %d' % (oldSize, newSize), file=self.log) f = open(self.options.newModelFile, 'wb') pickle.dump(model, f, pickle.HIGHEST_PROTOCOL) f.close() del f if self.options.shouldSelfTest: print('warning: --self-test does not treat pronunciation variants correctly', file=self.log) if not self.develSample: print('error: cannot do --self-test without --devel sample', file=self.log) else: translator = Translator(model) evaluator = Evaluator() evaluator.setSample(self.develSample) evaluator.verboseLog = self.log result = evaluator.evaluate(translator) print(result, file=self.log) return model
def procureModel(self): if self.options.resume_from_checkpoint: model = ModelTemplate.resume(self.options.resume_from_checkpoint) self.sequitur = model.sequitur elif self.options.modelFile: model = pickle.load(open(self.options.modelFile, "rb")) self.sequitur = model.sequitur else: self.sequitur = Sequitur() model = None if self.options.shouldRampUp: model.rampUp() if self.options.trainSample: model = self.trainModel(model) if not model: print('failed to estimate or load model', file=self.log) return if not model: raise UsageError # model.sequenceModel.showMostProbable(sys.stdout, model.sequitur.symbol, limit=250) if self.options.shouldTranspose: model.transpose() if self.options.newModelFile: oldSize, newSize = model.strip() print('stripped number of multigrams from %d to %d' % (oldSize, newSize), file=self.log) f = open(self.options.newModelFile, 'wb') pickle.dump(model, f) f.close() del f if self.options.shouldSelfTest: print( 'warning: --self-test does not treat pronunciation variants correctly', file=self.log) if not self.develSample: print('error: cannot do --self-test without --devel sample', file=self.log) else: translator = Translator(model) evaluator = Evaluator() evaluator.setSample(self.develSample) evaluator.verboseLog = self.log result = evaluator.evaluate(translator) print(result, file=self.log) return model
def procureModel(self): if self.options.resume_from_checkpoint: model = ModelTemplate.resume(self.options.resume_from_checkpoint) self.sequitur = model.sequitur elif self.options.modelFile: model = pickle.load(open(self.options.modelFile)) self.sequitur = model.sequitur else: self.sequitur = Sequitur() model = None if self.options.shouldRampUp: model.rampUp() if self.options.trainSample: model = self.trainModel(model) if not model: print >> self.log, 'failed to estimate or load model' return if not model: raise UsageError # model.sequenceModel.showMostProbable(sys.stdout, model.sequitur.symbol, limit=250) if self.options.shouldTranspose: model.transpose() if self.options.newModelFile: oldSize, newSize = model.strip() print >> self.log, 'stripped number of multigrams from %d to %d' % (oldSize, newSize) f = open(self.options.newModelFile, 'w') pickle.dump(model, f, pickle.HIGHEST_PROTOCOL) f.close() del f if self.options.shouldSelfTest: print >> self.log, 'warning: --self-test does not treat pronunciation variants correctly' if not self.develSample: print >> self.log, 'error: cannot do --self-test without --devel sample' else: translator = Translator(model) evaluator = Evaluator() evaluator.setSample(self.develSample) evaluator.verboseLog = self.log result = evaluator.evaluate(translator) print >> self.log, result return model
def trainModel(self, initialModel): self.loadSamples() compiledTrainSample = self.sequitur.compileSample(self.trainSample) compiledDevelSample = self.sequitur.compileSample(self.develSample) del self.trainSample if self.options.fixed_discount: discount = eval(self.options.fixed_discount) if not isinstance(discount, collections.Sequence): discount = [discount] discount = num.array(discount) else: discount = None template = ModelTemplate(self.sequitur) if self.options.fixed_discount: template.DiscountAdjustmentStrategy = FixedDiscounts(discount) elif self.develSample: if self.options.eager_discount_adjustment: template.DiscountAdjustmentStrategy = EagerDiscountAdjuster else: template.DiscountAdjustmentStrategy = DefaultDiscountAdjuster else: template.DiscountAdjustmentStrategy = StaticDiscounts if self.options.lengthConstraints: spec = self.options.lengthConstraints.strip() if spec.startswith('['): assert spec.endswith(']') st = spec[1:-1].split(',') st = [t.split(':') for t in st] st = [(int(l), int(r)) for l, r in st] template.setSizeTemplates(st) else: lc = tuple(map(int, spec.split(','))) template.setLengthConstraints(*lc) template.allowEmergenceOfNewMultigrams( not bool(self.options.shouldSuppressNewMultigrams)) template.useMaximumApproximation(bool(self.options.viterbi)) if self.options.minIterations > self.options.maxIterations: print('invalid limits on number of iterations %d > %d' % \ (self.options.minIterations,self.options.maxIterations), file=self.log) return template.minIterations = self.options.minIterations template.maxIterations = self.options.maxIterations if self.options.checkpoint and self.options.newModelFile: template.checkpointInterval = 8 * 60 * 60 base, ext = os.path.splitext(self.options.newModelFile) template.checkpointFile = base + '-cp%d' + ext if self.options.shouldWipeModel: initialModel.wipeOut(template.nPossibleMultigrams()) if self.options.shouldTestContinuously: if self.develSample: template.observers.append( OnlineTester('devel', self.develSample)) if self.options.testSample: template.observers.append( OnlineTester('test', self.loadSample(self.options.testSample))) estimationContext = template.makeContext(compiledTrainSample, compiledDevelSample, initialModel) del initialModel estimationContext.log = self.log if self.options.shouldInitializeWithCounts: template.initializeWithOverlappingCounts(estimationContext) template.run(estimationContext) return estimationContext.bestModel
def trainModel(self, initialModel): self.loadSamples() compiledTrainSample = self.sequitur.compileSample(self.trainSample) compiledDevelSample = self.sequitur.compileSample(self.develSample) del self.trainSample if self.options.fixed_discount: discount = eval(self.options.fixed_discount) if not operator.isSequenceType(discount): discount = [discount] discount = num.array(discount) else: discount = None template = ModelTemplate(self.sequitur) if self.options.fixed_discount: template.DiscountAdjustmentStrategy = FixedDiscounts(discount) elif self.develSample: if self.options.eager_discount_adjustment: template.DiscountAdjustmentStrategy = EagerDiscountAdjuster else: template.DiscountAdjustmentStrategy = DefaultDiscountAdjuster else: template.DiscountAdjustmentStrategy = StaticDiscounts if self.options.lengthConstraints: spec = self.options.lengthConstraints.strip() if spec.startswith('['): assert spec.endswith(']') st = spec[1:-1].split(',') st = [ t.split(':') for t in st ] st = [ (int(l), int(r)) for l, r in st ] template.setSizeTemplates(st) else: lc = tuple(map(int, spec.split(','))) template.setLengthConstraints(*lc) template.allowEmergenceOfNewMultigrams(not bool(self.options.shouldSuppressNewMultigrams)) template.useMaximumApproximation(bool(self.options.viterbi)) if self.options.minIterations > self.options.maxIterations: print >> self.log, 'invalid limits on number of iterations %d > %d' % \ (self.options.minIterations,self.options.maxIterations) return template.minIterations = self.options.minIterations template.maxIterations = self.options.maxIterations if self.options.checkpoint and self.options.newModelFile: template.checkpointInterval = 8 * 60*60 base, ext = os.path.splitext(self.options.newModelFile) template.checkpointFile = base + '-cp%d' + ext if self.options.shouldWipeModel: initialModel.wipeOut(template.nPossibleMultigrams()) if self.options.shouldTestContinuously: if self.develSample: template.observers.append( OnlineTester('devel', self.develSample)) if self.options.testSample: template.observers.append( OnlineTester('test', self.loadSample(self.options.testSample))) estimationContext = template.makeContext( compiledTrainSample, compiledDevelSample, initialModel) del initialModel estimationContext.log = self.log if self.options.shouldInitializeWithCounts: template.initializeWithOverlappingCounts(estimationContext) template.run(estimationContext) return estimationContext.bestModel
def procureModel(self): #print self.options,type(self.options) #print self.loadSample,type(self.loadSample) #print self.log,type(self.log) if self.options.resume_from_checkpoint: model = ModelTemplate.resume(self.options.resume_from_checkpoint) self.sequitur = model.sequitur elif self.options.modelFile: #print "loading",self.options.modelFile f = open(self.options.modelFile) #print "loaded",f #print "type:",type(f) #print pickle class Model(object): pass model = pickle.load(f) #print "loaded",self.options.modelFile self.sequitur = model.sequitur else: self.sequitur = Sequitur() model = None if self.options.shouldRampUp: model.rampUp() if self.options.trainSample: model = self.trainModel(model) if not model: print >> self.log, 'failed to estimate or load model' return if not model: raise UsageError # model.sequenceModel.showMostProbable(sys.stdout, model.sequitur.symbol, limit=250) if self.options.shouldTranspose: model.transpose() if self.options.newModelFile: oldSize, newSize = model.strip() print >> self.log, 'stripped number of multigrams from %d to %d' % ( oldSize, newSize) f = open(self.options.newModelFile, 'w') pickle.dump(model, f, pickle.HIGHEST_PROTOCOL) f.close() del f if self.options.shouldSelfTest: print >> self.log, 'warning: --self-test does not treat pronunciation variants correctly' if not self.develSample: print >> self.log, 'error: cannot do --self-test without --devel sample' else: translator = Translator(model) evaluator = Evaluator() evaluator.setSample(self.develSample) evaluator.verboseLog = self.log result = evaluator.evaluate(translator) print >> self.log, result return model