def verifysettings(self): super().verifysettings() if 'class' not in self.settings: self.settings['class'] = 'nonworderror' if 'runonclass' not in self.settings: self.settings['runonclass'] = 'runonerror' if 'runon' not in self.settings: self.settings['runon'] = True if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'maxdistance_short' not in self.settings: self.settings['maxdistance_short'] = 1 if 'maxlength' not in self.settings: self.settings['maxlength'] = 25 #longer words will be ignored if 'minlength' not in self.settings: self.settings['minlength'] = 5 #shorter word will be ignored if 'shortlength' not in self.settings: self.settings['shortlength'] = self.settings['minlength'] if 'maxnrclosest' not in self.settings: self.settings['maxnrclosest'] = 5 if 'suffixes' not in self.settings: self.settings['suffixes'] = [] if 'prefixes' not in self.settings: self.settings['prefixes'] = [] self.cache = getcache(self.settings, 1000) #2nd arg is default cache size
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'confusion' super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 if 'threshold' not in self.settings: self.threshold = 0.9 else: self.threshold = self.settings['threshold'] if 'freqthreshold' not in self.settings: self.freqthreshold = 2 else: self.freqthreshold = self.settings['freqthreshold'] if 'minlength' not in self.settings: self.minlength = 5 else: self.minlength = self.settings['minlength'] if 'probfactor' not in self.settings: self.probfactor = 10 else: self.probfactor = self.settings['probfactor'] if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False self.hapaxer = gethapaxer(self, self.settings) self.cache = getcache(self.settings, 1000) try: modelfile = self.models[0] if not modelfile.endswith(".ibase"): raise Exception("First model must be a TIMBL instance base model, which must have the extension '.ibase', got " + modelfile + " instead") if len(self.models) > 1: lexiconfile = self.models[1] if not lexiconfile.endswith("colibri.patternmodel"): raise Exception("Second model must be a Colibri pattern model, which must have the extensions '.colibri.patternmodel', got " + modelfile + " instead") except: raise Exception("Expected one or two models, the first a TIMBL instance base, and the optional second a colibri patternmodel, got " + str(len(self.models)) )
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'nonworderror' super().verifysettings() if 'delimiter' not in self.settings: self.settings['delimiter'] = "\t" elif self.settings['delimiter'].lower() == 'space': self.settings['delimiter'] = " " elif self.settings['delimiter'].lower() == 'tab': self.settings['delimiter'] = "\t" elif self.settings['delimiter'].lower() == 'comma': self.settings['delimiter'] = "," if 'reversedformat' not in self.settings: #reverse format has (word,freq) pairs rather than (freq,word) pairs self.settings['reversedformat'] = False if 'ordered ' not in self.settings: self.settings[ 'ordered'] = True #Model file is ordered in descending frequency if 'freqthreshold' not in self.settings: self.settings['freqthreshold'] = 100 if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'maxdistance_short' not in self.settings: self.settings['maxdistance_short'] = 1 if 'maxlength' not in self.settings: self.settings['maxlength'] = 25 #longer words will be ignored if 'minlength' not in self.settings: self.settings['minlength'] = 5 #shorter word will be ignored if 'shortlength' not in self.settings: self.settings['shortlength'] = self.settings['minlength'] if 'minfreqthreshold' not in self.settings: self.settings['minfreqthreshold'] = 10000 if 'freqfactor' not in self.settings: self.settings['freqfactor'] = 10000 if 'maxnrclosest' not in self.settings: self.settings['maxnrclosest'] = 5 self.cache = getcache(self.settings, 1000) #2nd arg is default cache size if 'suffixes' not in self.settings: self.settings['suffixes'] = [] if 'prefixes' not in self.settings: self.settings['prefixes'] = []
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'nonworderror' super().verifysettings() if 'delimiter' not in self.settings: self.settings['delimiter'] = "\t" elif self.settings['delimiter'].lower() == 'space': self.settings['delimiter'] = " " elif self.settings['delimiter'].lower() == 'tab': self.settings['delimiter'] = "\t" elif self.settings['delimiter'].lower() == 'comma': self.settings['delimiter'] = "," if 'reversedformat' not in self.settings: #reverse format has (word,freq) pairs rather than (freq,word) pairs self.settings['reversedformat'] = False if 'ordered ' not in self.settings: self.settings['ordered'] = True #Model file is ordered in descending frequency if 'freqthreshold' not in self.settings: self.settings['freqthreshold'] = 100 if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'maxdistance_short' not in self.settings: self.settings['maxdistance_short'] = 1 if 'maxlength' not in self.settings: self.settings['maxlength'] = 25 #longer words will be ignored if 'minlength' not in self.settings: self.settings['minlength'] = 5 #shorter word will be ignored if 'shortlength' not in self.settings: self.settings['shortlength'] = self.settings['minlength'] if 'minfreqthreshold' not in self.settings: self.settings['minfreqthreshold'] = 10000 if 'freqfactor' not in self.settings: self.settings['freqfactor'] = 10000 if 'maxnrclosest' not in self.settings: self.settings['maxnrclosest'] = 5 self.cache = getcache(self.settings, 1000) #2nd arg is default cache size if 'suffixes' not in self.settings: self.settings['suffixes'] = [] if 'prefixes' not in self.settings: self.settings['prefixes'] = []
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'contexterror' super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 if 'threshold' not in self.settings: self.threshold = self.settings['threshold'] else: self.threshold = 0.9 if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False self.hapaxer = gethapaxer(self.settings) self.cache = getcache(self.settings, 1000) try: modelfile = self.models[0] if not modelfile.endswith(".ibase"): raise Exception("TIMBL models must have the extension ibase, got " + modelfile + " instead") except: raise Exception("Expected one model, got 0 or more")
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'contexterror' super().verifysettings() if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 self.maxcontext = max(self.settings['leftcontext'], self.settings['rightcontext']) if 'freqthreshold' not in self.settings: self.threshold = 25 if 'threshold' not in self.settings: self.threshold = self.settings['threshold'] else: self.threshold = 0.9 if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False self.hapaxer = gethapaxer(self.settings) self.cache = getcache(self.settings, 1000) try: modelfile = self.models[0] except: raise Exception("Expected one model, got 0 or more")
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'confusion' super().verifysettings() if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 self.maxcontext = max(self.settings['leftcontext'], self.settings['rightcontext']) if 'freqthreshold' not in self.settings: self.freqthreshold = 25 if 'threshold' not in self.settings: self.threshold = self.settings['threshold'] else: self.threshold = 0.9 if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False self.hapaxer = gethapaxer(self, self.settings) self.cache = getcache(self.settings, 1000) try: self.models[0] except: raise Exception("Expected one model, got 0 or more")