def __init__(self):
     Ruleset.__init__(self, 'Sample ruleset')
     self.tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
     self.rules = [
         ShortSentencesRule(self),
         LongSentencesRule(self),
         AllSentencesRule(self)
     ]
Пример #2
0
 def __init__(self, data, uid):
     Ruleset.__init__(self, data['ruleset'], data.get('comments', []), {
         'prefix': data.get('prefix', ''),
         'suffix': data.get('suffix', ''),
     })
     self.uid = str(uid).replace(u'\\', u'/')
     self.data = data
     self.flags = process_flags(data.get('flags', ''), DEFAULT_FLAGS)
     self.replace = data.get('replace', {})
     self.rules = [
         RegularExpressionRule(self, self.props, d, self.flags,
                               self.replace) for d in data['rules']
     ]
 def __init__(self, data, uid):
     Ruleset.__init__(self,
         data['ruleset'],
         data.get('comments', []),
         {
             'prefix': data.get('prefix', ''),
             'suffix': data.get('suffix', ''),
         }
     )
     self.uid = unicode(uid).replace(u'\\', u'/')
     self.data = data
     self.flags = process_flags(data.get('flags', ''), DEFAULT_FLAGS)
     self.replace = data.get('replace', {})
     self.rules = [
         RegularExpressionRule(self, self.props, d, self.flags, self.replace)
         for d in data['rules']
     ]