def __init__(self, leftInventory = None, rightInventory = None): self.leftInventory = leftInventory self.rightInventory = rightInventory if not self.leftInventory: self.leftInventory = SymbolInventory() if not self.rightInventory: self.rightInventory = SymbolInventory() self.inventory = MultigramInventory() self.term = self.inventory.index( ((self.leftInventory.term,), (self.rightInventory.term,)))
def __init__(self, leftInventory=None, rightInventory=None): self.leftInventory = leftInventory self.rightInventory = rightInventory if not self.leftInventory: self.leftInventory = SymbolInventory() if not self.rightInventory: self.rightInventory = SymbolInventory() self.inventory = MultigramInventory() self.term = self.inventory.index( ((self.leftInventory.term, ), (self.rightInventory.term, )))
class Sequitur: """ Multigram / sequence model tokens / indices: 0 (zero) indicates VOID, and is only used internally as a sentinel. term is the index of the (term,term) multigram which is the end-of-string token. (Also used as begin-of-string token.) """ def __init__(self, leftInventory = None, rightInventory = None): self.leftInventory = leftInventory self.rightInventory = rightInventory if not self.leftInventory: self.leftInventory = SymbolInventory() if not self.rightInventory: self.rightInventory = SymbolInventory() self.inventory = MultigramInventory() self.term = self.inventory.index( ((self.leftInventory.term,), (self.rightInventory.term,))) def compileSample(self, sample): return [ (self.leftInventory.parse(left), self.rightInventory.parse(right)) for left, right in sample ] def symbol(self, i): "multigramFromTokenIndex" l, r = self.inventory.symbol(i) l = self.leftInventory.format(l) r = self.rightInventory.format(r) return (l, r) def symbols(self): return [ self.symbol(i) for i in range(1, self.inventory.size() + 1) ] def index(self, left, right): "tokenIndexFromMultigram" left = self.leftInventory.parse(left) right = self.rightInventory.parse(right) return self.inventory.index((left, right)) def makeStringInventory(self): result = [] for i in range(1, self.inventory.size() + 1): result.append('%s:%s' % self.symbol(i)) return sequitur_.StringInventory(result)