Пример #1
0
 def __init__(self, leftInventory = None, rightInventory = None):
     self.leftInventory = leftInventory
     self.rightInventory = rightInventory
     if not self.leftInventory:
         self.leftInventory = SymbolInventory()
     if not self.rightInventory:
         self.rightInventory = SymbolInventory()
     self.inventory = MultigramInventory()
     self.term = self.inventory.index(
         ((self.leftInventory.term,), (self.rightInventory.term,)))
Пример #2
0
 def __init__(self, leftInventory=None, rightInventory=None):
     self.leftInventory = leftInventory
     self.rightInventory = rightInventory
     if not self.leftInventory:
         self.leftInventory = SymbolInventory()
     if not self.rightInventory:
         self.rightInventory = SymbolInventory()
     self.inventory = MultigramInventory()
     self.term = self.inventory.index(
         ((self.leftInventory.term, ), (self.rightInventory.term, )))
Пример #3
0
class Sequitur:
    """
    Multigram / sequence model tokens / indices: 0 (zero) indicates
    VOID, and is only used internally as a sentinel. term is the index
    of the (term,term) multigram which is the end-of-string token.
    (Also used as begin-of-string token.)
    """

    def __init__(self, leftInventory = None, rightInventory = None):
        self.leftInventory = leftInventory
        self.rightInventory = rightInventory
        if not self.leftInventory:
            self.leftInventory = SymbolInventory()
        if not self.rightInventory:
            self.rightInventory = SymbolInventory()
        self.inventory = MultigramInventory()
        self.term = self.inventory.index(
            ((self.leftInventory.term,), (self.rightInventory.term,)))

    def compileSample(self, sample):
        return [
            (self.leftInventory.parse(left),
             self.rightInventory.parse(right))
            for left, right in sample ]

    def symbol(self, i):
        "multigramFromTokenIndex"
        l, r = self.inventory.symbol(i)
        l = self.leftInventory.format(l)
        r = self.rightInventory.format(r)
        return (l, r)

    def symbols(self):
        return [ self.symbol(i) for i in range(1, self.inventory.size() + 1) ]

    def index(self, left, right):
        "tokenIndexFromMultigram"
        left = self.leftInventory.parse(left)
        right = self.rightInventory.parse(right)
        return self.inventory.index((left, right))

    def makeStringInventory(self):
        result = []
        for i in range(1, self.inventory.size() + 1):
            result.append('%s:%s' % self.symbol(i))
        return sequitur_.StringInventory(result)