示例#1
0
文件: cfg.py 项目: daleloogn/speechAD
    def __init__(self, terminals, non_terminals, productions, starts=()):
        terminals = sorteduniquetuple(terminals)
        non_terminals = sorteduniquetuple(non_terminals)
        productions = sorteduniquetuple((lhs, tuple(rhs)) for lhs, rhs in productions)
        starts = sorteduniquetuple(starts)

        self.data = terminals, non_terminals, productions, starts

        self._verify()
示例#2
0
    def __init__(self, terminals, non_terminals, productions, starts=()):
        terminals = sorteduniquetuple(terminals)
        non_terminals = sorteduniquetuple(non_terminals)
        productions = sorteduniquetuple(
            (lhs, tuple(rhs)) for lhs, rhs in productions)
        starts = sorteduniquetuple(starts)

        self.data = terminals, non_terminals, productions, starts

        self._verify()
示例#3
0
文件: cfg.py 项目: daleloogn/speechAD
    def __init__(self, grammar_sets, debug=False):
        # allow copy construction
        if isinstance(grammar_sets, Grammar):
            grammar_sets = grammar_sets.grammar_sets

        assert isinstance(grammar_sets, GrammarTables), repr(type(grammar_sets).__name__)
        grammar_sets._verify()
        terminals, non_terminals, productions, starts = self._grammar_sets = grammar_sets

        if debug:
            print "productions:", tuple(enumerate(productions))
            print "starts:", tuple(starts)

        # bijective maps with non-negative ids
        self.terminal_by_id, self.id_by_terminal = frozenbijection(terminals)
        self.non_terminal_by_id, self.id_by_non_terminal = frozenbijection(non_terminals)

        if debug:
            print "terminal_by_id:", tuple(enumerate(self.terminal_by_id))
            print "terminal by integer:", tuple(
                (self.symbol_to_integer(terminal), terminal) for terminal in self.terminal_by_id
            )
            print "non_terminal_by_id:", tuple(enumerate(self.non_terminal_by_id))

        # create the index-based/integer-based left-hand sides and integer-based right-hand sides
        # note: we put completion_id at end of each rhs as a cursor-indexible sentinel
        for lhs, rhs in productions:
            assert self.id_by_non_terminal[lhs] == self.symbol_to_integer(lhs)
        prod1 = sorteduniquetuple(
            (
                self.id_by_non_terminal[lhs],
                tuple(self.symbol_to_integer(symbol) for symbol in rhs) + (self.completion_id,),
            )
            for lhs, rhs in productions
        )
        if debug:
            print "prod1:", prod1

        # introduce two further levels of indirection: sharing of rhs sequences, sharing of rule sets

        self.rhs_by_id, id_by_rhs = frozenbijection(rhs for lhs, rhs in prod1)
        if debug:
            print "rhs_by_id:", tuple(enumerate(self.rhs_by_id))
            print "rhs uniqueness:", len(self.rhs_by_id), "of", len(prod1), ":", len(self.rhs_by_id) / len(prod1)

        # each production as a pair: this uses an index for each rhs sequence
        prod2 = sorteduniquetuple((lhs, id_by_rhs[rhs]) for lhs, rhs in prod1)
        if debug:
            print "prod2:", prod2

        proddict = defaultdict(set)
        for lhs, rhs in prod2:
            proddict[lhs].add(rhs)
        # here, for each production, a tuple of the set of rhs indices is used for rhs
        prod3 = sorteduniquetuple((lhs, sorteduniquetuple(proddict[lhs])) for lhs in xrange(len(proddict)))

        self.ruleset_by_id, id_by_ruleset = frozenbijection(rhs for lhs, rhs in prod3)
        if debug:
            print "ruleset_by_id:", tuple(enumerate(self.ruleset_by_id))
            print "ruleset uniqueness:", len(self.ruleset_by_id), "of", len(prod3), ":", len(self.ruleset_by_id) / len(
                prod3
            )

        # note: lhs are now a (useless) enumeration
        self.ruleset_id_by_lhs_id = tuple(id_by_ruleset[rhs] for lhs, rhs in prod3)
        if debug:
            print "self.ruleset_id_by_lhs_id:", tuple(enumerate(self.ruleset_id_by_lhs_id))

        self.starts = sorteduniquetuple(self.id_by_non_terminal[start] for start in starts)
        if debug:
            print "starts:", self.starts

        self._verify()
示例#4
0
    def __init__(self, grammar_sets, debug=False):
        # allow copy construction
        if isinstance(grammar_sets, Grammar):
            grammar_sets = grammar_sets.grammar_sets

        assert isinstance(grammar_sets,
                          GrammarTables), repr(type(grammar_sets).__name__)
        grammar_sets._verify()
        terminals, non_terminals, productions, starts = self._grammar_sets = grammar_sets

        if debug:
            print 'productions:', tuple(enumerate(productions))
            print 'starts:', tuple(starts)

        # bijective maps with non-negative ids
        self.terminal_by_id, self.id_by_terminal = frozenbijection(terminals)
        self.non_terminal_by_id, self.id_by_non_terminal = frozenbijection(
            non_terminals)

        if debug:
            print 'terminal_by_id:', tuple(enumerate(self.terminal_by_id))
            print 'terminal by integer:', tuple(
                (self.symbol_to_integer(terminal), terminal)
                for terminal in self.terminal_by_id)
            print 'non_terminal_by_id:', tuple(
                enumerate(self.non_terminal_by_id))

        # create the index-based/integer-based left-hand sides and integer-based right-hand sides
        # note: we put completion_id at end of each rhs as a cursor-indexible sentinel
        for lhs, rhs in productions:
            assert self.id_by_non_terminal[lhs] == self.symbol_to_integer(lhs)
        prod1 = sorteduniquetuple(
            (self.id_by_non_terminal[lhs],
             tuple(self.symbol_to_integer(symbol)
                   for symbol in rhs) + (self.completion_id, ))
            for lhs, rhs in productions)
        if debug:
            print 'prod1:', prod1

        # introduce two further levels of indirection: sharing of rhs sequences, sharing of rule sets

        self.rhs_by_id, id_by_rhs = frozenbijection(rhs for lhs, rhs in prod1)
        if debug:
            print 'rhs_by_id:', tuple(enumerate(self.rhs_by_id))
            print 'rhs uniqueness:', len(
                self.rhs_by_id), 'of', len(prod1), ':', len(
                    self.rhs_by_id) / len(prod1)

        # each production as a pair: this uses an index for each rhs sequence
        prod2 = sorteduniquetuple((lhs, id_by_rhs[rhs]) for lhs, rhs in prod1)
        if debug:
            print 'prod2:', prod2

        proddict = defaultdict(set)
        for lhs, rhs in prod2:
            proddict[lhs].add(rhs)
        # here, for each production, a tuple of the set of rhs indices is used for rhs
        prod3 = sorteduniquetuple((lhs, sorteduniquetuple(proddict[lhs]))
                                  for lhs in xrange(len(proddict)))

        self.ruleset_by_id, id_by_ruleset = frozenbijection(
            rhs for lhs, rhs in prod3)
        if debug:
            print 'ruleset_by_id:', tuple(enumerate(self.ruleset_by_id))
            print 'ruleset uniqueness:', len(
                self.ruleset_by_id), 'of', len(prod3), ':', len(
                    self.ruleset_by_id) / len(prod3)

        # note: lhs are now a (useless) enumeration
        self.ruleset_id_by_lhs_id = tuple(id_by_ruleset[rhs]
                                          for lhs, rhs in prod3)
        if debug:
            print 'self.ruleset_id_by_lhs_id:', tuple(
                enumerate(self.ruleset_id_by_lhs_id))

        self.starts = sorteduniquetuple(self.id_by_non_terminal[start]
                                        for start in starts)
        if debug:
            print 'starts:', self.starts

        self._verify()