def __init__(self, terminals, non_terminals, productions, starts=()): terminals = sorteduniquetuple(terminals) non_terminals = sorteduniquetuple(non_terminals) productions = sorteduniquetuple((lhs, tuple(rhs)) for lhs, rhs in productions) starts = sorteduniquetuple(starts) self.data = terminals, non_terminals, productions, starts self._verify()
def __init__(self, terminals, non_terminals, productions, starts=()): terminals = sorteduniquetuple(terminals) non_terminals = sorteduniquetuple(non_terminals) productions = sorteduniquetuple( (lhs, tuple(rhs)) for lhs, rhs in productions) starts = sorteduniquetuple(starts) self.data = terminals, non_terminals, productions, starts self._verify()
def __init__(self, grammar_sets, debug=False): # allow copy construction if isinstance(grammar_sets, Grammar): grammar_sets = grammar_sets.grammar_sets assert isinstance(grammar_sets, GrammarTables), repr(type(grammar_sets).__name__) grammar_sets._verify() terminals, non_terminals, productions, starts = self._grammar_sets = grammar_sets if debug: print "productions:", tuple(enumerate(productions)) print "starts:", tuple(starts) # bijective maps with non-negative ids self.terminal_by_id, self.id_by_terminal = frozenbijection(terminals) self.non_terminal_by_id, self.id_by_non_terminal = frozenbijection(non_terminals) if debug: print "terminal_by_id:", tuple(enumerate(self.terminal_by_id)) print "terminal by integer:", tuple( (self.symbol_to_integer(terminal), terminal) for terminal in self.terminal_by_id ) print "non_terminal_by_id:", tuple(enumerate(self.non_terminal_by_id)) # create the index-based/integer-based left-hand sides and integer-based right-hand sides # note: we put completion_id at end of each rhs as a cursor-indexible sentinel for lhs, rhs in productions: assert self.id_by_non_terminal[lhs] == self.symbol_to_integer(lhs) prod1 = sorteduniquetuple( ( self.id_by_non_terminal[lhs], tuple(self.symbol_to_integer(symbol) for symbol in rhs) + (self.completion_id,), ) for lhs, rhs in productions ) if debug: print "prod1:", prod1 # introduce two further levels of indirection: sharing of rhs sequences, sharing of rule sets self.rhs_by_id, id_by_rhs = frozenbijection(rhs for lhs, rhs in prod1) if debug: print "rhs_by_id:", tuple(enumerate(self.rhs_by_id)) print "rhs uniqueness:", len(self.rhs_by_id), "of", len(prod1), ":", len(self.rhs_by_id) / len(prod1) # each production as a pair: this uses an index for each rhs sequence prod2 = sorteduniquetuple((lhs, id_by_rhs[rhs]) for lhs, rhs in prod1) if debug: print "prod2:", prod2 proddict = defaultdict(set) for lhs, rhs in prod2: proddict[lhs].add(rhs) # here, for each production, a tuple of the set of rhs indices is used for rhs prod3 = sorteduniquetuple((lhs, sorteduniquetuple(proddict[lhs])) for lhs in xrange(len(proddict))) self.ruleset_by_id, id_by_ruleset = frozenbijection(rhs for lhs, rhs in prod3) if debug: print "ruleset_by_id:", tuple(enumerate(self.ruleset_by_id)) print "ruleset uniqueness:", len(self.ruleset_by_id), "of", len(prod3), ":", len(self.ruleset_by_id) / len( prod3 ) # note: lhs are now a (useless) enumeration self.ruleset_id_by_lhs_id = tuple(id_by_ruleset[rhs] for lhs, rhs in prod3) if debug: print "self.ruleset_id_by_lhs_id:", tuple(enumerate(self.ruleset_id_by_lhs_id)) self.starts = sorteduniquetuple(self.id_by_non_terminal[start] for start in starts) if debug: print "starts:", self.starts self._verify()
def __init__(self, grammar_sets, debug=False): # allow copy construction if isinstance(grammar_sets, Grammar): grammar_sets = grammar_sets.grammar_sets assert isinstance(grammar_sets, GrammarTables), repr(type(grammar_sets).__name__) grammar_sets._verify() terminals, non_terminals, productions, starts = self._grammar_sets = grammar_sets if debug: print 'productions:', tuple(enumerate(productions)) print 'starts:', tuple(starts) # bijective maps with non-negative ids self.terminal_by_id, self.id_by_terminal = frozenbijection(terminals) self.non_terminal_by_id, self.id_by_non_terminal = frozenbijection( non_terminals) if debug: print 'terminal_by_id:', tuple(enumerate(self.terminal_by_id)) print 'terminal by integer:', tuple( (self.symbol_to_integer(terminal), terminal) for terminal in self.terminal_by_id) print 'non_terminal_by_id:', tuple( enumerate(self.non_terminal_by_id)) # create the index-based/integer-based left-hand sides and integer-based right-hand sides # note: we put completion_id at end of each rhs as a cursor-indexible sentinel for lhs, rhs in productions: assert self.id_by_non_terminal[lhs] == self.symbol_to_integer(lhs) prod1 = sorteduniquetuple( (self.id_by_non_terminal[lhs], tuple(self.symbol_to_integer(symbol) for symbol in rhs) + (self.completion_id, )) for lhs, rhs in productions) if debug: print 'prod1:', prod1 # introduce two further levels of indirection: sharing of rhs sequences, sharing of rule sets self.rhs_by_id, id_by_rhs = frozenbijection(rhs for lhs, rhs in prod1) if debug: print 'rhs_by_id:', tuple(enumerate(self.rhs_by_id)) print 'rhs uniqueness:', len( self.rhs_by_id), 'of', len(prod1), ':', len( self.rhs_by_id) / len(prod1) # each production as a pair: this uses an index for each rhs sequence prod2 = sorteduniquetuple((lhs, id_by_rhs[rhs]) for lhs, rhs in prod1) if debug: print 'prod2:', prod2 proddict = defaultdict(set) for lhs, rhs in prod2: proddict[lhs].add(rhs) # here, for each production, a tuple of the set of rhs indices is used for rhs prod3 = sorteduniquetuple((lhs, sorteduniquetuple(proddict[lhs])) for lhs in xrange(len(proddict))) self.ruleset_by_id, id_by_ruleset = frozenbijection( rhs for lhs, rhs in prod3) if debug: print 'ruleset_by_id:', tuple(enumerate(self.ruleset_by_id)) print 'ruleset uniqueness:', len( self.ruleset_by_id), 'of', len(prod3), ':', len( self.ruleset_by_id) / len(prod3) # note: lhs are now a (useless) enumeration self.ruleset_id_by_lhs_id = tuple(id_by_ruleset[rhs] for lhs, rhs in prod3) if debug: print 'self.ruleset_id_by_lhs_id:', tuple( enumerate(self.ruleset_id_by_lhs_id)) self.starts = sorteduniquetuple(self.id_by_non_terminal[start] for start in starts) if debug: print 'starts:', self.starts self._verify()