def factorize_grammar(self): clean_grammar = self factorization_set: Set[NonTerminal] = set(clean_grammar.non_terminals) rules_dict: Dict[NonTerminal, List[GrammarRule]] = dict(clean_grammar.rules_dict) new_rules: Set[GrammarRule] = set() new_symbols: List[NonTerminal] = list(clean_grammar.non_terminals) additional_symbols_count = len(clean_grammar.non_terminals) def is_only_terminals(rule: GrammarRule) -> bool: for symbol in rule.right_symbols: if not isinstance(symbol, Terminal): return False return True while factorization_set: first_symb_dict: Dict[GrammarSymbol, List[GrammarRule]] = defaultdict(list) current_symbol = factorization_set.pop() rules_for_current = rules_dict[current_symbol] for rule in rules_for_current: if is_only_terminals(rule) or len(rule.right_symbols) == 1: new_rules.add(rule) continue first_symb_dict[rule.right_symbols[0]].append(rule) for start_symbol, rules_for_symb in first_symb_dict.items(): if len(rules_for_symb) > 1: new_symb = FromNonTerminal(current_symbol, f"{additional_symbols_count}") rules_dict[new_symb] = list( map( lambda rule: GrammarRule(new_symb, rule. right_symbols[1:]), rules_for_symb, )) new_symbols.append(new_symb) new_rules.add( GrammarRule(current_symbol, [start_symbol, new_symb])) factorization_set.add(new_symb) additional_symbols_count += 1 else: if rules_for_symb: new_rules.update(rules_for_symb) return ContextFreeGrammar( clean_grammar.terminals, new_symbols, list(new_rules), clean_grammar.start_non_terminal, )
def transform_to_greibach_form(self): clean_grammar = self disappearing = clean_grammar.detect_disappearing_non_terminals() new_rules: Set[GrammarRule] = set(clean_grammar.rules) new_start: NonTerminal = clean_grammar.start_non_terminal new_non_terminals: Sequence[NonTerminal] = clean_grammar.non_terminals if disappearing: new_rules = set() for rule in clean_grammar.rules: new_rules \ .update(rule.remove_disappearing_from_rule(disappearing)) if clean_grammar.start_non_terminal in disappearing: new_start = FromNonTerminal( new_start, str(len(clean_grammar.non_terminals))) new_rules.add(GrammarRule(new_start, [])) new_rules.add( GrammarRule(new_start, [clean_grammar.start_non_terminal])) new_non_terminals = [new_start] + list(new_non_terminals) return ContextFreeGrammar( clean_grammar.terminals, new_non_terminals, list(new_rules), new_start, )
def __remove_direct_left_recursion__(self, symbol: NonTerminal): symbol_rules = self.rules_dict[symbol] no_direct_recursion = list( filter( lambda rule: rule.right_symbols[0] != symbol, symbol_rules, )) has_direct_recursion = list( filter( lambda rule: rule.right_symbols[0] == symbol, symbol_rules, )) if not has_direct_recursion: return self new_symbol = FromNonTerminal(symbol, str(len(self.non_terminals))) new_symbol_rules: List[GrammarRule] = list() for rule in has_direct_recursion: new_symbol_rules += [ GrammarRule(new_symbol, rule.right_symbols[1:] + [new_symbol]) ] new_symbol_rules += [GrammarRule(new_symbol, [])] old_symbol_rules = list( map( lambda rule: GrammarRule(symbol, rule.right_symbols + [new_symbol]), no_direct_recursion, )) old_rules = list( filter( lambda rule: rule.left_symbol != symbol, self.rules, )) return ContextFreeGrammar( self.terminals, list(self.non_terminals) + [new_symbol], old_rules + new_symbol_rules + old_symbol_rules, self.start_non_terminal)
def addRule(pattern="", suggestion="", short_comment="", full_comment="", exceptions={}): global rules rules.append( GrammarRule(pattern, suggestion, short_comment, full_comment, exceptions))
def __remove_indirect_recursion_for__( self, lower_symbol: NonTerminal, greater_symbol: NonTerminal, ): starts_with_lower = list( filter( lambda rule: rule.right_symbols and rule.right_symbols[0] == lower_symbol, self.rules_dict[greater_symbol], )) new_rules_for_greater: Set[GrammarRule] = set( filter( lambda rule: rule.right_symbols[0] != lower_symbol, self.rules_dict[greater_symbol], )) for rule_with_lower in starts_with_lower: new_rules_for_greater.update( list( map( lambda rule_for_lower: GrammarRule( greater_symbol, rule_for_lower.right_symbols + rule_with_lower.right_symbols[1:]), self.rules_dict[lower_symbol], ))) old_rules = set( filter( lambda rule: rule.left_symbol != greater_symbol, self.rules, )) new_rules = old_rules new_rules.update(new_rules_for_greater) return ContextFreeGrammar( self.terminals, self.non_terminals, list(new_rules), self.start_non_terminal, )
def on_next_token(current_symbol) -> bool: for rule in self.grammar.rules_dict[current_symbol]: if Terminal(get_cur_token()) in self.first(rule.right_symbols): for symbol in rule.right_symbols: if isinstance(symbol, Terminal): if symbol != Terminal(get_cur_token()): return False word_queue.pop() else: if not on_next_token(symbol): return False return True if GrammarRule(current_symbol, []) in self.grammar.rules_dict[current_symbol] \ and Terminal(get_cur_token()) in self.follows_dict[current_symbol]: return True return False
# https://sourceforge.net/projects/graviax/ from grammar_rule import GrammarRule rules = [] # rules.append( GrammarRule(pattern, suggestion, short_comment, full_comment, exceptions) ) # *** en-badstyle.xml # Stressed 'And' before 'Therefore' rules.append( GrammarRule( r"\band(,|, | ,| , )therefore(,| ,|)", # pattern "and therefore", # suggestion "Bad Style", # short comment "The comma emphasizes the 'and.' Therefore, it should be removed.", # full comment {} # exceptions )) # Double negatives rules.append( GrammarRule( r"\b(are|is|has|was|do|does|wo|would|could|should|have)n't not\b", # pattern r"\1", # suggestion "Double Negative", # short comment "Double Negative", # full comment {} # exceptions )) # Vowels
if self.is_in_language(word, rule.right_symbols + cur_symbols[1:]): return True return False if __name__ == "__main__": cfg_for_factorization = ContextFreeGrammar([ Terminal("+"), Terminal("*"), Terminal("n"), Terminal("("), Terminal(")") ], [NonTerminal("E"), NonTerminal("T"), NonTerminal("F")], [ GrammarRule(NonTerminal("E"), [NonTerminal("T"), Terminal("+"), NonTerminal("E")]), GrammarRule(NonTerminal("E"), [NonTerminal("T")]), GrammarRule(NonTerminal("T"), [NonTerminal("F"), Terminal("*"), NonTerminal("T")]), GrammarRule(NonTerminal("T"), [NonTerminal("F")]), GrammarRule(NonTerminal("F"), [Terminal("n")]), GrammarRule( NonTerminal("F"), [Terminal("("), NonTerminal("E"), Terminal(")")]), ], NonTerminal("E")) parser = Parser(cfg_for_factorization)
if rules_for_symb: new_rules.update(rules_for_symb) return ContextFreeGrammar( clean_grammar.terminals, new_symbols, list(new_rules), clean_grammar.start_non_terminal, ) if __name__ == "__main__": cfg = ContextFreeGrammar( [Terminal("chr"), Terminal("ast")], [NonTerminal("A"), NonTerminal("B")], [GrammarRule(NonTerminal("A"), [Terminal("chr"), Terminal("ast")])], NonTerminal("A"), ) cfg_non_terminals = ContextFreeGrammar( [Terminal("chr"), Terminal("ast")], [NonTerminal("A"), NonTerminal("B"), NonTerminal("S")], [ GrammarRule(NonTerminal("S"), [NonTerminal("A"), NonTerminal("B")]), GrammarRule(NonTerminal("A"), []), GrammarRule(NonTerminal("B"), []) ], NonTerminal("S"),