def delete_unreachable_variables(G: Grammar): # Para eliminar mas rapido castearemos las listas a set, # de esta forma la eliminacion sera O(1) G.terminals = set(G.terminals) G.nonTerminals = set(G.nonTerminals) G.Productions = set(G.Productions) # Los elementos que no pueden ser alcanzados por una o mas producciones del caracter inicial # No son necesarias pues nunca son utilizadas para generar ningun elemento del lenguaje # estos elementos inalcanzables pueden ser tanto terminales como no terminales stack = [G.startSymbol] reacheable_nonterminals = {G.startSymbol} reacheable_terminals = set() # Encontramos los terminales y no terminales alcanzables while stack: current = stack.pop() for _, body in current.productions: for symbol in body: if symbol.IsNonTerminal: if symbol not in reacheable_nonterminals: reacheable_nonterminals.add(symbol) stack.append(symbol) else: reacheable_terminals.add(symbol) # Eliminamos las producciones con elementos no alcanzables G.Productions -= { production for production in G.Productions if production.Left not in reacheable_nonterminals } # Ahora removemos los no terminales no alcanzables G.nonTerminals -= { nonterminal for nonterminal in G.nonTerminals if nonterminal not in reacheable_nonterminals } # Ahora removemos los terminales no alcanzables G.terminals -= { terminal for terminal in G.terminals if terminal not in reacheable_terminals } # Finalmente casteamos a lista otra vez G.terminals = list(G.terminals) G.nonTerminals = list(G.nonTerminals) G.Productions = list(G.Productions) return G
def delete_immediate_left_recursion(G: Grammar): """ Algoritmo para eliminar la recursion izquierda inmediata """ for symbol in G.nonTerminals: if any(not body.IsEpsilon and body[0] == symbol for _, body in symbol.productions): last_productions = set(symbol.productions) A = G.NonTerminal(f"{symbol}'") new_sents = [ body + A for _, body in symbol.productions if body.IsEpsilon or body[0] != symbol ] for _, body in symbol.productions: if not body.IsEpsilon and body[0] == symbol: # A' -> b A' A %= Sentence(*(body[1:] + (A, ))) A %= G.Epsilon for sent in new_sents: # A -> b A' symbol %= sent symbol.productions = list( set(symbol.productions) - last_productions) G.Productions = list(set(G.Productions) - last_productions) return G
def remove_unit(G: Grammar): """ Removes unit productions from G. Additionally this removes cycles. """ def is_unit(p: Production) -> bool: """ True if production have the form A -> B """ return len(p.Right) == 1 and p.Right[0].IsNonTerminal prods = [prod for prod in G.Productions] unit_prods = [p for p in prods if is_unit(p)] variables = {p.Left.Name: {p.Right[0].Name} for p in unit_prods} change = True while change: change = False for v in variables: l = len(variables[v]) iter_set = {s for s in variables[v]} for s in iter_set: if s == v: # Do not check own set of a variable continue try: for x in variables[s]: if v != x: # Avoids add a key to his set variables[v].add(x) except KeyError: # Reached a symbol that belongs to right part of an unit prod pass # that is not in variables' keys (is not left part of a unit prod) if l != len(variables[v]): change = True # for x in variables.items(): # print(x) for v in variables: for s in variables[v]: for p in G[s].productions: if not is_unit(p): prods.append(Production(G[v], p.Right)) # Replace old productions by new productions # Don't add unit productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in prods: # Add new productions if not is_unit(p): G.Add_Production(p)
def change_grammar_from_productions(gramm:Grammar,new_productions): """ Empty all non terminal and grammar productions\n and add all productions in new_productions to gramm """ for x in gramm.nonTerminals: x.productions = [] gramm.Productions = [] for x in new_productions: gramm.Add_Production(x) return gramm
def remove_unreachable(G: Grammar): """ Removes unreachable symbols from start symbol """ prods = G.Productions reachables = {G.startSymbol.Name} # Finding unreachable symbols checked = set() change = True while change: change = False for i in range(len(prods)): if i in checked: continue if prods[i].Left.Name in reachables: right_set = {s.Name for s in prods[i].Right} if not right_set.issubset(reachables): reachables = reachables.union(right_set) change = True checked.add(i) # Removing all productions that have unreachable symbols for i in range(len(prods)): if prods[i].Left.Name not in reachables \ or any(s.Name not in reachables for s in prods[i].Right): prods[i] = None # Replacing old productions by new productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in prods: # Add new productions if p is not None: G.Add_Production(p) # Removing unreachable symbols symbols = [nt.Name for nt in G.nonTerminals] symbols.extend(t.Name for t in G.terminals) for s in symbols: if s not in reachables: try: G.nonTerminals.remove(G[s]) except ValueError: G.terminals.remove(G[s]) G.symbDict.pop(s)
def remove_ambiguity(G: Grammar): """ Transforms productions of a non terminal for remove ambiguity. """ change = True while change: change = False prods = G.Productions for nt in G.nonTerminals: p_dict = {} # pi.Right[0] : {p1, p2, ..., pn} for p in nt.productions: if p.IsEpsilon: continue try: p_dict[p.Right[0].Name].append(p) except KeyError: p_dict[p.Right[0].Name] = [p] next_appendix = "'" for p_set in p_dict.values(): if len( p_set ) > 1: # Means nt has ambiguous production (all in p_set) new_left = G.NonTerminal(nt.Name + next_appendix) next_appendix = next_appendix + "'" for p in p_set: new_right = p.Right[1:] if len(new_right) == 0: prods.append(Production(new_left, G.Epsilon)) else: prods.append( Production(new_left, Sentence(*new_right))) prods.remove(p) prods.append(Production(nt, Sentence(p.Right[0], new_left))) change = True # Replacing old productions by new productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in prods: # Add new productions G.Add_Production(p)
def remove_vars_nothing(G: Grammar): """ Eliminates variables that derive nothing. """ prods = G.Productions accepted = {t.Name for t in G.terminals } # Symbols that derives in some terminal string # Discovering all variables that derives in terminal strings change = True checked = set() while change: change = False for i in range(len(prods)): # Iter over productions if i in checked: continue if all(s.Name in accepted for s in prods[i].Right): accepted.add(prods[i].Left.Name) checked.add(i) change = True # Removing all productions that have non accepted variables variables = [nt.Name for nt in G.nonTerminals] for i in range(len(prods)): if prods[i].Left.Name not in accepted \ or any(s.Name in variables and s.Name not in accepted for s in prods[i].Right): prods[i] = None # Replacing old productions by new productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in prods: # Add new productions if p is not None: G.Add_Production(p) # Removing non terminals with no productions for v in variables: if v not in accepted: G.nonTerminals.remove(G[v]) G.symbDict.pop(v)
def remove_left_recursion(G: Grammar): """ Eliminates all left-recursion for any CFG with no e-productions and no cycles. """ def has_lr(nt: NonTerminal) -> bool: """ True if `nt` has left recursion. """ return any(p.Left == p.Right[0] for p in nt.productions) prods = [p for p in G.Productions] new_prods = [] for nt in G.nonTerminals: if has_lr(nt): new_symbol = G.NonTerminal(nt.Name + "'") for p in nt.productions: if p.Right[0] == p.Left: # Production has the from A -> Axyz new_right = [s for s in p.Right[1:]] new_right.append(new_symbol) new_prods.append( Production(new_symbol, Sentence(*new_right))) else: # Production has the from A -> xyz new_right = [s for s in p.Right[0:]] new_right.append(new_symbol) new_prods.append(Production(p.Left, Sentence(*new_right))) new_prods.append(Production(new_symbol, G.Epsilon)) else: for p in nt.productions: new_prods.append(p) # Replacing old productions by new productions G.Productions = [] # Clean grammar productions for nt in G.nonTerminals: # Clean non terminals productions nt.productions = [] for p in new_prods: # Add new productions G.Add_Production(p)
def remove_epsilon(G: Grammar): """ Removes e-productions from G. """ prods = G.Productions # Find non terminals that derives in epsilon nullables = [] changed = True while changed: changed = False for prod in prods: for symbol in prod.Right: if symbol in nullables: continue elif not symbol.IsEpsilon: break else: if prod.Left not in nullables: nullables.append(prod.Left) changed = True # Decomposing of productions removing one or multiple nullables non terminals # Removing old productions G.Productions = [] for nt in G.nonTerminals: nt.productions = [] # Adding new productions for prod in prods: prod_nullables = { index: symbol for index, symbol in zip(range(len(prod.Right)), prod.Right) \ if symbol in nullables } for i in range(1, len(prod_nullables) + 1): # Size iter for subset in it.combinations(prod_nullables, i): # Subset iter right_part = [] for j in range(len(prod.Right)): if j not in subset: right_part.append(prod.Right[j]) if len(right_part) > 0: new_prod = Production(prod.Left, Sentence(*right_part)) else: new_prod = Production(prod.Left, G.Epsilon) if new_prod not in G.Productions: G.Add_Production(new_prod) # Adding old productions for prod in prods: G.Add_Production(prod) prods = G.Productions G.Productions = [] useless_symbols = [ symbol for symbol in nullables if all(prod.IsEpsilon for prod in symbol.productions) ] # Removing productions that contains non terminals that derive in epsilon for prod in prods: if prod.IsEpsilon or any(symbol in useless_symbols for symbol in prod.Right): continue else: G.Add_Production(prod) # Removing non terminals symbols with no productions for s in useless_symbols: G.nonTerminals.remove(s) G.symbDict.pop(s.Name)