def cky_parse(grammar, tokens): #Initialize parse table table = ParseTable(tokens) #Move left to right across table for j in xrange(1, table.n + 1): #Add nonterminal symbols that correspond to terminal symbol rules = grammar.productions(rhs = tokens[j-1]) table[(j-1,j)] = TableEntry() for rule in rules: new_rule = nltk.grammar.Production(nltk.grammar.Nonterminal(rule.lhs()), table[(table.n,j)].symbols + [None]) table[(j-1,j)].add_entry({rule.lhs(): (new_rule, ((table.n,j), None))}) #Iterate over all non-(nonterminal -> terminal) cells of table #Move up rows for i in reversed(xrange(0, j)): #Analyze all reachable cells #Reset new symbols list and children dict symbols = [] probs = [] children = [] #Iterate over all possible children for k in xrange(i+1, j): #Analyze all cells to left in row try: left_possibilities = table[i,k].symbols except KeyError: continue for left in left_possibilities: #Analyze all cells below in column try: down_possibilities = table[k,j].symbols except KeyError: continue for down in down_possibilities: #Determine all possible nonterminals A in rules A -> B C where B is left and C is down rules = [x for x in grammar.productions() if x.rhs() == (left, down)] if rules != []: for rule in rules: if rule.lhs() not in symbols: if not (rule.lhs() == grammar.start() and (i,j) != (0,table.n)): symbols.append(rule.lhs()) new_rule = nltk.grammar.Production(rule.lhs(), [left, down]) children.append({rule.lhs(): (new_rule, ((i,k),(k,j)))}) else: new_rule = nltk.grammar.Production(rule.lhs(), [left, down]) children.append({rule.lhs(): (new_rule, ((i,k),(k,j)))}) #Add new entry to table if symbols != []: table[(i,j)] = TableEntry() for child in children: table[(i,j)].add_entry(child) return table
def generate_sample(grammar, items, frags): # print ("items ",items) #global frags for item in items: # print ("item ", item) # print ("nonterminal ",isinstance(item, Nonterminal)) if isinstance(item, Nonterminal): prods = grammar.productions(lhs=item) probs = [] for prod in prods: probs.append(prod.prob()) # print (prod.prob()) #This is because the probabilities hardly ever sum exactly 1.0 if abs(1-sum(probs)) < 0.1: remaining = 1-sum(probs) probs[0]=probs[0]+remaining # print ("no dio 1") else: raise("Probabilities don't sum even near to 1.0") chosen_prod = prods[0] #Initializing chosen_prod = choice(prods, p= probs) if str(item) == 'S': print ("chosen prod ", chosen_prod.rhs()) grammar_file = open("chosen_prod.txt", "w") grammar_file.write(str(chosen_prod.rhs())) grammar_file.close() generate_sample(grammar, chosen_prod.rhs(), frags) #It passes the right hand items of the chosen production and the same grammar else: frags.append(item) return frags
def find_join(grammar): for i in range(len(grammar.productions())): prod1 = grammar.productions()[i] if len(prod1.rhs()) < 2: continue for j in range(i + 1, len(grammar.productions())): prod2 = grammar.productions()[j] if len(prod1.rhs()) != len(prod2.rhs()): continue diff_index = None for k in range(len(prod1.rhs())): if prod1.rhs()[k] != prod2.rhs()[k]: if diff_index is None: diff_index = k else: diff_index = None break if diff_index is not None: return (diff_index, prod1) return (None, None)
def main(): #Load and sort grammar. grammar = nltk.data.load(sys.argv[1]) grammar_dict = {} for production in grammar.productions(): nt = production.lhs().symbol() if nt in grammar_dict: grammar_dict[nt].append(production.rhs()) else: grammar_dict[nt] = [production.rhs()] #Generate sentence from start symbol. print(generate(grammar.start().symbol(), grammar_dict))
def get_best_n_gram(grammar, n = 2): '''Finds the best n-gram to replace with a new non-terminal, along with the relative frequency of that n-gram''' n_grams = {} # maps n-grams to freqs for prod in grammar.productions(): if len(prod.rhs()) == n: continue for i in range(len(prod.rhs()) - (n - 1)): n_gram = prod.rhs()[i:i + n] if n_gram not in n_grams: n_grams[n_gram] = 0 n_grams[n_gram] += prod.freq if not n_grams: return (None, None) else: return max(n_grams.items(), key = lambda item: item[1])
def produceStk(grammar): while (True): if (stack.empty()): break prodrule = stack.get() flag = True for prod in prodrule.rhs(): if (is_terminal(prod)): continue else: flag = False prodsagain = grammar.productions(lhs=prod) for prodagain in prodsagain: prodrulenew = nltk.grammar.Production( prodrule.lhs(), replace(prodrule.rhs(), prod, prodagain.rhs())) stack.put(prodrulenew) break if (flag): print "one of the sentence :", prodrule return
def add_sub_nonterminal(grammar, n_gram, freq): '''Replaces an n-gram in a grammar with a new non-terminal''' n_gram = list(n_gram) prods = [] nt = nltk.grammar.Nonterminal('sub_%s_%s' % ('_'.join("(%s)" % symbol for symbol in n_gram), grammar.new_symbol_count)) grammar.new_symbol_count += 1 for prod in grammar.productions(): rhs = list(prod.rhs()) i = 0 while i < len(rhs): if rhs[i:i + len(n_gram)] == n_gram: rhs[i:i + len(n_gram)] = [nt] i += 1 new_prod = nltk.grammar.WeightedProduction(prod.lhs(), rhs, prob = prod.prob()) new_prod.freq = prod.freq prods.append(new_prod) new_prod = nltk.grammar.WeightedProduction(nt, n_gram, prob = 1.0) new_prod.freq = freq prods.append(new_prod) new_grammar = nltk.grammar.WeightedGrammar(grammar.start(), prods) new_grammar.new_symbol_count = grammar.new_symbol_count return new_grammar
def add_join_nonterminal(grammar, diff_index, base_prod): prods = [] new_prods = [] for prod in grammar.productions(): if len(prod.rhs()) == len(base_prod.rhs()) and all(prod.rhs()[i] == base_prod.rhs()[i] or i == diff_index for i in range(len(prod.rhs()))): prods.append(prod) else: new_prods.append(prod) nt = nltk.grammar.Nonterminal('join_%s_%s' % ('_'.join("(%s)" % prod.rhs()[diff_index] for prod in prods), grammar.new_symbol_count)) grammar.new_symbol_count += 1 total_freq = sum(prod.freq for prod in prods) new_rhs = list(base_prod.rhs()) new_rhs[diff_index] = nt for prod in prods: new_prod = nltk.grammar.WeightedProduction(prod.lhs(), new_rhs, prob = prod.prob()) new_prod.freq = prod.freq new_prods.append(new_prod) new_prod = nltk.grammar.WeightedProduction(nt, [prod.rhs()[diff_index]], prob = prod.freq / total_freq) new_prod.freq = prod.freq new_prods.append(new_prod) new_grammar = nltk.grammar.WeightedGrammar(grammar.start(), new_prods) new_grammar.new_symbol_count = grammar.new_symbol_count return new_grammar
prodsagain = grammar.productions(lhs=prod) for prodagain in prodsagain: prodrulenew = nltk.grammar.Production( prodrule.lhs(), replace(prodrule.rhs(), prod, prodagain.rhs())) stack.put(prodrulenew) break if (flag): print "one of the sentence :", prodrule return grammar = CFG.fromstring(''' S -> A OP B OP -> '+'| '-' | '*' | '/' | '%' B -> '1'|'2' A -> '3' | '4' ''') parser = ChartParser(grammar) gr = parser.grammar() print "GR:", gr print "gr.start():", gr.start() globalmaxindex = 0 print "gr.start() : " print gr.start() productions = grammar.productions(lhs=gr.start()) for production in productions: stack.put(production) produceStk(gr) print "+++++++++++++++++"
def print_grammar(grammar, nonterminal=""): for rule in grammar.productions(): if nonterminal == "" or rule.lhs().symbol() == nonterminal: print(rule)