Пример #1
0
def cky_parse(grammar, tokens):
    #Initialize parse table
    table = ParseTable(tokens)

    #Move left to right across table
    for j in xrange(1, table.n + 1):
		#Add nonterminal symbols that correspond to terminal symbol
		rules = grammar.productions(rhs = tokens[j-1])
		table[(j-1,j)] = TableEntry()
		for rule in rules:
		    new_rule = nltk.grammar.Production(nltk.grammar.Nonterminal(rule.lhs()), table[(table.n,j)].symbols + [None])
		    table[(j-1,j)].add_entry({rule.lhs(): (new_rule, ((table.n,j), None))})

		#Iterate over all non-(nonterminal -> terminal) cells of table
		#Move up rows
		for i in reversed(xrange(0, j)):
		    #Analyze all reachable cells

		    #Reset new symbols list and children dict
		    symbols = []
		    probs = []
		    children = []

		    #Iterate over all possible children
		    for k in xrange(i+1, j):
				#Analyze all cells to left in row
				try:
				    left_possibilities = table[i,k].symbols
				except KeyError:
				    continue
				for left in left_possibilities:
				    #Analyze all cells below in column
				    try:
						down_possibilities = table[k,j].symbols
				    except KeyError:
						continue
				    for down in down_possibilities:
						#Determine all possible nonterminals A in rules A -> B C where B is left and C is down
						rules = [x for x in grammar.productions() if x.rhs() == (left, down)]
						if rules != []:
						    for rule in rules:
								if rule.lhs() not in symbols:
								    if not (rule.lhs() == grammar.start() and (i,j) != (0,table.n)):
										symbols.append(rule.lhs())
										new_rule = nltk.grammar.Production(rule.lhs(), [left, down])
										children.append({rule.lhs(): (new_rule, ((i,k),(k,j)))})
								else:
								    new_rule = nltk.grammar.Production(rule.lhs(), [left, down])
								    children.append({rule.lhs(): (new_rule, ((i,k),(k,j)))})

		    #Add new entry to table
		    if symbols != []:
				table[(i,j)] = TableEntry()
				for child in children:
				    table[(i,j)].add_entry(child)
	    
    return table
Пример #2
0
def main():
    #Load and sort grammar.
    grammar = nltk.data.load(sys.argv[1])
    grammar_dict = {}
    for production in grammar.productions():
        nt = production.lhs().symbol()
        if nt in grammar_dict:
            grammar_dict[nt].append(production.rhs())
        else:
            grammar_dict[nt] = [production.rhs()]

    #Generate sentence from start symbol.
    print(generate(grammar.start().symbol(), grammar_dict))
Пример #3
0
def add_sub_nonterminal(grammar, n_gram, freq):
    '''Replaces an n-gram in a grammar with a new non-terminal'''
    n_gram = list(n_gram)
    prods = []
    nt = nltk.grammar.Nonterminal('sub_%s_%s' % ('_'.join("(%s)" % symbol for symbol in n_gram), grammar.new_symbol_count))
    grammar.new_symbol_count += 1
    for prod in grammar.productions():
        rhs = list(prod.rhs())
        i = 0
        while i < len(rhs):
            if rhs[i:i + len(n_gram)] == n_gram:
                rhs[i:i + len(n_gram)] = [nt]
            i += 1
        new_prod = nltk.grammar.WeightedProduction(prod.lhs(), rhs, prob = prod.prob())
        new_prod.freq = prod.freq
        prods.append(new_prod)
    new_prod = nltk.grammar.WeightedProduction(nt, n_gram, prob = 1.0)
    new_prod.freq = freq
    prods.append(new_prod)
    new_grammar = nltk.grammar.WeightedGrammar(grammar.start(), prods)
    new_grammar.new_symbol_count = grammar.new_symbol_count
    return new_grammar
Пример #4
0
def add_join_nonterminal(grammar, diff_index, base_prod):
    prods = []
    new_prods = []
    for prod in grammar.productions():
        if len(prod.rhs()) == len(base_prod.rhs()) and all(prod.rhs()[i] == base_prod.rhs()[i] or i == diff_index for i in range(len(prod.rhs()))):
            prods.append(prod)
        else:
            new_prods.append(prod)
    nt = nltk.grammar.Nonterminal('join_%s_%s' % ('_'.join("(%s)" % prod.rhs()[diff_index] for prod in prods), grammar.new_symbol_count))
    grammar.new_symbol_count += 1
    total_freq = sum(prod.freq for prod in prods)
    new_rhs = list(base_prod.rhs())
    new_rhs[diff_index] = nt
    for prod in prods:
        new_prod = nltk.grammar.WeightedProduction(prod.lhs(), new_rhs, prob = prod.prob())
        new_prod.freq = prod.freq
        new_prods.append(new_prod)
        
        new_prod = nltk.grammar.WeightedProduction(nt, [prod.rhs()[diff_index]], prob = prod.freq / total_freq)
        new_prod.freq = prod.freq
        new_prods.append(new_prod)
    new_grammar = nltk.grammar.WeightedGrammar(grammar.start(), new_prods)
    new_grammar.new_symbol_count = grammar.new_symbol_count
    return new_grammar