示例#1
0
文件: cky.py 项目: atitus5/cky_parse
def cky_parse(grammar, tokens):
    #Initialize parse table
    table = ParseTable(tokens)

    #Move left to right across table
    for j in xrange(1, table.n + 1):
		#Add nonterminal symbols that correspond to terminal symbol
		rules = grammar.productions(rhs = tokens[j-1])
		table[(j-1,j)] = TableEntry()
		for rule in rules:
		    new_rule = nltk.grammar.Production(nltk.grammar.Nonterminal(rule.lhs()), table[(table.n,j)].symbols + [None])
		    table[(j-1,j)].add_entry({rule.lhs(): (new_rule, ((table.n,j), None))})

		#Iterate over all non-(nonterminal -> terminal) cells of table
		#Move up rows
		for i in reversed(xrange(0, j)):
		    #Analyze all reachable cells

		    #Reset new symbols list and children dict
		    symbols = []
		    probs = []
		    children = []

		    #Iterate over all possible children
		    for k in xrange(i+1, j):
				#Analyze all cells to left in row
				try:
				    left_possibilities = table[i,k].symbols
				except KeyError:
				    continue
				for left in left_possibilities:
				    #Analyze all cells below in column
				    try:
						down_possibilities = table[k,j].symbols
				    except KeyError:
						continue
				    for down in down_possibilities:
						#Determine all possible nonterminals A in rules A -> B C where B is left and C is down
						rules = [x for x in grammar.productions() if x.rhs() == (left, down)]
						if rules != []:
						    for rule in rules:
								if rule.lhs() not in symbols:
								    if not (rule.lhs() == grammar.start() and (i,j) != (0,table.n)):
										symbols.append(rule.lhs())
										new_rule = nltk.grammar.Production(rule.lhs(), [left, down])
										children.append({rule.lhs(): (new_rule, ((i,k),(k,j)))})
								else:
								    new_rule = nltk.grammar.Production(rule.lhs(), [left, down])
								    children.append({rule.lhs(): (new_rule, ((i,k),(k,j)))})

		    #Add new entry to table
		    if symbols != []:
				table[(i,j)] = TableEntry()
				for child in children:
				    table[(i,j)].add_entry(child)
	    
    return table
def generate_sample(grammar, items, frags):
    # print ("items ",items)
    #global frags
    for item in items:
        # print ("item ", item)
        # print ("nonterminal ",isinstance(item, Nonterminal))
        if isinstance(item, Nonterminal):
            prods = grammar.productions(lhs=item)
            probs = []
            for prod in prods:
                probs.append(prod.prob())
                # print (prod.prob())
            #This is because the probabilities hardly ever sum exactly 1.0
            if abs(1-sum(probs)) < 0.1:
                remaining = 1-sum(probs)
                probs[0]=probs[0]+remaining
                # print ("no dio 1")
            else:
                raise("Probabilities don't sum even near to 1.0")
            chosen_prod = prods[0] #Initializing
            chosen_prod = choice(prods, p= probs)
            if str(item) == 'S':
                print ("chosen prod ", chosen_prod.rhs())
                grammar_file = open("chosen_prod.txt", "w")
                grammar_file.write(str(chosen_prod.rhs()))
                grammar_file.close()
            generate_sample(grammar, chosen_prod.rhs(), frags) #It passes the right hand items of the chosen production and the same grammar
        else:
            frags.append(item)
    return frags
示例#3
0
def find_join(grammar):
    for i in range(len(grammar.productions())):
        prod1 = grammar.productions()[i]
        if len(prod1.rhs()) < 2:
            continue
        for j in range(i + 1, len(grammar.productions())):
            prod2 = grammar.productions()[j]
            if len(prod1.rhs()) != len(prod2.rhs()):
                continue
            diff_index = None
            for k in range(len(prod1.rhs())):
                if prod1.rhs()[k] != prod2.rhs()[k]:
                    if diff_index is None:
                        diff_index = k
                    else:
                        diff_index = None
                        break
            if diff_index is not None:
                return (diff_index, prod1)
    return (None, None)
示例#4
0
def main():
    #Load and sort grammar.
    grammar = nltk.data.load(sys.argv[1])
    grammar_dict = {}
    for production in grammar.productions():
        nt = production.lhs().symbol()
        if nt in grammar_dict:
            grammar_dict[nt].append(production.rhs())
        else:
            grammar_dict[nt] = [production.rhs()]

    #Generate sentence from start symbol.
    print(generate(grammar.start().symbol(), grammar_dict))
示例#5
0
def get_best_n_gram(grammar, n = 2):
    '''Finds the best n-gram to replace with a new non-terminal,
       along with the relative frequency of that n-gram'''
    n_grams = {} # maps n-grams to freqs
    for prod in grammar.productions():
        if len(prod.rhs()) == n:
            continue
        for i in range(len(prod.rhs()) - (n - 1)):
            n_gram = prod.rhs()[i:i + n]
            if n_gram not in n_grams:
                n_grams[n_gram] = 0
            n_grams[n_gram] += prod.freq
    if not n_grams:
        return (None, None)
    else:
        return max(n_grams.items(), key = lambda item: item[1])
示例#6
0
def produceStk(grammar):
    while (True):
        if (stack.empty()):
            break
        prodrule = stack.get()
        flag = True
        for prod in prodrule.rhs():
            if (is_terminal(prod)):
                continue
            else:
                flag = False
                prodsagain = grammar.productions(lhs=prod)
                for prodagain in prodsagain:
                    prodrulenew = nltk.grammar.Production(
                        prodrule.lhs(),
                        replace(prodrule.rhs(), prod, prodagain.rhs()))
                    stack.put(prodrulenew)
                break
        if (flag):
            print "one of the sentence :", prodrule
    return
示例#7
0
def add_sub_nonterminal(grammar, n_gram, freq):
    '''Replaces an n-gram in a grammar with a new non-terminal'''
    n_gram = list(n_gram)
    prods = []
    nt = nltk.grammar.Nonterminal('sub_%s_%s' % ('_'.join("(%s)" % symbol for symbol in n_gram), grammar.new_symbol_count))
    grammar.new_symbol_count += 1
    for prod in grammar.productions():
        rhs = list(prod.rhs())
        i = 0
        while i < len(rhs):
            if rhs[i:i + len(n_gram)] == n_gram:
                rhs[i:i + len(n_gram)] = [nt]
            i += 1
        new_prod = nltk.grammar.WeightedProduction(prod.lhs(), rhs, prob = prod.prob())
        new_prod.freq = prod.freq
        prods.append(new_prod)
    new_prod = nltk.grammar.WeightedProduction(nt, n_gram, prob = 1.0)
    new_prod.freq = freq
    prods.append(new_prod)
    new_grammar = nltk.grammar.WeightedGrammar(grammar.start(), prods)
    new_grammar.new_symbol_count = grammar.new_symbol_count
    return new_grammar
示例#8
0
def add_join_nonterminal(grammar, diff_index, base_prod):
    prods = []
    new_prods = []
    for prod in grammar.productions():
        if len(prod.rhs()) == len(base_prod.rhs()) and all(prod.rhs()[i] == base_prod.rhs()[i] or i == diff_index for i in range(len(prod.rhs()))):
            prods.append(prod)
        else:
            new_prods.append(prod)
    nt = nltk.grammar.Nonterminal('join_%s_%s' % ('_'.join("(%s)" % prod.rhs()[diff_index] for prod in prods), grammar.new_symbol_count))
    grammar.new_symbol_count += 1
    total_freq = sum(prod.freq for prod in prods)
    new_rhs = list(base_prod.rhs())
    new_rhs[diff_index] = nt
    for prod in prods:
        new_prod = nltk.grammar.WeightedProduction(prod.lhs(), new_rhs, prob = prod.prob())
        new_prod.freq = prod.freq
        new_prods.append(new_prod)
        
        new_prod = nltk.grammar.WeightedProduction(nt, [prod.rhs()[diff_index]], prob = prod.freq / total_freq)
        new_prod.freq = prod.freq
        new_prods.append(new_prod)
    new_grammar = nltk.grammar.WeightedGrammar(grammar.start(), new_prods)
    new_grammar.new_symbol_count = grammar.new_symbol_count
    return new_grammar
示例#9
0
                prodsagain = grammar.productions(lhs=prod)
                for prodagain in prodsagain:
                    prodrulenew = nltk.grammar.Production(
                        prodrule.lhs(),
                        replace(prodrule.rhs(), prod, prodagain.rhs()))
                    stack.put(prodrulenew)
                break
        if (flag):
            print "one of the sentence :", prodrule
    return


grammar = CFG.fromstring('''
S -> A OP B
OP -> '+'| '-' | '*' | '/' | '%'
B -> '1'|'2'
A -> '3' | '4' 
''')
parser = ChartParser(grammar)
gr = parser.grammar()
print "GR:", gr
print "gr.start():", gr.start()
globalmaxindex = 0
print "gr.start() : "
print gr.start()
productions = grammar.productions(lhs=gr.start())
for production in productions:
    stack.put(production)
produceStk(gr)
print "+++++++++++++++++"
示例#10
0
def print_grammar(grammar, nonterminal=""):
    for rule in grammar.productions():
        if nonterminal == "" or rule.lhs().symbol() == nonterminal:
            print(rule)