def add_to_shrg_rules(shrg_rules, lhs, rhs_prev, rhs_next, s, t): lhs_he = grammar.Nonterminal(str(len(lhs))) rule_prev = grammar.Rule(lhs_he, make_rule(rhs_prev, lhs, s), t) rule_next = grammar.Rule(lhs_he, make_rule(rhs_next, lhs, s), t) # print lhs_he, '->', rule_prev.rhs.edges(), rule_next.rhs.edges() if lhs_he not in shrg_rules: shrg_rules[lhs_he] = [(rule_prev, rule_next)] else: #prev side rhs_list = shrg_rules[lhs_he] match = None for i in range(0,len(rhs_list)): rhs = rhs_list[i] if nx.is_isomorphic(rhs[0].rhs, rule_prev.rhs, edge_match=edge_isomorph, node_match=node_isomorph): # print("prev isomorph") if nx.is_isomorphic(rhs[1].rhs, rule_next.rhs, edge_match=edge_isomorph, node_match=node_isomorph): # print("next isomorph") match = rhs_list[i] match[0].weight *= 2 match[0].iso = True match[1].weight *= 2 match[1].iso = True if not match: shrg_rules[lhs_he] += [(rule_prev, rule_next)]
def test_next_category(self): # Complete case. state = psr.State(rule=gmr.Rule('S', ['VP']), span_start=0, span_stop=0, dot_position=0) self.assertEqual('VP', state.next_category) # Incomplete case. state = psr.State(rule=gmr.Rule('S', ['VP']), span_start=0, span_stop=1, dot_position=1) self.assertEqual('', state.next_category)
def test_incomplete(self): # Complete case. incomplete_state = psr.State(rule=gmr.Rule('S', ['VP']), span_start=0, span_stop=0, dot_position=0) self.assertTrue(incomplete_state.incomplete) # Incomplete case. complete_state = psr.State(rule=gmr.Rule('S', ['VP']), span_start=0, span_stop=0, dot_position=1) self.assertFalse(complete_state.incomplete)
def R(): if not Nonterminal(): return False if not lexer.token(r'\->'): error('rules LHSs must be followed by "->"') rule = grammar.Rule(stack.pop()) if not Production(): error('rule "{0}" has no productions'.format(rule.lhs)) (rhs, actions, prec, assoc) = stack.pop() rule.addProduction(rhs=rhs, actions=actions, prec=prec, assoc=assoc) while lexer.token(r'\|'): if not Production(): error('(%s) "|" must be followed by a production' % (rule.lhs)) (rhs, actions, prec, assoc) = stack.pop() rule.addProduction(rhs=rhs, actions=actions, prec=prec, assoc=assoc) if not lexer.token(';'): error('(%s) rules must be ended by ";"' % (rule.lhs)) stack.append(rule) return True
def test_empty_words(self): grammar = gmr.Grammar(gmr.Rule('N', ['Nothing'], preterminal=True)) words = [] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual(0, len(trees)) self.assertEqual([], trees)
def test_regex_rule(self): grammar = gmr.Grammar( gmr.Rule('S', [gmr.Regex(r'[a-z]')], preterminal=True)) words = ['hello'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual(1, len(trees)) self.assertEqual([['S', 'hello']], trees)
def test_programming_language_parsing(self): grammar = gmr.Grammar(gmr.Rule('program', ['variable', 'operator', 'value']), gmr.Rule('variable', [gmr.Regex(r'x')], preterminal=True), gmr.Rule('operator', [gmr.Regex(r'[+\-=*/]')], preterminal=True), gmr.Rule('value', [gmr.Regex(r'\d+')], preterminal=True), distinguished_symbol='program') words = ['x', '=', '599993949'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual([[ 'program', ['variable', 'x'], ['operator', '='], ['value', '599993949'] ]], trees)
def markNodesInMatrix(t, cky_matrix, displacement): if (t.isPreTerminal()): cky_matrix[0, displacement] = t.root else: cky_matrix[conta_terminali(t) - 1, displacement] = grammar.Rule( t.root, [t.children[0].root, t.children[1].root]) markNodesInMatrix(t.children[0], cky_matrix, displacement) markNodesInMatrix(t.children[1], cky_matrix, displacement + conta_terminali(t.children[0])) return cky_matrix
def augment_grammar(g): """ augment grammar g by adding new rule, S' -> S, where S was start symbol of g changes g in place """ new_start = g.start + "'" old_start = g.start g.start = new_start g.nonterm.append(new_start) new_rule = grammar.Rule([new_start, [old_start]]) g.rules.append(new_rule)
def test_initializer(self): grammar = gmr.Grammar(gmr.Rule('S', ['VP']), gmr.Rule('VP', ['V']), gmr.Rule('V', ['initialize'], preterminal=True)) self.assertIn(gmr.Rule('S', ['VP']), grammar) self.assertIn(gmr.Rule('VP', ['V']), grammar) self.assertIn(gmr.Rule('V', ['initialize'], preterminal=True), grammar) self.assertEqual(3, len(grammar))
def markNodesInMatrix(t, cky_matrix, displacement, returnTree=False): if (t.isPreTerminal()): if returnTree: cky_matrix[displacement, 0].append(t) else: cky_matrix[displacement, 0].append( grammar.Rule(t.root, [x.root for x in t.children])) else: if returnTree: cky_matrix[displacement, conta_terminali(t) - 1].append(t) else: cky_matrix[displacement, conta_terminali(t) - 1].append( grammar.Rule(t.root, [x.root for x in t.children])) for i, x in enumerate(t.children): if i == 0: markNodesInMatrix(x, cky_matrix, displacement, returnTree) else: markNodesInMatrix( x, cky_matrix, displacement + sum(conta_terminali(x) for x in t.children[:i]), returnTree) # markNodesInMatrix(t.children[0],cky_matrix,displacement) # markNodesInMatrix(t.children[1],cky_matrix,displacement + conta_terminali(t.children[0])) return cky_matrix
def test_multiple_parses(self): grammar = gmr.Grammar(gmr.Rule('N', ['I'], preterminal=True), gmr.Rule('V', ['made'], preterminal=True), gmr.Rule('N', ['her'], preterminal=True), gmr.Rule('V', ['duck'], preterminal=True), gmr.Rule('N', ['duck'], preterminal=True), gmr.Rule('S', ['N', 'V', 'N', 'V']), gmr.Rule('S', ['N', 'V', 'N', 'N'])) words = ['I', 'made', 'her', 'duck'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual(2, len(trees)) self.assertEqual( [['S', ['N', 'I'], ['V', 'made'], ['N', 'her'], ['V', 'duck']], ['S', ['N', 'I'], ['V', 'made'], ['N', 'her'], ['N', 'duck']]], trees)
def R(self, states, s: earley.Situation, j: int): breaked = False rule = gr.Rule(copy.deepcopy(s.left), copy.deepcopy(s.beforeDot)) self.pi.append(self.__grammar.find_rule_number(rule)) print(self.pi) k = len(s.beforeDot) print("k = " + str(k)) c = j print("c = " + str(c)) if k == 0: return self.pi while k != 0: breaked = False rightterm = rule.right[k - 1] print(rightterm.value) if self.__grammar.is_terminal(rightterm): k = k - 1 c = c - 1 print("k = " + str(k)) print("c = " + str(c)) elif self.__grammar.is_nonterminal(rightterm): # находим ситуацию в I[c] Xk = s.beforeDot[k - 1].value A = s.left.value for st in states[c]: if breaked: break if not st.afterDot and st.left.value == Xk: r = st.get_k() print("r = " + str(r)) # находим ситуацию в I[r] print("-------") for nst in states[r]: if breaked: break if nst.left.value == A and nst.afterDot and nst.afterDot[ 0].value == Xk: self.R(states, st, c) k = k - 1 c = r breaked = True return self.pi
def R(self, pi, states, s: earley.Situation, j: int): rule = gr.Rule(copy.deepcopy(s.left), copy.deepcopy(s.beforeDot)) pi.append(self.__grammar.find_rule_number(rule)) # print(pi) k = len(s.beforeDot) print("k = " + str(k)) c = j print("c = " + str(c)) while k > 0: rightterm = rule.right[k - 1] print(rightterm.value) if self.__grammar.is_terminal(rightterm): k = k - 1 c = c - 1 print("k = " + str(k)) print("c = " + str(c)) elif self.__grammar.is_nonterminal(rightterm): # находим ситуацию в I[c] Xk = s.beforeDot[k - 1].value A = s.left.value searchstate = None searchflag = False for st in states[c]: if searchflag: break if not st.afterDot and st.left.value == Xk: r = st.get_k() print("r = " + str(r)) # находим ситуацию в I[r] print("-------") for nst in states[r]: if nst.left.value == s.left.value \ and nst.afterDot and nst.afterDot[0].value == Xk \ and len(nst.beforeDot) == k - 1: searchstate = st searchflag = True break self.R(pi, states, searchstate, c) k = k - 1 c = r return pi
def test_parse(self): grammar = gmr.Grammar( gmr.Rule('S', ['VP']), gmr.Rule('VP', ['V', 'NP']), gmr.Rule('NP', ['Det', 'Nominal']), gmr.Rule('Det', ['that'], preterminal=True), gmr.Rule('Nominal', ['flight'], preterminal=True), gmr.Rule('V', ['Book'], preterminal=True)) words = ['Book', 'that', 'flight'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual([[ 'S', [ 'VP', ['V', 'Book'], ['NP', ['Det', 'that'], ['Nominal', 'flight']] ] ]], trees)
frhs = networkx.DiGraph(root="i1") frhs.add_node("i1", label="instance") frhs.add_node("want", label="want") frhs.add_edge("i1", "want", label="id") frhs.add_node("x", label="instance") frhs.add_edge("i1", "x", label="agent") hypergraphs.add_hyperedge(frhs, ("x", ), label=grammar.Nonterminal("Entity"), link=0) frhs.add_node("i2", label="instance") frhs.add_edge("i1", "i2", label="theme") hypergraphs.add_hyperedge(frhs, ("i2", "x"), label=grammar.Nonterminal("Truth"), link=1) frules.append(grammar.Rule(lhs, frhs, id=0)) erhs = networkx.DiGraph(root="0") erhs.add_node("0", label="S") erhs.add_node("1", label="NP") erhs.add_node("2", label="VP") hypergraphs.add_hyperedge(erhs, ("0", "1", "2")) hypergraphs.add_hyperedge(erhs, ("1", ), label=grammar.Nonterminal("Entity"), link=0) erhs.add_node("21", label="VBP") erhs.add_node("22", label="SBAR") hypergraphs.add_hyperedge(erhs, ("2", "21", "22")) erhs.add_node("211", label="want") hypergraphs.add_hyperedge(erhs, ("21", "211")) hypergraphs.add_hyperedge(erhs, ("22", ),
def test_equality(self): first_rule = gmr.Rule('S', ['NP', 'VP']) second_rule = gmr.Rule('S', ['NP', 'VP']) third_rule = gmr.Rule('S', ['VP', 'NP']) self.assertEqual(first_rule, second_rule) self.assertNotEqual(first_rule, third_rule)
def fromPosListToRule(posList): return [grammar.Rule(x, ['None']) for x in posList]
def parser_with_reconstruction3(sentence, grammar, k_best, distributed_vector=None, dtk_generator=None, referenceTable=None, rule_filter=2): #uso la grammatica nuova (grammar_2 ) words = sentence.split() n = len(words) #initialize TABLE P = numpy.zeros((n, n), dtype=object) for i, _ in numpy.ndenumerate(P): P[i] = [] #unit production for i, word in enumerate(words): # to prevent uncovered words we create rule of the form X -> w # for each symbol X in the grammar and for each word w in the sentence for symbol in grammar.symbols: rule = gramm.Rule(symbol,[word]) # create a new rule rt = rule.toTree() # and transform into tree score = numpy.dot(dtk_generator.sn(rt), distributed_vector) ## NORMALIZATION score = score/numpy.sqrt(numpy.dot(dtk_generator.sn(rt), dtk_generator.sn(rt))) rt.score = score #P[i][0].append(((rule, None),(rt, score))) P[i][0].append(rt) #P[i][0] = sorted(P[i][0], key=lambda x: x[1][1], reverse=True)[:2] P[i][0] = sorted(P[i][0], key = lambda x: x.score, reverse=True)[:2] #non terminal rules numero_dtk = 0 #count iterations for debugging purpose for i in range(2, n + 1): #TODO: #add a check if numero_dtk is too high and break returning "not parsed" # total_size = len(dtk_generator.dt_cache) + len(dtk_generator.sn_cache) + len(dtk_generator.dtf_cache) # total_size_mbytes = (total_size*8*dtk_generator.dimension)/1048576 # print (i, total_size_mbytes) if psutil.virtual_memory().percent > 95: return False, None, P for j in range(1, n - i + 2): for k in range(1, i): # look for combination of a tree in leftCell with a tree in rightCell leftCell = P[j - 1][k - 1] rightCell = P[j + k - 1][i - k - 1] for (subtree1, subtree2) in itertools.product(leftCell, rightCell): stringa = subtree1.root + " " + subtree2.root for rule in grammar.nonterminalrules[stringa]: #FILTER on rules with too low score passed, ruleScore = filterRule(rule, dtk_generator, distributed_vector, rule_filter) if passed: rtt = tree(root=rule.left, children=[subtree1, subtree2]) score = numpy.dot(dtk_generator.sn(rtt), distributed_vector) ## NORMALIZATION score = score/ruleScore rtt.score = score P[j-1][i-1].append(rtt) numero_dtk = numero_dtk + 1 #sort rules #P[j-1][i-1] = sorted(P[j-1][i-1], key=lambda x: x[1][1], reverse=True) P[j-1][i-1] = sorted(P[j-1][i-1], key=lambda x: x.score, reverse=True) #another k_best rules where the root is different than the first rule selected before #lista_diversi = [x for x in P[j-1][i-1] if x[0][0].left != P[j-1][i-1][0][0][0].left][:k_best] lista_diversi = [x for x in P[j-1][i-1] if x.root != P[j-1][i-1][0].root][:k_best] P[j-1][i-1] = P[j-1][i-1][:k_best] #if the new rules weren't already selected, add them if lista_diversi: for a in lista_diversi: if a not in P[j-1][i-1]: P[j-1][i-1].append(a) #PARTE DI DEBUG #se ho una reference, stampo la lista di regole che ho nella casella dopo aver trimmato e la casella corrispettiva #al primo errore ritorno Pp (stampata bene per confrontarla con referenceTable) if referenceTable is not None: if P[j-1][i-1] and referenceTable[i-1][j-1]: lista_alberi = [x[0][0] for x in P[j-1][i-1]] if referenceTable[i-1][j-1] not in lista_alberi: #rule = P[j-1][i-1][0][0][0] print ("cella: ", (i-1, j-1)) print ([x[0][0] for x in P[j-1][i-1]], referenceTable[i-1][j-1]) # <- questo caso è FAIL #albero_sbagliato = P[j-1][i-1][0][1][0] #score1 = P[j-1][i-1][0][1][1] alberi_sbagliati = [x[1][0] for x in P[j-1][i-1]] dtk_generator.dt_cache = {} print ("SN: ") for albero_sbagliato in alberi_sbagliati: rtt = tree(root = referenceTable[i-1][j-1].left, children=alberi_sbagliati[0].children) score1 = numpy.dot(dtk_generator.sn(albero_sbagliato), distributed_vector) print (score1, albero_sbagliato) score2 = numpy.dot(dtk_generator.sn(rtt), distributed_vector) print (score2, rtt) dtk_generator.dtf_cache = {} print ("DTF: ") for albero_sbagliato in alberi_sbagliati: score1 = numpy.dot(dtk_generator.dtf(albero_sbagliato), distributed_vector) regola = tree(root=albero_sbagliato.root, children=[tree(albero_sbagliato.children[0].root, None),tree(albero_sbagliato.children[1].root, None)]) print ("punteggio regola: ", numpy.dot(dtk_generator.dtf(regola), distributed_vector), regola) print (score1, albero_sbagliato) score2 = numpy.dot(dtk_generator.dtf(rtt), distributed_vector) print (score2, rtt) #return False, None, P else: if referenceTable[i-1][j-1]: # e P[][] è vuota pass #print (P[j-1][i-1],referenceTable[i-1][j-1] ) # <- questo caso è FAIL #return False, None, P if P[j-1][i-1]: # e referenceTable è 0 pass #print ("ok?", P[j-1][i-1],referenceTable[i-1][j-1] ) # <- questo caso può andar bene #FINE DEBUG #print (numero_dtk) #number of iteration #list of tree in the final cell of the table finalList = P[0][-1] if finalList: #final sort (by DTK) finalList = sorted(finalList, key=lambda x: numpy.dot(dtk_generator.dt(x),distributed_vector), reverse=True) return True, finalList , P else: #treeToCYKMatrix.printCYKMatrix(simpleTable(P)) return False, None, P
def test_ambiguity(self): grammar = gmr.Grammar( gmr.Rule('S', ['NP', 'VP']), gmr.Rule('NP', ['Det', 'Nominal']), gmr.Rule('NP', ['Det', 'Nominal', 'PP']), gmr.Rule('NP', ['Nominal']), gmr.Rule('VP', ['VP', 'PP']), gmr.Rule('VP', ['V', 'NP']), gmr.Rule('PP', ['Prep', 'NP']), gmr.Rule('Det', ['a'], preterminal=True), gmr.Rule('Nominal', ['I'], preterminal=True), gmr.Rule('Nominal', ['man'], preterminal=True), gmr.Rule('Nominal', ['telescope'], preterminal=True), gmr.Rule('V', ['saw'], preterminal=True), gmr.Rule('Prep', ['with'], preterminal=True)) words = ['I', 'saw', 'a', 'man', 'with', 'a', 'telescope'] parser = psr.EarleyParser(grammar) trees = parser.parse(words) self.assertEqual(2, len(trees)) self.assertEqual( [ # ... saw ... with a telescope [ 'S', ['NP', ['Nominal', 'I']], [ 'VP', [ 'VP', ['V', 'saw'], ['NP', ['Det', 'a'], ['Nominal', 'man']] ], [ 'PP', ['Prep', 'with'], ['NP', ['Det', 'a'], ['Nominal', 'telescope']] ] ] ], # ... man with a telescope [ 'S', ['NP', ['Nominal', 'I']], [ 'VP', ['V', 'saw'], [ 'NP', ['Det', 'a'], ['Nominal', 'man'], [ 'PP', ['Prep', 'with'], ['NP', ['Det', 'a'], ['Nominal', 'telescope']] ] ] ] ] ], trees)
def parse(self, sentence, k_best, distributed_vector=None, referenceTable=None): words = sentence.split() n = len(words) #initialize TABLE P = numpy.zeros((n, n), dtype=object) for i, _ in numpy.ndenumerate(P): P[i] = [] #unit production for i, word in enumerate(words): # to prevent uncovered words we create rule of the form X -> w # for each symbol X in the grammar and for each word w in the sentence for symbol in self.grammar.symbols: rule = gramm.Rule(symbol,[word]) # create a new rule rt = rule.toTree() # and transform into tree score = numpy.dot(self.dtk_generator.sn(rt), distributed_vector) ## NORMALIZATION score = score/numpy.sqrt(numpy.dot(self.dtk_generator.sn(rt), self.dtk_generator.sn(rt))) rt.score = score P[i][0].append(rt) P[i, 0] = sorted(P[i, 0], key = lambda x: x.score, reverse=True)[:2] #non terminal rules numero_dtk = 0 #count iterations for debugging purpose for i in range(2, n + 1): #TODO: #add a check if numero_dtk is too high and break returning "not parsed" # total_size = len(dtk_generator.dt_cache) + len(dtk_generator.sn_cache) + len(dtk_generator.dtf_cache) # total_size_mbytes = (total_size*8*dtk_generator.dimension)/1048576 # print (i, total_size_mbytes) if psutil.virtual_memory().percent > 95: return False, None, P for j in range(1, n - i + 2): for k in range(1, i): # look for combination of a tree in leftCell with a tree in rightCell leftCell = P[j - 1, k - 1] rightCell = P[j + k - 1, i - k - 1] for (subtree1, subtree2) in itertools.product(leftCell, rightCell): stringa = subtree1.root + " " + subtree2.root for rule in self.grammar.nonterminalrules[stringa]: #FILTER on rules with too low score passed, ruleScore = self.filterRule(rule, distributed_vector, self.filter) if passed: rtt = tree(root=rule.left, children=[subtree1, subtree2]) score = numpy.dot(self.dtk_generator.sn(rtt), distributed_vector) ## NORMALIZATION score = score/ruleScore rtt.score = score P[j-1, i-1].append(rtt) numero_dtk = numero_dtk + 1 #sort rules #P[j-1][i-1] = sorted(P[j-1][i-1], key=lambda x: x[1][1], reverse=True) P[j-1, i-1] = sorted(P[j-1, i-1], key=lambda x: x.score, reverse=True) #another k_best rules where the root is different than the first rule selected before #lista_diversi = [x for x in P[j-1][i-1] if x[0][0].left != P[j-1][i-1][0][0][0].left][:k_best] lista_diversi = [x for x in P[j-1, i-1] if x.root != P[j-1, i-1][0].root][:k_best] P[j-1, i-1] = P[j-1, i-1][:k_best] #if the new rules weren't already selected, add them if lista_diversi: for a in lista_diversi: if a not in P[j-1, i-1]: P[j-1, i-1].append(a) #list of tree in the final cell of the table finalList = P[0, -1] if finalList: #final sort (by DTK) finalList = sorted(finalList, key=lambda x: numpy.dot(self.dtk_generator.dt(x),distributed_vector), reverse=True) return True, finalList , P else: #treeToCYKMatrix.printCYKMatrix(simpleTable(P)) return False, None, P
def parse(self, sentence, k_best=2, distributed_vector=None, referenceTable=None, rule_filter=2): """return the k-best parse""" words = sentence.split() n = len(words) #initialize TABLE C = numpy.zeros((n, n), dtype=object) for i, _ in numpy.ndenumerate(C): #each cell has a type1 list and a type2 list (C is matrix of completed (up to that point) trees) #elements of type1 are complete trees: A -> B C D ... (B, C, D ... sono alberi completi) #elements of type2 are LIST of partial trees: [B, C, ..., •] (B, C ... sono ancora alberi completi, ma esiste una regola A -> B C D .... ) #each element in C should also have a score attached to it (<dtk(element), dtk(reference_tree)> <- o qualche variazione sul tema ) C[i] = [[], []] #parsing step numero_dtk = 0 for span in range(0, n): for i in range(0, n - span): j = i + span if i == j: # to prevent uncovered words we create rule of the form X -> w # for each symbol X in the grammar and for each word w in the sentence for sym in self.grammar.symbols: rule = gramm.Rule(sym, [words[i]]) rt = rule.toTree() score = numpy.dot(self.dtk_generator.sn(rt), distributed_vector) #score = numpy.dot(dtk_generator.dtf(rt), distributed_vector) #score = sorting_method(dtk_generator, rt, distributed_vector) ## NORMALIZATION score = score / numpy.sqrt( numpy.dot(self.dtk_generator.sn(rt), self.dtk_generator.sn(rt))) rt.score = score C[i][j][0].append(rt) #return None, [] C[i, j][0] = sorted(C[i, j][0], key=lambda x: x.score, reverse=True)[:k_best] #self-filling part #print ("prima: ", len(C[i, j][0])) for B in C[i, j][0]: #B = A -> B C B_string = B.root rules = self.grammar.nonterminalrules[ B_string] #X -> A • for r in rules: if B_string != " ".join(r.right): if [B, "•"] not in C[i, j][ 1]: # <- devo dare uno score a questo (o forse no?) C[i, j][1].append([B, "•"]) else: new_tree = tree(root=r.left, children=[B]) score = numpy.dot( self.dtk_generator.sn(new_tree), distributed_vector) numero_dtk = numero_dtk + 1 #print (score, B.score, score > B.score) if score > B.score: #pensare ad un filtro più stringente.... new_tree.score = score #print (new_tree) C[i, j][0].append(new_tree) if len(C[i, j][0]) > 10: break #print ("dopo: ", len(C[i][j][0])) #sort and trimming (credo che non serva sortare l'altra lista...) C[i, j][0] = sorted(C[i, j][0], key=lambda x: x.score, reverse=True)[:k_best] #C[i,j][1] = sorted(C[i,j][1], key=lambda x: self.scorePartialRule(x, distributed_vector), reverse=True)[:k_best] if j > i: for k in range(0, j): first_cell_C = C[i, k] second_cell_C = C[k + 1, j] #print (len(first_cell_C[1]), len(second_cell_C[0])) for (x, y) in itertools.product(first_cell_C[1], second_cell_C[0]): xx = " ".join(c.root for c in x[:-1]) yy = y.root string = xx + " " + yy rules = self.grammar.nonterminalrules[string] #print ("regole: ", len(rules), end=" ---- ") for r in rules: #rule filtering passed, ruleScore = self.filterRule( r, distributed_vector, self.filter) if passed: if " ".join(r.right) == string: #print (r, "empty") children = x[:-1] children.append(y) new_tree = tree(root=r.left, children=children) score = numpy.dot( self.dtk_generator.sn(new_tree), distributed_vector) numero_dtk = numero_dtk + 1 new_tree.score = score if new_tree not in C[i, j][0]: C[i, j][0].append(new_tree) else: new_list = x[:-1] + [y] + ["•"] if new_list not in C[i, j][1]: C[i, j][1].append(new_list) # TODO: devo vedere dove mettere il sorting... se qui, dopo il self-filling o in entrambi i posti. (o eventualmente con k diversi) # TODO: sembra vada bene metterlo solo qui # C[i, j][0] = sorted(C[i, j][0], key=lambda x: x.score, reverse=True)[:k_best] #self-filling part #print ("prima: ", len(C[i, j][0])) for B in C[i, j][0]: B_string = B.root #B = A -> B C rules = self.grammar.nonterminalrules[B_string] for r in rules: #TODO: add another rule filter here? passed, ruleScore = self.filterRule( r, distributed_vector, self.filter) if passed: if B_string != " ".join(r.right): if [B, "•"] not in C[i, j][1]: C[i, j][1].append([B, "•"]) else: # per evitare loop infiniti aggiungo un albero solo se il suo score è maggiore di quello precedente new_tree = tree(root=r.left, children=[B]) score = numpy.dot( self.dtk_generator.sn(new_tree), distributed_vector) numero_dtk = numero_dtk + 1 if score > B.score: #TODO: pensare ad un filtro più stringente (e che sicuro non crei loop infiniti) ?? new_tree.score = score C[i, j][0].append(new_tree) #print ("dopo: ", len(C[i, j][0]), r) if len( C[i, j][0] ) > 20: # se ne sto aggiungendo troppi lascio perdere... break # sort (no trimming) la prima lista C[i, j][0] = sorted(C[i, j][0], key=lambda x: x.score, reverse=True) # as in cyk normale, add a list of "different" rules lista_diversi = [ x for x in C[i, j][0] if x.root != C[i, j][0][0].root ][:k_best] #e solo dopo trimmare a k_best C[i, j][0] = C[i, j][0][:k_best] #if the new rules weren't already selected, add them if lista_diversi: for a in lista_diversi: if a not in C[j, i][0]: C[i, j][0].append(a) #infine sorto e trimmo l'altra lista #C[i, j][1] = sorted(C[i, j][1], key=lambda x: self.scorePartialRule(x, distributed_vector), reverse=True)[:k_best] print(numero_dtk) #rendo l'ouput come quello di CYK_easy finalList = C[0][-1][0] if finalList: #final sort (by DTK) finalList = sorted( finalList, key=lambda x: numpy.dot(self.dtk_generator.dt(x), distributed_vector), reverse=True) return True, finalList, C else: #treeToCYKMatrix.printCYKMatrix(simpleTable(P)) return False, None, C
def parse(self, sentence, k_best=2, distributed_vector=None, referenceTable=None, rule_filter=2, realTree=None): start = time.time() """return the k-best parse""" words = sentence.split() n = len(words) #initialize TABLE C = numpy.zeros((n, n), dtype=object) for i, _ in numpy.ndenumerate(C): #each cell has a type1 list and a type2 list (C is matrix of completed (up to that point) trees) #elements of type1 are complete trees: A -> B C D ... (B, C, D ... sono alberi completi) #elements of type2 are LIST of partial trees: [B, C, ..., .] (B, C ... sono ancora alberi completi, ma esiste una regola A -> B C D .... ) #each element in C should also have a score attached to it (<dtk(element), dtk(reference_tree)> <- o qualche variazione sul tema ) C[i] = [[], []] #unit production # start_unit = time.time() # total_time_symbols = 0 # total_time_sort = 0 for i, word in enumerate(words): # to prevent uncovered words we create rule of the form X -> w # for each symbol X in the grammar and for each word w in the sentence # TODO: also, do more clever stuff: i.e if w is a number always do CD -> w # TODO: and the same for punctuation # 1) parsing step # some special cases: if word == ",": tree = gramm.Rule(",", word) rt = tree.toTree() score = numpy.dot(self.dtk_generator.dtf(rt), distributed_vector) rt.score = score # in this cases I don't think I need to filter, because by definition we take the *right* choice # if score > self.LAMBDA/self.filter: C[i, 0][0].append(rt) elif word in "`'": tree = gramm.Rule(2 * word, word) rt = tree.toTree() score = numpy.dot(self.dtk_generator.dtf(rt), distributed_vector) rt.score = score # in this cases I don't think I need to filter, because by definition we take the *right* choice # if score > self.LAMBDA/self.filter: C[i, 0][0].append(rt) else: for symbol in self.grammar.posTags: # prendere lista solo dei POS tree = gramm.Rule(symbol, [word]) # create a new rule rt = tree.toTree() # and transform into tree #compute and normalize score score = numpy.dot(self.dtk_generator.dtf(rt), distributed_vector) # score = score/numpy.sqrt(numpy.dot(self.dtk_generator.sn(rt), self.dtk_generator.sn(rt))) #prova senza normalizzazione rt.score = score if score > self.LAMBDA / self.filter: C[i, 0][0].append(rt) # total_time_symbols = total_time_symbols + (time.time() - start_unit_symbols) C[i, 0][0] = sorted( C[i, 0][0], key=lambda x: x.score, reverse=True )[: k_best] # prima era [:k_best], a volte la prima scelta è sbagliata... # 2) self-filling step for tree in C[i, 0][0]: #rule = A -> w treeString = tree.root rules = self.grammar.nonterminalrules[treeString] #X -> A . incompleteRules = False completeRules = [] for rule in rules: if treeString != " ".join(rule.right): incompleteRules = True else: completeRules.append(rule) # incompleteRules = [rule for rule in rules if treeString != " ".join(rule.right)] # completeRules = [rule for rule in rules if treeString == " ".join(rule.right)] if incompleteRules: C[i, 0][1].append([tree]) # for incompleteRule in incompleteRules: # passed, score = self.filterRule(incompleteRule, distributed_vector, self.filter) # if passed: # C[i, 0][1].append([tree]) # break for completeRule in completeRules: passed, score = self.filterRule(completeRule, distributed_vector, self.filter) if passed: # it's a complete rule (of the form X -> A ) newTree = Tree(root=completeRule.left, children=[tree]) newTreescore = numpy.dot( self.dtk_generator.sn(newTree), distributed_vector) passed, score = self.filterTree( newTree, distributed_vector, self.filter) if passed: #pensare ad un filtro più stringente.... newTree.score = newTreescore #print (new_tree) C[i, 0][0].append(newTree) if len(C[i, 0][0]) > 100: print('aiuto') break # for rule in rules: #rule X -> A B C # passed, score = self.filterRule(rule, distributed_vector, self.filter) # if passed: # # ulteriore filtro, se la regola ha un punteggio "alto", non provare ad espanderla ancora...? # if treeString != " ".join(rule.right): # # it's a partial rule # if [tree] not in C[i, 0][1]: # C[i, 0][1].append([tree]) # else: # # it's a complete rule (of the form X -> A ) # newTree = Tree(root=rule.left, children=[tree]) # newTreescore = numpy.dot(self.dtk_generator.sn(newTree), distributed_vector) # passed, score = self.filterTree(newTree, distributed_vector, self.filter) # # if passed: #pensare ad un filtro più stringente.... # newTree.score = newTreescore # #print (new_tree) # C[i, 0][0].append(newTree) #sort and trimming if len(C[i, 0][0]) > k_best: # print (len(C[i, 0][0])) C[i, 0][0] = sorted(C[i, 0][0], key=lambda x: x.score, reverse=True)[:k_best] #[:k_best] # start_sort = time.time() # print (len(C[i, 0][1])) #C[i,0][1] = sorted(C[i,0][1], key=lambda x: self.scorePartialRule(x, self.filter, distributed_vector), reverse=True)[:k_best] # total_time_sort = total_time_sort + (time.time() - start_sort) #unit production finished, printing for debug # for i, word in enumerate(words): # print (word) # for p in C[i, 0][0]: # print (p) # print ("-") # print ("--") # print ('fine unit production', time.time() - start_unit) # print ('fine symbol production', total_time_symbols) # print ('sorting time', total_time_sort) start_unit = time.time() # after unit rules for i in range(2, n + 1): for j in range(1, n - i + 2): # 1) parsing for k in range(1, i): # look for combination of a tree in leftCell with a tree in rightCell leftCell = C[j - 1, k - 1] rightCell = C[j + k - 1, i - k - 1] for (partialRule, completeRule) in itertools.product( leftCell[1], rightCell[0]): ruleString = " ".join( c.root for c in partialRule) + " " + completeRule.root rules = self.grammar.nonterminalrules[ruleString] # provare a dividere in regole complete e parziali e filtrare/ordinare dopo newPartialRule = False newCompleteRule = [] for rule in rules: if " ".join(rule.right) == ruleString: newCompleteRule.append(rule) else: newPartialRule = True children = partialRule + [completeRule] if newPartialRule: C[j - 1, i - 1][1].append(children) for rule in newCompleteRule: passed, ruleScore = self.filterRule( rule, distributed_vector, self.filter) if rule == Rule( left="NP", right=[ "NP , NP , NP , NP , NP , NP CC NP" ]): t = rule.toTree() v = numpy.linalg.norm( self.dtk_generator.dtf(t)) print(v) # if passed != (rule in [gramm.Rule.fromTree(x) for x in realTree.allRules()]): # print (i, j, rule, ruleScore, passed, rule in [gramm.Rule.fromTree(x) for x in realTree.allRules()]) if passed: # print (i, j, rule, ruleScore, rule in [gramm.Rule.fromTree(x) for x in realTree.allRules()]) newTree = Tree(root=rule.left, children=children) score = numpy.dot( self.dtk_generator.sn(newTree), distributed_vector) newTree.score = score if newTree not in C[j - 1, i - 1][0]: C[j - 1, i - 1][0].append(newTree) # 2) self-filling for tree in C[j - 1, i - 1][0]: ruleString = tree.root rules = self.grammar.nonterminalrules[ruleString] incompleteRules = False completeRules = [] for rule in rules: if ruleString != " ".join(rule.right): incompleteRules = True else: completeRules.append(rule) if incompleteRules: C[j - 1, i - 1][1].append([tree]) for completeRule in completeRules: # filter on rule with low score passed, ruleScore = self.filterRule( completeRule, distributed_vector, self.filter) if passed: # TODO: add a check to prevent chain longer than X -> X if (len(tree.children) == 1) and (completeRule.left == tree.root == tree.children[0].root): continue newTree = Tree(root=completeRule.left, children=[tree]) score = numpy.dot(self.dtk_generator.sn(newTree), distributed_vector) # passed, score2 = self.filterTree(newTree, distributed_vector, self.filter) newTree.score = score C[j - 1, i - 1][0].append(newTree) if len(C[j - 1][i - 1][0]) > 50: break #print ("dopo: ", len(C[i, j][0]), r) # stampa numero di nodi # for t in C[j-1, i-1][0]: # # print (len(list(t.allNodes())), end=" ") # if C[j-1, i-1][0]: # print ("numero nodi") # 3) sorting and trimming if len(C[j - 1, i - 1][0]) > k_best: C[j - 1, i - 1][0] = sorted(C[j - 1, i - 1][0], key=lambda x: x.score, reverse=True) # if C[j-1][i-1][0]: # print (i, j, C[j-1][i-1][0]) # as in cyk normale, add a list of "different" rules lista_diversi = [ x for x in C[j - 1, i - 1][0] if x.root != C[j - 1, i - 1][0][0].root ][:k_best] #e solo dopo trimmare a k_best C[j - 1, i - 1][0] = C[j - 1, i - 1][0][:k_best] #if the new rules weren't already selected, add them if lista_diversi: for a in lista_diversi: if a not in C[j - 1, i - 1][0]: C[j - 1, i - 1][0].append(a) #infine sorto e trimmo l'altra lista start_sort = time.time() if len(C[j - 1, i - 1][1]) > k_best: # print ("numero di regole parziali: ", len(C[j-1, i-1][1])) # for pr in C[j-1, i-1][1]: # for t in pr: # print (t.root, end= " ") # print (" - ", end = " ") # print() # if (j-1, i-1) == (0, 24): # l = sorted(C[j-1, i-1][1], key=lambda x: self.scorePartialRule(x, self.filter, distributed_vector), reverse=True) # print ("cella 0 24: ", [([x.root for x in t], self.scorePartialRule(t, self.filter, distributed_vector)) for t in l]) C[j - 1, i - 1][1] = sorted( C[j - 1, i - 1][1], key=lambda x: self.scorePartialRule( x, self.filter, distributed_vector), reverse=True)[:k_best] # total_time_sort = total_time_sort + (time.time() - start_sort) #rendo l'ouput come quello di CYK # print ('fine parsing', time.time() - start_unit) # print ('sorting time', total_time_sort) finalList = C[0][-1][0] # print ("time: ", time.time() - start) if finalList: #final sort (by DTK) finalList = sorted( finalList, key=lambda x: numpy.dot(self.dtk_generator.dt(x), distributed_vector), reverse=True) return True, finalList, C else: #treeToCYKMatrix.printCYKMatrix(simpleTable(P)) return False, None, C
def set_rules(): terms_str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZабвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ0123456789(){}[]+-*/%><=!&|;“‘,_#@$^~№:?" rules = [] # Программа -> # rules.append(gr.Rule(gr.Term("программа"), [gr.Term("объявление переменной"), gr.Term("программа")])) # rules.append(gr.Rule(gr.Term("программа"), [gr.Term("объявление функции"), gr.Term("программа")])) # rules.append(gr.Rule(gr.Term("программа"), [gr.Term("объявление константы"), gr.Term("программа")])) rules.append(gr.Rule(gr.Term("программа"), [gr.Term("главная функция")])) # главная функция rules.append( gr.Rule(gr.Term("главная функция"), [ gr.Term("R3"), gr.Term("ID"), gr.Term("D6"), gr.Term("D7"), gr.Term("D4"), gr.Term("блок кода"), gr.Term("возврат значения"), gr.Term("D5") ])) # объявление переменной rules.append( gr.Rule( gr.Term("объявление переменной"), [gr.Term("тип данных"), gr.Term("идентификатор"), gr.Term("D3")])) rules.append( gr.Rule(gr.Term("объявление переменной"), [ gr.Term("тип данных"), gr.Term("идентификатор"), gr.Term("O15"), gr.Term("значение"), gr.Term("D3") ])) rules.append( gr.Rule(gr.Term("объявление переменной"), [ gr.Term("тип данных"), gr.Term("идентификатор"), gr.Term("O15"), gr.Term("выражение"), gr.Term("D3") ])) # в документе "тип данных переменной" # объявление константы # rules.append( # gr.Rule(gr.Term("объявление константы"), [gr.Term("c"), gr.Term("o"), gr.Term("n"), gr.Term("s"), gr.Term("t"), # gr.Term("тип данных"), gr.Term("идентификатор"), gr.Term("="), # gr.Term("значение"), gr.Term(";")])) # значение # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("число")])) # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("символьное значение")])) # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("логическое значение")])) # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("идентификатор")])) # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("вызов функции")])) # объявление функции rules.append( gr.Rule(gr.Term("объявление функции"), [ gr.Term("тип данных функции"), gr.Term("идентификатор"), gr.Term("D6"), gr.Term("параметры функции"), gr.Term("D7"), gr.Term("D4"), gr.Term("тело функции"), gr.Term("D5") ])) rules.append( gr.Rule(gr.Term("объявление функции"), [ gr.Term("тип данных функции"), gr.Term("идентификатор"), gr.Term("D6"), gr.Term("D7"), gr.Term("D4"), gr.Term("тело функции"), gr.Term("D5") ])) # параметры функции rules.append( gr.Rule(gr.Term("параметры функции"), [gr.Term("тип данных"), gr.Term("идентификатор")])) rules.append( gr.Rule(gr.Term("параметры функции"), [ gr.Term("тип данных"), gr.Term("идентификатор"), gr.Term("D2"), gr.Term("параметры функции") ])) # Значимый тип данных rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R1")])) rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R2")])) rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R3")])) rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R4")])) rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R5")])) # модификатор типа данных rules.append(gr.Rule(gr.Term("модификатор типа данных"), [gr.Term("K6")])) rules.append(gr.Rule(gr.Term("модификатор типа данных"), [gr.Term("K5")])) rules.append(gr.Rule(gr.Term("модификатор типа данных"), [gr.Term("K7")])) rules.append(gr.Rule(gr.Term("модификатор типа данных"), [gr.Term("K8")])) # тип данных rules.append( gr.Rule(gr.Term("тип данных"), [ gr.Term("модификатор типа данных"), gr.Term("значимый тип данных") ])) rules.append( gr.Rule(gr.Term("тип данных"), [gr.Term("значимый тип данных")])) # тип данных функции rules.append( gr.Rule(gr.Term("тип данных функции"), [gr.Term("тип данных")])) rules.append(gr.Rule(gr.Term("тип данных функции"), [gr.Term("R6")])) # буква for i in range(0, 52): rules.append(gr.Rule(gr.Term("буква"), [gr.Term(terms_str[i])])) # цифра for i in range(10): rules.append(gr.Rule(gr.Term("цифра"), [gr.Term(str(i))])) # целое число rules.append( gr.Rule(gr.Term("целое число"), [gr.Term("цифра"), gr.Term("целое число")])) rules.append(gr.Rule(gr.Term("целое число"), [gr.Term("цифра")])) # вещественное число rules.append( gr.Rule(gr.Term("вещественное число"), [gr.Term("N"), gr.Term("D1"), gr.Term("N")])) # число rules.append(gr.Rule(gr.Term("число"), [gr.Term("целое число")])) rules.append(gr.Rule(gr.Term("число"), [gr.Term("вещественное число")])) # прочие символы for i in range(52, 158): rules.append( gr.Rule(gr.Term("прочие символы"), [gr.Term(terms_str[i])])) # символ идентификатора # rules.append(gr.Rule(gr.Term("символ идентификатора"), gr.Term("буква"))) # rules.append(gr.Rule(gr.Term("символ идентификатора"), gr.Term("_"))) # идентификатор rules.append(gr.Rule(gr.Term("идентификатор"), [gr.Term("ID")])) # ид # rules.append(gr.Rule(gr.Term("ид"), [gr.Term("символ идентификатора"), gr.Term("ид")])) # rules.append(gr.Rule(gr.Term("ид"), [gr.Term("цифра"), gr.Term("ид")])) # rules.append(gr.Rule(gr.Term("ид"), gr.Term("символ идентификатора"))) # rules.append(gr.Rule(gr.Term("ид"), gr.Term("цифра"))) # тело функции rules.append( gr.Rule(gr.Term("тело функции"), [gr.Term("блок кода"), gr.Term("возврат значения")])) rules.append(gr.Rule(gr.Term("тело функции"), [gr.Term("блок кода")])) # возврат значения rules.append( gr.Rule(gr.Term("возврат значения"), [gr.Term("K10"), gr.Term("выражение"), gr.Term("D3")])) rules.append( gr.Rule(gr.Term("возврат значения"), [gr.Term("K10"), gr.Term("ID"), gr.Term("D3")])) rules.append( gr.Rule(gr.Term("возврат значения"), [gr.Term("K10"), gr.Term("имя константы"), gr.Term("D3")])) # блок кода rules.append( gr.Rule(gr.Term("блок кода"), [gr.Term("инструкция"), gr.Term("блок кода")])) rules.append(gr.Rule(gr.Term("блок кода"), [gr.Term("инструкция")])) # цикл rules.append( gr.Rule(gr.Term("цикл"), [ gr.Term("K9"), gr.Term("D6"), gr.Term("выражение"), gr.Term("D7"), gr.Term("D4"), gr.Term("тело цикла"), gr.Term("D5") ])) rules.append( gr.Rule(gr.Term("цикл"), [ gr.Term("K1"), gr.Term("D4"), gr.Term("тело цикла"), gr.Term("D5"), gr.Term("K9"), gr.Term("D6"), gr.Term("выражение"), gr.Term("D7") ])) rules.append( gr.Rule(gr.Term("цикл"), [ gr.Term("K3"), gr.Term("D6"), gr.Term("инструкция"), gr.Term("лог выражение"), gr.Term("D3"), gr.Term("присваивание"), gr.Term("D7"), gr.Term("D4"), gr.Term("тело цикла"), gr.Term("D5") ])) rules.append( gr.Rule(gr.Term("цикл"), [ gr.Term("K3"), gr.Term("D6"), gr.Term("ID"), gr.Term("D3"), gr.Term("лог выражение"), gr.Term("D3"), gr.Term("присваивание"), gr.Term("D7"), gr.Term("D4"), gr.Term("тело цикла"), gr.Term("D5") ])) # тело цикла # rules.append( # gr.Rule(gr.Term("тело цикла"), [gr.Term("блок кода"), gr.Term("оператор цикла"), gr.Term("блок кода")])) # rules.append(gr.Rule(gr.Term("тело цикла"), [gr.Term("оператор цикла"), gr.Term("блок кода")])) # rules.append(gr.Rule(gr.Term("тело цикла"), [gr.Term("блок кода"), gr.Term("оператор цикла")])) rules.append(gr.Rule(gr.Term("тело цикла"), [gr.Term("блок кода")])) # rules.append(gr.Rule(gr.Term("тело цикла"), [gr.Term("оператор цикла")])) # оператор цикла # rules.append(gr.Rule(gr.Term("оператор цикла"), # [gr.Term("b"), gr.Term("r"), gr.Term("e"), gr.Term("a"), gr.Term("k"), gr.Term("D3")])) # rules.append(gr.Rule(gr.Term("оператор цикла"), # [gr.Term("c"), gr.Term("o"), gr.Term("n"), gr.Term("t"), gr.Term("i"), gr.Term("n"), # gr.Term("u"), gr.Term("e"), gr.Term(";")])) # ветвление rules.append( gr.Rule(gr.Term("ветвление"), [ gr.Term("K4"), gr.Term("D6"), gr.Term("выражение"), gr.Term("D7"), gr.Term("D4"), gr.Term("блок кода"), gr.Term("D5") ])) rules.append( gr.Rule(gr.Term("ветвление"), [ gr.Term("K4"), gr.Term("D6"), gr.Term("выражение"), gr.Term("D7"), gr.Term("D4"), gr.Term("блок кода"), gr.Term("D5"), gr.Term("K2"), gr.Term("D4"), gr.Term("блок кода"), gr.Term("D5") ])) rules.append( gr.Rule(gr.Term("ветвление"), [ gr.Term("K4"), gr.Term("D6"), gr.Term("выражение"), gr.Term("D7"), gr.Term("D4"), gr.Term("блок кода"), gr.Term("D5"), gr.Term("K2"), gr.Term("ветвление") ])) # символьное значение rules.append(gr.Rule(gr.Term("символьное значение"), [gr.Term("C")])) # rules.append(gr.Rule(gr.Term("символьное значение"), [gr.Term("'"), gr.Term("цифра"), gr.Term("'")])) # rules.append(gr.Rule(gr.Term("символьное значение"), [gr.Term("'"), gr.Term("прочие символы"), gr.Term("'")])) # оператор сравнения rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O13")])) rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O14")])) rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O11")])) rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O12")])) rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O9")])) rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O10")])) # мат знак типа сложения rules.append(gr.Rule(gr.Term("мат знак типа сложения"), [gr.Term("O1")])) rules.append(gr.Rule(gr.Term("мат знак типа сложения"), [gr.Term("O2")])) # мат знак типа умножения rules.append(gr.Rule(gr.Term("мат знак типа умножения"), [gr.Term("O3")])) rules.append(gr.Rule(gr.Term("мат знак типа умножения"), [gr.Term("O4")])) rules.append(gr.Rule(gr.Term("мат знак типа умножения"), [gr.Term("O5")])) # мат выражение rules.append(gr.Rule(gr.Term("мат выражение"), [gr.Term("E1")])) # E1 rules.append( gr.Rule( gr.Term("E1"), [gr.Term("T1"), gr.Term("мат знак типа сложения"), gr.Term("E1")])) rules.append(gr.Rule(gr.Term("E1"), [gr.Term("T1")])) # T1 rules.append( gr.Rule( gr.Term("T1"), [gr.Term("F1"), gr.Term("мат знак типа умножения"), gr.Term("T1")])) rules.append(gr.Rule(gr.Term("T1"), [gr.Term("F1")])) # разве тут не должно быть наподобие предыдущего, F1 мат знак T1 ? # F1 rules.append( gr.Rule(gr.Term("F1"), [gr.Term("("), gr.Term("E1"), gr.Term(")")])) rules.append(gr.Rule(gr.Term("F1"), [gr.Term("N")])) rules.append(gr.Rule(gr.Term("F1"), [gr.Term("вещественное число")])) rules.append(gr.Rule(gr.Term("F1"), [gr.Term("ID")])) # логическое значение rules.append(gr.Rule(gr.Term("логическое значение"), [gr.Term("R10")])) rules.append(gr.Rule(gr.Term("логическое значение"), [gr.Term("R11")])) # лог знак типа сложения rules.append(gr.Rule(gr.Term("лог знак типа сложения"), [gr.Term("O6")])) rules.append( gr.Rule(gr.Term("лог знак типа сложения"), [gr.Term("оператор сравнения")])) # лог знак типа умножения rules.append(gr.Rule(gr.Term("лог знак типа умножения"), [gr.Term("O7")])) # лог знак унарной операции rules.append(gr.Rule(gr.Term("лог знак унарной операции"), [gr.Term("O8")])) # лог выражение rules.append( gr.Rule(gr.Term("лог выражение"), [ gr.Term("мат выражение"), gr.Term("оператор сравнения"), gr.Term("мат выражение") ])) rules.append( gr.Rule(gr.Term("лог выражение"), [ gr.Term("символьное значение"), gr.Term("оператор сравнения"), gr.Term("символьное значение") ])) rules.append(gr.Rule(gr.Term("лог выражение"), [gr.Term("лог значение")])) # # E2 # rules.append(gr.Rule(gr.Term("E2"), [gr.Term("T2"), gr.Term("лог знак типа сложения"), gr.Term("E2")])) # rules.append(gr.Rule(gr.Term("E2"), [gr.Term("T2")])) # #rules.append(gr.Rule(gr.Term("E2"), [gr.Term("лог знак унарной операции"), gr.Term("T2")])) # # # T2 # rules.append(gr.Rule(gr.Term("T2"), [gr.Term("T2"), gr.Term("лог знак типа умножения"), gr.Term("F2")])) # rules.append(gr.Rule(gr.Term("T2"), [gr.Term("F2")])) # #rules.append(gr.Rule(gr.Term("T2"), [gr.Term("лог знак унарной операции"), gr.Term("F2")])) # # разве тут не должно быть наподобие предыдущего, F2 лог знак T2 ? # # # F2 # rules.append(gr.Rule(gr.Term("F2"), [gr.Term("D6"), gr.Term("E2"), gr.Term("D7")])) # rules.append(gr.Rule(gr.Term("F2"), [gr.Term("лог значение")])) # rules.append(gr.Rule(gr.Term("F2"), [gr.Term("мат выражение")])) # выражение rules.append(gr.Rule(gr.Term("выражение"), [gr.Term("лог выражение")])) rules.append(gr.Rule(gr.Term("выражение"), [gr.Term("мат выражение")])) rules.append( gr.Rule(gr.Term("выражение"), [gr.Term("символьное значение")])) # инструкция rules.append( gr.Rule( gr.Term("инструкция"), [gr.Term("присваивание"), gr.Term("D3")])) rules.append( gr.Rule(gr.Term("инструкция"), [gr.Term("объявление переменной")])) rules.append( gr.Rule(gr.Term("инструкция"), [gr.Term("объявление константы")])) rules.append( gr.Rule( gr.Term("инструкция"), [gr.Term("вызов функции"), gr.Term("D3")])) rules.append( gr.Rule(gr.Term("инструкция"), [gr.Term("выражение"), gr.Term("D3")])) rules.append(gr.Rule(gr.Term("инструкция"), [gr.Term("цикл")])) rules.append(gr.Rule(gr.Term("инструкция"), [gr.Term("ветвление")])) # вызов функции rules.append( gr.Rule(gr.Term("вызов функции"), [gr.Term("имя функции"), gr.Term("D6"), gr.Term("D7")])) rules.append( gr.Rule(gr.Term("вызов функции"), [ gr.Term("имя функции"), gr.Term("D6"), gr.Term("параметры вызова функции"), gr.Term("D7") ])) # параметры вызова функции rules.append( gr.Rule(gr.Term("параметры вызова функции"), [gr.Term("выражение")])) rules.append( gr.Rule(gr.Term("параметры вызова функции"), [ gr.Term("выражение"), gr.Term(","), gr.Term("параметры вызова функции") ])) # оператор присваивания rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("O15")])) # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("+"), gr.Term("=")])) # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("-"), gr.Term("=")])) # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("*"), gr.Term("=")])) # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("/"), gr.Term("=")])) # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("%"), gr.Term("=")])) # присваивание rules.append( gr.Rule(gr.Term("присваивание"), [ gr.Term("идентификатор"), gr.Term("оператор присваивания"), gr.Term("выражение") ])) rules.append( gr.Rule(gr.Term("присваивание"), [ gr.Term("идентификатор"), gr.Term("оператор присваивания"), gr.Term("идентификатор") ])) return rules
def topRule(t): return grammar.Rule(t.root, [x.root for x in t.children])