def decode_cyk(self): """Returns None if no goal item found""" self.initialize() for i, j in cyk_spans(self.N): self.binary_expand(i, j) self.unary_expand(i, j) return self.get_goal()
def decode_earley(self): """Returns None if no goal item found""" self.initialize_earley() #logger.level = 5 for i, j in cyk_spans(self.N): if logger.level >= 4: logger.writeln() logger.writeln('---- span (%s %s) ----' % (i, j)) # finish dot chart, build a cube new_items = Cube() new_virtual_items = Cube() for dotchart in self.dotcharts: if logger.level >= 4: logger.writeln() logger.writeln('dot chart for %s' % dotchart.grammar.name) dotchart.expand(i, j) for dotitem in dotchart.bins[i][j]: if dotitem.node.filled: for lhs, rulebin in dotitem.node.iter_rulebins(): bins = (rulebin, ) + dotitem.ants if is_virtual(lhs): new_virtual_items.add_cube( bins, self.get_cube_op(i, j)) else: new_items.add_cube(bins, self.get_cube_op(i, j)) self.cubes_built += 1 if logger.level >= 4: logger.writeln(' -- cubes --') logger.writeln(new_items) logger.writeln(' -- cubes for virtual items--') logger.writeln(new_virtual_items) # pop new items from the cube for cube in [new_items, new_virtual_items]: #print '================' #print cube for new_item in cube.iter_top_univar(FLAGS.bin_size): self.nonunary_edges_proposed += 1 #if logger.level >= 4: # logger.writeln('cube pop: %s' % new_item) # logger.writeln(new_item.incoming[0]) added = self.chart.add(new_item) #if logger.level >= 4: # logger.writeln('added: %s' % added) if logger.level >= 4 and added: logger.writeln('cube pop and add: %s' % new_item) logger.writeln(new_item.incoming[0]) #print '----------------' # apply unary rules self.unary_expand(i, j) # generate dot items like A->B.C (first nonterminal matched) # after the unary derivations are all finished for dotchart in self.dotcharts: if logger.level >= 4: logger.writeln() logger.writeln('unary expand for dot chart %s' % dotchart.grammar.name) dotchart.unary_expand(i, j) return self.get_goal(True)
def initialize_with_lexgrammar(self, lexgrammar): """initialize chart and agenda with a pure lexical grammar""" # items in lexchart are pointers to TrieNodes in lexgrammar lexchart = [[None for i in range(self.N + 1)] for j in range(self.N + 1)] # seed the chart for i in range(self.N): lexchart[i][i] = lexgrammar.root #TODO: insertion? for i, j in cyk_spans(self.N): prevnode = lexchart[i][j - 1] word = self.fwords[j - 1] if prevnode and word in prevnode: curnode = lexchart[i][j - 1][word] # scan one word lexchart[i][j] = curnode for rule in curnode.iter_rules(): item = self.features.make_new_item(rule, (), i, j) self.agenda.push(item) self.chart.add(item)
def iter_bins(self): for i, j in cyk_spans(self.chart.N): yield self.bins[i][j]