Пример #1
0
 def decode_earley(self):
     """Returns None if no goal item found"""
     self.initialize_earley()
     #logger.level = 5
     for i, j in cyk_spans(self.N):
         if logger.level >= 4:
             logger.writeln()
             logger.writeln('---- span (%s %s) ----' % (i, j))
         # finish dot chart, build a cube
         new_items = Cube()
         new_virtual_items = Cube()
         for dotchart in self.dotcharts:
             if logger.level >= 4:
                 logger.writeln()
                 logger.writeln('dot chart for %s' % dotchart.grammar.name)
             dotchart.expand(i, j)
             for dotitem in dotchart.bins[i][j]:
                 if dotitem.node.filled:
                     for lhs, rulebin in dotitem.node.iter_rulebins():
                         bins = (rulebin, ) + dotitem.ants
                         if is_virtual(lhs):
                             new_virtual_items.add_cube(
                                 bins, self.get_cube_op(i, j))
                         else:
                             new_items.add_cube(bins,
                                                self.get_cube_op(i, j))
                         self.cubes_built += 1
         if logger.level >= 4:
             logger.writeln(' -- cubes --')
             logger.writeln(new_items)
             logger.writeln(' -- cubes for virtual items--')
             logger.writeln(new_virtual_items)
         # pop new items from the cube
         for cube in [new_items, new_virtual_items]:
             #print '================'
             #print cube
             for new_item in cube.iter_top_univar(FLAGS.bin_size):
                 self.nonunary_edges_proposed += 1
                 #if logger.level >= 4:
                 #    logger.writeln('cube pop: %s' % new_item)
                 #    logger.writeln(new_item.incoming[0])
                 added = self.chart.add(new_item)
                 #if logger.level >= 4:
                 #    logger.writeln('added: %s' % added)
                 if logger.level >= 4 and added:
                     logger.writeln('cube pop and add: %s' % new_item)
                     logger.writeln(new_item.incoming[0])
             #print '----------------'
         # apply unary rules
         self.unary_expand(i, j)
         # generate dot items like A->B.C (first nonterminal matched)
         # after the unary derivations are all finished
         for dotchart in self.dotcharts:
             if logger.level >= 4:
                 logger.writeln()
                 logger.writeln('unary expand for dot chart %s' %
                                dotchart.grammar.name)
             dotchart.unary_expand(i, j)
     return self.get_goal(True)
Пример #2
0
 def binary_expand(self, i, j):
     if logger.level >= 4:
         logger.writeln('span %s %s' % (i, j))
     new_items = Cube()
     for k in range(i + 1, j):
         for lvar, lbin in self.chart.iter_items_by_nts(i, k):
             for rvar, rbin in self.chart.iter_items_by_nts(k, j):
                 for grammar in self.grammars:
                     rulebin = grammar.itg.get_sorted_rules((lvar, rvar))
                     if rulebin:
                         new_items.add_cube((rulebin, lbin, rbin),
                                            self.get_cube_op(i, j))
     for new_item in new_items.iter_top(FLAGS.bin_size):
         if logger.level >= 4:
             logger.writeln(new_item)
         self.chart.add(new_item)
Пример #3
0
class Node(object):
    def __init__(self):
        self.incoming = []
        self.nout = 0  # number of outgoing edges
        self.hg = None  # hypergraph this node belongs to
        self.best_paths_list = None

    def add_incoming(self, edge):
        self.incoming.append(edge)
        edge.head = self

    def best_paths(self):
        """returns a virtual list (class Cube) of best paths.
        node.best_paths()[i] is the i'th best path."""
        if self.best_paths_list == None:
            if self.incoming:
                self.best_paths_list = Cube()
                for edge in self.incoming:
                    cube = [n.best_paths() for n in edge.tail]
                    self.best_paths_list.add_cube(cube, edge.make_path)
            else:
                # initialization for leaf nodes
                self.best_paths_list = [None]
        return self.best_paths_list

    def id_str(self):
        """The id of a node is assigned after topological sort in reversed
        topological order. (Root has id 0.) Use python object id if this node
        is not assigned a id"""
        if hasattr(self, 'id'):
            return str(self.id)
        else:
            return 'obj%s' % id(self)

    def dot_label(self, detailed=True):
        """Returns label used in dot representation."""
        if detailed:
            return '%s: %s' % (self.id_str(), escape_quote(str(self)))
        else:
            return '%s' % self.id_str()

    def dot(self, color='', detailed=True):
        """dot language representation of this node and its incoming edges"""
        result = 'n%s [label="%s" style="filled" color="%s"];\n' % \
                (self.id_str(),
                 self.dot_label(detailed=detailed),
                 color)
        # write hyperedges
        for i, edge in enumerate(self.incoming):
            edgename = 'e%s_%s' % (self.id_str(), i)
            # graph node for hyperedge
            result += '%s [shape="point"]\n' % edgename
            # hyperedge head
            result += '%s -> n%s [label="%s"]\n' % \
                    (edgename,
                     edge.head.id_str(),
                     escape_quote(str(edge)) if detailed else '')
            # hyperedge tails
            for tailnode in edge.tail:
                result += 'n%s -> %s [dir="none"]\n' % \
                        (tailnode.id_str(), edgename)
        return result

    def neighbors(self, max_dist=3):
        """return a set of nodes who are within max_dist of self"""
        # TODO: this may have problems because the set doesn't
        # compare object id but uses user defined comparison methods
        # TODO: outgoing edges are no longer saved
        found = set()
        found.add(self)
        queue = [(self, 0)]
        while queue:
            node, d = queue.pop(0)
            if d < max_dist:
                for edge in node.outgoing:
                    if edge.head not in found:
                        found.add(edge.head)
                        queue.append((edge.head, d + 1))
                for edge in node.incoming:
                    for tailnode in edge.tail:
                        if tailnode not in found:
                            found.add(tailnode)
                            queue.append((tailnode, d + 1))
        return found

    def serialize(self):
        return '[%s]' % self.id

    def deserialize(self, s):
        s = s.strip()
        assert s.startswith('[') and s.endswith(']')
        self.id = int(s[1:-1])

    def show_neighborhood(self, max_dist=3, detailed=True):
        """show the neighborhood of this node in a picture"""
        dotstr = ''
        for node in self.neighbors(max_dist):
            if node is self:
                dotstr += node.dot(color='dodgerblue', detailed=detailed)
            else:
                dotstr += node.dot(detailed=detailed)
        dotstr = 'digraph hypergraph {\nrankdir=BT\n%s}\n' % dotstr
        f = open('/tmp/dotty', 'w')
        f.write(dotstr)
        f.close()
        os.system('cat /tmp/dotty | dot -Tgif > /tmp/dotty.gif')
        os.system('eog /tmp/dotty.gif')