Пример #1
0
def compute_crossings(a, r, cumul):
    count = [0] * len(r.f)
    vars = [0, 0]
    (pfi, pfj, pei, pej) = r.span
    for rfi in xrange(len(r.f)):
        for rei in xrange(len(r.e)):
            if type(r.fpos[rfi]) is int and type(
                    r.epos[rei]) is int and a.aligned[r.fpos[rfi]][
                        r.epos[rei]]:
                # both terminals
                fi = r.fpos[rfi]
                ei = r.epos[rei]
                if region_aligned(cumul, pfi, fi, ei + 1,
                                  pej) + region_aligned(
                                      cumul, fi + 1, pfj, pei, ei) > 0:
                    count[rfi] = 1
                    break
            elif type(r.fpos[rfi]) is tuple and type(
                    r.epos[rei]) is tuple and sym.getindex(
                        r.f[rfi]) == sym.getindex(r.e[rei]):
                # coindexed nonterminals
                (fi, fj) = r.fpos[rfi]
                (ei, ej) = r.epos[rei]
                if region_aligned(cumul, pfi, fi, ej, pej) + region_aligned(
                        cumul, fj, pfj, pei, ei) > 0:
                    count[rfi] = 1
                    vars[sym.getindex(r.f[rfi]) - 1] = 1
                break
    return vars
Пример #2
0
 def _tree_helper(t, antvalues):
     t = tree.str_to_tree(t)
     for node in t.frontier():
         x = sym.fromstring(node.label)
         if sym.isvar(x):
             node.insert_child(0, antvalues[sym.getindex(x)-1])
     return t
Пример #3
0
 def _tree_helper(t, antvalues):
     t = tree.str_to_tree(t)
     for node in t.frontier():
         x = sym.fromstring(node.label)
         if sym.isvar(x):
             node.insert_child(0, antvalues[sym.getindex(x) - 1])
     return t
Пример #4
0
        def visit(item):
            ded = self.ded[id(item)]
            if ded.rule:
                align = collections.defaultdict(list)
                if 'align' in ded.rule.attrs:
                    for fi, ei in ded.rule.attrs['align']:
                        align[ei].append(fi)

                result = []
                j1 = None
                for ei, e in enumerate(ded.rule.e):
                    if sym.isvar(e):
                        result.extend(visit(ded.ants[sym.getindex(e) - 1]))
                    else:
                        if len(ded.ants) == 2:
                            j1 = ded.ants[0].j
                        else:
                            j1 = None
                        result.append([
                            ded.rule.f.stringpos(fi, item.i, item.j, j1)
                            for fi in align[ei]
                        ])
                print ded.rule, item.i, item.j, j1, result
                return result
            else:
                return visit(ded.ants[0])
Пример #5
0
def _ded_to_xml(node, result, memo, mode, models, weights):
    if weights:
        result.append('<and label=%s cost=%s>' % (xml.sax.saxutils.quoteattr(str(id(node.rule))),
                                                  xml.sax.saxutils.quoteattr(str(weights.dot(node.dcost)))))
    else:
        result.append('<and label=%s>' % (xml.sax.saxutils.quoteattr(str(id(node)))))

    result.append('<features>')
    for f,v in node.dcost.iteritems():
        result.append('<feature name=%s value=%s/>' % (xml.sax.saxutils.quoteattr(f), xml.sax.saxutils.quoteattr(str(v))))
    result.append('</features>')

    if mode == 'french':
        children = node.rule.f if node.rule else node.ants
    elif mode == 'english':
        children = node.rule.e if node.rule else node.ants
    else:
        children = node.ants

    for child in children:
        if isinstance(child, Item):
            _item_to_xml(child, result, memo, mode=mode, models=models, weights=weights)
        elif sym.isvar(child):
            _item_to_xml(node.ants[sym.getindex(child)-1], result, memo, mode=mode, models=models, weights=weights)
        else:
            result.append('<leaf label=%s/>' % xml.sax.saxutils.quoteattr(sym.tostring(child)))
    result.append('</and>')
Пример #6
0
def _ded_to_text(node, result, memo, mode=None, weights=None):
    # Convert rule and features into single tokens
    #vstr = ",".join("%s:%s" % (quotefeature(f),node.dcost[f]) for f in node.dcost)
    vstr = "cost:%s" % weights.dot(node.dcost)
    #rstr = id(node.rule)
    rstr = id(node)
    s = "%s<%s>" % (rstr,vstr)
    if False and len(node.ants) == 0: # the format allows this but only if we don't tag with an id. but we tag everything with an id
        result.append(s)
    else:
        result.append('(')
        result.append(s)
        if mode == 'french':
            children = node.rule.f if node.rule else node.ants
        elif mode == 'english':
            children = node.rule.e if node.rule else node.ants
        else:
            children = node.ants

        for child in children:
            if isinstance(child, Item):
                result.append(' ')
                _item_to_text(child, result, memo, mode=mode, weights=weights)
            elif sym.isvar(child):
                result.append(' ')
                _item_to_text(node.ants[sym.getindex(child)-1], result, memo, mode=mode, weights=weights)
            else:
                result.append(' ')
                result.append(quoteattr(sym.tostring(child)))
        result.append(')')
Пример #7
0
 def _fake_tree_helper(lhs, rhs, antvalues):
     children = []
     for x in rhs:
         if sym.isvar(x):
             children.append(antvalues[sym.getindex(x) - 1])
         else:
             children.append(tree.Node(sym.tostring(x), []))
     return tree.Node(sym.totag(lhs), children)
Пример #8
0
 def _fake_tree_helper(lhs, rhs, antvalues):
     children = []
     for x in rhs:
         if sym.isvar(x):
             children.append(antvalues[sym.getindex(x)-1])
         else:
             children.append(tree.Node(sym.tostring(x), []))
     return tree.Node(sym.totag(lhs), children)
Пример #9
0
def forest_to_json(root, fwords=None, mode=None, models=None, weights=None):
    result = []
    result.append('{\n')

    if fwords:
        fwords = [(sym.tostring(fword) if type(fword) is int else fword)
                  for fword in fwords]
        result.append('  "source": [%s],\n' %
                      ",".join(quotejson(fword) for fword in fwords))

    items = list(root)
    nodeindex = {}
    nodestrs = []
    for ni, item in enumerate(items):
        nodeindex[item] = ni
        if item is root:
            ri = ni
        if item.x is None:
            nodestrs.append('    {}')
        else:
            nodestrs.append('    {"label": %s}' % quotejson(sym.totag(item.x)))
    result.append('  "nodes": [\n%s\n  ],\n' % ",\n".join(nodestrs))

    result.append('  "root": %d,\n' % ri)

    edgestrs = []
    for ni, item in enumerate(items):
        for ded in item.deds:
            tailstrs = []

            if mode == 'french':
                children = ded.rule.f if ded.rule else ded.ants
            elif mode == 'english':
                children = ded.rule.e if ded.rule else ded.ants
            else:
                children = ded.ants

            for child in children:
                if isinstance(child, Item):
                    tailstrs.append(str(nodeindex[child]))
                elif sym.isvar(child):
                    ant = ded.ants[sym.getindex(child) - 1]
                    tailstrs.append(str(nodeindex[ant]))
                else:
                    tailstrs.append(quotejson(sym.tostring(child)))

            dcoststr = "{%s}" % ",".join("%s:%s" % (quotejson(f), v)
                                         for (f, v) in ded.dcost.iteritems())
            edgestrs.append(
                '    {"head": %s, "tails": [%s], "features": %s}\n' %
                (ni, ",".join(tailstrs), dcoststr))

    result.append('  "edges": [\n%s\n  ]\n' % ",\n".join(edgestrs))

    result.append('}')
    return "".join(result)
Пример #10
0
def forest_to_json(root, fwords=None, mode=None, models=None, weights=None):
    result = []
    result.append('{\n')

    if fwords:
        fwords = [(sym.tostring(fword) if type(fword) is int else fword) for fword in fwords]
        result.append('  "source": [%s],\n' % ",".join(quotejson(fword) for fword in fwords))

    items = list(root)
    nodeindex = {}
    nodestrs = []
    for ni,item in enumerate(items):
        nodeindex[item] = ni
        if item is root:
            ri = ni
        if item.x is None:
            nodestrs.append('    {}')
        else:
            nodestrs.append('    {"label": %s}' % quotejson(sym.totag(item.x)))
    result.append('  "nodes": [\n%s\n  ],\n' % ",\n".join(nodestrs))

    result.append('  "root": %d,\n' % ri)

    edgestrs = []
    for ni,item in enumerate(items):
        for ded in item.deds:
            tailstrs = []

            if mode == 'french':
                children = ded.rule.f if ded.rule else ded.ants
            elif mode == 'english':
                children = ded.rule.e if ded.rule else ded.ants
            else:
                children = ded.ants

            for child in children:
                if isinstance(child, Item):
                    tailstrs.append(str(nodeindex[child]))
                elif sym.isvar(child):
                    ant = ded.ants[sym.getindex(child)-1]
                    tailstrs.append(str(nodeindex[ant]))
                else:
                    tailstrs.append(quotejson(sym.tostring(child)))

            dcoststr = "{%s}" % ",".join("%s:%s" % (quotejson(f),v) for (f,v) in ded.dcost.iteritems())
            edgestrs.append('    {"head": %s, "tails": [%s], "features": %s}\n' % (
                    ni,
                    ",".join(tailstrs),
                    dcoststr))

    result.append('  "edges": [\n%s\n  ]\n' % ",\n".join(edgestrs))

    result.append('}')
    return "".join(result)
Пример #11
0
def compute_crossings(a, r, cumul):
    count = [0]*len(r.f)
    vars = [0,0]
    (pfi, pfj, pei, pej) = r.span
    for rfi in xrange(len(r.f)):
        for rei in xrange(len(r.e)):
            if type(r.fpos[rfi]) is int and type(r.epos[rei]) is int and a.aligned[r.fpos[rfi]][r.epos[rei]]:
                # both terminals
                fi = r.fpos[rfi]
                ei = r.epos[rei]
                if region_aligned(cumul, pfi, fi, ei+1, pej) + region_aligned(cumul, fi+1, pfj, pei, ei) > 0:
                    count[rfi] = 1
                    break
            elif type(r.fpos[rfi]) is tuple and type(r.epos[rei]) is tuple and sym.getindex(r.f[rfi]) == sym.getindex(r.e[rei]):
                # coindexed nonterminals
                (fi,fj) = r.fpos[rfi]
                (ei,ej) = r.epos[rei]
                if region_aligned(cumul, pfi, fi, ej, pej) + region_aligned(cumul, fj, pfj, pei, ei) > 0:
                    count[rfi] = 1
                    vars[sym.getindex(r.f[rfi])-1] = 1
                break
    return vars
Пример #12
0
def _ded_to_xml(node, result, memo, mode, models, weights):
    if weights:
        result.append(
            '<and label=%s cost=%s>' %
            (xml.sax.saxutils.quoteattr(str(id(node.rule))),
             xml.sax.saxutils.quoteattr(str(weights.dot(node.dcost)))))
    else:
        result.append('<and label=%s>' %
                      (xml.sax.saxutils.quoteattr(str(id(node)))))

    result.append('<features>')
    for f, v in node.dcost.iteritems():
        result.append('<feature name=%s value=%s/>' %
                      (xml.sax.saxutils.quoteattr(f),
                       xml.sax.saxutils.quoteattr(str(v))))
    result.append('</features>')

    if mode == 'french':
        children = node.rule.f if node.rule else node.ants
    elif mode == 'english':
        children = node.rule.e if node.rule else node.ants
    else:
        children = node.ants

    for child in children:
        if isinstance(child, Item):
            _item_to_xml(child,
                         result,
                         memo,
                         mode=mode,
                         models=models,
                         weights=weights)
        elif sym.isvar(child):
            _item_to_xml(node.ants[sym.getindex(child) - 1],
                         result,
                         memo,
                         mode=mode,
                         models=models,
                         weights=weights)
        else:
            result.append('<leaf label=%s/>' %
                          xml.sax.saxutils.quoteattr(sym.tostring(child)))
    result.append('</and>')
Пример #13
0
def _ded_to_text(node, result, memo, mode=None, weights=None):
    # Convert rule and features into single tokens
    #vstr = ",".join("%s:%s" % (quotefeature(f),node.dcost[f]) for f in node.dcost)
    # lhuang: in case no weights
    vstr = "cost:%s" % weights.dot(node.dcost) if weights is not None \
           else "_"
    rstr = id(node.rule)
    #rstr = id(node)
    s = "ruleid=%s<value=%s>" % (rstr,vstr)
    print "\truleid=%s" % rstr,
    
    if False and len(node.ants) == 0: # the format allows this but only if we don't tag with an id. but we tag everything with an id
        result.append(s)
    else:
        result.append('(')
        result.append(s)
        if mode == 'french':
            children = node.rule.f if node.rule else node.ants
        elif mode == 'english':
            # lhuang: default mode: english side
            children = node.rule.e if node.rule else node.ants
        else:
            children = node.ants

        for child in children:
            if isinstance(child, Item):
                result.append(' it ')
                _item_to_text(child, result, memo, mode=mode, weights=weights)
            elif sym.isvar(child):
                # lhuang: variable, do recursion
                result.append(' var ')
                _item_to_text(node.ants[sym.getindex(child)-1], result, memo, mode=mode, weights=weights)
            else:
                # lhuang: english word
                result.append(' word ')
                w = quoteattr(sym.tostring(child))
                result.append(w)
                print w,
        result.append(')')

    print # end of a hyperedge
Пример #14
0
        def visit(item):
            ded = self.ded[id(item)]
            if ded.rule:
                align = collections.defaultdict(list)
                if 'align' in ded.rule.attrs:
                    for fi, ei in ded.rule.attrs['align']:
                        align[ei].append(fi)

                result = []
                j1 = None
                for ei, e in enumerate(ded.rule.e):
                    if sym.isvar(e):
                        result.extend(visit(ded.ants[sym.getindex(e)-1]))
                    else:
                        if len(ded.ants) == 2:
                            j1 = ded.ants[0].j
                        else:
                            j1 = None
                        result.append([ded.rule.f.stringpos(fi, item.i, item.j, j1) for fi in align[ei]])
                print ded.rule, item.i, item.j, j1, result
                return result
            else:
                return visit(ded.ants[0])
Пример #15
0
def _ded_to_text(node, result, memo, mode=None, weights=None):
    # Convert rule and features into single tokens
    #vstr = ",".join("%s:%s" % (quotefeature(f),node.dcost[f]) for f in node.dcost)
    vstr = "cost:%s" % weights.dot(node.dcost)
    #rstr = id(node.rule)
    rstr = id(node)
    s = "%s<%s>" % (rstr, vstr)
    if False and len(
            node.ants
    ) == 0:  # the format allows this but only if we don't tag with an id. but we tag everything with an id
        result.append(s)
    else:
        result.append('(')
        result.append(s)
        if mode == 'french':
            children = node.rule.f if node.rule else node.ants
        elif mode == 'english':
            children = node.rule.e if node.rule else node.ants
        else:
            children = node.ants

        for child in children:
            if isinstance(child, Item):
                result.append(' ')
                _item_to_text(child, result, memo, mode=mode, weights=weights)
            elif sym.isvar(child):
                result.append(' ')
                _item_to_text(node.ants[sym.getindex(child) - 1],
                              result,
                              memo,
                              mode=mode,
                              weights=weights)
            else:
                result.append(' ')
                result.append(quoteattr(sym.tostring(child)))
        result.append(')')