Exemplo n.º 1
0
def parse(tree, classifier, tagger=False, tagsize=5, singlestep=False, silent=True, stackwindow=2, qwindow=3, keepscore=True, preclassified=False, justParse=False):
    malt.SILENT = silent
    if type(tree) == "SENTENCE":
        if tree.dtree:
            s, goldstandard = malt.dtree2state(tree)
        else:
            goldstandard = tree.goldstandard
            s = malt.STATE(text=tree.leaves)
        words = s.words
    else:
        words = [malt.WORD(w[0], w[1]) for w in tagger.tag(tree)]
        tree = tb.SENTENCE(False, False, False, False)
        tree.leaves = words
        s = malt.STATE(text=words)
        goldstandard = False
    for w in s.words:
        w.tag = w.tag[:tagsize]
    features = sorted(classifier.features.keys())
    agenda = [s]
    s.score = 0.0
    while not agenda == []:
        s = agenda.pop()
        if s.queue == []:
            """
            What we'd like to do (pace Sardar) is to insist
            that the stack should have only one item on it,
            and if not then we go on to the next task on
            the agenda. That works nicely if we use the
            head percolation table that has CC as the head
            whenever possible; but overall the one that has
            CC as worst choice for the head does better (as
            with Maytham), and in that case trying to
            get to a nice terminal state by choosing other
            options from the agenda gets stuck. So we have
            to do something more simple-minded:
            attaching the things that haven't been attached
            to anything to their neighbours seems to work
            quite nicely. It's not exactly systematic, but
            it gets a surprising number of things right.
            """
            for i in range(len(s.stack)-1):
                hd = s.stack[i+1]
                dtr = s.stack[i]
                s.relations[dtr.position] = malt.RELATION(hd.position, dtr.position, 'mod')
        d = s.stateDescriptor(False, qwindow=classifier.qwindow, stackwindow=classifier.stackwindow)
        t = id3.INSTANCE(features, d)
        """
        The classifier can return several options: it would
        be nice to weigh them up, using influences from the
        grammar, and then order the agenda to pay due
        attention to the confidence of the classifier and
        the constraints from the grammar, but I can't find
        any useful influences. So I'm just choosing the
        best and working with that. I've left it as a loop,
        even though it's currently a pointless loop because
        the list only ever has one item on it, so that I
        can revisit it some time if I have the energy.
        """
        actions = sortTable(classifier.classify(t, printing=False))
        if actions == []:
            print "no action found"
        for action in actions:
            s1 = s.copy()
            s1.score = s.score+10
            if singlestep:
                print '***********************************'
                s1.showState()
                ss("action %s\ns.relations %s\nd %s\n"%(action, s.relations, d))
            action = eval("malt.STATE.%s"%(action[0]))
            WARNINGS = True
            try:
                if action(s1, warnings=WARNINGS, throwException=True):
                    agenda.append(s1)
                    break
            except Exception as e:
                if agenda == []:
                    if not s1.queue == []:
                        malt.STATE.shift(s1, warnings=WARNINGS)
                        agenda.append(s1)
                    else:
                        break
        agenda.sort(cmp=compstates)
    right = 0
    tree.parsed = s.relations
    if justParse:
        return tree
    if type(tree) == "str" or not keepscore:
        return malt.buildtree(s.relations, s.words)
    else:
        right1 = scoreState(goldstandard, s, tree)
        if preclassified:
            usePreclassifiedHDs(preclassified, s.relations)
            right2 = scoreState(goldstandard, s, tree)
            return right2, len(goldstandard), len(s.stack)
        else:
            return right1, len(goldstandard), len(s.stack)
Exemplo n.º 2
0
def forceparse(s, goldstandard=False, irelns=False, top=False):
    if top:
        malt.SILENT = True
        s.actions = []
        if goldstandard == False:
            if s.dtree:
                x, goldstandard = malt.dtree2state(s)
                s.goldstandard = goldstandard
                s = x
            else:
                goldstandard = s.goldstandard
                s = malt.STATE(queue=s.leaves)
                s.words = [w for w in s.queue]
        else:
            for r in goldstandard:
                r = goldstandard[r]
                (s.words[r.dtr]).label = r.rel
        if irelns == False:
            irelns = {}
            for d in goldstandard:
                h = goldstandard[d].hd
                if not h in irelns:
                    irelns[h] = {}
                irelns[h][d] = True
    malt.REPLAY = False
    queue = s.queue
    stack = s.stack
    if queue == []:
        return s
        if not len(stack) == 1:
            return False
        if satisfied(goldstandard, s):
            return s
        else:
            return False
    if not stack == [] and not queue == []:
        q0 = queue[0]
        s0 = stack[0]
        """
        leftArc: hd of the queue is hd, top of the stack is dtr.
        We want to check that this is indeed a targeted relation, i.e. that

            goldstandard[s[0].position] = q[0].position > s[0].position

        AND that all q[0] daughters have already been found
        
        """
        for i in range(len(stack)):
            # Comment this out for LDD (currently broken)
            if i > 0: break  
            d = stack[i]
            if d.position in goldstandard and goldstandard[d.position].hd == q0.position and alldtrsfound(d.position, irelns, s.relations):
                s.leftArc(d.label, i=i)
                r = forceparse(s, goldstandard=goldstandard, irelns=irelns)
                if r:
                    return r
                else:                    
                    s.undo()
        """
        rightArc: top of the stack is hd, hd of the queue is dtr.
        We want to check that this is indeed a targeted relation, i.e. that

            goldstandard[q[0].position] = s[0].position > q[0].position

        AND that all q[0] daughters have already been found
        """
        for i in range(len(queue)):
            # Comment this out for LDD (currently broken)
            if i > 0: break
            d = queue[i]
            if d.position in goldstandard and goldstandard[d.position].hd == s0.position and alldtrsfound(d.position, irelns, s.relations):
                s.rightArc(d.label, i=i)
                r = forceparse(s, goldstandard=goldstandard, irelns=irelns)
                if r:
                    return r
                else:
                    s.undo()
    s.shift()
    r = forceparse(s, goldstandard=goldstandard, irelns=irelns)
    if r:
        return r
    else:
        s.undo()
        return False