Exemplo n.º 1
0
def work(line, i):
    global totalscore, totalstates, totaledges, totaluniq, totaltime, totalprec

    line = line.strip()
    if line[0] == "(":
        # input is a gold tree (so that we can evaluate)
        reftree = DepTree.parse(line)
        sentence = DepTree.sent  # assigned in DepTree.parse()
    else:
        # input is word/tag list
        reftree = None
        sentence = [tuple(x.rsplit("/", 1))
                    for x in line.split()]  # split by default returns list
        DepTree.sent = sentence

    if FLAGS.debuglevel >= 1:
        print >> logs, sentence
        print >> logs, reftree

    mytime.zero()

    if FLAGS.sim is not None:  # simulation, not parsing
        actions = map(int, sequencefile.readline().split())
        goal, feats = parser.simulate(actions,
                                      sentence)  #if model is None score=0
        print >> logs, feats
        score, tree = goal.score, goal.top()
        (nstates, nedges, nuniq) = (0, 0, 0)
    else:
        # real parsing
        if True:  #FLAGS.earlystop:
            refseq = reftree.seq() if reftree is not None else None
            tree, myseq, score = parser.try_parse(sentence,
                                                  refseq,
                                                  early_stop=FLAGS.earlystop)
            if FLAGS.earlystop:
                print >> logs, "ref=", refseq
                print >> logs, "myt=", myseq

                refseq = refseq[:len(myseq)]  # truncate
                _, reffeats = parser.simulate(refseq, sentence)
                _, myfeats = parser.simulate(myseq, sentence)
                print >> logs, "feat diff=", Model.trim(reffeats - myfeats)

            nstates, nedges, nuniq = parser.stats()
        else:
            goal = parser.parse(sentence)
            nstates, nedges, nuniq = parser.stats()

    dtime = mytime.period()

    if not FLAGS.earlystop and not FLAGS.profile:
        if FLAGS.forest:
            parser.dumpforest(i)
        else:
            if not FLAGS.kbest:
                toprint = str(tree)
            else:
                stuff = parser.beams[-1][:FLAGS.kbest]
                toprint = "sent.%d\t%d" % (i, len(stuff))
                toprint += [
                    "%.2f\t%s" % (state.score, state.tree()) for state in stuff
                ]

        if FLAGS.oracle:
            oracle, oracletree = parser.forestoracle(reftree)
            totaloracle += oracle

    prec = DepTree.compare(tree, reftree)  # OK if either is None

    searched = sum(x.derivation_count()
                   for x in parser.beams[-1]) if FLAGS.forest else 0
    print >> logs, "sent {i:-4} (len {l}):\tmodelcost= {c:.2f}\tprec= {p:.2%}"\
          "\tstates= {ns} (uniq {uq})\tedges= {ne}\ttime= {t:.3f}\tsearched= {sp}" \
          .format(i=i, l=len(sentence), c=score, p=prec.prec(), \
                  ns=nstates, uq=nuniq, ne=nedges, t=dtime, sp=searched)
    if FLAGS.seq:
        actions = goal.all_actions()
        print >> logs, " ".join(actions)
        check = simulate(actions, sentence, model)  #if model is None score=0
        checkscore = check.score
        checktree = check.top()
        print >> logs, checktree
        checkprec = checktree.evaluate(reftree)
        print >> logs, "verify: tree:%s\tscore:%s\tprec:%s" % \
              (tree == checktree, score == checkscore, prec == checkprec)
        print >> logs, "sentence %-4d (len %d): modelcost= %.2lf\tprec= %.2lf\tstates= %d (uniq %d)\tedges= %d\ttime= %.3lf" % \
              (i, len(sentence), checkscore, checkprec.prec100(), nstates, nuniq, nedges, dtime)

    totalscore += score
    totalstates += nstates
    totaledges += nedges
    totaluniq += nuniq
    totaltime += dtime

    totalprec += prec

    return toprint
Exemplo n.º 2
0
def main():

    if FLAGS.sim is not None:
        sequencefile = open(FLAGS.sim)

    parser = Parser(model, b=FLAGS.beam)

    print >> logs, "memory usage before parsing: ", human(memory(start_mem))

    totalscore = 0
    totalstates = 0
    totaluniq = 0
    totaledges = 0
    totaltime = 0

    totalprec = DepVal()    
    totaloracle = DepVal()

    print >> logs, "gc.collect unreachable: %d" % gc.collect()

    if FLAGS.manual_gc:
        gc.disable()
    
    i = 0
    gctime = 0
    for i, line in enumerate(shell_input(), 1):

        if FLAGS.manual_gc and i % FLAGS.gc == 0:
            print >> logs, "garbage collection...",
            tt = time.time()
            print >> logs, "gc.collect unreachable: %d" % gc.collect()
            tt = time.time() - tt
            print >> logs, "took %.1f seconds" % tt
            gctime += tt

        line = line.strip()
        if line[0]=="(":
            # input is a gold tree (so that we can evaluate)
            reftree = DepTree.parse(line)
            sentence = DepTree.sent # assigned in DepTree.parse()            
        else:
            # input is word/tag list
            reftree = None
            sentence = [tuple(x.rsplit("/", 1)) for x in line.split()]   # split by default returns list            
            DepTree.sent = sentence

        if FLAGS.debuglevel >= 1:
            print >> logs, sentence
            print >> logs, reftree

        mytime.zero()
        
        if FLAGS.sim is not None: # simulation, not parsing
            actions = map(int, sequencefile.readline().split())
            goal, feats = parser.simulate(actions, sentence) #if model is None score=0
            print >> logs, feats
            score, tree = goal.score, goal.top()
            (nstates, nedges, nuniq) = (0, 0, 0)
        else:
            # real parsing
            if True: #FLAGS.earlystop:
                refseq = reftree.seq() if reftree is not None else None
                tree, myseq, score, _ = parser.try_parse(sentence, refseq, update=False)
                if FLAGS.early:
                    print >> logs, "ref=", refseq
                    print >> logs, "myt=", myseq

                    refseq = refseq[:len(myseq)] # truncate
                    _, reffeats = parser.simulate(refseq, sentence) 
                    _, myfeats = parser.simulate(myseq, sentence)
                    print >> logs, "+feats", reffeats
                    print >> logs, "-feats", myfeats
                    
                nstates, nedges, nuniq = parser.stats()
            else:
                goal = parser.parse(sentence)
                nstates, nedges, nuniq = parser.stats()

##        score, tree = goal.score, goal.top()
#        score, tree = mytree
            
        dtime = mytime.period()

        if not FLAGS.early and not FLAGS.profile:
            if FLAGS.forest:
                parser.dumpforest(i)
            elif FLAGS.output:
                if not FLAGS.kbest:
                    print tree
                else:
                    stuff = parser.beams[-1][:FLAGS.kbest]
                    print "sent.%d\t%d" % (i, len(stuff))
                    for state in stuff:
                        print "%.2f\t%s" % (state.score, state.tree())
                    print
                    
            if FLAGS.oracle:
                oracle, oracletree = parser.forestoracle(reftree)
                totaloracle += oracle

        prec = DepTree.compare(tree, reftree) # OK if either is None

        searched = sum(x.derivation_count() for x in parser.beams[-1]) if FLAGS.forest else 0
        print >> logs, "sent {i:-4} (len {l}):\tmodelcost= {c:.2f}\tprec= {p:.2%}"\
              "\tstates= {ns} (uniq {uq})\tedges= {ne}\ttime= {t:.3f}\tsearched= {sp}" \
              .format(i=i, l=len(sentence), c=score, p=prec.prec(), \
                      ns=nstates, uq=nuniq, ne=nedges, t=dtime, sp=searched)
        if FLAGS.seq:
            actions = goal.all_actions()
            print >> logs, " ".join(actions)
            check = simulate(actions, sentence, model) #if model is None score=0
            checkscore = check.score
            checktree = check.top()
            print >> logs, checktree
            checkprec = checktree.evaluate(reftree)
            print >> logs, "verify: tree:%s\tscore:%s\tprec:%s" % (tree == checktree, score == checkscore, prec == checkprec)
            print >> logs, "sentence %-4d (len %d): modelcost= %.2lf\tprec= %.2lf\tstates= %d (uniq %d)\tedges= %d\ttime= %.3lf" % \
                  (i, len(sentence), checkscore, checkprec.prec100(), nstates, nuniq, nedges, dtime)

        totalscore += score
        totalstates += nstates
        totaledges += nedges
        totaluniq += nuniq
        totaltime += dtime

        totalprec += prec

    if i == 0:
        print >> logs, "Error: empty input."
        sys.exit(1)

    if FLAGS.featscache:
        print >> logs, "feature constructions: tot= %d shared= %d (%.2f%%)" % (State.tot, State.shared, State.shared / State.tot * 100)

    print >> logs, "beam= {b}, avg {a} sents,\tmodelcost= {c:.2f}\tprec= {p:.2%}" \
          "\tstates= {ns:.1f} (uniq {uq:.1f})\tedges= {ne:.1f}\ttime= {t:.4f}\n{d:s}" \
          .format(b=FLAGS.b, a=i, c=totalscore/i, p=totalprec.prec(), 
                  ns=totalstates/i, uq=totaluniq/i, ne=totaledges/i, t=totaltime/i, 
                  d=totalprec.details())
    
    if FLAGS.uniqstat:
        for i in sorted(uniqstats):
            print >> logs, "%d\t%.1lf\t%d\t%d" % \
                  (i, sum(uniqstats[i]) / len(uniqstats[i]), \
                   min(uniqstats[i]), max(uniqstats[i]))

    if FLAGS.oracle:
        print >> logs, "oracle= ", totaloracle

    if FLAGS.manual_gc:
        print >> logs, "garbage collection took %.1f seconds" % gctime

    print >> logs, "memory usage after parsing: ", human(memory(start_mem))
    if FLAGS.mydouble:
        from mydouble import counts
        print >> logs, "mydouble usage and freed: %d %d" % counts()