示例#1
0
        writer.writerow([unicode(x) for x in ['id'] + [x[0] for x in labels] + ['diff', 'snippet']])
    else:
        writer.writerow([unicode(x) for x in ['id', 'predicted', 'coded', 'confidence', 'correct?', 'diff', 'snippet']])
    vecs = map(lambda x: x.vector, vectors)
    output = {}
    for (lname, labs) in labels:
        m = models[lname]
        if m == None:
            print >>sys.stderr, lname
            continue
        print lname + ': '

        lab,acc,val = liblinear.linearutil.predict(labs, vecs, m, '-b 1')

        # print performances and failure cases
        pn = pn_t({True: 0, False: 0},
                  {True: 0, False: 0})
        for (i,pred) in enumerate(lab):
            ok = bool(pred) == labs[i]
            res = 'Yes' if ok else 'No'
            if labs[i] == None:
                res = 'Unknown'
            else:
                if pred > 0:
                    pn.p[ok] += 1
                else:
                    pn.n[ok] += 1
            revid = vectors[i].raw['id']['rev_id'] if vectors[i].raw['id'].has_key('rev_id') else None
            link = 'http://enwp.org/?diff=prev&oldid=%s' % revid
            ls = [lname,
                  repr(vectors[i].raw['id']),
                  bool(pred),
示例#2
0
                        dest='verbose', action='store_true', default=False,
                        help='turn on verbose message output')
    parser.add_argument('input', nargs='+', type=lambda x: open(x))
    
    options = parser.parse_args()

    # load raw table of coded examples
    csv.field_size_limit(1000000000)
    table = []
    for f in options.input:
        t = list(csv.reader(f, delimiter=options.delimiter))
        table += t[1:]
    pns = {}
    for cols in table:
        lab,pred,code = [cols[x] for x in [options.label, options.pred, options.code]]
        pn = pns.setdefault(lab, pn_t({True: 0, False: 0},
                                      {True: 0, False: 0}))
        if code == options.ignore:
            None
        else:
            ok = (pred == code)
            if pred == options.positive:
                pn.p[ok] += 1
            else:
                pn.n[ok] += 1
    for (lab,pn) in sorted(pns.items(), key=lambda x: x[0]):
        print lab
        numcorrect = pn.p[True] + pn.n[True]
        numwrong   = pn.p[False] + pn.n[False]
        print ' accuracy  = %f (%d/%d)' % (float(numcorrect) / (numcorrect + numwrong) if numcorrect + numwrong > 0 else float('nan'),
                                           numcorrect,
                                           (numcorrect + numwrong))