Пример #1
0
def confusion_matrix(log, plcf, col):
    """generate type confusion matrix"""
    ins_lst = weka.log_parse(log)
    classes = set()
    places = dataset.DataItem()
    with open(plcf) as fplc:
        for line in fplc:
            place = json.loads(line)
            places[place[col]] = place
            classes.add(place[col])

    print classes

    cmat = dict()
    for ref in classes:
        cmat[ref] = dict()
        for prd in classes:
            cmat[ref][prd] = list()

    for ins in ins_lst:
        ref = places[ins['refN']][col]
        hyp = places[ins['prdN']][col]
        cmat[ref][hyp].append(int(ins['id']))

    return cmat
Пример #2
0
def thirgest(log):
    """The accuracy in first three"""
    threshold = 3
    ins_lst = weka.log_parse(log)
    pos, cnt = 0, 0
    for ins in ins_lst:
        #print ins['score']
        rnk = sorted(zip(ins['score'], range(1, len(ins['score']) + 1)), \
                key=lambda x: x[0], reverse=True)
        #print rnk
        for i in range(threshold):
            if rnk[i][1] == ins['ref']:
                pos += 1
                break
        #print pos
        #if cnt>10: return
        cnt += 1
    return pos/float(cnt)