def confusion_matrix(log, plcf, col): """generate type confusion matrix""" ins_lst = weka.log_parse(log) classes = set() places = dataset.DataItem() with open(plcf) as fplc: for line in fplc: place = json.loads(line) places[place[col]] = place classes.add(place[col]) print classes cmat = dict() for ref in classes: cmat[ref] = dict() for prd in classes: cmat[ref][prd] = list() for ins in ins_lst: ref = places[ins['refN']][col] hyp = places[ins['prdN']][col] cmat[ref][hyp].append(int(ins['id'])) return cmat
def thirgest(log): """The accuracy in first three""" threshold = 3 ins_lst = weka.log_parse(log) pos, cnt = 0, 0 for ins in ins_lst: #print ins['score'] rnk = sorted(zip(ins['score'], range(1, len(ins['score']) + 1)), \ key=lambda x: x[0], reverse=True) #print rnk for i in range(threshold): if rnk[i][1] == ins['ref']: pos += 1 break #print pos #if cnt>10: return cnt += 1 return pos/float(cnt)