def analyseData(data): #for member in data: # print member.CHVFound, member.hasCHV, member.hasUMLS, member.hasCHVMisspelled, member.comboScore #combo = [ round(member.comboScore, 3) for member in data if round(member.comboScore,3) != 0.290] combo = [ round(member.comboScore, 3) for member in data] npCombo = generateStatsVector(combo) countingCombo = Counter(combo) print "Combo Score: Mean - %.3f (+/- %.3f) Median - %.4f " % (npCombo.mean, npCombo.std, npCombo.median) return npCombo, countingCombo
def calculateWinner(filename1, filename2, alg, metricIndex): # MetricIndex => 2: Acc, 3: f1 metricVec0 = getVector(filename1, alg, metricIndex) metricVec1 = getVector(filename2, alg, metricIndex) metricSv0 = generateStatsVector(metricVec0) metricSv1 = generateStatsVector(metricVec1) winner = -1 if metricSv0.mean > metricSv1.mean: winner = 0 elif metricSv1.mean > metricSv0.mean: winner = 1 print metricSv0.mean, metricSv1.mean #from scipy import stats #t, p = stats.ttest_rel(metricVec0, metricVec1) #if p > 0.05: # winner = -1 return winner
def compareValues(minIndex): blAccs, blF1s, blwF1s, cAccs, cF1s, cwF1s = [], [], [], [], [], [] for group in returnedValues: accBaseline, f1Baseline, wf1Baseline, classacc, classf1, classwf1 = group[0], group[1], group[2], group[minIndex], group[minIndex+1], group[minIndex+2] blAccs.append(accBaseline) blF1s.append(f1Baseline) blwF1s.append(wf1Baseline) cAccs.append(classacc) cF1s.append(classf1) cwF1s.append(classwf1) (tacc, probacc) = stats.ttest_rel(blAccs, cAccs) (tf1, probf1) = stats.ttest_rel(blF1s, cF1s) (wtf1, probwf1) = stats.ttest_rel(blwF1s, cwF1s) blAccsStat = generateStatsVector(blAccs) blF1sStat = generateStatsVector(blF1s) blwF1sStat = generateStatsVector(blwF1s) cAccsStat = generateStatsVector(cAccs) cF1sStat = generateStatsVector(cF1s) cwF1sStat = generateStatsVector(cwF1s) print "BL = %.3f (+/- %.5f)" % (blAccsStat.mean, blAccsStat.std) print "F1 = %.3f (+/- %.5f)" % (blF1sStat.mean, blF1sStat.std) print "wF1 = %.3f (+/- %.5f)" % (blwF1sStat.mean, blwF1sStat.std) print "Classifier ACC = %.3f (+/- %.5f)" % (cAccsStat.mean, cAccsStat.std) print "Classifier F1 = %.3f (+/- %.5f)" % (cF1sStat.mean, cF1sStat.std) print "Classifier wF1 = %.3f (+/- %.5f)" % (cwF1sStat.mean, cwF1sStat.std) print "ACC GAIN --> %0.2f%% " % (100.0 * (cAccsStat.mean - blAccsStat.mean) / blAccsStat.mean) print "F1 GAIN --> %0.2f%% " % (100.0 * (cF1sStat.mean - blF1sStat.mean) / blF1sStat.mean) print "WF1 GAIN --> %0.2f%% " % (100.0 * (cwF1sStat.mean - blwF1sStat.mean) / blwF1sStat.mean) print "Tacc, Probacc = ", tacc, probacc, probacc > 0.05 print "Tf1, probF1 = ", tf1, probf1, probf1 > 0.05 print "wtF1, probwf1 = ", wtf1, probwf1, probwf1 > 0.05
for classify, listOfValues in clfs.items(): print classify, listOfValues k = len(listOfValues) for classify, listOfValues in clfs.items(): vs = defaultdict(list) for n in range(nMeasures): for values in listOfValues: vs[n].append(values[n]) for n in range(nMeasures): clfs2[classify][n] = vs[n] for v in vs.values(): npv = generateStatsVector(v) means[classify].append( (npv.mean, npv.std) ) # all the keys in pais (A,B), (A,C), (B,C)... for clf in list(it.combinations(clfs.keys(), 2)): print "=== Comparing", clf[0], "and", clf[1], "===" for n in range(nMeasures): print "============" print "Measure ", n vec1 = clfs2[clf[0]][n] vec2 = clfs2[clf[1]][n] mean0 = means[clf[0]][n][0] mean1 = means[clf[1]][n][0] print "%s %.3f (%.3f) " % (clf[0], mean0, means[clf[0]][n][1]) print "%s %.3f (%.3f) " % (clf[1], mean1, means[clf[1]][n][1])