return float(len(Set(t).intersection(Set(p)))) / len(t) def fscore(t,p): per = precision(t,p) rec = recall(t,p) return 2.0 * (per*rec) / (per+rec) if per+rec > 0 else 0 def avgFscore(t,p): l = len(t) sum = 0 for i in xrange(l): sum += fscore(t[i],p[i]) return sum/l labels = data.getLabelsArray()[:1000] rawData = data.getDataArray()[:1000] stPrimerov = len(labels) bad = data.getBadAttributes(rawData,10) rawData = data.filterArr(rawData,bad) k = 10; print "starting %d fold cross validation" % k print "number of cases: %d" % len(rawData) print "number of attributes: %d" % len(rawData[0]) #tolerance = [a/100.0 for a in range(40,50) if a%2 == 0] #meja = [10,12,14,16,18,20]
import matplotlib.pyplot as plot import data from sets import Set from itertools import chain from collections import Counter from random import shuffle rawL = data.getLabelsArray() rawD = data.getDataArray() remLD, remLL = data.removeLeastCommonData(rawD, rawL,5) remMD, remML = data.removeMostCommonData(rawD, rawL,20) addD , addL = data.addFakeData(rawD, rawL,50) #expD, expL = data.removeLeastCommonData(rawD, rawL,5) #expD, expL = data.removeMostCommonData(expD, expL,20) #expD, expL = data.addFakeData(expD, expL,80) expD, expL = data.addFakeData(rawD, rawL,80) expD, expL = data.removeLeastCommonData(expD, expL,5) expD, expL = data.removeMostCommonData(expD, expL,20) #plot.hist(list(chain(*addL)),bins=83) #plot.hist(list(chain(*rawL)),bins=83) #plot.hist(list(chain(*remLL)),bins=83) #plot.hist(list(chain(*remML)),bins=83) #plot.hist(list(chain(*expL)),bins=83) #plot.xlabel("stevilo oznak") #plot.ylabel("stevilo primerov") #plot.show() #plot.close()
return float(len(Set(t).intersection(Set(p)))) / len(t) def fscore(t,p): per = precision(t,p) rec = recall(t,p) return 2.0 * (per*rec) / (per+rec) if per+rec > 0 else 0 def avgFscore(t,p): l = len(t) sum = 0 for i in xrange(l): sum += fscore(t[i],p[i]) return sum/l labels = data.getLabelsArray() rawData = data.getDataArray() stPrimerov = len(labels) #bad = data.getBadAttributes(rawData,10) #rawData = data.filterArr(rawData,bad) k = 10; print "starting %d fold cross validation" % k print "number of cases: %d" % len(rawData) print "number of attributes: %d" % len(rawData[0]) #tolerance = [a/100.0 for a in range(40,50) if a%2 == 0] #meja = [10,12,14,16,18,20]
return float(len(Set(t).intersection(Set(p)))) / len(t) def fscore(t,p): per = precision(t,p) rec = recall(t,p) return 2.0 * (per*rec) / (per+rec) if per+rec > 0 else 0 def avgFscore(t,p): l = len(t) sum = 0 for i in xrange(l): sum += fscore(t[i],p[i]) return sum/l labels = data.getLabelsArray() rawData = data.getDataArray() stPrimerov = len(labels) #bad = data.getBadAttributes(rawData,10) #rawData = data.filterArr(rawData,bad) k = 10; print "starting %d fold cross validation" % k print "number of cases: %d" % len(rawData) print "number of attributes: %d" % len(rawData[0]) aaa = 0 allPred = []