def fscore(t,p): per = precision(t,p) rec = recall(t,p) return 2.0 * (per*rec) / (per+rec) if per+rec > 0 else 0 def avgFscore(t,p): l = len(t) sum = 0 for i in xrange(l): sum += fscore(t[i],p[i]) return sum/l labels = data.getLabelsArray()[:1000] rawData = data.getDataArray()[:1000] stPrimerov = len(labels) bad = data.getBadAttributes(rawData,10) rawData = data.filterArr(rawData,bad) k = 10; print "starting %d fold cross validation" % k print "number of cases: %d" % len(rawData) print "number of attributes: %d" % len(rawData[0]) #tolerance = [a/100.0 for a in range(40,50) if a%2 == 0] #meja = [10,12,14,16,18,20] #allTests = {}
def fscore(t,p): per = precision(t,p) rec = recall(t,p) return 2.0 * (per*rec) / (per+rec) if per+rec > 0 else 0 def avgFscore(t,p): l = len(t) sum = 0 for i in xrange(l): sum += fscore(t[i],p[i]) return sum/l labels = data.getLabelsArray() rawData = data.getDataArray() stPrimerov = len(labels) #bad = data.getBadAttributes(rawData,10) #rawData = data.filterArr(rawData,bad) k = 10; print "starting %d fold cross validation" % k print "number of cases: %d" % len(rawData) print "number of attributes: %d" % len(rawData[0]) #tolerance = [a/100.0 for a in range(40,50) if a%2 == 0] #meja = [10,12,14,16,18,20] #allTests = {}
import matplotlib.pyplot as plot import data from sets import Set from itertools import chain from collections import Counter from random import shuffle rawL = data.getLabelsArray() rawD = data.getDataArray() remLD, remLL = data.removeLeastCommonData(rawD, rawL,5) remMD, remML = data.removeMostCommonData(rawD, rawL,20) addD , addL = data.addFakeData(rawD, rawL,50) #expD, expL = data.removeLeastCommonData(rawD, rawL,5) #expD, expL = data.removeMostCommonData(expD, expL,20) #expD, expL = data.addFakeData(expD, expL,80) expD, expL = data.addFakeData(rawD, rawL,80) expD, expL = data.removeLeastCommonData(expD, expL,5) expD, expL = data.removeMostCommonData(expD, expL,20) #plot.hist(list(chain(*addL)),bins=83) #plot.hist(list(chain(*rawL)),bins=83) #plot.hist(list(chain(*remLL)),bins=83) #plot.hist(list(chain(*remML)),bins=83) #plot.hist(list(chain(*expL)),bins=83) #plot.xlabel("stevilo oznak") #plot.ylabel("stevilo primerov") #plot.show() #plot.close()
import data import math d = data.getDataArray() t = data.getTestArray() a10 = data.getBadAttributes(d,10) d10 = data.filterArr(d,a10) t10 = data.filterArr(t,a10) binD = [[int(x>0) for x in i] for i in d10] binT = [[int(x>0) for x in i] for i in t10] logD = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in d10] logT = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in t10] newD = [] newT = [] for i in range(len(t10)): newD.append(list(d10[i])+list(binD[i])+list(logD[i])) newT.append(list(t10[i])+list(binT[i])+list(logT[i])) f = file("plusBinLogTraingingData.csv","w") f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newD ])) f.flush() f.close() f = file("plusBinLogTestData.csv","w") f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newT ])) f.flush()
def fscore(t,p): per = precision(t,p) rec = recall(t,p) return 2.0 * (per*rec) / (per+rec) if per+rec > 0 else 0 def avgFscore(t,p): l = len(t) sum = 0 for i in xrange(l): sum += fscore(t[i],p[i]) return sum/l labels = data.getLabelsArray() rawData = data.getDataArray() stPrimerov = len(labels) #bad = data.getBadAttributes(rawData,10) #rawData = data.filterArr(rawData,bad) k = 10; print "starting %d fold cross validation" % k print "number of cases: %d" % len(rawData) print "number of attributes: %d" % len(rawData[0]) aaa = 0 allPred = [] for i in xrange(k):