def avgFscore(t,p): l = len(t) sum = 0 for i in xrange(l): sum += fscore(t[i],p[i]) return sum/l labels = data.getLabelsArray()[:1000] rawData = data.getDataArray()[:1000] stPrimerov = len(labels) bad = data.getBadAttributes(rawData,10) rawData = data.filterArr(rawData,bad) k = 10; print "starting %d fold cross validation" % k print "number of cases: %d" % len(rawData) print "number of attributes: %d" % len(rawData[0]) #tolerance = [a/100.0 for a in range(40,50) if a%2 == 0] #meja = [10,12,14,16,18,20] #allTests = {} #for tol in tolerance: # for mej in meja: # aaa = 0 # allPred = [] # for i in xrange(k):
import data import math d = data.getDataArray() t = data.getTestArray() a10 = data.getBadAttributes(d,10) d10 = data.filterArr(d,a10) t10 = data.filterArr(t,a10) binD = [[int(x>0) for x in i] for i in d10] binT = [[int(x>0) for x in i] for i in t10] logD = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in d10] logT = [[int(math.ceil(math.log(x) if x > 0 else 0)) for x in i] for i in t10] newD = [] newT = [] for i in range(len(t10)): newD.append(list(d10[i])+list(binD[i])+list(logD[i])) newT.append(list(t10[i])+list(binT[i])+list(logT[i])) f = file("plusBinLogTraingingData.csv","w") f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newD ])) f.flush() f.close() f = file("plusBinLogTestData.csv","w") f.write("\n".join(["\t".join([str(x).replace("c","") for x in i]) for i in newT ])) f.flush()