def artificialTest(): dataType = 1 #0 syntetic, 1 real modelType = 1 #0 classification, 1 regression dataPath = "data/" dataSets = ut.constructDatasetNames(dataType,modelType,dataPath) #dataSets = dataSets[22:24] #print dataSets i=0 verboseClassifiers = True for f in dataSets: maxAcc = 1000000*modelType bestRun = False data = read_csv(f) #data = data[0:2000] X = np.array(data.ix[:,0:-1]) y = np.array(data.ix[:,-1]) print f startTime = time.time() acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3) endTime = time.time() if(modelType==0): print "original:", str(acc*100)+"%", "#"+str(X.shape[1]), "n:"+str(X.shape[0]), str(round(endTime-startTime,3))+"s" else: print "original:", "e: "+str(acc), "#"+str(X.shape[1]), "n:"+str(X.shape[0]), str(round(endTime-startTime,3))+"s" for minRed in [0,1]:#range(0,2): for binMethod in [0]:#range(0,2): for cutMethod in [3]:#range(0,4): for measure in [0,1,2,3,4]:#range(0,6): startTime = time.time() rank = fs.featureSelection(X=X,y=y, modelType=modelType, runs=3, processes=0, measure=measure, binMethod=binMethod, cutMethod=cutMethod, minRed=minRed, rrThreshold=0.9, debug=False) endTime = time.time() timefs = round(endTime-startTime,3) X = np.array(data.ix[:,rank]) startTime = time.time() acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3) endTime = time.time() timecf = round(endTime-startTime,3) if(modelType==0): print "[",minRed, binMethod, cutMethod, measure, "]", str(acc*100)+"%", str(timefs)+"s", str(timecf)+"s", "#"+str(len(rank)), rank[0:10] bestRun = True if acc>maxAcc else False else: print "[",minRed, binMethod, cutMethod, measure, "]", "e: "+str(acc), str(timefs)+"s", str(timecf)+"s", "#"+str(len(rank)), rank[0:10] bestRun = True if acc<maxAcc else False if(bestRun): maxAcc = acc maxRank = rank maxTimefs = timefs maxTimecf = timecf configuration = [minRed,binMethod,cutMethod,measure] bestRun = False X = np.array(data.ix[:,0:-1]) if(modelType==0): print "best:", configuration, str(maxAcc*100)+"%", str(maxTimefs)+"s", str(maxTimecf)+"s", "#"+str(len(maxRank)), maxRank[0:10] else: print "best:", configuration, "e: "+str(maxAcc), str(maxTimefs)+"s", str(maxTimecf)+"s", "#"+str(len(maxRank)), maxRank[0:10]
def evaluteDataset(filepath, modelType=2, measure=1, cutMethod=1, minRed=0, comporative=True): data = read_csv(filepath) X = np.array(data.ix[:, 0:-1]) y = np.array(data.ix[:, -1]) if (modelType >= 2 or modelType < 0): modelType = ut.datesetType(y) if (comporative): startTime = time.time() acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3) endTime = time.time() print "original:", acc, X.shape[1], str(round(endTime - startTime, 3)) + "s" startTime = time.time() rank = fs.featureSelection(X=X, y=y, modelType=modelType, runs=3, processes=0, measure=measure, binMethod=0, cutMethod=cutMethod, minRed=minRed, rrThreshold=0.9, debug=False) endTime = time.time() timefs = round(endTime - startTime, 3) X = np.array(data.ix[:, rank]) startTime = time.time() acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3) endTime = time.time() timecf = round(endTime - startTime, 3) print "result:", acc, str(timefs) + "s", str(timecf) + "s", len(rank), rank
def searchValidationCut(X,y,rank,modelType=0,consecutives=7,runs=3): bestScore = 0 rankPositions = [] featuresAccepted = [] counter = 0 for i in range(0,len(rank)): rankPositions.append(i) featuresAccepted.append(rank[i]) if(modelType==0): score = ml.modelJudge(X=X[:,featuresAccepted], y=y, modelType=modelType, testPerc=0.4, runs=runs) else: score = 1/(ml.modelJudge(X=X[:,featuresAccepted], y=y, modelType=modelType, testPerc=0.4, runs=runs)+1) if(bestScore >= score): rankPositions.remove(i) featuresAccepted.remove(rank[i]) counter = counter + 1 if(counter>=consecutives): break else: bestScore = score counter = 0 return [featuresAccepted, rankPositions]
def monotonicValidationCut(X,y,rank,modelType=0,consecutives=5,runs=3): bestScore = 0 cutpos = 0 counter = 0 for i in range(1,len(rank)): if(modelType==0): score = ml.modelJudge(X=X[:,rank[0:i]], y=y, testPerc=0.4, runs=runs) else: score = 1/(ml.modelJudge(X=X[:,rank[0:i]], y=y, testPerc=0.4, runs=runs)+1) #print bestScore, score, cutpos if(bestScore >= score): counter = counter + 1 if(counter>=consecutives): cutpos = i-consecutives break else: counter = 0 bestScore = score cutpos = i if(cutpos<=0): cutpos=1 return cutpos