def artificialTest():
	dataType = 1 #0 syntetic, 1 real
	modelType = 1 #0 classification, 1 regression
	dataPath = "data/"
	dataSets = ut.constructDatasetNames(dataType,modelType,dataPath)
	#dataSets = dataSets[22:24]
	#print dataSets
	i=0
	verboseClassifiers = True
	for f in dataSets:
		maxAcc = 1000000*modelType
		bestRun = False
		data = read_csv(f)
		#data = data[0:2000]
		X = np.array(data.ix[:,0:-1])
		y = np.array(data.ix[:,-1])
		print f
		startTime = time.time()
		acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3)
		endTime = time.time()
		if(modelType==0):
			print "original:", str(acc*100)+"%", "#"+str(X.shape[1]), "n:"+str(X.shape[0]), str(round(endTime-startTime,3))+"s"
		else:
			print "original:", "e: "+str(acc), "#"+str(X.shape[1]), "n:"+str(X.shape[0]), str(round(endTime-startTime,3))+"s"
		for minRed in [0,1]:#range(0,2):
			for binMethod in [0]:#range(0,2):
				for cutMethod in [3]:#range(0,4):
					for measure in [0,1,2,3,4]:#range(0,6):
						startTime = time.time()
						rank = fs.featureSelection(X=X,y=y, modelType=modelType, runs=3, processes=0, measure=measure, binMethod=binMethod, cutMethod=cutMethod, minRed=minRed, rrThreshold=0.9, debug=False)							
						endTime = time.time()
						timefs = round(endTime-startTime,3)
						X = np.array(data.ix[:,rank])
						startTime = time.time()
						acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3)
						endTime = time.time()
						timecf = round(endTime-startTime,3)
						if(modelType==0):		
							print "[",minRed, binMethod, cutMethod, measure, "]", str(acc*100)+"%", str(timefs)+"s", str(timecf)+"s", "#"+str(len(rank)), rank[0:10]			
							bestRun = True if acc>maxAcc else False 
						else:
							print "[",minRed, binMethod, cutMethod, measure, "]", "e: "+str(acc), str(timefs)+"s", str(timecf)+"s", "#"+str(len(rank)), rank[0:10]
							bestRun = True if acc<maxAcc else False 
						if(bestRun):							
							maxAcc = acc
							maxRank = rank
							maxTimefs = timefs
							maxTimecf = timecf
							configuration = [minRed,binMethod,cutMethod,measure]
							bestRun = False
						X = np.array(data.ix[:,0:-1])
		if(modelType==0):
			print "best:", configuration, str(maxAcc*100)+"%", str(maxTimefs)+"s", str(maxTimecf)+"s", "#"+str(len(maxRank)), maxRank[0:10]
		else:
			print "best:", configuration, "e: "+str(maxAcc), str(maxTimefs)+"s", str(maxTimecf)+"s", "#"+str(len(maxRank)), maxRank[0:10]
def evaluteDataset(filepath,
                   modelType=2,
                   measure=1,
                   cutMethod=1,
                   minRed=0,
                   comporative=True):
    data = read_csv(filepath)
    X = np.array(data.ix[:, 0:-1])
    y = np.array(data.ix[:, -1])
    if (modelType >= 2 or modelType < 0):
        modelType = ut.datesetType(y)
    if (comporative):
        startTime = time.time()
        acc = ml.modelJudge(X=X,
                            y=y,
                            modelType=modelType,
                            testPerc=0.4,
                            runs=3)
        endTime = time.time()
        print "original:", acc, X.shape[1], str(round(endTime - startTime,
                                                      3)) + "s"
    startTime = time.time()
    rank = fs.featureSelection(X=X,
                               y=y,
                               modelType=modelType,
                               runs=3,
                               processes=0,
                               measure=measure,
                               binMethod=0,
                               cutMethod=cutMethod,
                               minRed=minRed,
                               rrThreshold=0.9,
                               debug=False)
    endTime = time.time()
    timefs = round(endTime - startTime, 3)
    X = np.array(data.ix[:, rank])
    startTime = time.time()
    acc = ml.modelJudge(X=X, y=y, modelType=modelType, testPerc=0.4, runs=3)
    endTime = time.time()
    timecf = round(endTime - startTime, 3)
    print "result:", acc, str(timefs) + "s", str(timecf) + "s", len(rank), rank
def searchValidationCut(X,y,rank,modelType=0,consecutives=7,runs=3):
	bestScore = 0
	rankPositions = []
	featuresAccepted = []
	counter = 0
	for i in range(0,len(rank)):
		rankPositions.append(i)
		featuresAccepted.append(rank[i])
		if(modelType==0):
			score = ml.modelJudge(X=X[:,featuresAccepted], y=y, modelType=modelType, testPerc=0.4, runs=runs)
		else:
			score = 1/(ml.modelJudge(X=X[:,featuresAccepted], y=y, modelType=modelType, testPerc=0.4, runs=runs)+1)
		if(bestScore >= score):
			rankPositions.remove(i)
			featuresAccepted.remove(rank[i])
			counter = counter + 1
			if(counter>=consecutives):
				break
		else:
			bestScore = score
			counter = 0
	return [featuresAccepted, rankPositions]
def monotonicValidationCut(X,y,rank,modelType=0,consecutives=5,runs=3):
	bestScore = 0
	cutpos = 0
	counter = 0
	for i in range(1,len(rank)):
		if(modelType==0):
			score = ml.modelJudge(X=X[:,rank[0:i]], y=y, testPerc=0.4, runs=runs)
		else:
			score = 1/(ml.modelJudge(X=X[:,rank[0:i]], y=y, testPerc=0.4, runs=runs)+1)
		#print bestScore, score, cutpos
		if(bestScore >= score):
			counter = counter + 1
			if(counter>=consecutives):
				cutpos = i-consecutives			
				break
		else:
			counter = 0
			bestScore = score
			cutpos = i
	if(cutpos<=0):
		cutpos=1
	return cutpos