def kFoldErrorChoose(x,y,maxOrder,k): 
	e = [0 for i in range(0,maxOrder)]
	d = kSplit([x,y],k)
#	pdb.set_trace()
	for order in range(1,maxOrder+1):
		sumError = 0
		for i in range(0,k): #The current partition to use: the ith partition is used as test data.
			Dcopy = copy.copy(d)
			dtest = Dcopy.pop(i)
			dtrain = Dcopy[0]
			f = Regression.polyTrain(dtrain[0],dtrain[1],order)
			sumError += meanSquaredError(dtest[0],dtest[1],f)
		e[order-1] = sumError/(k * 1.0)
	return min(e[i] for i in range(0,len(e))),(argmin(e)+1)
示例#2
0
	raise Exception("Invalid command line argument")


#In the following, D is the data set which has all the x values as its first entry and the y values as its second.

error,order = CV.kFoldErrorChoose(D[0],D[1],10,5)

#Graph the points on the base polynomial
Graph.lineColor(D[0],D[1],'red')

#Add Gaussian noise to the data outputs
D[1] = Data.addGaussianNoise(D[1],1.0/2000)

#Graph them as points in blue
Graph.pointsSimple(D[0],D[1])

#Estimate the coefficients of the polynomial with best order
fit = Regression.polyTrain(D[0],D[1],order)

#Get the function's estimates for the training x values
z = [fit(i) for i in D[0]]

#Graph the points
Graph.lineColor(D[0],z,'g')

#Show the plot
Graph.show()

if(len(sys.argv) == 1):
	print "True function was an order " + str(trueOrder) + " polynomial, fit with order " + str(order)
def squaredErrorChoose(x,y,maxOrder):
	e = [0 for i in range(0,maxOrder)]
	for order in range(1,maxOrder+1):
		f = Regression.polyTrain(x,y,order)
		e[order-1] = meanSquaredError(x,y,f)
	return min(e[i] for i in range(0,len(e))),(argmin(e)+1)