from OLSgradientDescent import gradientDescent import pandas as pd from sklearn.decomposition import PCA from sklearn import linear_model nInstances = 500 nParams = 100 noiseLevel = 500 trueWeights = np.asarray(range(nParams)) # trueWeights[5:] = 0 # trueWeights[:4] = 0 X = np.random.rand(nInstances,nParams) Y = np.dot(X,trueWeights) + noiseLevel*np.random.rand(np.size(nInstances)) test = gradientDescent(Y, X, 0, 1000, 0.01) input = pd.read_csv('OnlineNewsPopularity/OnlineNewsPopularity.csv') inp = input.as_matrix() X = inp[:,1:60] Y = inp[:,60] Y = Y.reshape(Y.shape[0],1) X, Y = shuffle(X,Y) pca = PCA(n_components = 3) pca.fit(X) print(pca.explained_variance_ratio_) X_new = pca.transform(X)
clf = linear_model.Lasso(alpha=lambArray[j], max_iter = 10000) clf.fit(Xtrain,Ytrain) Ypred = clf.predict(Xvalid) error = mse(Ypred,Yvalid) lassoErrorArray[i,j] = error #Estimation and prediction using closed form solution wEst = OLSClosed(Xtrain,Ytrain,L2 = lambArray[j]) Ypred = np.dot(Xvalid,wEst) error = mse(Ypred,Yvalid) closedErrorArray[i,j]= error #Estimation and prediction using gradient descent wEst = gradientDescent(Ytrain,Xtrain,0.0001, 10000, 0.01, 'ridge', lambArray[j]) Ypred = np.dot(Xvalid,wEst['Weights']) error = mse(Ypred,Yvalid) gradientErrorArray[i,j]= error #Estimation and prediction using closed form solution AND PCA wEst = OLSClosed(np.hstack((np.ones((np.size(Xtrain,axis=0) ,1)),pca.transform(Xtrain[:,1:]))),Ytrain,L2 = lambArray[j]) Ypred = np.dot(np.hstack((np.ones((np.size(Xvalid,axis=0) ,1)),pca.transform(Xvalid[:,1:]))),wEst) error = mse(Ypred,Yvalid) pcaErrorArray[i,j]= error aveLassoError = np.average(lassoErrorArray,axis = 0) aveClosedError = np.average(closedErrorArray,axis = 0) aveGradientError = np.average(gradientErrorArray,axis = 0) avePCAError = np.average(pcaErrorArray,axis = 0)