示例#1
0
# Problem 2 -- Model Scoring -- for Homework 3 of CS107
# Author: Max Li

from sklearn import datasets
from sklearn.model_selection import train_test_split
import Regression as reg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

linear_model = reg.LinearRegression()
ridge_model = reg.RidgeRegression()
ridge_model.set_params(alpha=0.1)
models = [linear_model, ridge_model]
scores = []

for model in models:
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    scores.append(score)
    print("R-squared: " + str(score))
    print(model.get_params())
##model_performance.py
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import Regression as myReg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

alpha = 0.1
olsreg = myReg.LinearRegression()
rigreg = myReg.RidgeRegression()

rigreg.set_params(alpha=0.1)

models = [olsreg, rigreg]

alpha_array = np.logspace(-2, 1, 10)
score_array_ols = np.zeros(alpha_array.shape)
score_array_rig = np.zeros(alpha_array.shape)

cnt = 0
for alpha_i in alpha_array:
    for model in models:
        model.set_params(alpha=alpha_i)
        model.fit(X_train, y_train)
from sklearn import datasets
from sklearn.model_selection import train_test_split
import Regression as reg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

alpha = 0.1
#initialize LR model
LRModel = reg.LinearRegression()

#initilizse the RR model
RRModel = reg.RidgeRegression()
RRModel.set_params(alpha=alpha)

#put both models into a list
models = [LRModel, RRModel]

#initialize empty list to store the scores of the models
score = []

#iterate over the models
for model in models:
    model.fit(X_train, y_train)
    score.append(model.score(X_test, y_test))
    print(model.params)

#print the computed scores for the different models in nice format
示例#4
0
from sklearn import datasets
from sklearn.model_selection import train_test_split
#import regression classes
import Regression as Reg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

alpha = 0.1
linreg = Reg.LinearRegression()
ridreg = Reg.RidgeRegression()
ridreg.set_params(alpha=alpha)
models = [linreg, ridreg]

model_scores = []
for model in models:
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    model_scores.append(score)
    print(str(type(model).__name__) + " has R^2 score of: " + str(score))

best_model = models[model_scores.index(max(model_scores))]
print("The best model is " + str(type(best_model).__name__))
print("And params for the best model are: ")
print(best_model.get_params())
示例#5
0
plt.axhline(y=85, color='k', linestyle='--', label='85%')
plt.xticks(np.arange(1, features + 1, 1))

plt.xlabel('Number of Components')
plt.ylabel('Variance Explained')
plt.legend()

plt.show()

#####################

#####################
## Linear Regression
#####################

linear = R.LinearRegression()
X_train, X_test, y_train, y_test = data.getSplitData()
linear.train(features,
             X_train,
             X_test,
             y_train,
             y_test,
             n_jobs=1,
             verbose=True,
             startIndex=1)
linear.fit(X, y)
#func = linear.function(columnNames=['D','E', 'F', 'G', 'L', 'P', 'U', 'AA', 'AB', 'AD'], featureStartIndex = 3)
#func = linear.function(columnNames=['D','E', 'F', 'G', 'P','W','X','Y','AA', 'AB', 'AD'], featureStartIndex = 3)

linear.function(columnNames=[
    feature_columns[letter - ord('A')]
示例#6
0
from sklearn import datasets
from sklearn.model_selection import train_test_split
import Regression as reg

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

alpha = 0.1
rdg_regress = reg.RidgeRegression()
rdg_regress.set_params(alpha=alpha)
models = [reg.LinearRegression(), rdg_regress]

model_scores = {}
model_params = {}

for model in models:
    model.fit(X_train, y_train)
    model_scores[model.__class__.__name__] = model.score(X_test, y_test)
    model_params[model.__class__.__name__] = model.get_params()
    print("The model is : {}. The R-square value in the test dataset is : {}.".
          format(model.__class__.__name__, model.score(X_test, y_test)))

best_model = max(model_scores, key=model_scores.get)
print("The best model is : {} \nParameters are : \n{}".format(
    best_model, model_params[best_model]))
示例#7
0
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

irisdata = datasets.load_iris()
X_train, X_test, Target_train, Target_test = train_test_split(irisdata.data,
                                                              irisdata.target,
                                                              test_size=.4)
Batch_size = 10
epoch_num = int(len(X_train) / Batch_size)
MeanSquareError = np.zeros((3, epoch_num))
R2Score = np.zeros((3, epoch_num))
for epoch in range(epoch_num):
    X_batch = X_train[epoch:epoch + Batch_size, :]
    Y_batch = Target_train[epoch:epoch + Batch_size]
    Reg = Regression(X_batch, Y_batch)
    LinReg, _ = Reg.LinearRegression(X_batch)
    RigReg, _ = Reg.RidgeRegression(X_batch, alpha=0.1)
    LasReg, _ = Reg.LassoRegression(X_batch, alpha=0.1)
    LinReg_Eval = Evaluation(LinReg, Y_batch)
    RigReg_Eval = Evaluation(RigReg, Y_batch)
    LasReg_Eval = Evaluation(LasReg, Y_batch)

    MeanSquareError[0, epoch] = LinReg_Eval.MeanSquarErr()
    MeanSquareError[1, epoch] = RigReg_Eval.MeanSquarErr()
    MeanSquareError[2, epoch] = LasReg_Eval.MeanSquarErr()
    R2Score[0, epoch] = LinReg_Eval.R2Square()
    R2Score[1, epoch] = RigReg_Eval.R2Square()
    R2Score[2, epoch] = LasReg_Eval.R2Square()

fig = plt.figure()
fig1 = fig.add_subplot(1, 1, 1)
from sklearn import datasets
from sklearn.model_selection import train_test_split
import Regression as reg

import numpy as np

dataset = datasets.load_boston()
X_train, X_test, y_train, y_test = train_test_split(dataset['data'],
                                                    dataset['target'],
                                                    test_size=0.2,
                                                    random_state=42)

#instantiate the linear model
linear = reg.LinearRegression()
linear.fit(X_train, y_train)
linear_score = linear.score(X_test, y_test)
print(linear_score)

ridge = reg.RidgeRegression()
ridge.set_params(alpha='0.5')
ridge.fit(X_train, y_train)
ridge_score = ridge.score(X_test, y_test)

alpha = 0.5
models = [model1(alpha), model2(alpha)]

for model in models:
    model.fit(X_train, y_train)