示例#1
0
class SgdLibraryLinearRegression:
    def __init__(self, data):
        self.df = pd.read_csv(data)
        self.regressor = SGDRegressor(max_iter=40,
                                      tol=1e-5,
                                      learning_rate='constant',
                                      eta0=0.06)

    def preprocess(self):
        # Removing Null values
        self.df.dropna()

        # Removing Duplicates
        self.df.drop_duplicates()

        # Checking the type of input data
        self.df.dtypes

        # Since horsepower is of object type we want to determine the nature of the attribute
        self.df['horsepower'].unique()

        # We are able to see ? in between numerical values so we are disregarding those instances
        self.df = self.df[self.df.horsepower != '?']

        # We are then casting the object to float for further processing
        self.df['horsepower'] = self.df['horsepower'].astype('float')

        # We are removing the car name attribute since that does not correlate with the mpg of the car
        self.df.drop(['car name'], axis=1, inplace=True)

        # Attributes are starting from column 1
        self.X = self.df.iloc[:, 1:].values
        self.Y = self.df.iloc[:, 0].values

        # Scaling the input attributes
        self.X = StandardScaler().fit_transform(self.X)

        # Splitting the data into training and test data set of the proportion 70:30
        self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(
            self.X, self.Y, test_size=0.3, random_state=1)

    def train(self, epoch_count=40, learning_rate=0.06):
        self.regressor = SGDRegressor(max_iter=epoch_count,
                                      tol=1e-5,
                                      learning_rate='constant',
                                      eta0=learning_rate)
        # Running the training by calling the library method
        self.regressor.fit(self.X_train, self.Y_train)

    def predictTrain(self):
        # Predicting the values based on the test data
        self.Y_pred = self.regressor.predict(self.X_train)

        # Getting the accuracy from the library method
        self.accuracy_score = self.regressor.score(self.X_train, self.Y_train)
        # print("accuracy score ", accuracy_score)

        # Getting the mean squared error by comparing the predicted value with the actual test value
        self.calculated_mse = mean_squared_error(self.Y_train, self.Y_pred)
        # print("mean square error ",calculated_mse)

        # Getting the r2 score by comparing the predicted value with the actual test value
        self.r2_scor = r2_score(self.Y_train, self.Y_pred)
        # print("r2_score ", r2_scor)

        return self.calculated_mse

    def print(self):
        print("accuracy score ", self.accuracy_score)
        print("mean square error ", self.calculated_mse)
        print("r2_score ", self.r2_scor)

    def predictTest(self):
        # Predicting the values based on the test data
        self.Y_pred = self.regressor.predict(self.X_test)

        # Getting the accuracy from the library method
        self.accuracy_score = self.regressor.score(self.X_test, self.Y_test)
        # print("accuracy score ", accuracy_score)

        # Getting the mean squared error by comparing the predicted value with the actual test value
        self.calculated_mse = mean_squared_error(self.Y_test, self.Y_pred)
        # print("mean square error ", calculated_mse)

        # Getting the r2 score by comparing the predicted value with the actual test value
        self.r2_scor = r2_score(self.Y_test, self.Y_pred)
        # print("r2_score ", r2_scor)

        return self.calculated_mse

    def plotLearningRate(self, epoch_count, min, max, step, color):
        mse_error = list()
        step_size = max
        x_scale = list()
        label = "epoch = "
        label += str(epoch_count)
        while (step_size >= min):
            self.train(epoch_count, step_size)
            mse_error.append(self.predictTest())
            x_scale.append(step_size)
            step_size = step_size - step
        return plt.scatter(x_scale, mse_error, color=color)
示例#2
0
y_SGD = linear_SGD.predict(test_x)

linear_rg = LinearRegression(
    fit_intercept=True,  #计算截距
    normalize=False,  #回归之前不对数据集进行规范化处理
    copy_X=True,  #复制X,不会对X的原始值产生影响
    n_jobs=-1)  #使用所有的CPU
linear_rg.fit(train_x, train_y)
y_rg = linear_rg.predict(test_x)

print('模拟数据参数', coef)
print('SGDRegressor模型参数', linear_SGD.coef_)
print('LinearRegression模型参数', linear_rg.coef_)

scores = cross_val_score(linear_SGD, train_x, train_y, cv=5)
print('SGDRegressor交叉验证R方值:', scores)
print('SGDRegressor交叉验证R方均值:', np.mean(scores))
print('SGDRegressor测试集R方值:', linear_SGD.score(test_x, test_y))

scores = cross_val_score(linear_rg, train_x, train_y, cv=5)
print('LinearRegression交叉验证R方值:', scores)
print('LinearRegression交叉验证R方均值:', np.mean(scores))
print('LinearRegression测试集R方值:', linear_rg.score(test_x, test_y))

#%%
ax2 = plt_helper('ax1', '观察不同回归模型的效果')
ax2.plot(X[:, 0], y, 'r*', label="模拟数据")
ax2.plot(test_x[:, 0], y_SGD, '-k', label='SGDRegressor模型')
ax2.plot(test_x[:, 0], y_rg, '-k', label='线性回归模型')
ax2.legend(loc='best', prop=myfont)