Exemplo n.º 1
0
def train_regression_using_standardisation(data, x_col, y_col, model):
    """
    A function to train a linear regression model with standardisation
    :param model:
    :param data: the dataset
    :param x_col: the x inputs
    :param y_col: the y outputs
    :return:
    """
    data, x, y = apply_pipeline(data, x_col, y_col)
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    fig, ax = plot_residual(y_test.to_numpy(), prediction,
                            'Linear Regression using Standardisation')
    fig.show()
    return model, sqrt(mean_squared_error(y_test, prediction)), fig
Exemplo n.º 2
0
from pandas import read_csv
from os.path import join as path_join

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold, cross_val_score

from models import train_linear_regression_pca
from pipeline import apply_pipeline
from utils.reduce_vif import ReduceVIF
import logging

logging.root.setLevel(logging.INFO)

data = read_csv(path_join('./', 'data', 'train.csv'))
data, x, y = apply_pipeline(data, data.columns.difference(['critical_temp']),
                            ['critical_temp'])

fig, ax = train_linear_regression_pca(x, y)
# fig.set_title('PCA improvements with the addition of components')
fig.show()
fig.savefig('figs/pcasteps.png')

vif = ReduceVIF()
x_reduced = vif.fit_transform(x)
kf_10 = KFold(n_splits=5, shuffle=True, random_state=2)
regr = LinearRegression()
vif_score = cross_val_score(regr, x_reduced, y, cv=kf_10, scoring='r2').mean()
benchmark = cross_val_score(regr, x, y, cv=kf_10, scoring='r2').mean()
logging.info(
    f'Benchmark regression training scores {benchmark} vif_score {vif_score}')
logging.info(
Exemplo n.º 3
0
def process_image(img):
    return apply_pipeline(img, "unknown")