def pls_crossval(X, y, n_comp, **kwargs): opt_comp = optimal_n_comp(X, y, n_comp) variance_explained(X, y, n_comp)
# to perform variable selection from regression with whole dataset X_train_0_sel = pip_sel["variable_selection"].transform(X_train_0) X_test_0_sel = pip_sel["variable_selection"].transform(X_test_0) data_en_sel = {"X": X_train_0_sel, "y": y_train, "X_test": X_test_0_sel, "y_test": y_test} # %% # variance explained and MSECV for train and test set for each PLS component var, comp = variance_explained(X_train_0, y_train, plot=False) var_2, comp_2 = variance_explained(X_test_0, y_test, plot=False) extra_plot_variance_explained(var, comp, var_2, comp_2) mse, comp = mse_minimum(X_train_0, y_train, plot=False) mase_2, comp_2 = mse_minimum(X_test_0, y_test, plot=False) extra_plot_mse(mse, comp, mase_2, comp_2) # %% # variance explained and MSECV for train and test set for each component var, comp = variance_explained(X_train_0_sel, y_train, plot=False) var_2, comp_2 = variance_explained(X_test_0_sel, y_test, plot=False) extra_plot_variance_explained(var, comp, var_2, comp_2) mse, comp = mse_minimum(X_train_0, y_train, plot=False) mase_2, comp_2 = mse_minimum(X_test_0, y_test, plot=False) extra_plot_mse(mse, comp, mase_2, comp_2)
pip_dev0 = Pipeline( [ ("scaleing_X", GlobalStandardScaler()), ("scatter_correction", EmscScaler()), ("smmothing", SavgolFilter(polyorder=2, deriv=0)), #("variable_selection", EnetSelect()) ] ) X_train_0 = pip_dev0.fit_transform(X_train, y_train) X_test_0 = pip_dev0.transform(X_test) # %% variance_explained(X_train_0, y_train) # %% mse_minimum(X_train_0, y_train) # %% def pls_crossval(X, y, n_comp, **kwargs): # fits a pls model with a given number of components model = pls_regression(X, y, n_comp) # calculated score and mse from given model pls_scores(X, y, model) return model
"""returns a model with the optimsed number of pls components""" # returns n_comp with lowest loss opt_comp = optimal_n_comp(X, y, n_comp, plot=plot) # performs regression with n_comp opt_model = pls_regression(X,y, opt_comp, plot=plot) # returns regression scores pls_scores(X,y, opt_model) return opt_model # %% variance_explained(X_train_en, y_train, n_comp=20, plot=True) variance_explained(X_train_pip, y_train, n_comp=20, plot=True) # %% # %% model_en = pls_regression(**data_en, n_comp =3) model_pip = pls_regression(**data_pip, n_comp =2) # %%