def compare_models(X_stand, y_fromStand, modelsToCompare): # ------------------------------------------------------- # Initialize comparison relevant parameters model = lm.LinearRegression() K1 = 10 K2 = 10 K3 = 10 # Number of total comparison loops modelErrors = np.zeros((K3, len(modelsToCompare))) for i in range(K3): # ------------------------------------------------------- # Compute error for the regular model xIn, yIn = X_stand, y_fromStand modelErrors[i, 0] = twoLevelCV_single(xIn, yIn, model, K1, K2) # ------------------------------------------------------- # Compute error for the 6 PCA model xIn, yIn = X_stand, y_fromStand modelErrors[i, 1] = twoLevelCV_single_PCA(xIn, yIn, model, K1, K2) # ------------------------------------------------------- # Compute error for the added features model xIn, yIn = x_add_features(X_stand, y_fromStand) modelErrors[i, 2] = twoLevelCV_single(xIn, yIn, model, K1, K2) # ------------------------------------------------------- # Compute error for the polynomial regression model xIn, yIn = x_tilda_poly(X_stand, y_fromStand) modelErrors[i, 3] = twoLevelCV_single(xIn, yIn, model, K1, K2) # ------------------------------------------------------- # Compute error for the transformed features model xIn, yIn = x_tilda_transform(X_stand, y_fromStand) modelErrors[i, 4] = twoLevelCV_single(xIn, yIn, model, K1, K2) # ------------------------------------------------------- # Compute error for the chosen features model features = np.array([1, 2]) xIn, yIn = x_tilda_downSample(xIn, yIn, features) modelErrors[i, 5] = twoLevelCV_single(xIn, yIn, model, K1, K2) # ------------------------------------------------------- # Compute error for the transfomrmed + PCA features model xIn, yIn = x_tilda_transform(X_stand, y_fromStand) modelErrors[i, 6] = twoLevelCV_single_PCA(xIn, yIn, model, K1, K2) # MSE calculation - for plots # modelErrorsAvg = np.mean(modelErrors, axis = 0) # RMSE calculation - for plots modelErrorsAvg = np.sqrt(np.mean(modelErrors, axis=0)) return modelErrorsAvg
Created: 13.11.2020 """ import matplotlib.pyplot as plt import numpy as np import torch from sklearn import model_selection import sklearn.linear_model as lm from scipy import stats from ANN_functions import * from concNoZero_config import * from featureTransform import x_add_features from regularization import rlr_validate, regmultinominal_regression xIn,yIn = x_add_features(X_stand, y_fromStand) M = xIn.shape[1] attributeNames.append('Xf1') attributeNames.append('Xf2') attributeNames.append('Xf3') classNames = classNames.tolist() #%% # Unbalanced Dataset print("Observations of Low Concrete = {}".format(np.sum(y_class.squeeze()==0))) print("Observations of Medium Concrete = {}".format(np.sum(y_class.squeeze()==1))) print("Observations of High Concrete = {}".format(np.sum(y_class.squeeze()==2))) # BASELINE CLASSIFICATION MODEL baseline_class = np.array((np.sum(y_class.squeeze()==0), np.sum(y_class.squeeze()==1), np.sum(y_class.squeeze()==2))) baseline_model_prediction = np.argmax(baseline_class)*np.ones(y_class.shape[0])
def correlated_ttest(r, rho, alpha=0.05): rhat = np.mean(r) shat = np.std(r) J = len(r) sigmatilde = shat * np.sqrt(1 / J + rho / (1 - rho)) CI = st.t.interval(1 - alpha, df=J - 1, loc=rhat, scale=sigmatilde) # Confidence interval p = 2 * st.t.cdf(-np.abs(rhat) / sigmatilde, df=J - 1) # p-value return p, CI #_______CREATE DATASET WITH ADDED FEATURES_______ xIn, yIn = x_add_features(X_stand, y_class) # Initialize 2 layer CV parameters K1 = 5 K2 = 5 # Values of lambda lambdas = np.logspace(-5, 5, 20) # Range of hidden units hidden_units = np.array((1, 3, 6, 8, 11, 15)) # Parameters for ANN training part CV_ann = 2 n_replicates = 1 max_iter = 15000 tolerance = 1e-7