def scale_data(train_data, test_data): Z_train, y_train = zip(*train_data) scale = Scaler() scale.fit(Z_train) transform = compose(prepend_x0, scale.transform) scaledX_train = transform(Z_train) scaled_train = list(zip(scaledX_train, y_train)) Z_test, y_test = zip(*test_data) scaledX_test = transform(Z_test) scaled_test = list(zip(scaledX_test, y_test)) return scaled_train, scaled_test
from utility import Scaler from ml_util import train_test_split from out_utils import plot_cost, plot_errors # Get the data input = np.loadtxt('./data/Folds.csv', delimiter=',', skiprows=1) Z = np.array(list(pluck(list(range(0, len(input[0])-1)), input))) y = np.array(list(pluck(len(input[0])-1, input))) data = zip(Z, y) # Split into a train set and test set train_data, test_data = train_test_split(data, 0.33) # Scale the training data scale = Scaler() Z_train, y_train = zip(*train_data) scale.fit(Z_train) X_train = scale.transform(Z_train) scaledtrain_data = list(zip(X_train, y_train)) # Scale the testing data using the same scaling parameters # used for the training data Z_test, y_test = zip(*test_data) X_test = scale.transform(Z_test) print('****Minibatch Gradient Descent****') print('\n--Training--\n') hyperparam = {'eta': 0.3, 'epochs': 300, 'minibatches': 1, 'adaptive': 0.99} print('\nHyperparamters\n') for k, v in hyperparam.items():
# Get Sepal Length and Petal Length features Zp = list(pluck([0, 2], Z)) # Get only the Iris Setosa (0) and Iris Versicolour (1) classes datap = [[f, o] for f, o in zip(Zp, q) if o != 2.0] Xp, yp = zip(*datap) y = list(yp) Xpp = [list(e) for e in Xp] print(Xpp) print(y) # Split set into training and testing data train_data, test_data = train_test_split(zip(Xpp, y), 0.33) # Scale the data X_train, y_train = zip(*train_data) scale = Scaler() scale.fit(X_train) transform = compose(prepend_x0, scale.transform) scaledX_train = transform(X_train) scaled_train = zip(scaledX_train, y_train) # Fit the training data h_theta0 = [1., 1., 1.] print('****Gradient Descent****\n') print('--Training--\n') h_thetaf, cost = glm.fit(logr.logistic_log_likelihood, logr.grad_logistic, h_theta0, scaled_train, eta=0.03, it_max=500, gf='gd')
import metrics import linear_regression as lr import glm from ml_util import train_test_split import numpy as np from numpy.linalg import lstsq # Get the data Z, y = csv_reader('./data/Folds_small.csv', ['AT', 'V', 'AP', 'RH'], 'PE') data = zip(Z, y) # Split into a train set and test set train_data, test_data = train_test_split(data, 0.33) # Scale the training data scale = Scaler() Z_train, y_train = zip(*train_data) scale.fit(Z_train) transform = compose(prepend_x0, scale.transform) X_train = transform(Z_train) scaledtrain_data = zip(X_train, y_train) # Scale the testing data using the same scaling parameters # used for the training data Z_test, y_test = zip(*test_data) X_test = transform(Z_test) h_theta0 = [0., 0., 0., 0., 0.] print('****Gradient Descent****') h_thetaf, cost = glm.fit(lr.J, lr.gradJ, h_theta0, eta=0.3, it_max=5000,