예제 #1
0
def scale_data(train_data, test_data):
    Z_train, y_train = zip(*train_data)
    scale = Scaler()
    scale.fit(Z_train)
    transform = compose(prepend_x0, scale.transform)
    scaledX_train = transform(Z_train)
    scaled_train = list(zip(scaledX_train, y_train))
    Z_test, y_test = zip(*test_data)
    scaledX_test = transform(Z_test)
    scaled_test = list(zip(scaledX_test, y_test))
    return scaled_train, scaled_test
예제 #2
0
from utility import Scaler
from ml_util import train_test_split
from out_utils import plot_cost, plot_errors


# Get the data
input = np.loadtxt('./data/Folds.csv', delimiter=',', skiprows=1)
Z = np.array(list(pluck(list(range(0, len(input[0])-1)), input)))
y = np.array(list(pluck(len(input[0])-1, input)))
data = zip(Z, y)
# Split into a train set and test set
train_data, test_data = train_test_split(data, 0.33)
# Scale the training data
scale = Scaler()
Z_train, y_train = zip(*train_data)
scale.fit(Z_train)
X_train = scale.transform(Z_train)
scaledtrain_data = list(zip(X_train, y_train))
# Scale the testing data using the same scaling parameters
# used for the training data
Z_test, y_test = zip(*test_data)
X_test = scale.transform(Z_test)

print('****Minibatch Gradient Descent****')
print('\n--Training--\n')
hyperparam = {'eta': 0.3,
              'epochs': 300,
              'minibatches': 1,
              'adaptive': 0.99}
print('\nHyperparamters\n')
for k, v in hyperparam.items():
예제 #3
0
# Get Sepal Length and Petal Length features
Zp = list(pluck([0, 2], Z))
# Get only the Iris Setosa (0) and Iris Versicolour (1) classes
datap = [[f, o] for f, o in zip(Zp, q) if o != 2.0]
Xp, yp = zip(*datap)
y = list(yp)
Xpp = [list(e) for e in Xp]
print(Xpp)
print(y)

# Split set into training and testing data
train_data, test_data = train_test_split(zip(Xpp, y), 0.33)
# Scale the data
X_train, y_train = zip(*train_data)
scale = Scaler()
scale.fit(X_train)
transform = compose(prepend_x0, scale.transform)
scaledX_train = transform(X_train)
scaled_train = zip(scaledX_train, y_train)
# Fit the training data
h_theta0 = [1., 1., 1.]
print('****Gradient Descent****\n')
print('--Training--\n')
h_thetaf, cost = glm.fit(logr.logistic_log_likelihood,
                         logr.grad_logistic,
                         h_theta0,
                         scaled_train,
                         eta=0.03,
                         it_max=500,
                         gf='gd')
예제 #4
0
import metrics
import linear_regression as lr
import glm
from ml_util import train_test_split
import numpy as np
from numpy.linalg import lstsq

# Get the data
Z, y = csv_reader('./data/Folds_small.csv', ['AT', 'V', 'AP', 'RH'], 'PE')
data = zip(Z, y)
# Split into a train set and test set
train_data, test_data = train_test_split(data, 0.33)
# Scale the training data
scale = Scaler()
Z_train, y_train = zip(*train_data)
scale.fit(Z_train)
transform = compose(prepend_x0, scale.transform)
X_train = transform(Z_train)
scaledtrain_data = zip(X_train, y_train)
# Scale the testing data using the same scaling parameters
# used for the training data
Z_test, y_test = zip(*test_data)
X_test = transform(Z_test)

h_theta0 = [0., 0., 0., 0., 0.]
print('****Gradient Descent****')
h_thetaf, cost = glm.fit(lr.J,
                         lr.gradJ,
                         h_theta0,
                         eta=0.3,
                         it_max=5000,
예제 #5
0
# Get Sepal Length and Petal Length features
Zp = list(pluck([0, 2], Z))
# Get only the Iris Setosa (0) and Iris Versicolour (1) classes
datap = [[f, o] for f, o in zip(Zp, q) if o != 2.0]
Xp, yp = zip(*datap)
y = list(yp)
Xpp = [list(e) for e in Xp]
print(Xpp)
print(y)

# Split set into training and testing data
train_data, test_data = train_test_split(zip(Xpp, y), 0.33)
# Scale the data
X_train, y_train = zip(*train_data)
scale = Scaler()
scale.fit(X_train)
transform = compose(prepend_x0, scale.transform)
scaledX_train = transform(X_train)
scaled_train = zip(scaledX_train, y_train)
# Fit the training data
h_theta0 = [1., 1., 1.]
print('****Gradient Descent****\n')
print('--Training--\n')
h_thetaf, cost = glm.fit(logr.logistic_log_likelihood,
                                logr.grad_logistic,
                                h_theta0,
                                scaled_train,
                                eta=0.03,
                                it_max=500,
                                gf='gd')