Пример #1
0
def fit(
    features,
    labels,
    ## params:
    batch_size=1,
    max_epochs=100,
    learning_rate=.1,
    initial_weights=None,
    convergence_threshold=1e-5,
    convergence_look_back=2,
    ham_label=0,
    spam_label=1,
):
    '''
    Returns the optimal weights for a given training set (features
    and corresponding label inputs) for the ADALINE model.
    These weights are found using the gradient descent method.
    
    /!\ Assumes bias term is already in the features input.
        
    Inputs:
    - features: N * D Numpy matrix of binary values (0 and 1)
                with N: the number of training examples
                and  D: the number of features for each example
    - labels:   N * 1 Numpy vector of binary values (0 and 1)
    - learning_rate: float between 0 and 1
    - initial_weights: D * 1 Numpy vector, beginning weights
    - convergence_threshold: float, very small number; e.g. 1e-5
    - convergence_look_back: int, >= 1
                             stops if the error difference hasn't been over threshold
                             for the last X epochs
        
    Output:
    - W: D * 1 Numpy vector of real values
    '''
    ## notation
    X, Y = features, labels
    N, D = X.shape  # N #training samples; D #features

    W = gradient_descent(
        X,
        Y,
        calculate_output,
        computeCost,
        predict,
        batch_size=N,
        learning_rate=learning_rate,
        max_epochs=max_epochs,
        initial_weights=initial_weights,
        convergence_threshold=convergence_threshold,
        convergence_look_back=convergence_look_back,
    )

    return W
Пример #2
0
def fit(features, labels,
        ## params:
        batch_size=1,
        max_epochs=100,
        learning_rate=.1,
        initial_weights=None,
        convergence_threshold=1e-5,
        convergence_look_back=2,
        ham_label=0,
        spam_label=1,
        ):
    '''
    Returns the optimal weights for a given training set (features
    and corresponding label inputs) for the ADALINE model.
    These weights are found using the gradient descent method.
    
    /!\ Assumes bias term is already in the features input.
        
    Inputs:
    - features: N * D Numpy matrix of binary values (0 and 1)
                with N: the number of training examples
                and  D: the number of features for each example
    - labels:   N * 1 Numpy vector of binary values (0 and 1)
    - learning_rate: float between 0 and 1
    - initial_weights: D * 1 Numpy vector, beginning weights
    - convergence_threshold: float, very small number; e.g. 1e-5
    - convergence_look_back: int, >= 1
                             stops if the error difference hasn't been over threshold
                             for the last X epochs
        
    Output:
    - W: D * 1 Numpy vector of real values
    '''           
    ## notation
    X, Y = features, labels
    N, D = X.shape   # N #training samples; D #features
    
    W = gradient_descent(X, Y,
                         calculate_output,
                         computeCost,
                         predict,
                         batch_size=N,
                         learning_rate=learning_rate,
                         max_epochs=max_epochs,
                         initial_weights=initial_weights,
                         convergence_threshold=convergence_threshold,
                         convergence_look_back=convergence_look_back,
                         )

    return W
Пример #3
0
def fit(
    features,
    labels,
    ## params
    batch_size=1,
    max_epochs=100,
    learning_rate=.1,
    initial_weights=None,
    convergence_threshold=1e-5,
    convergence_look_back=1,
    ham_label=0,
    spam_label=1,
):
    '''
    Implementation of logistic regression classifier with stochastic
    gradient descent.
    
    /!\ Assumes bias term is already in the features input.
    
    Input:
    - features: N * D Numpy matrix of binary feature values (0 and 1)
                with N: the number of training examples
                and  D: the number of features for each example
    - labels: N * 1 Numpy vector of binary feature values (0 and 1)
    - learning_rate: float between 0 and 1
    - initial_weights: D * 1 Numpy vector, beginning weights
    
    Output:
    - W: D * 1 Numpy vector of float weights of trained classifier
    '''
    ## notation
    X, Y = features, labels
    N, D = X.shape  # N #training samples; D #features

    W = gradient_descent(
        X,
        Y,
        calculate_output,
        computeCost,
        predict,
        batch_size=batch_size,
        learning_rate=learning_rate,
        max_epochs=max_epochs,
        initial_weights=initial_weights,
        convergence_threshold=convergence_threshold,
        convergence_look_back=convergence_look_back,
    )

    return W
def fit(features, labels,
        ## params
        batch_size=1,
        max_epochs=100,
        learning_rate=.1,
        initial_weights=None,
        convergence_threshold=1e-5,
        convergence_look_back=1,
        ham_label=0,
        spam_label=1,
        ):
    '''
    Implementation of logistic regression classifier with stochastic
    gradient descent.
    
    /!\ Assumes bias term is already in the features input.
    
    Input:
    - features: N * D Numpy matrix of binary feature values (0 and 1)
                with N: the number of training examples
                and  D: the number of features for each example
    - labels: N * 1 Numpy vector of binary feature values (0 and 1)
    - learning_rate: float between 0 and 1
    - initial_weights: D * 1 Numpy vector, beginning weights
    
    Output:
    - W: D * 1 Numpy vector of float weights of trained classifier
    '''  
    ## notation
    X, Y = features, labels
    N, D = X.shape   # N #training samples; D #features
    
    W = gradient_descent(X, Y,
                         calculate_output,
                         computeCost,
                         predict,
                         batch_size=batch_size,
                         learning_rate=learning_rate,
                         max_epochs=max_epochs,
                         initial_weights=initial_weights,
                         convergence_threshold=convergence_threshold,
                         convergence_look_back=convergence_look_back,
                         )
    
    return W
Пример #5
0
 def fit(self, X, y):
     """A reference implementation of a fitting function
     Parameters
     ----------
     X : array-like or sparse matrix of shape = [n_samples, n_features]
         The training input samples.
     y : array-like, shape = [n_samples] or [n_samples, n_outputs]
         The target values (class labels in classification, real numbers in
         regression).
     Returns
     -------
     self : object
         Returns self.
     """
     X, y = check_X_y(X, y)
     X, y = self.holdout(X, y)
     X = self.standard_scaler_x.fit_transform(X)
     y = self.standard_scaler_y.fit_transform(y.reshape(-1, 1)).reshape(-1)
     batch_size = X.shape[0] if self.batch_size is None else self.batch_size
     self.w = gradient_descent(self.loss_gradient, adjust(X), y, batch_size,
                               self.n_epochs, self.shuffle, self.l2,
                               self.learning_rate, self.decay)
     # Return the estimator
     return self
Пример #6
0
 def __init__(self, data, labels):
     self.w = gd.gradient_descent(labels, data, 0.000015)
Пример #7
0
  createdata.convert_to_features(train_dir, "train_features")

if not os.path.exists("test_features.npy"):
  createdata.convert_to_features(test_dir, "test_features")

#Load training/test data
train_data = createdata.read_feature_data("train_features.npy")
test_data = createdata.read_feature_data("test_features.npy")

#Create variables for number of test/training samples
num_train_samples = train_data.shape[0]
num_test_samples = test_data.shape[0]

#Check to see if the models have already been created
if not os.path.exists("trained_models.npy"):
  trained_models = gd.gradient_descent(train_data[:, 1:], train_data[:, 0], 1, True)
  np.save("trained_models", trained_models)

#Load trained models
trained_models = np.load("trained_models.npy")

#Format test data into samples and expected results
test_X = test_data[:, 1:]
test_Y = test_data[:, 0]

#Get relevant variables
features, samples = test_data.shape
num_genres = int(np.ptp(test_Y)) + 1

#Normalize test_X input data
for i in range (0, features):
Пример #8
0
from datetime import datetime
import numpy as np

if __name__ == '__main__':
    # Comparison of Batch vs Stochastic Gradient Descent
    # Start Batch Gradient Descent

    a = datetime.now()
    X, y = make_regression(n_samples=10000, n_features=1, n_informative=1,
                        random_state=0, noise=35)

    m, n = np.shape(X)
    X = np.c_[ np.ones(m), X] # insert column
    iterations = 500 # about minimum to converge to truth
    alpha = 0.01 # learning rate
    theta = gradient_descent(alpha, X, y, iterations)

    print 'Parameters from and duration of Batch Gradient Descent:'
    print theta
    b = datetime.now()
    print b-a # 0.046 seconds

    # Start Stochastic Gradient Descent
    c = datetime.now()
    clf = linear_model.SGDRegressor(loss="squared_loss", n_iter=5, alpha=alpha) #5 iter is default
    clf.fit(X, y)

    print 'Parameters from and duration of Stochastic Gradient Descent'
    print clf.coef_
    d = datetime.now()
    print d-c # 0.006 seconds
Пример #9
0
#create forest
RF = RandomForestClassifier(n_estimators=50)
RF = RF.fit(weed_x_train, weed_y_train)
RF_score = RF.score(weed_x_test, weed_y_test)
print(f" Accuracy of Random Forest: {RF_score}")

# In[5]:

# Exercise 6
x = np.linspace(-1.5, 1.5, 100)
rates = [0.1, 0.01, 0.001, 0.0001]
iterations = []
function_values = []

for i in rates:
    i, y = gd.gradient_descent(x, i)
    iterations.append(i)
    function_values.append(y)

for i in range(len(rates)):
    print(
        f"Leaning Rate: {rates[i]}, Iteration: {iterations[i]}, Value: {function_values[i]}"
    )

# In[6]:

# Exercise 7
Iris2D1_train = np.loadtxt("data/Iris2D1_train.txt")
Iris2D1_test = np.loadtxt("data/Iris2D1_test.txt")
Iris2D2_train = np.loadtxt("data/Iris2D2_train.txt")
Iris2D2_test = np.loadtxt("data/Iris2D2_test.txt")