Python SGD.step示例，optimizer.SGD.step Python示例

示例#1

0

显示文件

文件： tests.py 项目： iliailmer/numpy_learn

def testNetwork():  # noqa D103
    net = Network([Linear(10, 64), ReLU(), Linear(64, 2), Sigmoid()])
    x = np.random.randn(32, 10)
    y = np.random.randn(32, 2)
    mse = MSE()
    optim = SGD(0.001, 0.001)
    pred = net(x)
    _ = mse(pred, y)
    _ = net.backward(mse.grad)
    optim.step(net)

示例#2

0

显示文件

文件： main.py 项目： ChengyaoWang/myTorch

from tensor import Tensor
from optimizer import SGD
from layer import MSELoss, Linear, Tanh, Sigmoid
from model import Sequential

import numpy as np

#Toy example of Using Tensor Class
np.random.seed(0)
data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), requires_grad=True)
target = Tensor(np.array([[0], [1], [0], [1]]), requires_grad=True)
#Every element in w, is an Object of Tensor representing weight matrix
model = Sequential(
    Linear(2, 3),
    Tanh(),
    Linear(3, 3),
    Tanh(),
    Linear(3, 1),
)
optim = SGD(parameters=model.get_parameters(), lr=0.1)
criterion = MSELoss()
for i in range(10):
    pred = model(data)
    loss = criterion(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data), is_grad=True))
    optim.step()
    print(loss.data)
print(
    "------------------------------------------------------------------------")

示例#3

0

显示文件

文件： test.py 项目： janbenzing/deep-learning-project2

mini_batch_size = 100
optimizer1 = SGD(model1.param(), lr=lr)
optimizer2 = SGD(model2.param(), lr=lr)


print("#" * 50)
print("Training model 1")
# Train model 1
for e in range(nb_epochs):
    sum_loss = 0
    for b in range(0, train_input.size(0), mini_batch_size):
        output, loss = model1.forward(train_input.narrow(0, b, mini_batch_size), train_target.narrow(0, b, mini_batch_size))
        optimizer1.zero_grad()
        grad = model1.backward()
        sum_loss = sum_loss + loss.item()
        optimizer1.step()

    print("Iteration {0:}: loss = {1:.3f}".format(e+1, sum_loss / (train_input.shape[0]/mini_batch_size)),
          end='\r',
          flush=True)

loss_train1 = sum_loss / (train_input.shape[0]/mini_batch_size)

print()
print("#" * 50)

# Test model 1
output_test1, loss_test1 = model1.forward(test_input, test_target)
nb_err_test1 = compute_nb_errors(output_test1, test_target)

# Print results

示例#4

0

显示文件

文件： backprop_learner.py 项目： macetheace96/toolkitPython

class BackPropLearner(SupervisedLearner):
    def __init__(self):
        self.lr = .01
        self.momentum = .9

        self.n_layers = 1
        self.hidden_dim = 8

        self.val_split = .2
        self.encoder = None

        self.threshold = 50
        self.max_epochs = 3000
        self.allowance = .0

        self.hidden_activation = (sigmoid, anti_sigmoid)
        # self.output_activation = (softmax, anti_sigmoid)
        self.output_activation = (sigmoid, anti_sigmoid)
        # self.loss_function = cross_entropy
        self.loss_function = lambda z, t: t - z
        self.opt = None
        self.layers = None

    def train(self, features, labels):
        in_dim = features.cols
        out_dim = labels.value_count(0)

        full_x, full_y = self.prep_data(features, labels)
        train_x, train_y, val_x, val_y = self.split_data(
            full_x, full_y, self.val_split)

        self.layers = self.init_layers(in_dim, out_dim)
        best_weights = deepcopy(self.layers)

        self.opt = SGD(self.lr, self.momentum)

        train_losses = []
        train_accuracies = []
        val_losses = []
        val_accuracies = []
        lowest_loss = np.inf
        highest_accuracy = 0
        stagnant_rounds = 0
        n_epochs = 0
        try:
            while True:

                train_x, train_y = self.shuffle(train_x, train_y)

                self.run_epoch(train_x, train_y)

                train_loss, train_accuracy = self.score(train_x, train_y)
                val_loss, val_accuracy = self.score(val_x, val_y)
                train_losses.append(train_loss)
                train_accuracies.append(train_accuracy)
                val_losses.append(val_loss)
                val_accuracies.append(val_accuracy)
                print(f"EPOCH {n_epochs}")
                print(f"Train:\t{train_losses[-1]}\t{train_accuracies[-1]}")
                print(f"Val:  \t{val_losses[-1]}\t{val_accuracies[-1]}")
                print()

                if val_losses[-1] < lowest_loss + self.allowance * lowest_loss:
                    lowest_loss = val_losses[-1]
                    best_weights = deepcopy(self.layers)

                elif stagnant_rounds < self.threshold:
                    stagnant_rounds += 1
                else:
                    break

                n_epochs += 1
        except KeyboardInterrupt:
            pass
        finally:
            self.layers = best_weights

            fig, ax1 = plt.subplots()

            ax1.set_title("Iris Validation Set Loss vs Accuracy")

            color = 'tab:red'
            ax1.set_xlabel('Epochs')

            ax1.set_ylabel('Loss (MSE)', color=color)
            ax1.plot(val_losses, color=color)
            ax1.tick_params(axis='y', labelcolor=color)

            ax2 = ax1.twinx(
            )  # instantiate a second axes that shares the same x-axis

            color = 'tab:blue'
            ax2.set_ylabel(
                'Accuracy',
                color=color)  # we already handled the x-label with ax1
            ax2.plot(val_accuracies, color=color)
            ax2.tick_params(axis='y', labelcolor=color)

            fig.tight_layout(
            )  # otherwise the right y-label is slightly clipped
            # plt.savefig("/Users/masonfp/Desktop/cs/CS478-Machine-Learning-Projects/plots/backprop/iris.png")
            plt.show()

    def shuffle(self, a, b):
        temp = list(zip(a, b))
        np.random.shuffle(temp)
        new_a, new_b = zip(*temp)
        return np.array(new_a), np.array(new_b)

    def prep_data(self, features, labels):
        instances = features.to_numpy()

        if not self.encoder:
            self.encoder = OneHotEncoder(sparse=False, categories='auto')
        targets = self.encoder.fit_transform(labels.data)

        return instances, targets

    def split_data(self, x, y, split):
        n_samples = int(len(x) * split)
        rand_indices = np.random.permutation(range(len(x)))

        old_x = x[rand_indices[n_samples:]]
        old_y = y[rand_indices[n_samples:]]
        new_x = x[rand_indices[:n_samples]]
        new_y = y[rand_indices[:n_samples]]

        return old_x, old_y, new_x, new_y

    def score(self, X, Y):
        losses = []
        accuracy_count = 0
        for x, y in zip(X, Y):
            logits = self.layers.forward(x)
            loss = self.loss_function(logits, y)
            accuracy_count += np.argmax(y) == np.argmax(logits)
            losses.append(loss**2)

        return np.mean(losses), accuracy_count / len(Y)

    def run_epoch(self, X, Y):
        losses = []
        for x, y in zip(X, Y):
            logits = self.layers.forward(x)
            loss = self.loss_function(logits, y)
            losses.append(loss)
            self.layers.backward(loss)
            self.opt.step(self.layers)
        return np.mean(losses)

    def init_layers(self, in_dim, out_dim):
        layers = LayerList()

        for x in range(self.n_layers):
            if x == 0:
                layers.add_layer(in_dim, self.hidden_dim,
                                 self.hidden_activation)
            else:
                layers.add_layer(self.hidden_dim, self.hidden_dim,
                                 self.hidden_activation)
        if len(layers):
            layers.add_layer(self.hidden_dim, out_dim, self.output_activation)
        else:
            layers.add_layer(in_dim, out_dim, self.output_activation)

        return layers

    def predict(self, features, labels):
        self.in_training = False
        del labels[:]
        pred = self.layers.forward(np.array(features))
        pred = [[1 if x == max(pred) else 0 for x in pred]]
        pred = self.encoder.inverse_transform(pred)
        labels.append(pred[0][0])

示例#5

0

显示文件

def main():
    # generate data and translate labels
    train_features, train_targets = generate_all_datapoints_and_labels()
    test_features, test_targets = generate_all_datapoints_and_labels()
    train_labels, test_labels = convert_labels(train_targets), convert_labels(test_targets)


    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('Model: Linear + ReLU + Linear +ReLU + Linear + ReLU + Linear + Tanh')
    print('Loss: MSE')
    print('Optimizer: SGD')
    print('*************************************************************************')
    print('Training')
    print('*************************************************************************')
    # build network, loss and optimizer for Model 1
    my_model_design_1=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(),
                       Linear(25,25), ReLU(),Linear(25,2),Tanh()]
    my_model_1=Sequential(my_model_design_1)
    optimizer_1=SGD(my_model_1,lr=1e-3)
    criterion_1=LossMSE()

    # train Model 1
    batch_size=1
    for epoch in range(50):
        temp_train_loss_sum=0.
        temp_test_loss_sum=0.
        num_train_correct=0
        num_test_correct=0
        
        # trained in batch-fashion: here batch size = 1
        for temp_batch in range(0,len(train_features), batch_size):
            temp_train_features=train_features.narrow(0, temp_batch, batch_size)  
            temp_train_labels=train_labels.narrow(0, temp_batch, batch_size)  
            
            for i in range(batch_size):
                # clean parameter gradient before each batch
                optimizer_1.zero_grad()  
                temp_train_feature=temp_train_features[i]
                temp_train_label=temp_train_labels[i]
                
                # forward pass to compute loss
                temp_train_pred=my_model_1.forward(temp_train_feature)
                temp_train_loss=criterion_1.forward(temp_train_pred,temp_train_label)
                temp_train_loss_sum+=temp_train_loss
                
                _, temp_train_pred_cat=torch.max(temp_train_pred,0)
                _, temp_train_label_cat=torch.max(temp_train_label,0)

                
                if temp_train_pred_cat==temp_train_label_cat:
                    num_train_correct+=1
  
                # calculate gradient according to loss gradient
                temp_train_loss_grad=criterion_1.backward(temp_train_pred,temp_train_label)
                # accumulate parameter gradient in each batch
                my_model_1.backward(temp_train_loss_grad)                       
            
            # update parameters by optimizer
            optimizer_1.step()
            
            
        # evaluate the current model on testing set
        # only forward pass is implemented
        for i_test in range(len(test_features)):
            temp_test_feature=test_features[i_test]
            temp_test_label=test_labels[i_test]

            temp_test_pred=my_model_1.forward(temp_test_feature)
            temp_test_loss=criterion_1.forward(temp_test_pred,temp_test_label)
            temp_test_loss_sum+=temp_test_loss

            
            _, temp_test_pred_cat=torch.max(temp_test_pred,0)
            _, temp_test_label_cat=torch.max(temp_test_label,0)

            if temp_test_pred_cat==temp_test_label_cat:
                num_test_correct+=1
            
            
        temp_train_loss_mean=temp_train_loss_sum/len(train_features)
        temp_test_loss_mean=temp_test_loss_sum/len(test_features)
        
        temp_train_accuracy=num_train_correct/len(train_features)
        temp_test_accuracy=num_test_correct/len(test_features)
        
        print("Epoch: {}/{}..".format(epoch+1, 50),
                      "Training Loss: {:.4f}..".format(temp_train_loss_mean),
                      "Training Accuracy: {:.4f}..".format(temp_train_accuracy), 
                      "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean),
                      "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy),  )
        
        
        
    # # visualize the classification performance of Model 1 on testing set
    test_pred_labels_1=[]
    for i in range(1000): 
        temp_test_feature=test_features[i]
        temp_test_label=test_labels[i]

        temp_test_pred=my_model_1.forward(temp_test_feature)

        _, temp_train_pred_cat=torch.max(temp_test_pred,0)
        if test_targets[i].int() == temp_train_pred_cat.int():
            test_pred_labels_1.append(int(test_targets[i]))
        else:
            test_pred_labels_1.append(2)
            
    fig,axes = plt.subplots(1,1,figsize=(6,6))
    axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_1)
    axes.set_title('Classification Performance of Model 1')
    plt.show()
                      
      
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('Model: Linear + ReLU + Linear + Dropout+ SeLU + Linear + Dropout + ReLU + Linear + Sigmoid')
    print('Loss: Cross Entropy')
    print('Optimizer: Adam')
    print('*************************************************************************')
    print('Training')
    print('*************************************************************************')
    
    # build network, loss function and optimizer for Model 2
    my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), SeLU(),
                       Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),
                       Sigmoid()]
    my_model_2=Sequential(my_model_design_2)
    optimizer_2=Adam(my_model_2,lr=1e-3)
    criterion_2=CrossEntropy()

    # train Model 2
    batch_size=1
    epoch=0
    while(epoch<25):
        temp_train_loss_sum=0.
        temp_test_loss_sum=0.
        num_train_correct=0
        num_test_correct=0
        
        # trained in batch-fashion: here batch size = 1
        for temp_batch in range(0,len(train_features), batch_size):
            temp_train_features=train_features.narrow(0, temp_batch, batch_size)  
            temp_train_labels=train_labels.narrow(0, temp_batch, batch_size)  
            
            for i in range(batch_size):
                # clean parameter gradient before each batch
                optimizer_2.zero_grad()  
                temp_train_feature=temp_train_features[i]
                temp_train_label=temp_train_labels[i]
                
                # forward pass to compute loss
                temp_train_pred=my_model_2.forward(temp_train_feature)
                temp_train_loss=criterion_2.forward(temp_train_pred,temp_train_label)
                temp_train_loss_sum+=temp_train_loss
                
                _, temp_train_pred_cat=torch.max(temp_train_pred,0)
                _, temp_train_label_cat=torch.max(temp_train_label,0)

                
                if temp_train_pred_cat==temp_train_label_cat:
                    num_train_correct+=1
       
                
                # calculate gradient according to loss gradient
                temp_train_loss_grad=criterion_2.backward(temp_train_pred,temp_train_label)
                '''
                if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0):
                    continue
                '''
                # accumulate parameter gradient in each batch
                my_model_2.backward(temp_train_loss_grad)     
                
            # update parameters by optimizer
            optimizer_2.step()
            
        # evaluate the current model on testing set
        # only forward pass is implemented
        for i_test in range(len(test_features)):
            temp_test_feature=test_features[i_test]
            temp_test_label=test_labels[i_test]

            temp_test_pred=my_model_2.forward(temp_test_feature)
            temp_test_loss=criterion_2.forward(temp_test_pred,temp_test_label)
            temp_test_loss_sum+=temp_test_loss

            
            _, temp_test_pred_cat=torch.max(temp_test_pred,0)
            _, temp_test_label_cat=torch.max(temp_test_label,0)

            if temp_test_pred_cat==temp_test_label_cat:
                num_test_correct+=1
            
            
        temp_train_loss_mean=temp_train_loss_sum/len(train_features)
        temp_test_loss_mean=temp_test_loss_sum/len(test_features)
        
        temp_train_accuracy=num_train_correct/len(train_features)
        temp_test_accuracy=num_test_correct/len(test_features)
        
        # in case there is gradient explosion problem, initiliza model again and restart training
        # but the situation seldom happens
        if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0):
            epoch=0
            my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(),
                       Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),Sigmoid()]
            my_model_2=Sequential(my_model_design_2)
            optimizer_2=Adam(my_model_2,lr=1e-3)
            criterion_2=CrossEntropy()
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('Restart training because of gradient explosion')
            continue
        
        print("Epoch: {}/{}..".format(epoch+1, 25),
                      "Training Loss: {:.4f}..".format(temp_train_loss_mean),
                      "Training Accuracy: {:.4f}..".format(temp_train_accuracy), 
                      "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean),
                      "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy),  )
        epoch+=1 
        
    # visualize the classification performance of Model 2 on testing set
    test_pred_labels_2=[]
    for i in range(1000): 
        temp_test_feature=test_features[i]
        temp_test_label=test_labels[i]

        temp_test_pred=my_model_2.forward(temp_test_feature)

        _, temp_train_pred_cat=torch.max(temp_test_pred,0)
        if test_targets[i].int() == temp_train_pred_cat.int():
            test_pred_labels_2.append(int(test_targets[i]))
        else:
            test_pred_labels_2.append(2)
            
    fig,axes = plt.subplots(1,1,figsize=(6,6))
    axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_2)
    axes.set_title('Classification Performance of Model 2')
    plt.show()

示例#6

0

显示文件

文件： train_cross_ent.py 项目： iliailmer/numpy_learn

acc_val: list = []

for epoch in progress_bar:
    offset = 0
    val_err = 0
    err = 0
    while offset + batch_size <= len(x_train):
        data = x_train[offset : offset + batch_size, :]
        label = y_train[offset : offset + batch_size, :]
        try:
            pred = net(data)
        except RuntimeWarning:
            print(f"Runtime warning on {offset}")
        err += loss(pred, label) / (len(x_train) / batch_size)
        g = net.backward(loss.grad)
        optim.step(net)
        offset += batch_size
        acc_train.append(accuracy_score(label.argmax(axis=1), pred.argmax(axis=1)))
    offset = 0
    while offset + batch_size <= len(x_val):
        val_data = x_val[offset : offset + batch_size, :]
        val_label = y_val[offset : offset + batch_size]
        pred = net(val_data)
        val_err += loss(pred, val_label) / (len(x_val) / batch_size)
        offset += batch_size
        acc_val.append(accuracy_score(val_label.argmax(axis=1), pred.argmax(axis=1)))
    if (epoch) % 2 == 0:
        progress_bar.set_postfix(
            {"loss_train": err, "loss_val": val_err, "acc_val": np.mean(acc_val)}
        )
    accuracies["train"].append(np.mean(acc_train))

示例#7

0

显示文件

    max_iter = 10000

    # batcher parameters
    batch_size = 64

    lenet = LeNet(layers)
    lenet.save_model("../models/my_model.model")
    #optimizer = SGDMomentum(lenet, **opt_params)
    optimizer = SGD(lenet.parameters(), lr=0.1)
    epochs = 10
    per_epoch = -(-xtrain.shape[0] // batch_size)
    iter_cnt = 0
    for epoch in range(epochs):
        for ix in tqdm(range(per_epoch)):
            optimizer.update_lr(iter_cnt)
            rand_ix = np.random.randint(0, xtrain.shape[0], (batch_size,))
            batch_x = xtrain[rand_ix]
            batch_y = ytrain[rand_ix]
            my_loss = lenet.forward(batch_x.reshape((batch_size, -1)), batch_y)
            lenet.backward(my_loss)
            optimizer.step()
            optimizer.zero_grad()
            iter_cnt += 1
        print("Epoch {0} of {1} Done. Starting Testing".format(
            epoch + 1, epochs))
        start = time.time()
        test_loss = lenet.forward(xtest, ytest)
        print("Testing Done. Took {0:.2f}s. Accuracy: {1:.4f}, Loss: {2:.2f}".format(
            time.time() - start, test_loss["acc"], test_loss["loss"]))
        lenet.save_model("weights.npy")