def run_feed_forward_back_propagation(model, epochs, dataloaders, criterion, device, optimizer): model.to(device) train_losses = [] validation_losses = [] for e in keep_awake(range(epochs)): running_loss = update_from_training_data(model, dataloaders, device, criterion, optimizer, train_losses) validation_loss, validation_accuracy = evaluate_model_on_validation( model, dataloaders, device, criterion, validation_losses) test_loss, test_accuracy = evaluate_model_on_testing( model, dataloaders, device, criterion) print("Epoch: {}/{}".format(e + 1, epochs)) print("Training Loss: {:.3f}..".format(running_loss / len(dataloaders['training']))) print("Validation Loss: {:.3f}..".format( validation_loss / len(dataloaders['validation']))) print("Validation Accuracy: {:.3f}..".format( validation_accuracy / len(dataloaders['validation']))) print("Testing Loss: {:.3f}..".format(test_loss / len(dataloaders['testing']))) print("Test Accuracy: {:.3f}..\n".format(test_accuracy / len(dataloaders['testing'])))
def train(model, trainloader, validloader, device, epochs=5): # Train fully connected layers print_every = 25 steps = 0 optimizer = model.optimizer criterion = model.criterion for e in keep_awake(range(epochs)): running_loss = 0 # turn on dropout for training model.train() for images, labels in trainloader: steps += 1 # move images and labels to same device as model images, labels = images.to(device), labels.to(device) optimizer.zero_grad() logps = model.forward(images) loss = criterion(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() # print progress periodically if steps % print_every == 0: # turn off dropout for validation model.eval() accuracy = 0 valid_loss = 0 with torch.no_grad(): for images, labels in validloader: # move images and labels to same device as model images, labels = images.to(device), labels.to(device) logps = model.forward(images) batch_loss = criterion(logps, labels) valid_loss += batch_loss.item() # check accuraccy ps = torch.exp(logps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type( torch.FloatTensor)).item() # print output print(f"Epoch {e+1}/{epochs}.. " f"Train loss: {running_loss/print_every:.3f}.. " f"Validation loss: {valid_loss/len(validloader):.3f}.. " f"Validation accuracy: {accuracy/len(validloader):.3f}") return model
def train_and_validate(self): optimizer: NNModule = self.optimizer_class( self.classifier.parameters(), lr=self.learning_rate) self.arch.to(self.device) for ep in keep_awake(range(self.epochs)): print(f"\nStarting epoch # {ep + 1} of {self.epochs}") print(f"Batch progress", end="...") # set model for training self.arch.train() training_loss: float = 0.0 for count, (images, labels) in enumerate(self.dataloaders["train"]): optimizer.zero_grad() images: torch.Tensor = images.to(self.device) labels: torch.Tensor = labels.to(self.device) if count % 10 == 0: print(count, end="...") log_ps: torch.Tensor = self.arch.forward(images) loss: torch.Tensor = self.criterion(log_ps, labels) training_loss += loss.item() loss.backward() optimizer.step() print(f"\nTotal training loss: {training_loss}") print(f"\nBeginning evaluation for epoch #{ep + 1}") print(f"Batch progress", end="...") # set model for evaluation self.arch.eval() accuracy: float = 0.0 validation_loss: float = 0.0 with torch.no_grad(): for count, (images, labels) in enumerate( self.dataloaders["validation"]): images: torch.Tensor = images.to(self.device) labels: torch.Tensor = labels.to(self.device) if count % 10 == 0: print(count, end="...") log_ps: torch.Tensor = self.arch.forward(images) loss: torch.Tensor = self.criterion(log_ps, labels) validation_loss += loss.item() ps: torch.Tensor = torch.exp(log_ps) top_class: torch.Tensor = ps.topk(1, dim=1)[1] equals: torch.Tensor = torch.eq( top_class, labels.view(*top_class.shape)) batch_acc: float = equals.type(torch.FloatTensor).mean() accuracy += batch_acc print( f"\n\tTotal validation loss: {validation_loss}" f"\n\tAccuracy: {accuracy / len(self.dataloaders['validation'])}" )
def train_nn_model(nn_model, optimizer, trainloader, validloader, device, epochs, report_every=20): """Run training loop.""" criterion = nn.NLLLoss() nn_model.to(device) running_loss = 0.0 start_time = time.time() for epoch in keep_awake(range(epochs)): steps = 0 for inputs, labels in trainloader: steps += 1 inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() logps = nn_model(inputs) loss = criterion(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % report_every == 0: val_loss = 0 accuracy = 0 nn_model.eval() with torch.no_grad(): for inputs, labels in validloader: inputs, labels = inputs.to(device), labels.to(device) logps = nn_model(inputs) val_loss += criterion(logps, labels).item() ps = torch.exp(logps) # inverse of log top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) device_type = torch.FloatTensor if device == torch.device('cuda'): device_type = torch.cuda.FloatTensor accuracy += torch.mean(equals.type(device_type)).item() val_size = len(validloader) logging.info( f"Epoch {epoch+1}/{epochs}.. " f"Steps: {steps}.. " f"Time: {(time.time() - start_time):.3f}s.. " f"Running loss: {running_loss/report_every:.3f}.. " f"Validation loss: {val_loss/val_size:.3f}.. " f"Validation accuracy: {100 * accuracy/val_size:.3f}%") running_loss = 0 nn_model.train() logging.info(f"Total training time: {(time.time() - start_time):.3f}s")
def train_model(model, trainloader, validloader, device, optimizer, epochs): criterion = nn.NLLLoss() step = 0 print_every = 5 train_loss = 0 for e in keep_awake(range(epochs)): for inputs, labels in trainloader: step += 1 inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() train_loss += loss.item() if step % print_every == 0: valid_loss = 0 accuracy = 0 model.eval() with torch.no_grad(): for inputs, labels in validloader: inputs, labels = inputs.to(device), labels.to(device) log_ps = model(inputs) batch_loss = criterion(log_ps, labels) valid_loss += batch_loss.item() ps = torch.exp(log_ps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type( torch.FloatTensor)).item() model.train() print(f'Epoch: {e + 1}/{epochs}', f'Training Loss: {train_loss/print_every:.3f}', f'Valid Loss: {valid_loss/len(validloader):.3f}', f'Valid Accuracy: {accuracy/len(validloader):.3f}') train_loss = 0 return model
def train_model(epochs, dropout, model, criterion, optimizer, device, train_dataloader, valid_dataloader): # TODO: Build and train your network steps = 0 running_loss = 0 print_every = 5 for epoch in keep_awake(range(epochs)): for images, labels in train_dataloader: steps += 1 images, labels = images.to(device), labels.to(device) optimizer.zero_grad() logps = model.forward(images) loss = criterion(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % print_every == 0: test_loss = 0 accuracy = 0 model.eval() with torch.no_grad(): for images, labels in valid_dataloader: images, labels = images.to(device), labels.to(device) logps = model.forward(images) batch_loss = criterion(logps, labels) test_loss += batch_loss.item() # Calculate accuracy ps = torch.exp(logps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type( torch.FloatTensor)).item() print( f"Epoch {epoch+1}/{epochs}.. " f"Train loss: {running_loss/print_every:.3f}.. " f"Validation loss: {test_loss/len(valid_dataloader):.3f}.. " f"Vaildation accuracy: {accuracy/len(valid_dataloader):.3f}" ) running_loss = 0 model.train() print('Training done') return model, optimizer
def train_network(model, device, optimizer, train_loader, validation_loader, epochs): running_loss = 0 for epoch in keep_awake(range(epochs)): for inputs, labels in train_loader: inputs, labels = inputs.to(device), labels.to(device) logps = model.forward(inputs) # Set gradients to zero. optimizer.zero_grad() loss = criterion(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() else: validation_loss, validation_accuracy = test_network( model, validation_loader, device) print( f"Epoch {epoch+1}/{epochs}.. " f"Train loss: {running_loss/len(train_loader):.3f}.. " f"Validation loss: {validation_loss/len(validation_loader):.3f}.. " f"Validation accuracy: {validation_accuracy/len(validation_loader):.3f}" ) running_loss = 0 return model, optimizer
valid_data = datasets.ImageFolder(valid_dir, transform=data_transforms[1]) test_data = datasets.ImageFolder(test_dir, transform=data_transforms[2]) image_datasets = [train_data, valid_data, test_data] # TODO: Using the image datasets and the trainforms, define the dataloaders trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True) validloader = torch.utils.data.DataLoader(valid_data, batch_size=64, shuffle=True) testloader = torch.utils.data.DataLoader(test_data, batch_size=64) dataloaders = [trainloader, validloader, testloader] ############################################TODO: Build and train your network for i in keep_awake(range(5)): #Loading the pre-trained net if args.arch == "vgg": model = models.vgg16(pretrained=True) mod_classifier_input = 25088 # Freeze parameters so we don't backprop through them for parameter in model.parameters(): parameter.requires_grad = False elif args.arch == "densenet": model = models.densenet161(pretrained=True) mod_classifier_input = 2208 # Freeze parameters so we don't backprop through them for parameter in model.parameters(): parameter.requires_grad = False else: print(
def main(): debug = 'true' l = 1 while l == 1: in_args = get_input_args() print("***************Training Starting***************") print(" data_dir: ", in_args.data_dir) print(" arch: = {!r}".format(in_args.arch)) print(" learning_rate: = {!r}".format(in_args.lr)) print(" scheduler: = {!r}".format(in_args.schdlr)) print(" dropout: = {!r}".format(in_args.dropout)) print(" hidden_layers: = {!r}".format(in_args.hidden_layers)) print(" epochs: = {!r}".format(in_args.epochs)) print(" batch_size = {!r}".format(in_args.batch_size)) print(" gpu: = {!r}".format(in_args.gpu)) print(" checkpoint: = {!r}".format(in_args.save_dir)) print(" log: = {!r}".format(in_args.log)) #print(in_args) if in_args.batch_size > 64 or in_args.batch_size < 1: print("--batch_size: must range from 1 to 64.") sys.exit(1) yn = str( input( "Would you like to continue training with these choices Y/N? " )) if (yn == 'y' or 'Y' or 'YES' or 'yes'): l = 0 else: sys.exit(1) print("\nSet the directory, Path and Name for the Checkpoint-------------") if in_args.save_dir: # Create save directory if required if not os.path.exists(in_args.save_dir): os.makedirs(in_args.save_dir) # Save checkpoint in save directory chkpoint_filepath = in_args.save_dir + '/' + in_args.arch + '_checkpoint.pth' else: # Save checkpoint in current directory chkpoint_filepath = in_args.arch + '_checkpoint.pth' # create logger logfile = in_args.log logging.basicConfig(filename=logfile, filemode='a', level=logging.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') logging.info('train logging started') # log selected arguments logging.info('%s %s %s %s', "architecture:", in_args.arch, "checkpoint:", chkpoint_filepath) #check gpu and set a flag for it. device = setupmodel.gpu_check() print(device) if torch.cuda.is_available() and in_args.gpu: gpu = True print("gpu is ENABLED and set, Training on GPU") else: gpu = False print("device:= ", device, "and gpu is:", gpu, "in_args.gpu:", in_args.gpu) # Load the data and do the transforms for the training, validation, and testing sets print("\ncall load_and_transform-----------------------") # Set the default top level folder for the data data_dir = in_args.data_dir dataloaders, trainloader, vloader, testloader, class_to_idx, dataset_sizes = datasetprep.load_and_transform( in_args.data_dir, in_args.batch_size) # create a mapping from the label number and the actual flower name. cat_to_name = datasetprep.map_catalog_to_name() if debug: # Explore the current batch, ids and labels print("Now show only data batch tensor, ids, and labels") inputs, labels = next(iter(dataloaders['training'])) print(inputs.size()) # gets the batch tensor info print(labels) print( "\nData load_and_transforms completed-----------------------------------" ) logging.info( '%s', "Data load_and_transforms completed-------------------------") print("\nget the model and features sizes-----------------------", in_args.arch) model, input_size = setupmodel.get_model(in_args.arch) # print out the model information if debug: print("architecture:", in_args.arch) output_size = len(class_to_idx) print("output_size:= ", output_size) print("input_size:= ", input_size) print("output_size= ", len(class_to_idx)) print("hidden_layers: = {!r}".format(in_args.hidden_layers)) learning_rate = in_args.lr print("hyperparameters:") print("batch_size: =", in_args.batch_size, "epochs: =", in_args.epochs, "dropout: =", in_args.dropout, "learning_rate= ", learning_rate) # How to load and view all the class indexes, and then get the output_size of the dataset: #model.class_to_idx = image_datasets['training_dataset'].class_to_idx #model.class_to_idx = trainloader.class_to_idx # print(model.class_to_idx) logging.info('%s %s %s %s %s %s %s %s', "architecture:", in_args.arch, "input_size:= ", input_size, "output_size:= ", output_size, "hidden_layers: ", in_args.hidden_layers) logging.info('%s %s %s %s %s %s %s %s', "batch_size: =", in_args.batch_size, "epochs: =", in_args.epochs, "dropout: =", in_args.dropout, "learning_rate= ", learning_rate) print( "\n apply hyperparameters and run the classifier-----------------------" ) # Create the classifier print("\nSetting Neural Network / create Classifer------") print('class_to_idx: ', class_to_idx) model, criterion, optimizer = setupmodel.create_classifier( model, input_size, in_args.hidden_layers, output_size, learning_rate, in_args.dropout, class_to_idx) print('criterion=', criterion) logging.info('%s %s %s %s %s %s %s %s', "learning_rate: =", in_args.lr, "hidden_layers: =", in_args.hidden_layers, " batch_size: =", in_args.batch_size, "checkpoint:", chkpoint_filepath) print("model", model) print("\nTraining Neural Network------------------------") # Model Training, train the final layers, also we will get an idea of how well the training is working # https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html#load-data # Initialize with the hyperparameters start_time = time() epochs = in_args.epochs print("Network architecture:", in_args.arch) print("Number of epochs: ", in_args.epochs) # Number of epochs to train for print('Learning rate: ', in_args.lr) print("dropout:= ", in_args.dropout) print('device= ', device) # print("schedular not active") logging.info('%s %s %s %s', "architecture:", in_args.arch, "Number of epochs: ", in_args.epochs) # Train the network print("\nTrain the network---") logging.info('%s', " Training Starting-----") from workspace_utils import keep_awake for i in keep_awake(range(1)): print("active session started") # The training loss, validation loss, and validation accuracy are printed out as a network trains model, criterion, optimizer = setupmodel.train_model( model, criterion, optimizer, epochs, trainloader, vloader) logging.info('%s', " Training Completed-----") # Save trained model print("\n Save the checkpoint -------------------") # Create `class_to_idx` attribute in model before saving to checkpoint # model.class_to_idx = image_datasets['training_dataset'].class_to_idx setupmodel.save_checkpoint(model, chkpoint_filepath, in_args.arch, in_args.epochs, criterion, optimizer) print('Model saved at {}'.format(chkpoint_filepath)) logging.info('%s %s %s %s %s', " checkpoint saved-----", in_args.arch, model, chkpoint_filepath, in_args.epochs) print("\n checkpoint saved-------------------") # Calculate and print overall runtime time_elapsed = time() - start_time print('Training time_elapsed: {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) logging.info( '%s', 'Training time_elapsed: {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
def train(data_dir, save_dir=os.path.dirname(os.path.abspath(__file__)), arch='resnet50', learning_rate=0.003, hidden_units=512, epochs=3, device=None): if device: device = 'cuda' else: device = 'cpu' model = flower_model(save_dir=save_dir, arch=arch, learning_rate=learning_rate, hidden_units=hidden_units) ts, tl = build_dataloaders(data_dir) train_dataset = ts[0] trainloader, validloader, testloader = tl # Setup variables model.model.to(device) steps = 0 # Count steps for validation print_every = 3 # Number of steps until validation occurs running_loss = 0 # Accumulate train loss to be averaged # Training Loop for e in keep_awake(range(epochs)): # Loop for each epoch using the trainloader to output batches of images. for inputs, labels in trainloader: # Counting up steps until next validation steps += 1 # Transfer the inputs and labels to the GPU (if active) inputs, labels = inputs.to(device), labels.to(device) # Zeroing the gradients of the optimizer, don't actually understand how this helps. # Assuming something to prevent the optimizer from adding gradients for each .step(). Not sure why not automatic... model.optimizer.zero_grad() # Running a forward pass. LogSoftmax outputs the logarithmic probabilites. Can retrieve probs with e^logps logps = model.model.forward(inputs) # Finding the loss of the output compared to the label loss = model.criterion(logps, labels) # Finding the gradients with back propagation loss.backward() # Apply the gradients to the model classifier (fc) parameters with the optimizer model.optimizer.step() # Update the running loss to be averaged in the validation step. Not really sure what the .item() is actually pulling... running_loss += loss.item() # Validation pass, every print_every number of steps if steps % print_every == 0: # Preventing the model from training, not exactly sure how this differs from torch.no_grad()... model.model.eval() # Initialize variables for validation accuracy = 0 valid_loss = 0 # Doing another type of preventing gradients from being built, same comment as model.eval() note... with torch.no_grad(): for inputs, labels in validloader: # Same steps as in trainer, but without doing steps to find backprop gradients and update the model inputs, labels = inputs.to(device), labels.to(device) logps = model.model.forward(inputs) batch_loss = model.criterion(logps, labels) valid_loss += batch_loss.item() # Steps to find the accuracy of the model on the validation images # probs = e^log(probs) ps = torch.exp(logps) # Finding the category with the highest probability top_p, top_class = ps.topk(1, dim=1) # Finding if the top_category matches the label (check if guess is correct). Not sure what the * is... equals = top_class == labels.view(*top_class.shape) # Finding the accuracy, don't entirely understand this step... How many were correct? .item? Why average in print? accuracy += torch.mean(equals.type( torch.FloatTensor)).item() # Format the print. Finding the averages of the accumulated losses. Not sure I understand accuracy (what is being accumulated?) print( "Epoch {}/{}".format(e + 1, epochs), "Train Loss: {:.3f}".format(running_loss / print_every), "Validation Loss: {:.3f}".format(valid_loss / len(testloader)), "Validation Accuracy: {:.3f}".format(accuracy / len(testloader))) # Reset running_loss running_loss = 0 # Set the model back to training for the next epoch model.model.train() return model
def main(): global model, device, data_dir, train_dir, valid_dir, test_dir global batch_size # TODO 0: Measures total program runtime by collecting start time start_time = time() print("start time {}".format(start_time)) in_arg = get_cmd_args() #check_command_line_arguments(in_arg) if (in_arg.data_dir != default_data_dir): print("ERROR ERROR only allowed data_dir is 'flowers'") data_dir = in_arg.data_dir train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' gpu = in_arg.gpu arch = in_arg.arch save_dir = in_arg.save_dir ## to save checkpoint path lr = in_arg.learning_rate save_dir = in_arg.save_dir epochs = in_arg.epochs batch_size = in_arg.batch_size hidden_layers = in_arg.hidden_units ## create save_dir save_dir = os.path.join("/home/workspace/ImageClassifier/", save_dir) #save_dir = "/home/workspace/ImageClassifier/" + save_dir if not os.path.isdir(save_dir): os.mkdir(save_dir, mode=0o755) ## create checkpoint file suffix time_suffix = datetime.now().timestamp print("Running train.py with:", "\n data_dir = ", in_arg.data_dir, "\n gpu =", in_arg.gpu, "\n arch =", in_arg.arch, " learning_rate =", in_arg.learning_rate, "\n save_dir=", in_arg.save_dir, "\n epochs =", in_arg.epochs, "\n batch_size =", in_arg.batch_size) with open('cat_to_name.json', 'r') as f: cat_to_name = json.load(f) create_datasets() create_dataloaders() device = torch.device("cpu") cuda = torch.cuda.is_available() if (gpu and cuda): device = torch.device("cuda:0") print("CUDA:{}".format(cuda)) if (arch == 'vgg13' or arch == 'vgg16'): model = models.vgg13(pretrained=True) no_input_layer = 25088 elif (arch == 'vgg16'): model = models.vgg16(pretrained=True) no_input_layer = 25088 elif (arch == 'alexnet'): model = models.alexnet(pretrained=True) no_input_layer = 9216 else: print("train.py does not support model:{}".format(arch)) print("train.py supports only vgg13 , vgg16,alexnet") print("Defaulting to vgg16") model = models.vgg16(pretrained=True) no_input_layer = 25088 #model print("Model's state_dict:") for param_tensor in model.state_dict(): print(param_tensor, "\t", model.state_dict()[param_tensor].size()) print("\nOur model:\n\n", model, '\n') print("State dict keys:\n\n", model.state_dict().keys()) print("DEVICE being used is:", device) model.classifier.out_features = num_of_classes model_classifier = create_classifier(model, hidden_layers) model.classifier = model_classifier print("DEVICE:{}".format(device)) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=lr) ## start Training print("Start training") ## exit() steps = 0 running_loss = 0 print_every = 40 for i in keep_awake(range(5)): for e in range(epochs): model.train() for images, labels in train_loader: steps += 1 images, labels = images.to(device), labels.to(device) optimizer.zero_grad() output = model.forward(images) output = output.to(device) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % print_every == 0: # Make sure network is in eval mode for inference model.eval() # Turn off gradients for validation, saves memory and computations with torch.no_grad(): test_loss, accuracy = validation( model, valid_loader, criterion, device) print( "Epoch: {}/{}.. ".format(e + 1, epochs), "Training Loss: {:.3f}.. ".format(running_loss / print_every), "Test Loss: {:.3f}.. ".format(test_loss / len(valid_loader)), "Test Accuracy: {:.3f}".format(accuracy / len(valid_loader))) running_loss = 0 # Make sure training is back on model.train() end_time = time() print("start time:{};end time:{}".format(start_time, end_time)) print("End training") ## end training # TODO: Save the checkpoint model.class_to_idx = train_dataset.class_to_idx checkpoint = { 'arch': arch, 'input_size': no_input_layer, 'output_size': 102, 'class_to_idx': model.class_to_idx, 'state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'epochs': epochs, 'classifier': model.classifier } #'hidden_layers':[each.out_features for each in vgg16_model.hidden_layers] gave following error ##AttributeError: 'VGG' object has no attribute 'hidden_layers' torch.save( checkpoint, '/home/workspace/ImageClassifier/' + save_dir + '/checkpoint.pth') print("END saving checkpoint")
# Set the liquidation time lqt = 60 # Set the number of trades n_trades = 60 # Set trader's risk aversion tr = 1e-6 # Set the number of episodes to run the simulation episodes = 10000 shortfall_hist = np.array([]) shortfall_deque = deque(maxlen=100) for episode in keep_awake(range(episodes)): # Reset the enviroment cur_state = env.reset(seed=episode, liquid_time=lqt, num_trades=n_trades, lamb=tr) # set the environment to make transactions env.start_transactions() for i in range(n_trades + 1): # Predict the best action for the current state. action = agent.act(cur_state, add_noise=True) # Action is performed and new state, reward, info are received.
def main(): argparser = argparse.ArgumentParser( description='Train the Classifier') argparser.add_argument( 'data_directory', help='The directory of training data.') argparser.add_argument( '--save_dir', default='checkpoint.pth', help='The directory for saving checkpoints.') argparser.add_argument( '--arch', default='densenet121', help='The model name.') argparser.add_argument( '--hidden_units', type=int, help='The number of hidden units.') argparser.add_argument( '--epochs', type=int, default=15, help='The number of traning epochs.') argparser.add_argument( '--learning_rate', type=float, default=0.003, help='The learning rate.') argparser.add_argument( '--gpu', action='store_true', help='Enable gpu') args = argparser.parse_args() checkpoint_file = args.save_dir data_dir = args.data_directory model_name = args.arch hidden_units = args.hidden_units epochs = args.epochs learning_rate = args.learning_rate gpu_enabled = args.gpu # Load training data train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' train_transforms = transforms.Compose([transforms.RandomRotation(30), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) valid_transforms = transforms.Compose([transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) test_transforms = transforms.Compose([transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) train_data = datasets.ImageFolder(train_dir, transform=train_transforms) valid_data = datasets.ImageFolder(valid_dir, transform=test_transforms) test_data = datasets.ImageFolder(test_dir, transform=test_transforms) trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True) validloader = torch.utils.data.DataLoader(valid_data, batch_size=64) testloader = torch.utils.data.DataLoader(test_data, batch_size=64) # Biuld training model model, classifier_name, hidden_layers = create_model(model_name, hidden_units) criterion = nn.NLLLoss() if classifier_name == 'classifier': optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) elif classifier_name == 'fc': optimizer = optim.Adam(model.fc.parameters(), lr=learning_rate) device = 'cpu' if gpu_enabled: if torch.cuda.is_available(): device = 'cuda' else: print("gpu is not available") model.to(device); # Start training #train_losses, valid_losses = [], [] for e in keep_awake(range(epochs)): running_loss = 0 for images, labels in trainloader: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() log_ps = model.forward(images) loss = criterion(log_ps, labels) loss.backward() optimizer.step() running_loss += loss.item() else: # Prints out training loss, validation loss, and validation accuracy as the network trains test_loss = 0 accuracy = 0 # Turn off gradients for validation, saves memory and computations with torch.no_grad(): model.eval() for images, labels in validloader: images, labels = images.to(device), labels.to(device) log_ps = model.forward(images) test_loss += criterion(log_ps, labels) ps = torch.exp(log_ps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)) model.train() #train_losses.append(running_loss/len(trainloader)) #valid_losses.append(test_loss/len(validloader)) print("Epoch: {}/{}.. ".format(e+1, epochs), "Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)), "validation Loss: {:.3f}.. ".format(test_loss/len(validloader)), "validation Accuracy: {:.3f}".format(accuracy/len(validloader))) # Do validation on the test set with active_session(): test_loss = 0 accuracy = 0 with torch.no_grad(): model.eval() for images, labels in testloader: images, labels = images.to(device), labels.to(device) #print(images.shape) log_ps = model.forward(images) test_loss += criterion(log_ps, labels) ps = torch.exp(log_ps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)) model.train() print("Test Loss: {:.3f}.. ".format(test_loss/len(testloader)), "Test Accuracy: {:.3f}".format(accuracy/len(testloader))) # Save the check point model_classifier_state_dict = None if classifier_name == 'classifier': model_classifier_state_dict = model.classifier.state_dict() elif classifier_name == 'fc': model_classifier_state_dict = model.fc.state_dict() checkpoint = {'class_to_idx': train_data.class_to_idx, 'optimizer_state_dict': optimizer.state_dict, 'learning_rate' : learning_rate, 'device' : device, 'model_name' : model_name, 'hidden_layers' : hidden_layers, 'model_classifier_state_dict': model_classifier_state_dict} torch.save(checkpoint, checkpoint_file)
def train_image_classifier(parameters, device, model, criterion, optimizer, train_loader, valid_loader, train_losses, valid_losses, valid_accuracies): """ Train and validate image classification model :param parameters: dictionary providing model parameters :param device: torch.device indicating if GPU or CPU will be used :param model: the neural network model :param criterion: the loss / error function :param optimizer: the optimizer for backpropagation :param train_loader: training torch.utils.data.DataLoader :param valid_loader: validation torch.utils.data.DataLoader :param train_losses: list to store training loss calculated at a certain interval :param valid_losses: list to store validation loss calculated at a certain interval :param valid_accuracies: list to store validation accuracy calculated at a certain interval :return: """ # Initialize parameters train_loss_accuracy_batch_interval = parameters.get( 'train_loss_accuracy_batch_interval', 5) epochs = parameters.get('epochs', 5) # Training loop # keep_awake is provided by Udacity to ensure that anything that happens inside this loop will keep the workspace active # for e in range(epochs): for e in keep_awake(range(epochs)): # Enable training mode, which uses dropouts model.train() print("***************************") print("Epoch {}/{}".format(e + 1, epochs)) running_training_loss = 0 batch_count = 0 train_image_count = 0 for train_images, train_labels in train_loader: # Count images processed by training train_image_count += len(train_images) # Count batches used for training batch_count += 1 # Move input and label tensors to GPU (if available) or CPU train_images, train_labels = train_images.to( device), train_labels.to(device) # Ensure that the gradient is not accumulated with each iteration optimizer.zero_grad() # Forward feeding the model train_output = model(train_images) # Calculate the loss (error function) train_loss = criterion(train_output, train_labels) # Backpropagation train_loss.backward() # Update weights optimizer.step() # Accumulate loss for each trained image running_training_loss += train_loss.item() # Validate the model after each loss_accuracy_batch_interval if batch_count % train_loss_accuracy_batch_interval == 0: # Validate the model test_image_classifier(device, model, criterion, valid_loader, valid_losses, valid_accuracies, False) # Calculate loss for batch used for last training train_losses.append(running_training_loss / batch_count) print("----") print("Processed {} training batches with {} processed images". format(batch_count, train_image_count)) print("Training loss: {}".format(train_losses[-1])) print("Validation loss: {}".format(valid_losses[-1])) print("Validation accuracy: {}".format(valid_accuracies[-1]))
def main(): seeding() parallel_envs = 4 number_of_episodes = 1000 episode_length = 80 batchsize = 1000 save_interval = 1000 t = 0 # amplitude of OU noise, which slowly decreases to 0 noise = 2 noise_reduction = 0.9999 # how many episodes before update episode_per_update = 2 * parallel_envs log_path = os.getcwd() + "/log" model_dir = os.getcwd() + "/model_dir" os.makedirs(model_dir, exist_ok=True) torch.set_num_threads(parallel_envs) """ `env` controls three agents, two blue, one red. env.observation_space: [Box(14,), Box(14,), Box(14,)] env.action_sapce: [Box(2,), Box(2,), Box(2,)] Box(14,) can be broken down into 2+3*2+3*2=14 (2) location coordinates of the target landmark (3*2) the three agents' positions w.r.t. the target landmark (3*2) the three agents' velocities w.r.t. the target landmark """ env = envs.make_parallel_env(parallel_envs) # keep 5000 episodes worth of replay buffer = ReplayBuffer(int(5000 * episode_length)) # initialize policy and critic maddpg = MADDPG() logger = SummaryWriter(log_dir=log_path) agent0_reward = [] agent1_reward = [] agent2_reward = [] # training loop # show progressbar import progressbar as pb widget = [ 'episode: ', pb.Counter(), '/', str(number_of_episodes), ' ', pb.Percentage(), ' ', pb.ETA(), ' ', pb.Bar(marker=pb.RotatingMarker()), ' ' ] timer = pb.ProgressBar(widgets=widget, maxval=number_of_episodes).start() # use keep_awake to keep workspace from disconnecting for episode in keep_awake(range(0, number_of_episodes, parallel_envs)): timer.update(episode) reward_this_episode = np.zeros((parallel_envs, 3)) # Consult `env_wrapper.py` line 19. all_obs = env.reset() """ `all_abs` is a list of size `parallel_envs`, each item in the list is another list of size two, first is env.observation_space: [Box(14,), Box(14,), Box(14,)], second is [Box(14,)], which is added to faciliate training https://goo.gl/Xtr6sF `obs` and `obs_full` are both lists of size `parallel_envs`, `obs` has the default observation space [Box(14,), Box(14,), Box(14,)] `obs_full` has the compounded observation space [Box(14,)] """ obs, obs_full = transpose_list(all_obs) # for calculating rewards for one episode - addition of all time steps # save info or not save_info = ((episode) % save_interval < parallel_envs or episode == number_of_episodes - parallel_envs) frames = [] tmax = 0 if save_info: frames.append(env.render('rgb_array')) for episode_t in range(episode_length): t += parallel_envs # explore = only explore for a certain number of steps # action input needs to be transposed actions = maddpg.act(transpose_to_tensor(obs), noise=noise) noise *= noise_reduction # `actions_array` has shape (3, parallel_envs, 2) actions_array = torch.stack(actions).detach().numpy() # `actions_for_env` has shape (parallel_envs, 3, 2), because # input to `step` requires the first index to be `parallel_envs` actions_for_env = np.rollaxis(actions_array, axis=1) # step forward one frame next_obs, next_obs_full, rewards, dones, info = \ env.step(actions_for_env) # add data to buffer transition = (obs, obs_full, actions_for_env, rewards, next_obs, next_obs_full, dones) buffer.push(transition) reward_this_episode += rewards obs, obs_full = next_obs, next_obs_full # save gif frame if save_info: frames.append(env.render('rgb_array')) tmax += 1 # update the target network `parallel_envs`=4 times # after every `episode_per_update`=2*4 if len(buffer ) > batchsize and episode % episode_per_update < parallel_envs: # update the local network for all agents, `a_i` refers to agent no. for a_i in range(3): samples = buffer.sample(batchsize) maddpg.update(samples, a_i, logger) # soft update the target network towards the actual networks maddpg.update_targets() for i in range(parallel_envs): agent0_reward.append(reward_this_episode[i, 0]) agent1_reward.append(reward_this_episode[i, 1]) agent2_reward.append(reward_this_episode[i, 2]) if episode % 100 == 0 or episode == number_of_episodes - 1: avg_rewards = [ np.mean(agent0_reward), np.mean(agent1_reward), np.mean(agent2_reward) ] agent0_reward = [] agent1_reward = [] agent2_reward = [] for a_i, avg_rew in enumerate(avg_rewards): logger.add_scalar('agent%i/mean_episode_rewards' % a_i, avg_rew, episode) # Saves the model. save_dict_list = [] if save_info: for i in range(3): save_dict = { 'actor_params': maddpg.maddpg_agent[i].actor.state_dict(), 'actor_optim_params': maddpg.maddpg_agent[i].actor_optimizer.state_dict(), 'critic_params': maddpg.maddpg_agent[i].critic.state_dict(), 'critic_optim_params': maddpg.maddpg_agent[i].critic_optimizer.state_dict() } save_dict_list.append(save_dict) torch.save( save_dict_list, os.path.join(model_dir, 'episode-{}.pt'.format(episode))) # Save gif files. imageio.mimsave(os.path.join(model_dir, 'episode-{}.gif'.format(episode)), frames, duration=.04) env.close() logger.close() timer.finish()
def main(): ###Get input from user in_arg = get_input_args() print(in_arg) train_dir = in_arg.dir + '/train' valid_dir = in_arg.dir + '/valid' test_dir = in_arg.dir + '/test' # Pass transforms in here, then run the next cell to see how the transforms look train_data = datasets.ImageFolder(train_dir, transform=train_transforms) valid_data = datasets.ImageFolder(valid_dir, transform=valid_transforms) test_data = datasets.ImageFolder(test_dir, transform=test_transforms) # TODO: Using the image datasets and the trainforms, define the dataloaders trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True) validloader = torch.utils.data.DataLoader(valid_data, batch_size=64, shuffle=True) testloader = torch.utils.data.DataLoader(test_data, batch_size=64) ####Define the model resnet18 = models.resnet18(pretrained=True) alexnet = models.alexnet(pretrained=True) vgg16 = models.vgg16(pretrained=True) densenet121 = models.densenet121(pretrained=True) models_dic = { 'resnet': resnet18, 'alexnet': alexnet, 'vgg': vgg16, 'densenet': densenet121 } model_name = in_arg.arch ###Load the userdefined model model = models_dic[model_name] # Freeze parameters so we don't backprop through them device = torch.device("cuda" if torch.cuda.is_available() else "cpu") for param in model.parameters(): param.requires_grad = False classifier = nn.Sequential( OrderedDict([('fc1', nn.Linear(1024, in_arg.hidden_nodes)), ('relu', nn.ReLU()), ('fc2', nn.Linear(in_arg.hidden_nodes, in_arg.output_nodes)), ('output', nn.LogSoftmax(dim=1))])) model.classifier = classifier criterion = nn.NLLLoss() # Only train the classifier parameters, feature parameters are frozen optimizer = optim.Adam(model.classifier.parameters(), lr=in_arg.learning_rate) model.to(device) epochs = in_arg.epocs steps = 0 running_loss = 0 print_every = 5 for i in keep_awake(range(5)): for epoch in range(epochs): for inputs, labels in trainloader: steps += 1 # Move input and label tensors to the default device inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() logps = model.forward(inputs) loss = criterion(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % print_every == 0: valid_loss = 0 accuracy = 0 model.eval() with torch.no_grad(): for inputs, labels in validloader: inputs, labels = inputs.to(device), labels.to( device) logps = model.forward(inputs) batch_loss = criterion(logps, labels) valid_loss += batch_loss.item() # Calculate accuracy ps = torch.exp(logps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean( equals.type(torch.FloatTensor)).item() print( f"Epoch {epoch+1}/{epochs}.. " f"Train loss: {running_loss/print_every:.3f}.. " f"Validation loss: {valid_loss/len(testloader):.3f}.. " f"Validation accuracy: {accuracy/len(testloader):.3f}") running_loss = 0 model.train() ########Save the model model.class_to_idx = train_data.class_to_idx checkpoint = { 'input_size': 1024, 'output_size': in_arg.output_nodes, 'hidden_layers': [each for each in model.classifier], 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'class_to_idx': model.class_to_idx } torch.save(checkpoint, in_arg.save_dir)
def build_and_train_model(data_path,checkpoint_dir, base_model='vgg16', learning_rate=0.05, hidden_units=4096, epochs=1, device='cuda'): train_loader=loading_data(data_path)[0] valid_loader=loading_data(data_path)[1] models_options={'vgg11':models.vgg11(pretrained=True),'vgg13':models.vgg13(pretrained=True),'vgg16':models.vgg16(pretrained=True)} model = models_options[base_model] l_rate=learning_rate # Use GPU if it's available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # do not backpropagate through the parameters of the loaded model for param in model.parameters(): param.requires_grad = False # define my feed forward model.classifier = nn.Sequential(nn.Linear(25088, hidden_units), nn.Dropout(0.3), nn.ReLU(), nn.Linear(hidden_units,102), nn.LogSoftmax(dim=1)) # define the criterion criterion = nn.NLLLoss() # define optimizer of parameters - only for the classifier, not the imported model optimizer = optim.Adam(model.classifier.parameters(), lr=l_rate) model.to(device); steps = 0 running_loss = 0 print_every = 10 # Train the NW for i in keep_awake(range(5)): #anything that happens inside this loop will keep the workspace active # do iteration with lots of work here with active_session(): # do long-running work here for epoch in range(epochs): for images, labels in train_loader: steps += 1 # Move input and label tensors to the default device images, labels = images.to(device), labels.to(device) # reset gradient to 0 optimizer.zero_grad() # forward step result = model.forward(images) # calculate loss loss = criterion(result, labels) # backpropagate loss.backward() optimizer.step() running_loss += loss.item() if steps % print_every == 0: eval_loss = 0 accuracy = 0 # Set the model in validation mode model.eval() # turn off gradient during validation with torch.no_grad(): for images, labels in valid_loader: images, labels = images.to(device), labels.to(device) result = model.forward(images) batch_loss = criterion(result, labels) eval_loss += batch_loss.item() # Calculate accuracy ps = torch.exp(result) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)).item() else: # print testing vs validation loss and accuracy print(f"Epoch {epoch+1}/{epochs}.. " f"Train loss: {running_loss/print_every:.3f}.. " f"Eval loss: {eval_loss/len(valid_loader):.3f}.. " f"Eval accuracy: {accuracy/len(valid_loader):.3f}") running_loss = 0 model.train() # EXPORT THE CHECKPOINT! model.class_to_idx = loading_data(data_path)[3].class_to_idx checkpoint = {'network': base_model, 'input_size': 25088, 'output_size': 102, 'learning_rate': learning_rate, 'batch_size': 64, 'classifier' : model.classifier, 'epochs': epochs, 'optimizer': optimizer.state_dict(), 'state_dict': model.state_dict(), 'class_to_idx': model.class_to_idx} torch.save(checkpoint, checkpoint_dir+'/checkpoint_terminal.pth') print('Your model has been trained and saved as checkpoint in the folder you indicated.', checkpoint_dir)
def train_model(arch="vgg16", hidden_units=4069, checkpoint=" ", epochs=3, learning_rate=0.003): for i in keep_awake(range(1)): device = torch.device(mode) dataset, dataloader = loadData(data_dir) trainloader = dataloader['train'] train_dataset = dataset['train'] validloader = dataloader['valid'] valid_dataset = dataset['valid'] classes_num = len(train_dataset.classes) model = load_checkpoint(hidden_units, arch, classes_num) optimizer = optim.Adam(model.classifier.parameters(), learning_rate) criterian = nn.NLLLoss() model.to(device) epochs_num = epochs step = 0 running_loss = 0 print_every = 5 for epoch in range(epochs_num): for images, labels in trainloader: step += 1 images, labels = images.to(device), labels.to(device) optimizer.zero_grad() logps = model(images) loss = criterian(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() if step % print_every == 0: vaild_loss = 0 accuracy = 0 model.eval() with torch.no_grad(): #Validation Loop for images, labels in validloader: images, labels = images.to(device), labels.to( device) logps = model(images) batch_loss = criterian(logps, labels) vaild_loss += batch_loss.item() #Claculate The Accuracy ps = torch.exp(logps) top_ps, top_class = ps.topk(1, dim=1) equality = top_class == labels.view( *top_class.shape) accuracy += torch.mean( equality.type(torch.FloatTensor)).item() print( f"Epoch {epoch+1}/{epochs}.." f"Train loss: {running_loss/print_every:.3f}.. " f"Validation loss: {vaild_loss/len(validloader):.3f}.. " f"Validation accuracy: {accuracy/len(validloader):.3f}" ) running_loss = 0 model.train() model.class_to_idx = train_dataset.class_to_idx checkpoint_dictionary = { 'hidden_units': hidden_units, 'arch': arch, 'class_to_idx': model.class_to_idx, 'state_dict': model.state_dict() } torch.save(checkpoint_dictionary, checkpoint) return model
def main(): seeding() # number of parallel agents parallel_envs = 4 # number of training episodes. # change this to higher number to experiment. say 30000. number_of_episodes = 1000 episode_length = 80 batchsize = 1000 # how many episodes to save policy and gif save_interval = 1000 t = 0 # amplitude of OU noise # this slowly decreases to 0 noise = 2 noise_reduction = 0.9999 # how many episodes before update episode_per_update = 2 * parallel_envs log_path = os.getcwd() + "/log" model_dir = os.getcwd() + "/model_dir" os.makedirs(model_dir, exist_ok=True) torch.set_num_threads(parallel_envs) env = envs.make_parallel_env(parallel_envs) # keep 5000 episodes worth of replay buffer = ReplayBuffer(int(5000 * episode_length)) # initialize policy and critic maddpg = MADDPG() logger = SummaryWriter(log_dir=log_path) agent0_reward = [] agent1_reward = [] agent2_reward = [] # training loop # show progressbar import progressbar as pb widget = [ 'episode: ', pb.Counter(), '/', str(number_of_episodes), ' ', pb.Percentage(), ' ', pb.ETA(), ' ', pb.Bar(marker=pb.RotatingMarker()), ' ' ] timer = pb.ProgressBar(widgets=widget, maxval=number_of_episodes).start() # use keep_awake to keep workspace from disconnecting for episode in keep_awake(range(0, number_of_episodes, parallel_envs)): timer.update(episode) reward_this_episode = np.zeros((parallel_envs, 3)) all_obs = env.reset() # obs, obs_full = transpose_list(all_obs) #for calculating rewards for this particular episode - addition of all time steps # save info or not save_info = ((episode) % save_interval < parallel_envs or episode == number_of_episodes - parallel_envs) frames = [] tmax = 0 if save_info: frames.append(env.render('rgb_array')) for episode_t in range(episode_length): t += parallel_envs # explore = only explore for a certain number of episodes # action input needs to be transposed actions = maddpg.act(transpose_to_tensor(obs), noise=noise) noise *= noise_reduction actions_array = torch.stack(actions).detach().numpy() # transpose the list of list # flip the first two indices # input to step requires the first index to correspond to number of parallel agents actions_for_env = np.rollaxis(actions_array, 1) # step forward one frame next_obs, next_obs_full, rewards, dones, info = env.step( actions_for_env) # add data to buffer transition = (obs, obs_full, actions_for_env, rewards, next_obs, next_obs_full, dones) buffer.push(transition) reward_this_episode += rewards obs, obs_full = next_obs, next_obs_full # save gif frame if save_info: frames.append(env.render('rgb_array')) tmax += 1 # update once after every episode_per_update if len(buffer ) > batchsize and episode % episode_per_update < parallel_envs: for a_i in range(3): samples = buffer.sample(batchsize) maddpg.update(samples, a_i, logger) maddpg.update_targets( ) #soft update the target network towards the actual networks for i in range(parallel_envs): agent0_reward.append(reward_this_episode[i, 0]) agent1_reward.append(reward_this_episode[i, 1]) agent2_reward.append(reward_this_episode[i, 2]) if episode % 100 == 0 or episode == number_of_episodes - 1: avg_rewards = [ np.mean(agent0_reward), np.mean(agent1_reward), np.mean(agent2_reward) ] agent0_reward = [] agent1_reward = [] agent2_reward = [] for a_i, avg_rew in enumerate(avg_rewards): logger.add_scalar('agent%i/mean_episode_rewards' % a_i, avg_rew, episode) #saving model save_dict_list = [] if save_info: for i in range(3): save_dict = { 'actor_params': maddpg.maddpg_agent[i].actor.state_dict(), 'actor_optim_params': maddpg.maddpg_agent[i].actor_optimizer.state_dict(), 'critic_params': maddpg.maddpg_agent[i].critic.state_dict(), 'critic_optim_params': maddpg.maddpg_agent[i].critic_optimizer.state_dict() } save_dict_list.append(save_dict) torch.save( save_dict_list, os.path.join(model_dir, 'episode-{}.pt'.format(episode))) # save gif files imageio.mimsave(os.path.join(model_dir, 'episode-{}.gif'.format(episode)), frames, duration=.04) env.close() logger.close() timer.finish()
('relu1', nn.ReLU()), ('hidden_layer1', nn.Linear(hidden_units, 90)), ('relu2', nn.ReLU()), ('hidden_layer2', nn.Linear(90, 80)), ('relu3', nn.ReLU()), ('hidden_layer3', nn.Linear(80, 102)), ('output', nn.LogSoftmax(dim=1))])) model.classifier = classifier criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.classifier.parameters(), learning_rate, momentum=0.9) if gpu: model.cuda() # Train the network print_every = 10 for epoch in keep_awake(range(1, num_epochs + 1)): t_loss = 0.0 for i, (t_image, t_label) in enumerate(trainloader, 1): if gpu: t_image = t_image.cuda() t_label = t_label.cuda() #Reset gradients optimizer.zero_grad() #Forward output = model.forward(t_image) loss = criterion(output, t_label) #Backword loss.backward() #parameter update optimizer.step() t_loss += loss.item()