def load_checkpoint(filepath): checkpoint = torch.load(filepath) model = fc_model.Network(checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers']) model.load_state_dict(checkpoint['state_dict']) return model
import torch from torch import nn, optim import torch.nn.functional as F from torchvision import datasets, transforms import fc_model transform = transforms.Compose([transforms.ToTensor()]) train_set = datasets.FashionMNIST('FashionMNIST_data/', download=False, train=True, transform=transform) test_set = datasets.FashionMNIST('FashionMNIST_data/', download=False, train=False, transform=transform) train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True) image, label = next(iter(train_loader)) model = fc_model.Network(784, 10, [512, 256, 128]) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # fc_model.train(model, train_loader, test_loader, criterion, optimizer, epochs=2) print("Our model: \n\n", model, '\n') print("The state dict keys: \n\n", model.state_dict().keys())
image_path = results.image_path assert os.path.exists(image_path), "image_path must be exist" checkpoint_path = results.checkpoint_path assert os.path.exists(checkpoint_path), "checkpoint_path must be exist" # Load cat_to_name with open(results.category_names, 'r') as f: cat_to_name = json.load(f) # -------------------------------- # --- Create model from the checkpoint data data_checkpoint = torch.load(checkpoint_path) network = fc_model.Network(data_checkpoint['arch'], cat_to_name, hidden_units=data_checkpoint['hidden_units'], gpu=results.gpu) loaded_model = network.model loaded_model.load_state_dict(data_checkpoint['state_dict']) # Show prediction info utils.view_classify(image_path, loaded_model, cat_to_name, data_checkpoint['class_to_idx'], topk=results.top_k, show_plot=results.show_plot)
# %% [markdown] # Here we can see one of the images. # %% image, label = next(iter(trainloader)) helper.imshow(image[0, :]) # %% [markdown] # # Train a network # # To make things more concise here, I moved the model architecture and training code from the last part to a file called `fc_model`. Importing this, we can easily create a fully-connected network with `fc_model.Network`, and train the network using `fc_model.train`. I'll use this model (once it's trained) to demonstrate how we can save and load models. # %% # Create the network, define the criterion and optimizer model = fc_model.Network(784, 10, [512, 256, 128]) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # %% fc_model.train(model, trainloader, testloader, criterion, optimizer, epochs=2) # %% [markdown] # ## Saving and loading networks # # As you can imagine, it's impractical to train a network every time you need to use it. Instead, we can save trained networks then load them later to train more or use them for predictions. # # The parameters for PyTorch networks are stored in a model's `state_dict`. We can see the state dict contains the weight and bias matrices for each of our layers. # %% print("Our model: \n\n", model, '\n')
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--checkpoint', type=str, default="./model_checkpoint/checkpoint.pth", help='Path to save Check point') parser.add_argument('--verify-model', action="store_true", default=False, help='use for to verify model file') parser.add_argument('--debug', action="store_true", default=False, help='use for to print debug logs') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") devname = "cuda" if use_cuda else "cpu" print("\n\n----------------\nDevice Used to process:", devname) kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) train_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=True, download=True, transform=transform), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( './data', train=False, download=True, transform=transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) cwd = os.getcwd() cwd = cwd + "/" + args.checkpoint if (args.debug == True): print("\n\nPath for checkpoint file :{}\n\n".format(cwd)) if os.path.isfile(cwd): print( "File Check Status: File is already present \nRetry with different file name\n----------------\n" ) else: print( "\n----------------\nFile Check Status: File is not present!!!\nCreating new with name:{}\n----------------\n\n" .format(cwd)) model = fc_model.Network(784, 10) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) #Train and validation fc_model.train(model, train_loader, test_loader, criterion, optimizer, device, args.epochs) print("\n----------------\nOur model: \n\n", model, "\n----------------\n") if (args.debug == True): print("The state dict keys: \n\n", model.state_dict().keys(), "\n----------------\n") checkpoint = { 'input_size': 784, 'output_size': 10, 'state_dict': model.state_dict() } torch.save(checkpoint, args.checkpoint) if (args.verify_model == True): model1 = load_checkpoint(args.checkpoint) model1.to(device) print("\n\nloaded model\n\n", model1) # Test out your network! model1.eval() dataiter = iter(test_loader) images, labels = dataiter.next() # Convert 2D image to 1D vector img, labels = images.to(device), labels.to(device) # Calculate the class probabilities (softmax) for img with torch.no_grad(): output = model1.forward(img) ps = torch.exp(output) equality = (labels.data == ps.max(1)[1]) print(ps) print(equality)
### LOAD # load the previously saved statedict with all trained paramters state_dict = torch.load('checkpoint.pth') print(state_dict.keys()) # Then you need as well to load the state dict in to the network itself model.load_state_dict(state_dict) '''ACHTUNG: Loading the state dict works only if the model architecture is exactly the same as the checkpoint architecture. If I create a model with a different architecture, this fails. This means we need to rebuild the model exactly as it was when trained. ''' # Try this model = fc_model.Network(784, 10, [400, 200, 100]) # This will throw an error because the tensor sizes are wrong! model.load_state_dict(state_dict) '''Information about the model architecture needs to be saved in the checkpoint, along with the state dict. To do this, you build a dictionary with all the information you need to compeletely rebuild the model.''' ### LOAD with FUNCTION '''The following is just a demo function. You need to build your own load function for each of the model you want to learn. ''' def load_checkpoint(filepath): checkpoint = torch.load(filepath) model = fc_model.Network(checkpoint['input_size'], checkpoint['output_size'],
results = parser.parse_args() # -------------------------------- # Check to ensure that data_dir is present first before proceeding data_dir = results.data_dir assert os.path.exists(data_dir), "data_dir must be exist" # Load cat_to_name with open(results.category_names, 'r') as f: cat_to_name = json.load(f) #--------------------------------- network = fc_model.Network(results.arch, cat_to_name, hidden_units=results.hidden_units, gpu=results.gpu) network.train(results.data_dir, learning_rate=results.learning_rate, epochs=results.epochs) # Save the checkpoint checkpoint = { 'arch': results.arch, 'data_dir': results.data_dir, 'cat_to_name': cat_to_name, 'gpu': results.gpu, 'hidden_units': results.hidden_units, 'epochs': results.epochs, 'learning_rate': results.learning_rate,