def cnn_finetune(device, dataloaders, dataset_sizes, class_names): """ Load a pretrained model and reset final fully conected layer """ model_ft = models.resnet18(pretrained=True) num_ftrs = model_ft.fc.in_features model_ft.fc = nn.Linear(num_ftrs, 2) model_ft.to(device) criterion = nn.CrossEntropyLoss() optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) # decay lr by factor of 0.1 after each 7 epochas exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, device, dataloaders, dataset_sizes, num_epochs=25) visualize_model(model_ft, device, dataloaders, class_names)
def cnn_feature_extractor(device, dataloaders, dataset_sizes, class_names): """ Freeze all nn except last layer. Requires requires_grade=False to freeze all the parameters so that the gradients are not computed in backward(). """ model_conv = models.resnet18(pretrained=True) # keep parameters in all layers except fully connected layer for param in model_conv.parameters(): requires_grade = False # Parameters of newly constructed modules have requires_grad=True by default num_ftrs = model_conv.fc.in_features model_conv.fc = nn.Linear(num_ftrs, 2) model_conv.to(device) criterion = nn.CrossEntropyLoss() # only final layer params optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1) model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, device, dataloaders, dataset_sizes, num_epochs=25) visualize_model(model_conv, device, dataloaders, class_names)
def main(): model_ft = models.resnet18(pretrained=True) num_ftrs = model_ft.fc.in_features # model_ft.fc = nn.Linear(num_ftrs, 2) model_ft.fc = nn.Linear(num_ftrs, 6) if use_gpu: model_ft = model_ft.cuda() criterion = nn.MSELoss() # criterion = nn.CrossEntropyLoss() # Observe that all parameters are being optimized optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25) visualize_model(model_ft, dset_loaders) # Here, we need to freeze all the network except the final layer. We need # to set ``requires_grad == False`` to freeze the parameters so that the # gradients are not computed in ``backward()``. # You can read more about this in the documentation # `here <http://pytorch.org/docs/notes/autograd.html#excluding-subgraphs-from-backward>`__. model_conv = torchvision.models.resnet18(pretrained=True) for param in model_conv.parameters(): param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default num_ftrs = model_conv.fc.in_features # model_conv.fc = nn.Linear(num_ftrs, 2) model_conv.fc = nn.Linear(num_ftrs, 6) if use_gpu: model_conv = model_conv.cuda() # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() # Observe that only parameters of final layer are being optimized as # opoosed to before. optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9) ###################################################################### # Train and evaluate # ^^^^^^^^^^^^^^^^^^ # # On CPU this will take about half the time compared to previous scenario. # This is expected as gradients don't need to be computed for most of the # network. However, forward does need to be computed. # model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=25) return model_conv
def main(argv): assert version.parse(torch.__version__) >= version.parse('1.2.0') dataset = argv[1] if len(argv) == 2 else 'celeba' print('Preparing dataset and parameters for', dataset, '...') if dataset == 'celeba': image_shape = [64, 64, 3] # The input image shape n_components = 300 # Number of components in the mixture model n_factors = 10 # Number of factors - the latent dimension (same for all components) batch_size = 1000 # The EM batch size num_iterations = 30 # Number of EM iterations (=epochs) feature_sampling = 0.2 # For faster responsibilities calculation, randomly sample the coordinates (or False) mfa_sgd_epochs = 0 # Perform additional training with diagonal (per-pixel) covariance, using SGD init_method = 'rnd_samples' # Initialize each component from few random samples using PPCA # trans = transforms.Compose([CropTransform((25, 50, 25+128, 50+128)), transforms.Resize(image_shape[0]), # transforms.ToTensor(), ReshapeTransform([-1])]) # train_set = CelebA(root='./data', split='train', transform=trans, download=True) # test_set = CelebA(root='./data', split='test', transform=trans, download=True) train_set, test_set = celeba_train_val_datasets(with_mask=False) elif dataset == 'mnist': image_shape = [28, 28] # The input image shape n_components = 50 # Number of components in the mixture model n_factors = 6 # Number of factors - the latent dimension (same for all components) batch_size = 1000 # The EM batch size num_iterations = 30 # Number of EM iterations (=epochs) feature_sampling = False # For faster responsibilities calculation, randomly sample the coordinates (or False) mfa_sgd_epochs = 0 # Perform additional training with diagonal (per-pixel) covariance, using SGD init_method = 'kmeans' # Initialize by using k-means clustering # trans = transforms.Compose([transforms.ToTensor(), ReshapeTransform([-1])]) # train_set = MNIST(root='./data', train=True, transform=trans, download=True) # test_set = MNIST(root='./data', train=False, transform=trans, download=True) train_set, test_set = mnist_train_val_datasets(with_mask=False) else: assert False, 'Unknown dataset: ' + dataset device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model_dir = './models/' + dataset os.makedirs(model_dir, exist_ok=True) figures_dir = './figures/' + dataset os.makedirs(figures_dir, exist_ok=True) model_name = 'c_{}_l_{}_init_{}'.format(n_components, n_factors, init_method) print('Defining the MFA model...') model = MFA(n_components=n_components, n_features=np.prod(image_shape), n_factors=n_factors, init_method=init_method).to(device=device) print('EM fitting: {} components / {} factors / batch size {} ...'.format( n_components, n_factors, batch_size)) ll_log = model.batch_fit(train_set, test_set, batch_size=batch_size, max_iterations=num_iterations, feature_sampling=feature_sampling) if mfa_sgd_epochs > 0: print( 'Continuing training using SGD with diagonal (instead of isotropic) noise covariance...' ) model.isotropic_noise = False ll_log_sgd = model.sgd_mfa_train(train_set, test_size=256, max_epochs=mfa_sgd_epochs, feature_sampling=feature_sampling) ll_log += ll_log_sgd print('Saving the model...') torch.save(model.state_dict(), os.path.join(model_dir, 'model_' + model_name + '.pth')) print('Visualizing the trained model...') model_image = visualize_model(model, image_shape=image_shape, end_component=10) imwrite(os.path.join(figures_dir, 'model_' + model_name + '.jpg'), model_image) print('Generating random samples...') rnd_samples, _ = model.sample(100, with_noise=False) mosaic = samples_to_mosaic(rnd_samples, image_shape=image_shape) imwrite(os.path.join(figures_dir, 'samples_' + model_name + '.jpg'), mosaic) print('Plotting test log-likelihood graph...') plt.plot(ll_log, label='c{}_l{}_b{}'.format(n_components, n_factors, batch_size)) plt.grid(True) plt.savefig( os.path.join(figures_dir, 'training_graph_' + model_name + '.jpg')) print('Done')
'step%d.read_rnn' % (step + 1)) write_rnn = utils.layer_by_name(canvas_next, 'step%d.write_rnn' % (step + 1)) sample = utils.layer_by_name(canvas_next, 'step%d.sample' % (step + 1)) output = ll.NonlinearityLayer(canvas_next, ln.sigmoid, name='output') return output if __name__ == '__main__': mnist = utils.load_mnist(process=lambda x: (x > 0.8).astype('float32')) model = make_model() logger.info('visualize model to model.svg') utils.visualize_model(model, 'model.svg') image = utils.layer_by_name(model, 'step1.image').input_var output_layers = [ utils.layer_by_name(model, name) for name in ( ['output'] + ['step%d.sample_mean' % j for j in range(1, TIME_ROUNDS + 1)] + ['step%d.sample_logvar2' % j for j in range(1, TIME_ROUNDS + 1)]) ] output_tensors = ll.get_output(output_layers) output = output_tensors[0] mean = output_tensors[1:1 + TIME_ROUNDS] logvar2 = output_tensors[1 + TIME_ROUNDS:1 + 2 * TIME_ROUNDS]