def run(count, memory_size, iteration, device='cuda'): traintransform = transforms.Compose( [transforms.RandomRotation(20), transforms.ToTensor()]) trainset = torchvision.datasets.MNIST(root='./data/mnist', train=True, download=True, transform=traintransform) trainloader = torch.utils.data.DataLoader(trainset, pin_memory=True, batch_size=128, shuffle=True, num_workers=10) testtransform = transforms.Compose([transforms.ToTensor()]) testset = torchvision.datasets.MNIST(root='./data/mnist', train=False, download=True, transform=testtransform) testloader = torch.utils.data.DataLoader(testset, pin_memory=True, batch_size=128, shuffle=True, num_workers=10) base_dir = os.path.join('mnist_' + str(memory_size), "4") model = MnistDraw(count, memory_size) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3) from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') trial = Trial( model, optimizer, nn.MSELoss(reduction='sum'), ['loss'], pass_state=True, callbacks=[ tm.kl_divergence(MU, LOGVAR), callbacks.MostRecent( os.path.join(base_dir, 'iter_' + str(iteration) + '.{epoch:02d}.pt')), callbacks.GradientClipping(5), callbacks.ExponentialLR(0.99), callbacks.TensorBoardImages(comment=current_time, name='Prediction', write_each_epoch=True, key=torchbearer.Y_PRED), callbacks.TensorBoardImages(comment=current_time + '_mnist', name='Target', write_each_epoch=True, key=torchbearer.Y_TRUE) ]).with_generators(train_generator=trainloader, val_generator=testloader).to(device) trial.run(100)
def evaluate(file, device='cuda'): transform_test = transforms.Compose([transforms.ToTensor()]) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=10) base_dir = 'cifarss_base' model = CifarVAE() model = SelfTaught(model.encoder, model.mu, 32) optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0) trial = Trial( model, optimizer, nn.NLLLoss(), ['acc', 'loss']).load_state_dict( torch.load(os.path.join(base_dir, file)), resume=False).with_generators(val_generator=testloader).to(device) return trial.evaluate()
def test_model(model: nn.Module, test_loader: DataLoader): loss_function = nn.MSELoss() device = "cuda:0" if torch.cuda.is_available() else "cpu" trial = Trial(model, None, loss_function, metrics=["loss"]).to(device) trial.with_generators(train_loader, test_generator=test_loader) results = trial.evaluate(data_key=torchbearer.TEST_DATA) return results, trial
def draw(count, memory_size, file, device='cuda'): testtransform = transforms.Compose([transforms.ToTensor()]) testset = torchvision.datasets.MNIST(root='./data/mnist', train=False, download=True, transform=testtransform) testloader = torch.utils.data.DataLoader(testset, pin_memory=True, batch_size=128, shuffle=True, num_workers=10) base_dir = os.path.join('mnist_' + str(memory_size), "6") model = MnistDraw(count, memory_size, output_stages=True) optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0) from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') from visualise import StagesGrid trial = Trial(model, optimizer, nn.MSELoss(reduction='sum'), ['loss'], pass_state=True, callbacks=[ callbacks.TensorBoardImages(comment=current_time, nrow=10, num_images=20, name='Prediction', write_each_epoch=True, key=torchbearer.Y_PRED, pad_value=1), callbacks.TensorBoardImages(comment=current_time + '_mnist', nrow=10, num_images=20, name='Target', write_each_epoch=False, key=torchbearer.Y_TRUE, pad_value=1), StagesGrid('mnist_stages.png', STAGES, 20) ]).load_state_dict(torch.load(os.path.join(base_dir, file)), resume=False).with_generators(train_generator=testloader, val_generator=testloader).for_train_steps(1).for_val_steps(1).to(device) trial.run() # Evaluate doesn't work with tensorboard in torchbearer, seems to have been fixed in most recent version
def run(count, memory_size, file, device='cuda'): transform_train = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ColorJitter(0.25, 0.25, 0.25, 0.25), transforms.ToTensor() ]) transform_test = transforms.Compose([transforms.ToTensor()]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=10) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=10) base_dir = os.path.join('cifarss_' + str(memory_size), "16") model = CifarDraw(count, memory_size) model.load_state_dict(torch.load(file)[torchbearer.MODEL]) model = SelfTaught(count, 512, memory_size, model.memory) for param in model.memory.parameters(): param.requires_grad = False model.memory.decay.requires_grad = True model.memory.learn.requires_grad = True model.memory.learn2.requires_grad = True optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3) trial = Trial(model, optimizer, nn.NLLLoss(), ['acc', 'loss'], pass_state=True, callbacks=[ callbacks.MultiStepLR([25, 40, 45]), callbacks.MostRecent( os.path.join(base_dir, '{epoch:02d}.pt')), callbacks.GradientClipping(5) ]).with_generators( train_generator=trainloader, val_generator=testloader).for_val_steps(5).to(device) trial.run(50)
def draw(file, device='cuda'): transform_test = transforms.Compose([transforms.ToTensor()]) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=10) base_dir = 'cifar_vae' model = CifarVAE() optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0) from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') trial = Trial( model, optimizer, nn.MSELoss(reduction='sum'), ['acc', 'loss'], pass_state=True, callbacks=[ callbacks.TensorBoardImages(comment=current_time, name='Prediction', write_each_epoch=True, key=torchbearer.Y_PRED, pad_value=1, nrow=16), callbacks.TensorBoardImages(comment=current_time + '_cifar_vae', name='Target', write_each_epoch=False, key=torchbearer.Y_TRUE, pad_value=1, nrow=16) ]).load_state_dict( torch.load(os.path.join(base_dir, file)), resume=False).with_generators( train_generator=testloader, val_generator=testloader).for_train_steps(1).to(device) trial.run( ) # Evaluate doesn't work with tensorboard in torchbearer, seems to have been fixed in most recent version
def _direct_estimator_predict(self, df): _df = preprocess_interactions_data_frame( df.copy(), self.direct_estimator.project_config) transform_with_indexing(_df, self.direct_estimator.index_mapping, self.direct_estimator.project_config) dataset = InteractionsDataset( data_frame=_df, embeddings_for_metadata=self.direct_estimator. embeddings_for_metadata, project_config=self.direct_estimator.project_config, index_mapping=self.direct_estimator.index_mapping) batch_sampler = FasterBatchSampler(dataset, self.direct_estimator.batch_size, shuffle=False) data_loader = NoAutoCollationDataLoader(dataset, batch_sampler=batch_sampler) trial = (Trial( self.direct_estimator.get_trained_module(), criterion=lambda *args: torch.zeros(1, device=self.direct_estimator. torch_device, requires_grad=True), ).with_generators(val_generator=data_loader).to( self.direct_estimator.torch_device).eval()) with torch.no_grad(): rewards_tensor: torch.Tensor = trial.predict( verbose=0, data_key=torchbearer.VALIDATION_DATA) rewards: np.ndarray = rewards_tensor[:, 0].cpu().numpy() return rewards
def _get_arm_scores(self, agent: BanditAgent, ob_dataset: Dataset) -> List[float]: batch_sampler = FasterBatchSampler(ob_dataset, self.batch_size, shuffle=False) generator = NoAutoCollationDataLoader( ob_dataset, batch_sampler=batch_sampler, num_workers=self.generator_workers, pin_memory=self.pin_memory if self.device == "cuda" else False, ) trial = ( Trial( agent.bandit.reward_model, criterion=lambda *args: torch.zeros( 1, device=self.torch_device, requires_grad=True ), ) .with_test_generator(generator) .to(self.torch_device) .eval() ) with torch.no_grad(): model_output: Union[torch.Tensor, Tuple[torch.Tensor]] = trial.predict( verbose=0 ) scores_tensor: torch.Tensor = model_output if isinstance( model_output, torch.Tensor ) else model_output[0][0] scores: List[float] = scores_tensor.cpu().numpy().reshape(-1).tolist() return scores
def _save_score_log(self, i, trial) -> None: val_loader = self.get_val_generator() trial = ( Trial( self.agent.bandit.reward_model, criterion=lambda *args: torch.zeros( 1, device=self.torch_device, requires_grad=True ), ) .with_generators(val_generator=val_loader) .to(self.torch_device) .eval() ) with torch.no_grad(): model_output: Union[torch.Tensor, Tuple[torch.Tensor]] = trial.predict( verbose=0, data_key=torchbearer.VALIDATION_DATA ) scores_tensor: torch.Tensor = model_output if isinstance( model_output, torch.Tensor ) else model_output[0][0] scores: List[float] = scores_tensor.cpu().numpy().reshape(-1).tolist() plot_scores(scores).savefig( os.path.join(self.output().path, "plot_history", "scores_{}.jpg".format(i)) )
def create_trial(self, module: nn.Module) -> Trial: loss_function = self._get_loss_function() trial = Trial( module, self._get_optimizer(module), loss_function, callbacks=self._get_callbacks(), metrics=self.metrics, ).to(self.torch_device) if hasattr(loss_function, "torchbearer_state"): loss_function.torchbearer_state = trial.state return trial
def run(count, memory_size, device='cuda'): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=10) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=10) base_dir = os.path.join('cifar_' + str(memory_size), str(count)) model = nn.DataParallel(CifarClassifier(count, memory_size)) optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, momentum=0.9, weight_decay=5e-4) trial = Trial(model, optimizer, nn.NLLLoss(), [torchbearer.metrics.CategoricalAccuracy(), 'loss'], callbacks=[ callbacks.MostRecent(os.path.join(base_dir, '{epoch:02d}.pt')), callbacks.GradientClipping(5), callbacks.MultiStepLR(milestones=[150, 250]), callbacks.TensorBoard(write_graph=False, comment=base_dir) ]).with_train_generator(trainloader).to(device) trial.run(350) trial.with_test_generator(testloader).evaluate(data_key=torchbearer.TEST_DATA)
def train(self, model, **kwargs) -> (float, float): # Get transfer model and put it in training mode net = model.net net.train() # Create optimiser optimiser = optim.Adam(net.parameters(), lr=1e-4) # Check for cuda device = "cuda:0" if torch.cuda.is_available() else "cpu" print("Training using", device) # Setup loss function if self.class_weight_method != ClassWeightMethod.Unweighted: distribution = class_distribution("data/processed/train") if self.class_weight_method == ClassWeightMethod.SumBased: inv_distribution = [ 1 - x / np.sum(distribution) for x in distribution ] inv_distribution = torch.from_numpy( np.array(inv_distribution)).float() elif self.class_weight_method == ClassWeightMethod.MaxBased: inv_distribution = [ np.max(distribution) / x for x in distribution ] inv_distribution = torch.from_numpy( np.array(inv_distribution)).float() else: raise IndexError("Unknown class weight method " + str(self.class_weight_method)) loss_function = self.loss(inv_distribution.to(device)) else: loss_function = self.loss() # Setup trial trial = Trial(net, optimiser, loss_function, metrics=["loss", "accuracy"]).to(device) trial.with_generators( self.image_datasets.train_loader, test_generator=self.image_datasets.validation_loader, ) # Actually run the training trial.run(epochs=self.num_epochs) # Evaluate and show results time.sleep(0.1) # Ensure training has finished net.eval() results = trial.evaluate(data_key=torchbearer.TEST_DATA) acc = float(results["test_acc"]) loss = float(results["test_loss"]) return acc, loss
def run(count, glimpse_size, memory_size, iteration, device='cuda'): base_dir = os.path.join('celeba_' + str(memory_size), str(glimpse_size)) if not os.path.exists(base_dir): os.makedirs(base_dir) transform_train = transforms.Compose([ transforms.ToTensor() ]) dataset = torchvision.datasets.ImageFolder(root='./cropped_celeba/', transform=transform_train) splitter = DatasetValidationSplitter(len(dataset), 0.05) trainset = splitter.get_train_dataset(dataset) # Save the ids torch.save((splitter.train_ids, splitter.valid_ids), os.path.join(base_dir, 'split.dat')) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=10) model = CelebDraw(count, glimpse_size, memory_size) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4) from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') call_a = callbacks.TensorBoardImages(comment=current_time, name='Prediction', write_each_epoch=True, key=torchbearer.Y_PRED) call_a.on_step_training = call_a.on_step_validation # Hack to make this log training samples call_b = callbacks.TensorBoardImages(comment=current_time + '_celeba', name='Target', write_each_epoch=True, key=torchbearer.Y_TRUE) call_b.on_step_training = call_b.on_step_validation # Hack to make this log training samples trial = Trial(model, optimizer, nn.MSELoss(reduction='sum'), ['acc', 'loss'], pass_state=True, callbacks=[ joint_kl_divergence(MU, LOGVAR), callbacks.MostRecent(os.path.join(base_dir, 'iter_' + str(iteration) + '.{epoch:02d}.pt')), callbacks.GradientClipping(5), call_a, call_b ]).with_generators(train_generator=trainloader).to(device) trial.run(100)
def test_save_checkpoint_save_filename(self, mock_save): torchmodel = Mock() optim = Mock() state = { torchbearer.SELF: Trial(torchmodel, optim, None, []), torchbearer.METRICS: {} } file_format = 'test_file.pt' check = _Checkpointer(file_format) check.save_checkpoint(state) self.assertEqual(mock_save.call_count, 1) self.assertTrue(mock_save.call_args[0][1] == 'test_file.pt')
def draw(count, glimpse_size, memory_size, file, device='cuda'): base_dir = os.path.join('celeba_' + str(memory_size), str(glimpse_size)) transform = transforms.Compose([ transforms.ToTensor() ]) dataset = torchvision.datasets.ImageFolder(root='./cropped_celeba/', transform=transform) splitter = DatasetValidationSplitter(len(dataset), 0.05) # load the ids splitter.train_ids, splitter.valid_ids = torch.load(os.path.join(base_dir, 'split.dat')) testset = splitter.get_val_dataset(dataset) testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=True, num_workers=10) model = CelebDraw(count, glimpse_size, memory_size, output_stages=True) optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0) from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') from visualise import StagesGrid trial = Trial(model, optimizer, nn.MSELoss(reduction='sum'), ['loss'], pass_state=True, callbacks=[ callbacks.TensorBoardImages(comment=current_time, nrow=10, num_images=20, name='Prediction', write_each_epoch=True, key=torchbearer.Y_PRED, pad_value=1), callbacks.TensorBoardImages(comment=current_time + '_celeb', nrow=10, num_images=20, name='Target', write_each_epoch=False, key=torchbearer.Y_TRUE, pad_value=1), callbacks.TensorBoardImages(comment=current_time + '_celeb_mask', nrow=10, num_images=20, name='Masked Target', write_each_epoch=False, key=MASKED_TARGET, pad_value=1), StagesGrid('celeb_stages.png', STAGES, 20) ]).load_state_dict(torch.load(os.path.join(base_dir, file)), resume=False).with_generators(train_generator=testloader, val_generator=testloader).for_train_steps(1).for_val_steps(1).to(device) trial.run() # Evaluate doesn't work with tensorboard in torchbearer, seems to have been fixed in most recent version
def test_save_checkpoint_subformatting(self, mock_save): torchmodel = Mock() optim = Mock() state = { torchbearer.SELF: Trial(torchmodel, optim, None, []), torchbearer.METRICS: {'test_metric': 0.001}, torchbearer.EPOCH: 2 } file_format = 'test_file_{test_metric:.01f}.pt' check = _Checkpointer(file_format) check.save_checkpoint(state) self.assertEqual(mock_save.call_count, 1) self.assertTrue(mock_save.call_args[0][1] == 'test_file_0.0.pt')
def test_save_checkpoint_model_only(self, mock_save): torchmodel = Mock() optim = Mock() state = { torchbearer.SELF: Trial(torchmodel, optim, None, []), torchbearer.METRICS: {'test_metric': 0.001}, torchbearer.EPOCH: 2, torchbearer.MODEL: torchmodel, } file_format = 'test_file_{test_metric:.01f}.pt' check = _Checkpointer(file_format, save_model_params_only=True) check.save_checkpoint(state) self.assertEqual(mock_save.call_count, 1) self.assertTrue(mock_save.call_args[0][0] == torchmodel.state_dict()) self.assertTrue(mock_save.call_args[0][1] == 'test_file_0.0.pt')
def test_save_checkpoint_wrong_format(self, _): torchmodel = Mock() optim = Mock() state = { torchbearer.SELF: Trial(torchmodel, optim, None, []), torchbearer.METRICS: {'test_metric': 0.001}, torchbearer.EPOCH: 2 } file_format = 'test_file_{test_metric:d}.pt' check = _Checkpointer(file_format) try: check.save_checkpoint(state) except: return self.fail('No error was thrown when wrong format chosen for save file format')
def test_save_checkpoint_overwrite_recent(self, _, __): torchmodel = Mock() optim = Mock() state = { torchbearer.SELF: Trial(torchmodel, optim, None, []), torchbearer.EPOCH: 0, torchbearer.METRICS: {} } file_format = 'test_file_{epoch}.pt' check = _Checkpointer(file_format) check.save_checkpoint(state, True) self.assertTrue(check.most_recent == 'test_file_0.pt') state[torchbearer.EPOCH] = 1 check.save_checkpoint(state, True) self.assertTrue(check.most_recent == 'test_file_1.pt')
def train_model(train, valid, test, classes, batch, num_epochs): # model = CNN(3, classes) model = models.resnet18() train_loader = DataLoader(train, batch_size=batch) val_loader = DataLoader(valid, batch_size=batch) test_loader = DataLoader(test, batch_size=batch) # define the loss function and the optimiser loss_function = nn.CrossEntropyLoss() # equation 8 in original paper optimiser = optim.Adam(model.parameters()) trial = Trial(model, optimiser, loss_function, metrics=['loss', 'accuracy'], verbose=1).to(device) trial.with_generators(train_loader, val_generator=val_loader, test_generator=test_loader) trial.run(epochs=num_epochs) results_test = trial.evaluate(data_key=torchbearer.TEST_DATA) results_val = trial.evaluate(data_key=torchbearer.VALIDATION_DATA) print(results_test) return results_test, results_val
def run(count, memory_size, device='cuda'): traintransform = transforms.Compose([ transforms.RandomRotation(20), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) trainset = torchvision.datasets.MNIST(root='./data/mnist', train=True, download=True, transform=traintransform) trainloader = torch.utils.data.DataLoader(trainset, pin_memory=True, batch_size=128, shuffle=True, num_workers=10) testtransform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) testset = torchvision.datasets.MNIST(root='./data/mnist', train=False, download=True, transform=testtransform) testloader = torch.utils.data.DataLoader(testset, pin_memory=True, batch_size=128, shuffle=False, num_workers=10) base_dir = os.path.join('mnist_' + str(memory_size), str(count)) model = MnistClassifier(count, memory_size) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001) trial = Trial( model, optimizer, nn.NLLLoss(), ['acc', 'loss'], callbacks=[ callbacks.MostRecent(os.path.join(base_dir, '{epoch:02d}.pt')), callbacks.GradientClipping(5), callbacks.MultiStepLR(milestones=[50, 100, 150, 190, 195]), callbacks.ExponentialLR(0.99), callbacks.TensorBoard(write_graph=False, comment=base_dir) ]).with_train_generator(trainloader).to(device) trial.run(200) trial.with_test_generator(testloader).evaluate( data_key=torchbearer.TEST_DATA)
def train(self): scheduler = torch_scheduler.StepLR(self.num_epochs_decay, gamma=self.decay_factor) loss_plot_plan = os.path.join( self.result_path, 'live_loss_plot%s-%d-%.4f-%d-%.4f.png' % (self.model_type, self.num_epochs, self.lr, self.num_epochs_decay, self.augmentation_prob)) callbacks = [scheduler] # imaging.FromState(torchbearer.X).on_val().cache(16).make_grid().to_pyplot(), # imaging.FromState(torchbearer.Y_TRUE).on_val().cache(16).make_grid().to_pyplot(), # imaging.FromState(torchbearer.Y_PRED).on_val().cache(16).make_grid().to_pyplot(), # imaging.FromState(torchbearer.X).on_test().cache(16).make_grid().to_pyplot(), # imaging.FromState(torchbearer.Y_TRUE).on_test().cache(16).make_grid().to_pyplot(), # imaging.FromState(torchbearer.Y_PRED).on_test().cache(16).make_grid().to_pyplot(), # TensorBoard(write_batch_metrics=True), trial = Trial( self.unet, self.optimizer, self.criterion, metrics=['loss', 'binary_acc'], # binary_acc for debugging certain things callbacks=callbacks).to(self.device) trial.with_generators(train_generator=self.train_loader, val_generator=self.valid_loader, test_generator=self.test_loader) start = time.time() history = trial.run(epochs=self.num_epochs, verbose=2) stop = time.time() train_time = stop - start state = self.unet.state_dict() unet_path = os.path.join( self.model_path, '%s-%d-%.4f-%d-%.4f_Index_BCE_Dropout_STAREIndex.pkl' % ( self.model_type, self.num_epochs, self.lr, self.num_epochs_decay, self.augmentation_prob, )) torch.save(state, unet_path) print(history) ### Testing results = trial.evaluate(data_key=torchbearer.TEST_DATA) print("Test result:") print(results) return history, results
def evaluate(self): self.cache_cleanup() module = self.get_trained_module() val_loader = self.get_val_generator() print("================== Evaluate ========================") trial = (Trial( module, self._get_optimizer(module), self._get_loss_function(), callbacks=[], metrics=self.metrics, ).to(self.torch_device).with_generators( val_generator=val_loader).eval()) print( json.dumps((trial.evaluate(data_key=torchbearer.VALIDATION_DATA)), indent=4)) if self.run_evaluate: self.run_evaluate_task()
def run(train_batch_size, val_batch_size, epochs, lr, log_interval, input_size=10, hidden_size=100, out_size=4): dataset = FuzzBuzzDataset(input_size) splitter = DatasetValidationSplitter(len(dataset), 0.1) train_set = splitter.get_train_dataset(dataset) val_set = splitter.get_val_dataset(dataset) train_loader = DataLoader(train_set, pin_memory=True, batch_size=train_batch_size, shuffle=True, num_workers=2) val_loader = DataLoader(val_set, pin_memory=True, batch_size=val_batch_size, shuffle=False, num_workers=2) model = FuzzBuzzModel(input_size, hidden_size, out_size) device = 'cpu' if torch.cuda.is_available(): device = 'cuda' optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr) loss = nn.CrossEntropyLoss() trial = Trial(model, optimizer, criterion=loss, metrics=['acc', 'loss']).to(device) trial = trial.with_generators(train_generator=train_loader, val_generator=val_loader) trial.run(epochs=epochs) trial.evaluate(data_key=VALIDATION_DATA)
def train_model( model: nn.Module, train_loader: DataLoader, val_loader: DataLoader = None, output_path: str = None, ): # Define the loss function and the optimiser loss_function = nn.MSELoss() optimiser = optim.Adam(model.parameters()) device = "cuda:0" if torch.cuda.is_available() else "cpu" trial = Trial(model, optimiser, loss_function, metrics=["loss"]).to(device) trial.with_generators(train_loader, val_generator=val_loader) trial.run(epochs=100) if output_path: torch.save(model.state_dict(), output_path) return trial
def run(hidden_size: int, file_prefix: str, epochs: int = 20): # Flatten 28*28 images to a 784 vector for each image transform = transforms.Compose([ transforms.ToTensor(), # convert to tensor transforms.Lambda(lambda x: x.view(-1)), # flatten into vector ]) trainset = MNIST(".", train=True, download=True, transform=transform) testset = MNIST(".", train=False, download=True, transform=transform) data_size = torch.numel(trainset[0][0]) # Create data loaders trainloader = DataLoader(trainset, batch_size=128, shuffle=True, drop_last=False) testloader = DataLoader(testset, batch_size=128, shuffle=True, drop_last=False) model = SingleHiddenLayerMLP(data_size, hidden_size, 10) loss_function = nn.CrossEntropyLoss() optimiser = optim.Adam(model.parameters()) device = "cuda" if torch.cuda.is_available() else "cpu" cm = (PyCM().on_train().with_handler( to_pandas_seaborn(normalize=True, title="Confusion Matrix: {epoch}"))) callbacks = [cm] trial = Trial( model, optimiser, loss_function, metrics=["loss", "accuracy"], callbacks=callbacks, ) trial.to(device) trial.with_generators(trainloader, val_generator=testloader, val_steps=1) history = trial.run(epochs=epochs) return history
for d_vvs in ventral_depths: for t in range(n_trials): model_file = f'./models/{cmode}/model_{n_bn}_{d_vvs}_{t}.pt' log_file = f'./logs/{cmode}/model_{n_bn}_{d_vvs}_{t}.csv' pathlib.Path(model_file).parents[0].mkdir(parents=True, exist_ok=True) pathlib.Path(log_file).parents[0].mkdir(parents=True, exist_ok=True) model = BaselineModel(n_bn, d_vvs, nch) optimiser = optim.RMSprop(model.parameters(), alpha=0.9, lr=0.0001, weight_decay=1e-6) loss_function = nn.CrossEntropyLoss() device = "cuda:0" if torch.cuda.is_available() else "cpu" trial = Trial( model, optimiser, loss_function, metrics=['loss', 'accuracy'], callbacks=[ torchbearer.callbacks.csv_logger.CSVLogger(log_file) ]).to(device) trial.with_generators(trainloader, val_generator=testloader) trial.run(epochs=20) torch.save(model.state_dict(), model_file)
if mode == 'cutmix': aug = [callbacks.CutMix(1, classes=10)] if mode == 'fmix': aug = [FMix(alpha=1, decay_power=3)] model = VAE(64, var=args.var) trial = Trial(model, optim.Adam(model.parameters(), lr=5e-2), nll, metrics=[ metrics.MeanSquaredError(pred_key=SAMPLE), metrics.mean(NLL), metrics.mean(KL), 'loss' ], callbacks=[ sample, kld(distributions.Normal(0, 1)), init.XavierNormal(targets=['Conv']), callbacks.MostRecent(args.dir + '/' + mode + '_' + str(args.i) + '.pt'), callbacks.MultiStepLR([40, 80]), callbacks.TensorBoard(write_graph=False, comment=mode + '_' + str(args.i), log_dir='vae_logs'), *aug ]) if mode in ['base', 'mix', 'cutmix']: trial = trial.load_state_dict( torch.load('vaes/' + '/' + mode + '_' + str(args.i) + '.pt')) trial.with_generators(train_loader,
def run(iteration, device='cuda:1'): transform_train = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ColorJitter(0.25, 0.25, 0.25, 0.25), transforms.ToTensor() ]) transform_test = transforms.Compose([transforms.ToTensor()]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=10) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=10) base_dir = 'cifar_vae' model = CifarVAE() optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=5e-4) from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') trial = Trial( model, optimizer, nn.MSELoss(reduction='sum'), ['acc', 'loss'], pass_state=True, callbacks=[ tm.kl_divergence(MU, LOGVAR, beta=2), callbacks.MultiStepLR([50, 90]), callbacks.MostRecent( os.path.join(base_dir, 'iter_' + str(iteration) + '.{epoch:02d}.pt')), callbacks.GradientClipping(5), callbacks.TensorBoardImages(comment=current_time, name='Prediction', write_each_epoch=True, key=torchbearer.Y_PRED), callbacks.TensorBoardImages(comment=current_time + '_cifar_vae', name='Target', write_each_epoch=False, key=torchbearer.Y_TRUE), ]).with_generators( train_generator=trainloader, val_generator=testloader).for_val_steps(5).to(device) trial.run(100)
# test_set = CIFAR10('./data', train=False, download=True, transform=transforms.Compose([transform_test])) test_gen = torch.utils.data.DataLoader(test_set, pin_memory=True, batch_size=128, shuffle=False, num_workers=4) optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) from torchbearer.callbacks import MultiStepLR, TensorBoard from torchbearer import Trial from datetime import datetime current_time = datetime.now().strftime('%b%d_%H-%M-%S') trial = Trial(net, optimizer, nn.CrossEntropyLoss(), metrics=['acc', 'loss'], callbacks=[ UnpackState(), TensorBoard(write_graph=False, comment=current_time), MultiStepLR([100, 150]) ]) trial.with_generators(train_generator=train_gen, val_generator=test_gen).to('cuda') trial.run(200, verbose=1)