train_valid_dataset, [50_000, 10_000], generator=torch.Generator().manual_seed(42)) # Select CUDA device if available cuda_device = 0 device = torch.device("cuda:%d" % cuda_device if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): print(f"Running on {torch.cuda.get_device_name(device)}") else: print("Running on CPU") # Define the network network = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 100), nn.ReLU(), nn.Linear(100, 10)) # Train model = Model(network, 'sgd', 'cross_entropy', batch_metrics=['accuracy'], epoch_metrics=['f1'], device=device) # Change the number of epochs to find the optimum value for your work model.fit_dataset(train_dataset, valid_dataset, epochs=1, batch_size=32, num_workers=2) model.evaluate_dataset(test_dataset)
# TRAIN model_path = results_path / "model.pickle" if model_path.exists(): model = torch.load(model_path) else: model = CNN((1, 28, 28)) poutyne_model = Model(model, optimizer='adam', loss_function='cross_entropy', batch_metrics=['accuracy'], device=device) poutyne_model.fit_dataset(train_dataset, test_dataset, epochs=5, batch_size=128, num_workers=2, dataloader_kwargs={"pin_memory": True}) torch.save(model, model_path) # Measure model's invariance to rotations # Iterate over images from MNIST without labels # Using same name as before to avoid double download class MNIST(datasets.MNIST): def __getitem__(self, index): x, y = super().__getitem__(index) return x dataset_nolabels = MNIST( path,
# Instanciate the MNIST dataset train_valid_dataset = MNIST('./datasets', train=True, download=True, transform=ToTensor()) test_dataset = MNIST('./datasets', train=False, download=True, transform=ToTensor()) train_dataset, valid_dataset = random_split( train_valid_dataset, [50_000, 10_000], generator=torch.Generator().manual_seed(42)) # Select CUDA device if available cuda_device = 0 device = torch.device('cuda:%d' % cuda_device if torch.cuda.is_available() else 'cpu') # Define the network network = nn.Sequential( nn.Flatten(), nn.Linear(28 * 28, 100), nn.ReLU(), nn.Linear(100, 10), ) epochs = 5 # Define the Model and train model = Model(network, 'sgd', 'cross_entropy', device=device) model.fit_dataset(train_dataset, valid_dataset, epochs=epochs)
class ModelDatasetMethodsTest(ModelFittingTestCase): @classmethod def setUpClass(cls): cls.temp_dir_obj = TemporaryDirectory() cls.train_dataset = MNIST(cls.temp_dir_obj.name, train=True, download=True, transform=ToTensor()) cls.test_dataset = MNIST(cls.temp_dir_obj.name, train=False, download=True, transform=ToTensor()) cls.train_sub_dataset, cls.valid_sub_dataset = random_split( cls.train_dataset, [50_000, 10_000], generator=torch.Generator().manual_seed(42)) @classmethod def tearDownClass(cls): cls.temp_dir_obj.cleanup() def setUp(self): super().setUp() torch.manual_seed(42) self.pytorch_network = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) self.batch_metrics = ['accuracy'] self.batch_metrics_names = ['acc'] self.batch_metrics_values = [ANY] self.epoch_metrics = ['f1'] self.epoch_metrics_names = ['fscore_micro'] self.epoch_metrics_values = [ANY] self.model = Model(self.pytorch_network, 'sgd', 'cross_entropy', batch_metrics=self.batch_metrics, epoch_metrics=self.epoch_metrics) def assertStdoutContains(self, values): for value in values: self.assertIn(value, self.test_out.getvalue().strip()) def test_fitting_mnist(self): logs = self.model.fit_dataset( self.train_sub_dataset, self.valid_sub_dataset, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) params = { 'epochs': ModelTest.epochs, 'steps': ModelTest.steps_per_epoch, 'valid_steps': ModelTest.steps_per_epoch } self._test_callbacks_train(params, logs, valid_steps=ModelTest.steps_per_epoch) def test_fitting_mnist_without_valid(self): logs = self.model.fit_dataset( self.train_dataset, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) params = { 'epochs': ModelTest.epochs, 'steps': ModelTest.steps_per_epoch, 'valid_steps': ModelTest.steps_per_epoch } self._test_callbacks_train(params, logs, has_valid=False) def test_evaluate_dataset(self): num_steps = 10 loss, metrics, pred_y = self.model.evaluate_dataset( self.test_dataset, batch_size=ModelTest.batch_size, steps=num_steps, return_pred=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), self.batch_metrics_values + self.epoch_metrics_values) self.assertEqual(type(pred_y), np.ndarray) self.assertEqual(pred_y.shape, (num_steps * ModelTest.batch_size, 10)) def test_evaluate_dataset_with_progress_bar_coloring(self): num_steps = 10 self._capture_output() self.model.evaluate_dataset(self.test_dataset, batch_size=ModelTest.batch_size, steps=num_steps) self.assertStdoutContains( ["%", "[32m", "[35m", "[36m", "[94m", "\u2588"]) def test_evaluate_dataset_with_callback(self): num_steps = 10 self.model.evaluate_dataset(self.test_dataset, batch_size=ModelTest.batch_size, steps=num_steps, callbacks=[self.mock_callback]) params = {'steps': ModelTest.epochs} self._test_callbacks_test(params) def test_evaluate_dataset_with_return_dict(self): num_steps = 10 logs = self.model.evaluate_dataset(self.test_dataset, batch_size=ModelTest.batch_size, steps=num_steps, return_dict_format=True) self._test_return_dict_logs(logs) def test_evaluate_dataset_with_ground_truth(self): num_steps = 10 loss, metrics, pred_y, true_y = self.model.evaluate_dataset( self.test_dataset, batch_size=ModelTest.batch_size, steps=num_steps, return_pred=True, return_ground_truth=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), self.batch_metrics_values + self.epoch_metrics_values) self.assertEqual(type(pred_y), np.ndarray) self.assertEqual(type(true_y), np.ndarray) self.assertEqual(pred_y.shape, (num_steps * ModelTest.batch_size, 10)) self.assertEqual(true_y.shape, (num_steps * ModelTest.batch_size, )) def test_predict_dataset(self): class PredictDataset(Dataset): def __init__(self, dataset): super().__init__() self.dataset = dataset def __getitem__(self, index): return self.dataset[index][0] def __len__(self): return len(self.dataset) num_steps = 10 pred_y = self.model.predict_dataset(PredictDataset(self.test_dataset), batch_size=ModelTest.batch_size, steps=num_steps) self.assertEqual(type(pred_y), np.ndarray) self.assertEqual(pred_y.shape, (num_steps * ModelTest.batch_size, 10))