Пример #1
0
    def test_not_fitting_scaler_but_normalising_data(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials,
                                           training_cols,
                                           class_cols,
                                           seed=0)
        second_dataset = self.create_dataset(n_trials,
                                             training_cols,
                                             class_cols,
                                             seed=71)
        _, scaler = dataset.prepare_dataset([test_dataset],
                                            class_cols,
                                            training_columns=training_cols,
                                            normalise_data=True,
                                            scaler=None)

        self.assertTrue(scaler is not None)
        loaders, scaler = dataset.prepare_dataset(
            [second_dataset, test_dataset],
            class_cols,
            training_columns=training_cols,
            normalise_data=True,
            scaler=scaler)

        self.assertTrue(scaler is not None)
        numpy_dataset = loaders[1].dataset.tensors[0].numpy()
        self.assertTrue(
            TestNormalise.are_mean_and_variance_correct(numpy_dataset,
                                                        atol=1e-7))
Пример #2
0
    def test_transforms(self):
        def add_one(dataframe):
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore")
                dataframe.loc[:, dataframe.
                              columns] = dataframe[dataframe.columns] + 1

        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)
        loader, _ = dataset.prepare_dataset([test_dataset],
                                            class_cols,
                                            training_columns=training_cols,
                                            normalise_data=False,
                                            transforms=[add_one])

        numpy_dataset = loader.dataset.tensors[0].numpy()
        datasets_training_cols = np.array(
            [np.array(trial[training_cols]) for trial in test_dataset])
        self.assertTrue(
            np.isclose(numpy_dataset, datasets_training_cols + 1).all())
Пример #3
0
    def test_multiclass(self):
        n_trials = 10
        n_training_cols = 8
        n_class_cols = 6
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]
        multiclass_list = [class_cols[:3], class_cols[3:]]

        test_dataset = self.create_dataset(n_trials,
                                           training_cols,
                                           multiclass_list,
                                           multiclass=True)
        dataset_loader, scaler = dataset.prepare_dataset(
            [test_dataset],
            multiclass_list,
            multiclass=True,
            training_columns=training_cols)

        self.assertEqual(scaler, None)
        self.assertEqual(len(dataset_loader.dataset), n_trials)

        # Check only if training cols are correctly set by passing None
        self.assertTrue(
            self.are_trials_correctly_set(dataset_loader, test_dataset,
                                          training_cols, None))
        self.assertTrue(
            self.is_multiclass_correctly_set(dataset_loader, test_dataset,
                                             multiclass_list))
Пример #4
0
    def test_class_columnns_not_in_every_dataset(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 5
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        train_dataset = self.create_dataset(2 * n_trials, training_cols,
                                            class_cols)
        test_dataset = self.create_dataset(n_trials, training_cols,
                                           class_cols[:3])

        with self.assertRaises(KeyError):
            dataset.prepare_dataset([train_dataset, test_dataset],
                                    class_cols,
                                    training_columns=training_cols)
Пример #5
0
    def test_normalisation_cols(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        categorical_columns = training_cols[:2]
        non_categorical_columns = training_cols[2:]
        categorical_cols_bool_index = np.array(
            [col in categorical_columns for col in training_cols])

        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)
        loader, _ = dataset.prepare_dataset(
            [test_dataset],
            class_cols,
            training_columns=training_cols,
            normalise_data=True,
            normalisation_cols=non_categorical_columns)

        numpy_dataset = loader.dataset.tensors[0].numpy()
        datasets_training_cols = np.array(
            [np.array(trial[training_cols]) for trial in test_dataset])

        # Check categorical columns haven't changed
        self.assertTrue(
            np.isclose(
                numpy_dataset[:, :, categorical_cols_bool_index],
                datasets_training_cols[:, :,
                                       categorical_cols_bool_index]).all())

        # Check non-categorical columns have been normalised correctly
        self.assertTrue(
            TestNormalise.are_mean_and_variance_correct(
                numpy_dataset[:, :, ~categorical_cols_bool_index], atol=1e-7))
Пример #6
0
    def test_non_categorical_class_columns(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)
        for trial in test_dataset:
            trial[class_cols] = np.random.rand(*trial[class_cols].shape)

        with self.assertRaisesRegex(ValueError,
                                    "Classes are not one-hot encoded"):
            dataset.prepare_dataset([test_dataset],
                                    class_cols,
                                    training_columns=training_cols)
Пример #7
0
    def test_all_training_columns_are_categorical(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        categorical_columns = training_cols

        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)

        with self.assertRaises(ValueError):
            dataset.prepare_dataset([test_dataset],
                                    class_cols,
                                    training_columns=training_cols,
                                    normalise_data=True,
                                    categorical_columns=categorical_columns)
Пример #8
0
    def test_columns_in_both_categorical_and_normalisation_cols(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        categorical_columns = training_cols[:4]
        non_categorical_columns = training_cols[2:]

        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)

        with self.assertRaises(ValueError):
            dataset.prepare_dataset([test_dataset],
                                    class_cols,
                                    training_columns=training_cols,
                                    normalise_data=True,
                                    normalisation_cols=non_categorical_columns,
                                    categorical_columns=categorical_columns)
Пример #9
0
    def test_non_categorical_class_columnns_in_multiclass_setting(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 6
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]
        multiclass_list = [class_cols[:3], class_cols[3:]]

        test_dataset = self.create_dataset(n_trials,
                                           training_cols,
                                           multiclass_list,
                                           multiclass=True)
        for trial in test_dataset:
            trial[class_cols] = np.random.rand(*trial[class_cols].shape)

        with self.assertRaises(ValueError):
            dataset.prepare_dataset([test_dataset],
                                    multiclass_list,
                                    multiclass=True,
                                    training_columns=training_cols)
Пример #10
0
    def test_passing_scaler_but_not_normalising_data(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials,
                                           training_cols,
                                           class_cols,
                                           seed=0)
        second_dataset = self.create_dataset(n_trials,
                                             training_cols,
                                             class_cols,
                                             seed=71)
        _, scaler = dataset.prepare_dataset([test_dataset],
                                            class_cols,
                                            training_columns=training_cols,
                                            normalise_data=True,
                                            scaler=None)

        self.assertTrue(scaler is not None)
        loader, scaler = dataset.prepare_dataset(
            [second_dataset],
            class_cols,
            training_columns=training_cols,
            normalise_data=False,
            scaler=scaler)

        self.assertTrue(scaler is not None)

        numpy_dataset = loader.dataset.tensors[0].numpy()
        datasets_training_cols = np.array(
            [np.array(trial[training_cols]) for trial in second_dataset])
        self.assertTrue(
            np.isclose(numpy_dataset, datasets_training_cols).all())
Пример #11
0
    def test_one_dataset(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)
        dataset_loader, scaler = dataset.prepare_dataset(
            [test_dataset], class_cols, training_columns=training_cols)

        self.assertEqual(scaler, None)
        self.assertEqual(len(dataset_loader.dataset), n_trials)
        self.assertTrue(
            self.are_trials_correctly_set(dataset_loader, test_dataset,
                                          training_cols, class_cols))
Пример #12
0
    def test_different_batch_sizes(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)
        for batch_size in [1, 4, 16]:
            dataset_loader, scaler = dataset.prepare_dataset(
                [test_dataset],
                class_cols,
                training_columns=training_cols,
                batch_size=batch_size)

            self.assertEqual(dataset_loader.batch_size, batch_size)
Пример #13
0
    def test_cpu_device(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]
        cpu_device = torch.device("cpu")

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)
        dataset_loader, _ = dataset.prepare_dataset(
            [test_dataset],
            class_cols,
            training_columns=training_cols,
        )

        self.assertFalse(dataset_loader.dataset.tensors[0].is_cuda)
        self.assertFalse(dataset_loader.dataset.tensors[1].is_cuda)
Пример #14
0
    def test_class_columnns_integer(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)

        for trial in test_dataset:
            trial[class_cols] = trial[class_cols]

        dataset_loader, _ = dataset.prepare_dataset(
            [test_dataset], class_cols, training_columns=training_cols)

        self.assertTrue(
            self.are_trials_correctly_set(dataset_loader, test_dataset,
                                          training_cols, class_cols))
Пример #15
0
    def test_save_plot_is_not_none(self):
        self.create_and_save_model(self.network_params, self.model_path)
        dataset = self.create_dataset(self.n_trials, self.training_columns,
                                      self.class_columns)
        test_loader, _ = prepare_dataset(
            [dataset],
            class_columns=self.class_columns,
            training_columns=self.training_columns)

        self.write_dataframes_to_file(dataset, self.dataset_path)
        _, predicted = evaluation.evaluate_saved_model(self.model_path,
                                                       self.network_params,
                                                       self.dataset_path,
                                                       self.training_columns,
                                                       self.class_columns,
                                                       trials=None)

        ax = visualization.plot_confusion_matrix_given_predicted_and_test_loader(
            predicted, test_loader, self.class_columns, self.plot_path)
        self.assertTrue(isinstance(ax, Axes))
        self.assertTrue(os.path.exists(self.plot_path))
Пример #16
0
    def test_gpu_device(self):
        if not torch.cuda.is_available():
            warnings.warn("No Cuda device available to run this test.")
            return

        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]
        gpu_device = torch.device("cuda:0")

        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)
        dataset_loader, _ = dataset.prepare_dataset(
            [test_dataset],
            class_cols,
            training_columns=training_cols,
            device=gpu_device)

        self.assertTrue(dataset_loader.dataset.tensors[0].is_cuda)
        self.assertTrue(dataset_loader.dataset.tensors[1].is_cuda)
Пример #17
0
    def test_more_than_one_dataset(self):
        n_trials = 10
        n_training_cols = 6
        n_class_cols = 3
        training_cols = ["col_" + str(i) for i in range(n_training_cols)]
        class_cols = ["class_" + str(i) for i in range(n_class_cols)]

        train_dataset = self.create_dataset(2 * n_trials, training_cols,
                                            class_cols)
        test_dataset = self.create_dataset(n_trials, training_cols, class_cols)
        loaders, scaler = dataset.prepare_dataset(
            [train_dataset, test_dataset],
            class_cols,
            training_columns=training_cols)
        self.assertEqual(scaler, None)
        self.assertEqual(len(loaders[0].dataset), 2 * n_trials)
        self.assertEqual(len(loaders[1].dataset), n_trials)
        self.assertTrue(
            self.are_trials_correctly_set(loaders[0], train_dataset,
                                          training_cols, class_cols))
        self.assertTrue(
            self.are_trials_correctly_set(loaders[1], test_dataset,
                                          training_cols, class_cols))