示例#1
0
    def test_do_inference(self):
        # Given
        data_preprocessor = DataPreprocessor()
        device = "cpu"
        inferencer = Inferencer(data_preprocessor, device)
        data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(), BASE_GRAPH.size(), BASE_GRAPH.view(-1).size())
        model = ModelSelector.load_model("RNN")
        model = model(time_steps=1,
                      number_of_nodes=data_dimensions[1][0],
                      number_of_node_features=data_dimensions[0][1],
                      fully_connected_layer_input_size=data_dimensions[1][0] * data_dimensions[0][1],
                      fully_connected_layer_output_size=data_dimensions[2][0])
        all_neighbors = to.tensor([[1, 2, -1, -1],
                                   [0, 2, -1, -1],
                                   [0, 1, 3, -1],
                                   [2, -1, -1, -1]])
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        tag = 'tag'
        dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors, BASE_GRAPH.view(-1), tag)]
        inference_data, _, _ = DataPreprocessor().train_validation_test_split(dataset, 1, 0.0, 0.0)
        output_label_pairs_expected = [BASE_GRAPH.view(-1), BASE_GRAPH.view(-1)]

        # When
        output_label_pairs = inferencer.do_inference(model, inference_data)

        # Then
        self.assertEqual(output_label_pairs[0][0].squeeze().size(), output_label_pairs_expected[0].size())
        self.assertEqual(output_label_pairs[0][1].squeeze().size(), output_label_pairs_expected[1].size())
 def _prepare_dataset(
     self, configuration_dictionary: Dict
 ) -> Tuple[DataLoader, DataLoader, DataLoader, Tuple]:
     dataset = GraphDataset(self.data_path, test_mode=self.test_mode)
     dataset.enable_test_mode()
     self.get_logger().info("Calculating all neighbors for each node")
     training_data, validation_data, test_data = self.data_preprocessor \
         .train_validation_test_split(dataset,
                                      configuration_dictionary['batch_size'],
                                      configuration_dictionary['validation_split'],
                                      configuration_dictionary['test_split'])
     data_dimensions = self.data_preprocessor.extract_data_dimensions(
         dataset)
     return training_data, validation_data, test_data, data_dimensions
    def test_start(self):
        # Given
        tests_model_directory = os.path.join(
            "tests", "test_data", "model-checkpoints-test",
            "configuration&id__model&RNN__epochs&10__loss_function&MSE__optimizer"
            "&Adagrad__batch_size&100__validation_split&0.2__test_split"
            "&0.1__time_steps&1__validation_period&5",
            "Epoch_5_model_state_dictionary.pth")
        tests_results_directory = os.path.join('tests', 'results_inference')
        device = "cpu"
        data_preprocessor = DataPreprocessor()
        loader = Loader("RNN")
        inferencer = Inferencer(data_preprocessor, device)
        saver = Saver(tests_model_directory, tests_results_directory)
        self.postgres_connector = PostgresConnector()
        self._insert_test_data(dataset_size=1)
        dataset = GraphDataset(self.postgres_connector)
        inference = Inference(dataset, data_preprocessor, loader, inferencer,
                              saver)

        # When
        inference.start()

        # Then
        filename_expected = datetime.now().strftime(
            "%d-%b-%YT%H_%M") + "_distance_maps.pickle"
        self.assertTrue(
            os.path.isfile(
                os.path.join(tests_results_directory, filename_expected)))

        # Tear down
        self._truncate_table()
 def _build_grid_search(self, grid_search_parameters) -> GridSearch:
     data_preprocessor = DataPreprocessor()
     postgres_connector = PostgresConnector()
     dataset = GraphDataset(postgres_connector)
     model_trainer = Trainer(data_preprocessor, self.device, postgres_connector)
     saver = Saver(self.model_directory, self.results_directory)
     grid_search_configurations = self._get_all_grid_search_configurations(grid_search_parameters)
     return GridSearch(dataset, data_preprocessor, model_trainer, grid_search_configurations, saver)
 def _build_inference(self) -> Inference:
     self.model = os.environ['MODEL']
     data_preprocessor = DataPreprocessor()
     model_loader = Loader(self.model)
     model_inferencer = Inferencer(data_preprocessor, self.device)
     postgres_connector = PostgresConnector()
     dataset = GraphDataset(postgres_connector)
     saver = Saver(self.model_directory, self.results_directory)
     return Inference(dataset, data_preprocessor, model_loader, model_inferencer, saver)
    def test_extract_data_dimensions(self):
        # Given
        dataset_length = 1
        features = BASE_GRAPH_NODE_FEATURES
        all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1],
                                   [0, 1, 3, -1], [2, -1, -1, -1]])
        labels = BASE_GRAPH.view(-1)
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        dataset.dataset = [(features, all_neighbors, labels, i)
                           for i in range(dataset_length)]
        data_dimensions_expected = (features.size(), labels.size())

        # When
        data_dimensions = self.data_preprocessor.extract_data_dimensions(
            dataset)

        # Then
        self.assertEqual(data_dimensions_expected, data_dimensions)
示例#7
0
    def test_do_evaluate(self):
        # Given
        data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(),
                           BASE_GRAPH.view(-1).size())
        self.model_trainer.instantiate_attributes(
            data_dimensions, self.configuration_dictionary)
        all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1],
                                   [0, 1, 3, -1], [2, -1, -1, -1]])
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        tag = 'tag'
        dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors,
                            BASE_GRAPH.view(-1), tag)]
        training_data, _, _ = DataPreprocessor().train_validation_test_split(
            dataset, 1, 0.0, 0.0)

        # When
        validation_loss = self.model_trainer.do_evaluate(
            evaluation_data=training_data, epoch=1)

        # Then
        self.assertTrue(validation_loss > 0.0)
    def test_train_validation_test_split(self):
        # Given
        dataset_length = 10
        features = BASE_GRAPH_NODE_FEATURES
        all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1],
                                   [0, 1, 3, -1], [2, -1, -1, -1]])
        labels = BASE_GRAPH.view(-1)
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        dataset.dataset = [(features, all_neighbors, labels, i)
                           for i in range(dataset_length)]
        train_validation_test_split_expected = [7, 2, 1]

        # When
        train_validation_test_split = self.data_preprocessor.train_validation_test_split(
            dataset, batch_size=1, validation_split=0.2, test_split=0.1)
        train_validation_test_split = [
            len(dataset) for dataset in train_validation_test_split
        ]

        # Then
        self.assertEqual(train_validation_test_split_expected,
                         train_validation_test_split)
示例#9
0
    def test_extract_data_dimensions(self):
        # Given
        self._insert_test_data(dataset_size=1)
        dataset = GraphDataset(self.postgres_connector)
        data_dimensions_expected = {"number_of_nodes": 4,
                                    "number_of_node_features": 4,
                                    "fully_connected_layer_input_size": 16,
                                    "fully_connected_layer_output_size": 8}

        # When
        data_dimensions = self.data_preprocessor.extract_data_dimensions(dataset)

        # Then
        self.assertEqual(data_dimensions_expected, data_dimensions)
        self._truncate_table()
示例#10
0
    def test_train_validation_test_split(self):
        # Given
        self._insert_test_data(dataset_size=10)
        dataset = GraphDataset(self.postgres_connector)
        train_validation_test_split_expected = [7, 2, 1]

        # When
        train_validation_test_split = self.data_preprocessor.train_validation_test_split(dataset,
                                                                                         batch_size=1,
                                                                                         validation_split=0.2,
                                                                                         test_split=0.1)
        train_validation_test_split = [len(dataset) for dataset in train_validation_test_split]

        # Then
        self.assertEqual(train_validation_test_split_expected, train_validation_test_split)
        self._truncate_table()
    def test_do_evaluate(self):
        # Given
        data_dimensions = {"number_of_nodes": 4,
                           "number_of_node_features": 4,
                           "fully_connected_layer_input_size": 16,
                           "fully_connected_layer_output_size": 8}
        self.model_trainer.build(data_dimensions, self.configuration_dictionary)
        self._insert_test_data(1)
        dataset = GraphDataset(self.postgres_connector)
        training_data, _, _ = DataPreprocessor().train_validation_test_split(dataset, 1, 0.0, 0.0)

        # When
        validation_loss = self.model_trainer.do_evaluate_step(evaluation_data=training_data, epoch=1)

        # Then
        self.assertTrue(validation_loss > 0.0)
        self._truncate_table()
示例#12
0
    def test_start_for_multiple_batches_of_differing_size(self):
        # Given
        dataset_size = 5
        grid_search_dictionary = {
            "model": ["RNN"],
            "epochs": [10],
            "batch_size": [3],
            "validation_split": [0.2],
            "test_split": [0.1],
            "loss_function": ["MSE"],
            "optimizer": ["Adagrad"],
            "time_steps": [1],
            "validation_period": [5],
            "scaling_factor": [0.1],
            "penalty_decimals": [0]
        }
        self._insert_test_data(dataset_size)
        dataset = GraphDataset(self.postgres_connector)
        grid_search_configurations = self._get_all_grid_search_configurations(
            grid_search_dictionary)

        grid_search = GridSearch(dataset, self.data_preprocessor,
                                 self.model_trainer,
                                 grid_search_configurations, self.saver)

        # When
        losses = grid_search.start()
        configuration_id = list(losses["training_loss"].keys())[0]

        # Then
        self.assertTrue(losses["training_loss"][configuration_id][
            grid_search_dictionary["epochs"][0]] > 0.0)
        self.assertTrue(losses["validation_loss"][configuration_id][
            grid_search_dictionary["validation_period"][0]] > 0.0)
        self.assertTrue(
            losses["test_loss"][configuration_id]["final_epoch"] > 0.0)

        # Tear down
        self._truncate_table()
    def test_do_inference(self):
        # Given
        data_preprocessor = DataPreprocessor()
        device = "cpu"
        inferencer = Inferencer(data_preprocessor, device)
        data_dimensions = {
            "number_of_nodes": 4,
            "number_of_node_features": 4,
            "fully_connected_layer_input_size": 16,
            "fully_connected_layer_output_size": 8
        }
        model = load_model("RNN")
        model = model(
            time_steps=1,
            number_of_nodes=data_dimensions["number_of_nodes"],
            number_of_node_features=data_dimensions["number_of_node_features"],
            fully_connected_layer_input_size=data_dimensions[
                "fully_connected_layer_input_size"],
            fully_connected_layer_output_size=data_dimensions[
                "fully_connected_layer_output_size"])
        self.postgres_connector = PostgresConnector()
        self._insert_test_data(1)
        dataset = GraphDataset(self.postgres_connector)
        inference_data, _, _ = DataPreprocessor().train_validation_test_split(
            dataset, 1, 0.0, 0.0)
        output_label_pairs_expected = [
            to.tensor((0, 1, 0, 2, 1, 2, 0, 0)),
            to.tensor((0, 1, 0, 2, 1, 2, 0, 0))
        ]

        # When
        output_label_pairs = inferencer.do_inference(model, inference_data)

        # Then
        self.assertEqual(output_label_pairs[0][0].squeeze().size(),
                         output_label_pairs_expected[0].size())
        self.assertEqual(output_label_pairs[0][1].squeeze().size(),
                         output_label_pairs_expected[1].size())
 def _prepare_dataset(self) -> Tuple[DataLoader, Tuple]:
     dataset = GraphDataset(self.data_path, test_mode=self.test_mode)
     inference_dataset = self.data_preprocessor.get_dataloader(dataset)
     data_dimensions = self.data_preprocessor.extract_data_dimensions(
         dataset)
     return inference_dataset, data_dimensions