def setUp(self) -> None:
     time_steps = 1
     loss_function = "MSE"
     optimizer = "SGD"
     model = "RNN"
     device = "cpu"
     self.configuration_dictionary = {"model": model,
                                      "loss_function": loss_function,
                                      "optimizer": optimizer,
                                      "time_steps": time_steps}
     self.postgres_connector = PostgresConnector()
     data_preprocessor = DataPreprocessor()
     self.model_trainer = Trainer(data_preprocessor, device)
def create_grid_search(dataset_name: str,
                       data_directory: str,
                       model_directory: str,
                       results_directory: str,
                       model: str,
                       device: str,
                       epochs: str,
                       loss_function_selection: str,
                       optimizer_selection: str,
                       batch_size: str,
                       validation_split: str,
                       test_split: str,
                       time_steps: str,
                       validation_period: str) -> MessagePassingNN:
    grid_search_dictionary = GridSearchParametersParser().get_grid_search_dictionary(model,
                                                                                     epochs,
                                                                                     loss_function_selection,
                                                                                     optimizer_selection,
                                                                                     batch_size,
                                                                                     validation_split,
                                                                                     test_split,
                                                                                     time_steps,
                                                                                     validation_period)
    data_path = _get_data_path(data_directory, dataset_name)
    data_preprocessor = DataPreprocessor()
    trainer = Trainer(data_preprocessor, device)
    saver = Saver(model_directory, results_directory)
    grid_search = GridSearch(data_path,
                             data_preprocessor,
                             trainer,
                             grid_search_dictionary,
                             saver)
    return MessagePassingNN(grid_search)
 def _build_grid_search(self, grid_search_parameters) -> GridSearch:
     data_preprocessor = DataPreprocessor()
     postgres_connector = PostgresConnector()
     dataset = GraphDataset(postgres_connector)
     model_trainer = Trainer(data_preprocessor, self.device, postgres_connector)
     saver = Saver(self.model_directory, self.results_directory)
     grid_search_configurations = self._get_all_grid_search_configurations(grid_search_parameters)
     return GridSearch(dataset, data_preprocessor, model_trainer, grid_search_configurations, saver)
Пример #4
0
 def setUp(self) -> None:
     self.features = BASE_GRAPH_NODE_FEATURES
     self.adjacency_matrix = BASE_GRAPH
     self.labels = BASE_GRAPH.view(-1)
     self.dataset = 'training-test-data'
     self.tests_data_directory = 'tests/test_data/'
     tests_model_directory = 'tests/model_checkpoints'
     tests_results_directory = 'tests/grid_search_results'
     device = "cpu"
     self.data_path = self.tests_data_directory + self.dataset + "/"
     self.repository = FileSystemRepository(self.tests_data_directory,
                                            self.dataset)
     self.data_preprocessor = DataPreprocessor()
     self.data_preprocessor.enable_test_mode()
     self.model_trainer = Trainer(self.data_preprocessor, device)
     self.saver = Saver(tests_model_directory, tests_results_directory)
Пример #5
0
 def setUp(self) -> None:
     self.features = BASE_GRAPH_NODE_FEATURES
     self.adjacency_matrix = BASE_GRAPH
     self.labels = BASE_GRAPH.view(-1)
     self.dataset_name = 'training-test-data'
     self.tests_data_directory = os.path.join('tests', 'test_data')
     tests_model_directory = os.path.join('tests', 'model_checkpoints')
     tests_results_directory = os.path.join('tests', 'grid_search_results')
     device = "cpu"
     self.data_path = os.path.join("./", self.tests_data_directory,
                                   self.dataset_name)
     self.repository = FileSystemRepository(self.tests_data_directory,
                                            self.dataset_name)
     self.data_preprocessor = DataPreprocessor()
     self.postgres_connector = PostgresConnector()
     self.model_trainer = Trainer(self.data_preprocessor, device,
                                  self.postgres_connector)
     self.saver = Saver(tests_model_directory, tests_results_directory)
Пример #6
0
class TestTrainer(TestCase):
    def setUp(self) -> None:
        time_steps = 1
        loss_function = "MSE"
        optimizer = "SGD"
        model = "RNN"
        device = "cpu"
        self.configuration_dictionary = {
            "model": model,
            "loss_function": loss_function,
            "optimizer": optimizer,
            "time_steps": time_steps
        }
        data_preprocessor = DataPreprocessor()
        self.model_trainer = Trainer(data_preprocessor, device, normalize=True)

    def test_instantiate_attributes(self):
        # Given
        number_of_nodes = BASE_GRAPH.size()[0]
        number_of_node_features = BASE_GRAPH_NODE_FEATURES.size()[1]
        data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(),
                           BASE_GRAPH.view(-1).size())

        # When
        self.model_trainer.instantiate_attributes(
            data_dimensions, self.configuration_dictionary)

        # Then
        self.assertTrue(
            self.model_trainer.model.number_of_nodes == number_of_nodes)
        self.assertTrue(self.model_trainer.model.number_of_node_features ==
                        number_of_node_features)
        self.assertTrue(self.model_trainer.optimizer.param_groups)

    def test_do_train(self):
        # Given
        data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(),
                           BASE_GRAPH.view(-1).size())
        self.model_trainer.instantiate_attributes(
            data_dimensions, self.configuration_dictionary)
        all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1],
                                   [0, 1, 3, -1], [2, -1, -1, -1]])
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        tag = 'tag'
        dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors,
                            BASE_GRAPH.view(-1), tag)]
        training_data, _, _ = DataPreprocessor().train_validation_test_split(
            dataset, 1, 0.0, 0.0)

        # When
        training_loss = self.model_trainer.do_train(
            training_data=training_data, epoch=1)

        # Then
        self.assertTrue(training_loss > 0.0)

    def test_do_evaluate(self):
        # Given
        data_dimensions = (BASE_GRAPH_NODE_FEATURES.size(),
                           BASE_GRAPH.view(-1).size())
        self.model_trainer.instantiate_attributes(
            data_dimensions, self.configuration_dictionary)
        all_neighbors = to.tensor([[1, 2, -1, -1], [0, 2, -1, -1],
                                   [0, 1, 3, -1], [2, -1, -1, -1]])
        dataset = GraphDataset("")
        dataset.enable_test_mode()
        tag = 'tag'
        dataset.dataset = [(BASE_GRAPH_NODE_FEATURES, all_neighbors,
                            BASE_GRAPH.view(-1), tag)]
        training_data, _, _ = DataPreprocessor().train_validation_test_split(
            dataset, 1, 0.0, 0.0)

        # When
        validation_loss = self.model_trainer.do_evaluate(
            evaluation_data=training_data, epoch=1)

        # Then
        self.assertTrue(validation_loss > 0.0)
class TestTrainer(TestCase):
    def setUp(self) -> None:
        time_steps = 1
        loss_function = "MSE"
        optimizer = "SGD"
        model = "RNN"
        device = "cpu"
        self.configuration_dictionary = {"model": model,
                                         "loss_function": loss_function,
                                         "optimizer": optimizer,
                                         "time_steps": time_steps}
        self.postgres_connector = PostgresConnector()
        data_preprocessor = DataPreprocessor()
        self.model_trainer = Trainer(data_preprocessor, device)

    def test_instantiate_attributes(self):
        # Given
        data_dimensions = {"number_of_nodes": 4,
                           "number_of_node_features": 4,
                           "fully_connected_layer_input_size": 16,
                           "fully_connected_layer_output_size": 8}

        # When
        self.model_trainer.build(data_dimensions, self.configuration_dictionary)

        # Then
        self.assertTrue(self.model_trainer.model.number_of_nodes == data_dimensions["number_of_nodes"])
        self.assertTrue(self.model_trainer.model.number_of_node_features == data_dimensions["number_of_node_features"])
        self.assertTrue(self.model_trainer.optimizer.param_groups)

    def test_do_train(self):
        # Given
        data_dimensions = {"number_of_nodes": 4,
                           "number_of_node_features": 4,
                           "fully_connected_layer_input_size": 16,
                           "fully_connected_layer_output_size": 8}
        self.model_trainer.build(data_dimensions, self.configuration_dictionary)
        self._insert_test_data(1)
        dataset = GraphDataset(self.postgres_connector)
        training_data, _, _ = DataPreprocessor().train_validation_test_split(dataset, 1, 0.0, 0.0)

        # When
        training_loss = self.model_trainer.do_train_step(training_data=training_data, epoch=1)

        # Then
        self.assertTrue(training_loss > 0.0)
        self._truncate_table()

    def test_do_evaluate(self):
        # Given
        data_dimensions = {"number_of_nodes": 4,
                           "number_of_node_features": 4,
                           "fully_connected_layer_input_size": 16,
                           "fully_connected_layer_output_size": 8}
        self.model_trainer.build(data_dimensions, self.configuration_dictionary)
        self._insert_test_data(1)
        dataset = GraphDataset(self.postgres_connector)
        training_data, _, _ = DataPreprocessor().train_validation_test_split(dataset, 1, 0.0, 0.0)

        # When
        validation_loss = self.model_trainer.do_evaluate_step(evaluation_data=training_data, epoch=1)

        # Then
        self.assertTrue(validation_loss > 0.0)
        self._truncate_table()

    def _insert_test_data(self, dataset_size):
        self.postgres_connector.open_connection()
        dataset_values = """(pdb_code varchar primary key, features float[][], neighbors float[][], labels float[])"""
        penalty_values = """(residue varchar primary key, matrix float[][], penalty float[][])"""
        self.postgres_connector.create_table(TEST_DATASET, dataset_values)
        self.postgres_connector.create_table(TEST_PENALTY, penalty_values)
        features = FEATURES_SERIALIZED
        neighbors = NEIGHBORS_SERIALIZED
        labels = LABELS_SERIALIZED
        penalty = PENALTY_SERIALIZED
        for index in range(dataset_size):
            self.postgres_connector.execute_insert_dataset(str(index), features, neighbors, labels)
        for residue in map_amino_acid_codes:
            self.postgres_connector.execute_insert_penalty(residue, penalty)
        self.postgres_connector.close_connection()

    def _truncate_table(self) -> None:
        self.postgres_connector.open_connection()
        self.postgres_connector.truncate_table(TEST_DATASET)
        self.postgres_connector.close_connection()