def test_experiment_not_transformed_test(self):
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        data_manager = DataManager()
        data_manager.set_data(loaded_data)
        data_manager.split_data(test_split=0.19, train_split=0.62)
        learning_model = FakePredictionModel()
        exp = Experiment(data_manager, learning_model)

        exp.run_experiment()

        self.assertEquals(0, exp.get_r2(SplitTypes.Test))
    def test_experiment(self):
        output_filename_header = FileLoader.create_output_file()
        time.sleep(1)
        loaded_algorithm_combinations = FileLoader.read_csv_file("../Datasets/test.csv")
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        # feature_eliminator = SelectKBest(f_regression,k=k_value)

        print (loaded_algorithm_combinations[0])
        output_filename = FileLoader.create_output_file()

        for i in range(0, 80):
            normalizer = self.getnormalizer(loaded_algorithm_combinations[i][0])

            feature_eliminator = self.getfeature_eliminator(loaded_algorithm_combinations[i][1])
            the_model = self.get_model(loaded_algorithm_combinations[i][2])

            print "taking ", type(normalizer).__name__, "and feature selector ", type(
                feature_eliminator
            ).__name__, "model", type(the_model).__name__
            FileLoader.write_model_in_file(
                output_filename_header,
                type(normalizer).__name__,
                type(feature_eliminator).__name__,
                type(the_model).__name__,
                "",
                "",
                "",
                "",
                "",
            )

            the_data_manager = DataManager(feature_eliminator, normalizer=normalizer)
            the_data_manager.set_data(loaded_data)
            the_data_manager.split_data(test_split=0.15, train_split=0.70)
            exp = Experiment(the_data_manager, the_model)

            exp.run_experiment()
            # arr_selected = feature_eliminator.get_support(indices=True)

            # if(exp.get_r2(SplitTypes.Train) > 0 and exp.get_r2(SplitTypes.Valid) > 0 and exp.get_r2(SplitTypes.Test) >  0):
            FileLoader.write_model_in_file(
                output_filename,
                type(normalizer).__name__,
                type(feature_eliminator).__name__,
                type(the_model).__name__,
                "",
                exp.fitness_matrix[0],
                exp.get_r2(SplitTypes.Train),
                exp.get_r2(SplitTypes.Valid),
                exp.get_r2(SplitTypes.Test),
            )
    def test_experiment_sum_of_squares_zeros_test(self):
        the_data_manager = DataManager()
        an_array_of_all_ones = np.ones((37, 397))
        the_model = svm.SVR()
        the_data_manager.set_data(an_array_of_all_ones)
        the_data_manager.split_data(test_split=0.19, train_split=0.62)
        exp = Experiment(the_data_manager, the_model)

        exp.run_experiment()
        sum_of_squares_test = exp.get_sum_of_squares(SplitTypes.Test)

        expected = 0
        self.assertEquals(expected, sum_of_squares_test)
    def test_experiment_svm_svr_37dataset_r2_train(self):
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        the_data_manager = DataManager()
        the_data_manager.set_data(loaded_data)
        the_data_manager.split_data(test_split=0.19, train_split=0.62)
        the_model = svm.SVR()
        exp = Experiment(the_data_manager, the_model)
        exp.run_experiment()

        r2_train = exp.get_r2(SplitTypes.Train)
        expected_svm_r2_value = 0.93994377385638073
        self.assertEqual(r2_train, expected_svm_r2_value)
    def test_experiment_all_zeros_r2_1(self):
        the_data_manager = DataManager()
        array_all_zeroes = np.zeros((37, 397))
        the_data_manager.set_data(array_all_zeroes)
        the_data_manager.split_data(test_split=0.19, train_split=0.62)

        the_model = svm.SVR()
        exp = Experiment(the_data_manager, the_model)
        exp.run_experiment()

        r2_train = exp.get_r2(SplitTypes.Train)
        expected = 1.0
        self.assertEqual(r2_train, expected)
    def test_experiment_svr_37dataset_r2_test(self):
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        the_data_manager = DataManager()
        the_data_manager.set_data(loaded_data)
        the_data_manager.split_data(test_split=0.19, train_split=0.62)
        the_model = svm.SVR()
        exp = Experiment(the_data_manager, the_model)

        exp.run_experiment()

        r2_test = exp.get_r2(SplitTypes.Test)
        expected_svm_r2_value = -0.33005242525900247
        self.assertEqual(r2_test, expected_svm_r2_value)
    def test_split_merge_csv_4_25_8(self):
        file_loader = FileLoader()
        data_manager = DataManager()
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        result = file_loader.load_file(file_path)
        data_manager.set_data(result)
        data_manager.split_data(test_split=0.11,train_split=0.22)

        test_shapes = np.zeros((4, 397)).shape
        valid_shapes = np.zeros((25,397)).shape
        train_shapes = np.zeros((8, 397)).shape
        expected = np.array([test_shapes, valid_shapes, train_shapes])
        result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape])
        self.assertTrue(np.array_equal(result, expected))
    def test_experiment_sum_of_squares_real37_test(self):
        file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
        loaded_data = FileLoader.load_file(file_path)
        the_data_manager = DataManager()
        the_data_manager.set_data(loaded_data)
        the_model = svm.SVR()
        the_data_manager.split_data(test_split=0.19, train_split=0.62)
        exp = Experiment(the_data_manager, the_model)

        exp.run_experiment()
        sum_of_squares_test = exp.get_sum_of_squares(SplitTypes.Test)

        expected = 6.708898437500002

        self.assertAlmostEqual(expected, sum_of_squares_test)
    def test_split_merge_csv_7_7_23(self):

         file_loader = FileLoader()
         data_manager = DataManager()
         file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
         result = file_loader.load_file(file_path)
         data_manager.set_data(result)
         data_manager.split_data(test_split=0.19,train_split=0.62)


         valid_and_test_shapes = (7, 397)
         train_shapes = (23, 397)
         expected = np.array([valid_and_test_shapes, valid_and_test_shapes, train_shapes])
         result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape])
         self.assertTrue(np.array_equal(result, expected))
 def test_split_into_target_and_input(self):
     file_loader = FileLoader()
     data_manager = DataManager()
     file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv"
     result = file_loader.load_file(file_path)
     data_manager.set_data(result)
     data_manager.split_data(test_split=0.11,train_split=0.22)
     test_shapes_input = np.zeros((4, 396)).shape
     valid_shapes_input = np.zeros((25,396)).shape
     train_shapes_input = np.zeros((8, 396)).shape
     test_shapes_target = np.zeros((4, )).shape
     valid_shapes_target = np.zeros((25,)).shape
     train_shapes_target = np.zeros((8, )).shape
     expected = np.array([test_shapes_input, valid_shapes_input, train_shapes_input, test_shapes_target, valid_shapes_target, train_shapes_target])
     result = np.array([data_manager.inputs[SplitTypes.Test].shape, data_manager.inputs[SplitTypes.Valid].shape, data_manager.inputs[SplitTypes.Train].shape, data_manager.targets[SplitTypes.Test].shape, data_manager.targets[SplitTypes.Valid].shape, data_manager.targets[SplitTypes.Train].shape])
     self.assertTrue(np.array_equal(result, expected))
Пример #11
0
from FileLoader import FileLoader
from DataManager import DataManager
from src.Population import Population

file_path = "../Dataset/00-91-Drugs-All-In-One-File.csv"
loaded_data = FileLoader.load_file(file_path)

data_manager = DataManager(normalizer=None)
data_manager.set_data(loaded_data)
data_manager.split_data(test_split=0.15, train_split=0.70)

population = Population()
population.load_data()
for i in range (1,50):
    print("row", i, population.data[i].sum())