Exemplo n.º 1
0
    def test_compute_prediction(self):
        model = Mljar(project=self.proj_title,
                      experiment=self.expt_title,
                      algorithms=['rfc'],
                      metric='logloss',
                      validation_kfolds=3,
                      tuning_mode='Normal',
                      single_algorithm_time_limit=1)
        self.assertTrue(model is not None)
        # fit models and wait till all models are trained
        model.fit(X=self.X, y=self.y, dataset_title='My dataset')

        # get project id
        project_id = model.project.hid
        # get model id
        model_id = model.selected_algorithm.hid

        dc = DatasetClient(project_id)
        init_datasets_cnt = len(dc.get_datasets())
        # compute predictions
        pred = Mljar.compute_prediction(self.X, model_id, project_id)
        # compute score
        score = self.mse(pred, self.y)
        self.assertTrue(score < 0.9)
        # check if dataset was removed
        self.assertEqual(init_datasets_cnt, len(dc.get_datasets()))
        # run predictions again, but keep dataset
        pred = Mljar.compute_prediction(self.X,
                                        model_id,
                                        project_id,
                                        keep_dataset=True)
        self.assertEqual(init_datasets_cnt + 1,
                         len(dc.get_datasets()))  # should be one more
Exemplo n.º 2
0
 def test_add_dataset_for_prediction(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get datasets, there should be none
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 0)
     # add dataset
     my_dataset = dc.add_dataset_if_not_exists(self.X, None)
     self.assertNotEqual(my_dataset, None)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 1)
     my_dataset_2 = dc.get_dataset(my_dataset.hid)
     self.assertEqual(my_dataset.hid, my_dataset_2.hid)
     self.assertEqual(my_dataset.title, my_dataset_2.title)
Exemplo n.º 3
0
 def test_add_existing_dataset(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get initial number of datasets
     init_datasets_cnt = len(dc.get_datasets())
     # add dataset
     dc.add_dataset_if_not_exists(self.X, self.y)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt + 1)
     # add the same dataset
     # it shouldn't be added
     dc.add_dataset_if_not_exists(self.X, self.y)
     # number of all datasets in project should be 1
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt + 1)
Exemplo n.º 4
0
 def test_create_and_delete(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get initial number of datasets
     init_datasets_cnt = len(dc.get_datasets())
     # add dataset
     my_dataset_1 = dc.add_dataset_if_not_exists(self.X, self.y)
     my_dataset_2 = dc.add_dataset_if_not_exists(self.X, y=None)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt + 2)
     # delete added dataset
     dc.delete_dataset(my_dataset_1.hid)
     # check number of datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), init_datasets_cnt + 1)
Exemplo n.º 5
0
 def test_add_dataset_for_training(self):
     # setup dataset client
     dc = DatasetClient(self.project.hid)
     self.assertNotEqual(dc, None)
     # get datasets, there should be none
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 0)
     # add dataset
     my_dataset = dc.add_dataset_if_not_exists(self.X, self.y)
     self.assertNotEqual(my_dataset, None)
     # get datasets
     datasets = dc.get_datasets()
     self.assertEqual(len(datasets), 1)
     my_dataset_2 = dc.get_dataset(my_dataset.hid)
     self.assertEqual(my_dataset.hid, my_dataset_2.hid)
     self.assertEqual(my_dataset.title, my_dataset_2.title)
     # test __str__ method
     self.assertTrue('id' in str(my_dataset_2))
     self.assertTrue('title' in str(my_dataset_2))
     self.assertTrue('file' in str(my_dataset_2))