def test_create_with_validation_dataset(self): #Create experiment with validation with dataset. # add vald dataset cols = ['sepal length', 'sepal width', 'petal length', 'petal width'] target = 'class' df = pd.read_csv('tests/data/test_1_vald.csv') dc = DatasetClient(self.project.hid) vald_dataset = dc.add_dataset_if_not_exists(df[cols], df[target]) # add experiment ec = ExperimentClient(self.project.hid) self.assertNotEqual(ec, None) # there should be none experiments experiments = ec.get_experiments() self.assertEqual(experiments, []) # create new experiment experiment = ec.add_experiment_if_not_exists( self.dataset, vald_dataset, self.expt_title, self.project.task, self.validation_kfolds, self.validation_shuffle, self.validation_stratify, 0.72, self.algorithms, self.metric, self.tuning_mode, self.time_constraint, self.create_enseble) self.assertNotEqual(experiment, None) self.assertEqual(experiment.title, self.expt_title) self.assertEqual(experiment.validation_scheme, "With dataset")
def test_add_existing_dataset(self): # setup dataset client dc = DatasetClient(self.project.hid) self.assertNotEqual(dc, None) # get initial number of datasets init_datasets_cnt = len(dc.get_datasets()) # add dataset dc.add_dataset_if_not_exists(self.X, self.y) # get datasets datasets = dc.get_datasets() self.assertEqual(len(datasets), init_datasets_cnt + 1) # add the same dataset # it shouldn't be added dc.add_dataset_if_not_exists(self.X, self.y) # number of all datasets in project should be 1 datasets = dc.get_datasets() self.assertEqual(len(datasets), init_datasets_cnt + 1)
def test_create_and_delete(self): # setup dataset client dc = DatasetClient(self.project.hid) self.assertNotEqual(dc, None) # get initial number of datasets init_datasets_cnt = len(dc.get_datasets()) # add dataset my_dataset_1 = dc.add_dataset_if_not_exists(self.X, self.y) my_dataset_2 = dc.add_dataset_if_not_exists(self.X, y = None) # get datasets datasets = dc.get_datasets() self.assertEqual(len(datasets), init_datasets_cnt+2) # delete added dataset dc.delete_dataset(my_dataset_1.hid) # check number of datasets datasets = dc.get_datasets() self.assertEqual(len(datasets), init_datasets_cnt+1)
def test_add_dataset_for_prediction(self): # setup dataset client dc = DatasetClient(self.project.hid) self.assertNotEqual(dc, None) # get datasets, there should be none datasets = dc.get_datasets() self.assertEqual(len(datasets), 0) # add dataset my_dataset = dc.add_dataset_if_not_exists(self.X, None) self.assertNotEqual(my_dataset, None) # get datasets datasets = dc.get_datasets() self.assertEqual(len(datasets), 1) my_dataset_2 = dc.get_dataset(my_dataset.hid) self.assertEqual(my_dataset.hid, my_dataset_2.hid) self.assertEqual(my_dataset.title, my_dataset_2.title)
def test_add_dataset_for_training(self): # setup dataset client dc = DatasetClient(self.project.hid) self.assertNotEqual(dc, None) # get datasets, there should be none datasets = dc.get_datasets() self.assertEqual(len(datasets), 0) # add dataset my_dataset = dc.add_dataset_if_not_exists(self.X, self.y) self.assertNotEqual(my_dataset, None) # get datasets datasets = dc.get_datasets() self.assertEqual(len(datasets), 1) my_dataset_2 = dc.get_dataset(my_dataset.hid) self.assertEqual(my_dataset.hid, my_dataset_2.hid) self.assertEqual(my_dataset.title, my_dataset_2.title) # test __str__ method self.assertTrue('id' in str(my_dataset_2)) self.assertTrue('title' in str(my_dataset_2)) self.assertTrue('file' in str(my_dataset_2))
def setUp(self): proj_title = 'Test project-01' + get_postfix() proj_task = 'bin_class' self.expt_title = 'Test experiment-01' self.validation_kfolds = 5 self.validation_shuffle = True self.validation_stratify = True self.validation_train_split = None self.algorithms = ['xgb'] self.metric = 'logloss' self.tuning_mode = 'Normal' self.time_constraint = 1 self.create_enseble = False # setup project self.project_client = ProjectClient() self.project = self.project_client.create_project(title=proj_title, task=proj_task) # add training data df = pd.read_csv('tests/data/test_1.csv') cols = ['sepal length', 'sepal width', 'petal length', 'petal width'] target = 'class' dc = DatasetClient(self.project.hid) self.dataset = dc.add_dataset_if_not_exists(df[cols], df[target])