def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.lr.logistic_regression", "LogisticRegressionApproach")().run()
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.IRIS_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset, ) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "decision_tree", self.sbds, path=str(Path(testenv.TEST_PATH, "dt"))) shutil.copyfile(testenv.IRIS_APPROACH, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.dt.decision_tree", "DecisionTreeApproach")().run()
def generate_subdataset(dataset, method, by): def parse_by(method, by): if method == "k_fold": return int(by) return float(by) by = parse_by(method, by) sbds = SubDataset(dataset=Dataset.load(dataset), method=method, by=by) sbds.save() print("Subdataset with id {} created".format(sbds.id))
def test_generate_subdataset(self, method="train_test", by=0.8): """ Creates a Project and a Dataset and links each other. Then creates a SubDataset out of the created Dataset. The subdataset are created using method and by paramenters Asserts ------- - Indicies are not None """ Project(path=self.path_to_test_dir, name=self.aux_project_name) ds = Dataset.read_file(path=self.path_to_dataset) ds.save() sbds = SubDataset(dataset=ds, method=method, by=by) self.assertIsNotNone(sbds.indices) return sbds
def test_create_dataset(self): """ Takes a dataset file and constructs a new dataset Asserts ------- - If the object created is an instance of a Dataset object - If the datasource attribute is an instance of a FileDataset object Returns ------- A Dataset object instance created from the dataset file """ Project(name=self.aux_project_name, path=self.path_to_test_dir) ds = Dataset.read_file(path=self.path_to_dataset, first_line_heading=False) self.assertIsInstance(ds, Dataset) self.assertIsInstance(ds.datasource, FileDatasource) return ds
def test_get_latest_subdataset(self): p = self.test_load_project() ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) ds.save() method = "k_fold" by = 5 sbds1 = SubDataset(ds, method=method, by=by) sbds1.save() sbds2 = SubDataset(ds, method=method, by=by*2) sbds2.save() l_sbds = p.get_last_subdataset() self.assertEqual(l_sbds.id, sbds2.id)
def test_load_dataset(self): """ Creates a Dataset intance persisted to the filesystem and loads it as a new Dataset instance Asserts ------- - Equal method on datasets return True Returns ------- Loaded Dataset instance """ ds1 = self.test_save_dataset() ds2 = Dataset.load(ds1.id) ds2.get_info() self.assertEqual(ds1, ds2) return ds2
def test_save_dataset(self): """ Creates a new Dataset instance and a new Project instance, sets Datset instance to project's path and then saves the Dataset Asserts ------- - The dataset is created in the expected collection Returns ------- A Dataset instace, persisted to the filesystem """ ds = self.test_create_dataset() ds.save() self.assertIsNotNone(Dataset.load(ds.id)) return ds
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.p = Project(path=testenv.TEST_PATH, name=testenv.MOCK_PROJECT_NAME) self.ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save()
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "test_approach", self.sbds) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save()
# create new project path_to_project = str(Path(".").absolute()) project_name = "test_project" project_path = Path(path_to_project, project_name) # If project exist delete and recreate it if project_path.is_dir(): proj = Project.load(project_path) clean(proj) proj = Project(name="test_project", path=path_to_project) # add a datasource path_to_dataset = str(Path(r"./test/resources/test_dataset.csv").absolute()) ds = Dataset.read_file(path_to_dataset) ds.set_project_path(proj.path) ds.save() # create subdataset sbds = SubDataset(ds, method="k_fold", by=5) sbds.save() # set apporach example_approach_path = r"./test/resources/approach_example.py" param_path = r"./test/resources/parameters_example.yml" a = Approach(proj, "approach_example", sbds) shutil.copyfile(example_approach_path, str(a.script_path)) shutil.copyfile(param_path, str(a.params_path)) a.save()
def test_automatically_detect_regression(self): Project(name=self.aux_project_name, path=self.path_to_test_dir) ds = Dataset.read_file("test/resources/housing.csv", label="median_house_value") self.assertEqual(ds.problem_type, "regression")
def test_automatically_detect_binary_clf(self): Project(name=self.aux_project_name, path=self.path_to_test_dir) ds = Dataset.read_file("test/resources/titanic.csv", label="Survived") self.assertEqual(ds.problem_type, "binary_clf")
def test_automatically_detect_clf(self): Project(name=self.aux_project_name, path=self.path_to_test_dir) ds = Dataset.read_file("test/resources/Iris.csv") self.assertEqual(ds.problem_type, "clf")