def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.lr.logistic_regression", "LogisticRegressionApproach")().run()
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.IRIS_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset, ) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "decision_tree", self.sbds, path=str(Path(testenv.TEST_PATH, "dt"))) shutil.copyfile(testenv.IRIS_APPROACH, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.dt.decision_tree", "DecisionTreeApproach")().run()
def evaluate(approach_id, metric): if not _is_running_in_project(): print("You must use driftai CLI inside an driftai project directory") return if not Approach.collection().exists(approach_id): print("Approach with id {} does not exist.".format(approach_id)) return approach = Approach.load(approach_id) r = ResultReport(approach=approach, metrics=[str_to_metric_fn[m] for m in metric]) r.as_dataframe()\ .to_csv(approach_id + "_evaluation.csv", index=False)
class ResultReportTest(unittest.TestCase): def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.lr.logistic_regression", "LogisticRegressionApproach")().run() def tearDown(self): testenv.delete_mock_projects() def test_create_result_report(self): metrics = ["recall", "precision", "f1"] r = ResultReport(approach=Approach.load(self.approach.id), metrics=[recall, precision, f1]) df = r.as_dataframe() self.assertTrue(all(m in df.columns for m in metrics)) def test_using_sklearn_metrics(self): from sklearn.metrics import classification_report r = ResultReport(approach=Approach.load(self.approach.id), metrics=[classification_report]) df = r.as_dataframe() self.assertIsNotNone(df.classification_report[0])
def test_create_runpool(self): # Force reload runs from database self.approach = Approach.load(self.approach.id) runpool = RunPool(self.approach.runs) for run in runpool.iteruns(): self.assertTrue(isinstance(run, Run))
def test_create_result_report(self): metrics = [multiclass_recall, multiclass_precision, multiclass_f1] r = ResultReport(approach=Approach.load(self.approach.id), metrics=metrics) df = r.as_dataframe() for m in [f.__name__ for f in metrics]: self.assertTrue(m in df.columns)
class MulticlassResultReportTest(unittest.TestCase): def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.IRIS_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME # Generate a project self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) # Add a dataset self.ds = Dataset.read_file(path=self.path_to_dataset, ) self.ds.save() # Generate subdataset self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() # set apporach self.approach = Approach(self.p, "decision_tree", self.sbds, path=str(Path(testenv.TEST_PATH, "dt"))) shutil.copyfile(testenv.IRIS_APPROACH, str(self.approach.script_path)) self.approach.save() # generate runs import_from("test.dt.decision_tree", "DecisionTreeApproach")().run() def tearDown(self): testenv.delete_mock_projects() def test_create_result_report(self): metrics = [multiclass_recall, multiclass_precision, multiclass_f1] r = ResultReport(approach=Approach.load(self.approach.id), metrics=metrics) df = r.as_dataframe() for m in [f.__name__ for f in metrics]: self.assertTrue(m in df.columns)
def status(approach_id): if not _is_running_in_project(): print("You must use driftai CLI inside an driftai project directory") return print("Loading approach data...") stat = Approach.load(approach_id).status if not stat["done"]: print("Approach {} is still running".format(approach_id)) print(stat["progress_bar"] + " Done runs: " + str(stat["done_runs"]) + " Total runs: " + str(stat["total_runs"])) else: print("There are no left runs for Approach {approach_id}!".format(approach_id))
def test_iterate_all_runs_runpool(self): self.approach = Approach.load(self.approach.id) runpool = RunPool(self.approach.runs) i = 0 for run in runpool.iteruns(): self.assertTrue(isinstance(run, Run)) run.status = "finished" i += 1 self.assertEqual(i, runpool.iter)
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.p = Project(path=testenv.TEST_PATH, name=testenv.MOCK_PROJECT_NAME) self.ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save()
class RunGeneratorTest(unittest.TestCase): def tearDown(self): testenv.delete_mock_projects() def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.p = Project(path=testenv.TEST_PATH, name=testenv.MOCK_PROJECT_NAME) self.ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() def test_generate_runs_from_subdataset(self): # Trick to load runnable approach LogisticRegressionApproach = import_from("test.lr.logistic_regression", "LogisticRegressionApproach") ra = LogisticRegressionApproach() # Generate the runs run_gens = RunGenerator.from_runnable_approach(ra) # Write runs to database ra.approach.runs = run_gens ra.approach.update() # Reload approach to test if runs were correctly stored approach = Approach.load(ra.approach.id) self.assertEqual(len(approach.runs), len(run_gens))
def test_get_subdataset_runs(self): runnable = import_from("test.lr.logistic_regression", "LogisticRegressionApproach") runs = RunGenerator.from_runnable_approach(runnable()) for run in runs: run.save() runs = Approach.load(self.approach.id).runs self.assertTrue(len(runs) > 0) for run in runs: self.assertIsInstance(run, Run) return runs
def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "test_approach", self.sbds) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save()
class ApproachTest(unittest.TestCase): def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.p = Project(path=testenv.TEST_PATH, name=testenv.MOCK_PROJECT_NAME) self.ds = Dataset.read_file(path=testenv.MOCK_DATASET, first_line_heading=False) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "logistic_regression", self.sbds, path=str(Path(testenv.TEST_PATH, "lr"))) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() def tearDown(self): testenv.delete_mock_projects() def test_get_subdataset_runs(self): runnable = import_from("test.lr.logistic_regression", "LogisticRegressionApproach") runs = RunGenerator.from_runnable_approach(runnable()) for run in runs: run.save() runs = Approach.load(self.approach.id).runs self.assertTrue(len(runs) > 0) for run in runs: self.assertIsInstance(run, Run) return runs
def run(approach_id, resume): if not _is_running_in_project(): print("You must use driftai CLI inside an driftai project directory") return if not Approach.collection().exists(approach_id): print("Approach with id {} does not exist.".format(approach_id)) return sys.path.append(Project.load().path) namespace = 'approaches.' + approach_id cls_name = to_camel_case(approach_id) + "Approach" approach_cls = import_from(namespace, cls_name) approach_cls().run(resume=resume)
def test_generate_runs_from_subdataset(self): # Trick to load runnable approach LogisticRegressionApproach = import_from("test.lr.logistic_regression", "LogisticRegressionApproach") ra = LogisticRegressionApproach() # Generate the runs run_gens = RunGenerator.from_runnable_approach(ra) # Write runs to database ra.approach.runs = run_gens ra.approach.update() # Reload approach to test if runs were correctly stored approach = Approach.load(ra.approach.id) self.assertEqual(len(approach.runs), len(run_gens))
from pathlib import Path import shutil from driftai.data import Dataset, SubDataset from driftai.run import RunGenerator from driftai.result_report import ResultReport, recall, precision from driftai import Approach, Project path_to_project = Path(r"..").absolute() project_name = "driftai" project_path = Path(path_to_project, project_name) if not project_path.is_dir(): exit(-1) proj = Project.load(str(project_path)) #resume subdataset sbds = proj.get_subdataset(how="latest") # set apporach example_approach_path = r"./test/resources/approach_example.py" a = Approach(proj, "example_approach", sbds) # run experiment a.run(kind="single") rr = ResultReport(results_path=str(Path(sbds.path, "results")), metrics=[recall, precision]) print(rr.as_dataframe())
def test_create_result_report(self): metrics = ["recall", "precision", "f1"] r = ResultReport(approach=Approach.load(self.approach.id), metrics=[recall, precision, f1]) df = r.as_dataframe() self.assertTrue(all(m in df.columns for m in metrics))
def test_using_sklearn_metrics(self): from sklearn.metrics import classification_report r = ResultReport(approach=Approach.load(self.approach.id), metrics=[classification_report]) df = r.as_dataframe() self.assertIsNotNone(df.classification_report[0])
proj = Project(name="test_project", path=path_to_project) # add a datasource path_to_dataset = str(Path(r"./test/resources/test_dataset.csv").absolute()) ds = Dataset.read_file(path_to_dataset) ds.set_project_path(proj.path) ds.save() # create subdataset sbds = SubDataset(ds, method="k_fold", by=5) sbds.save() # set apporach example_approach_path = r"./test/resources/approach_example.py" param_path = r"./test/resources/parameters_example.yml" a = Approach(proj, "approach_example", sbds) shutil.copyfile(example_approach_path, str(a.script_path)) shutil.copyfile(param_path, str(a.params_path)) a.save() # generate runs rg = RunGenerator.from_approach(a) # run experiment a.run(kind="single") rr = ResultReport(results_path=str(Path(a.path, "results")), metrics = [recall, precision]) print(rr.as_dataframe())
class RunTest(unittest.TestCase): def setUp(self): set_project_path(testenv.MOCK_PROJECT_PATH) self.path_to_dataset = testenv.MOCK_DATASET self.path_to_test_dir = testenv.TEST_PATH self.aux_project_name = testenv.MOCK_PROJECT_NAME self.path_to_auxproj = testenv.MOCK_PROJECT_PATH self.project_default_name = testenv.DEFAULT_PROJECT_NAME self.p = Project(path=self.path_to_test_dir, name=self.aux_project_name) self.ds = Dataset.read_file(path=self.path_to_dataset) self.ds.save() self.sbds = SubDataset(self.ds, method="k_fold", by=5) self.sbds.save() self.approach = Approach(self.p, "test_approach", self.sbds) shutil.copyfile(testenv.APPROACH_EXAMPLE, str(self.approach.script_path)) self.approach.save() def tearDown(self): testenv.delete_mock_projects() def test_create_run(self): Run( approach_id=self.approach.id, subdataset=self.sbds, subdataset_set="A", run_parameters={ "param1": 1, "param2": 2 }, ) def test_create_run_and_save(self): run = Run( approach_id=self.approach.id, subdataset=self.sbds, subdataset_set="A", run_parameters={ "param1": 1, "param2": 2 }, ) run.save() data = Run.load(self.approach.id, run.id) self.assertIsNotNone(data) return run def test_load_run(self): run1 = self.test_create_run_and_save() run2 = Run.load(self.approach.id, run1.id) self.assertEqual(run1.id, run2.id) def test_create_runpool(self): # Force reload runs from database self.approach = Approach.load(self.approach.id) runpool = RunPool(self.approach.runs) for run in runpool.iteruns(): self.assertTrue(isinstance(run, Run)) def test_iterate_all_runs_runpool(self): self.approach = Approach.load(self.approach.id) runpool = RunPool(self.approach.runs) i = 0 for run in runpool.iteruns(): self.assertTrue(isinstance(run, Run)) run.status = "finished" i += 1 self.assertEqual(i, runpool.iter) def test_iterate_all_runs_runpool_twice(self): self.approach = Approach.load(self.approach.id) runpool = RunPool(self.approach.runs) i = 0 for run in runpool.iteruns(): self.assertTrue(isinstance(run, Run)) run.status = "finished" i += 1 self.assertEqual(i, runpool.iter) i = 0 no_iterations = True for run in runpool.iteruns(): no_iterations = False self.assertTrue(no_iterations)
def generate_approach(identifier, subdataset_id): a = Approach( project=Project.load(), name=identifier, subdataset=SubDataset.load(subdataset_id)) a.save()