示例#1
0
    def setUp(self):
        set_project_path(testenv.MOCK_PROJECT_PATH)

        self.path_to_dataset = testenv.MOCK_DATASET
        self.path_to_test_dir = testenv.TEST_PATH
        self.aux_project_name = testenv.MOCK_PROJECT_NAME
        self.path_to_auxproj = testenv.MOCK_PROJECT_PATH
        self.project_default_name = testenv.DEFAULT_PROJECT_NAME

        # Generate a project
        self.p = Project(path=self.path_to_test_dir,
                         name=self.aux_project_name)

        # Add a dataset
        self.ds = Dataset.read_file(path=self.path_to_dataset)
        self.ds.save()

        # Generate subdataset
        self.sbds = SubDataset(self.ds, method="k_fold", by=5)
        self.sbds.save()

        # set apporach
        self.approach = Approach(self.p,
                                 "logistic_regression",
                                 self.sbds,
                                 path=str(Path(testenv.TEST_PATH, "lr")))
        shutil.copyfile(testenv.APPROACH_EXAMPLE,
                        str(self.approach.script_path))
        self.approach.save()

        # generate runs
        import_from("test.lr.logistic_regression",
                    "LogisticRegressionApproach")().run()
示例#2
0
    def setUp(self):
        set_project_path(testenv.MOCK_PROJECT_PATH)

        self.path_to_dataset = testenv.IRIS_DATASET
        self.path_to_test_dir = testenv.TEST_PATH
        self.aux_project_name = testenv.MOCK_PROJECT_NAME
        self.path_to_auxproj = testenv.MOCK_PROJECT_PATH
        self.project_default_name = testenv.DEFAULT_PROJECT_NAME

        # Generate a project
        self.p = Project(path=self.path_to_test_dir,
                         name=self.aux_project_name)

        # Add a dataset
        self.ds = Dataset.read_file(path=self.path_to_dataset, )
        self.ds.save()

        # Generate subdataset
        self.sbds = SubDataset(self.ds, method="k_fold", by=5)
        self.sbds.save()

        # set apporach
        self.approach = Approach(self.p,
                                 "decision_tree",
                                 self.sbds,
                                 path=str(Path(testenv.TEST_PATH, "dt")))
        shutil.copyfile(testenv.IRIS_APPROACH, str(self.approach.script_path))
        self.approach.save()

        # generate runs
        import_from("test.dt.decision_tree", "DecisionTreeApproach")().run()
示例#3
0
文件: cli.py 项目: mfarre/DriftAI
def generate_subdataset(dataset, method, by):
    def parse_by(method, by):
        if method == "k_fold":
            return int(by)
        return float(by)

    by = parse_by(method, by)
    sbds = SubDataset(dataset=Dataset.load(dataset), method=method, by=by)
    sbds.save()
    print("Subdataset with id {} created".format(sbds.id))
示例#4
0
    def test_generate_subdataset(self, method="train_test", by=0.8):
        """
        Creates a Project and a Dataset and links each other. Then creates a SubDataset out of the created Dataset.
        The subdataset are created using method and by paramenters
        
        Asserts
        -------
            - Indicies are not None
        """
        Project(path=self.path_to_test_dir, name=self.aux_project_name)
        ds = Dataset.read_file(path=self.path_to_dataset)
        ds.save()

        sbds = SubDataset(dataset=ds, method=method, by=by)
        self.assertIsNotNone(sbds.indices)

        return sbds
示例#5
0
 def test_create_dataset(self):
     """
     Takes a dataset file and constructs a new dataset
     Asserts
     -------
         - If the object created is an instance of a Dataset object
         - If the datasource attribute is an instance of a FileDataset object
     
     Returns
     -------
     A Dataset object instance created from the dataset file
     """
     Project(name=self.aux_project_name, path=self.path_to_test_dir)
     ds = Dataset.read_file(path=self.path_to_dataset,
                            first_line_heading=False)
     self.assertIsInstance(ds, Dataset)
     self.assertIsInstance(ds.datasource, FileDatasource)
     return ds
示例#6
0
    def test_get_latest_subdataset(self):
        p = self.test_load_project()

        ds = Dataset.read_file(path=testenv.MOCK_DATASET, 
                               first_line_heading=False)
        ds.save()

        method = "k_fold"
        by = 5

        sbds1 = SubDataset(ds, method=method, by=by)
        sbds1.save()
        sbds2 = SubDataset(ds, method=method, by=by*2)
        sbds2.save()

        l_sbds = p.get_last_subdataset()

        self.assertEqual(l_sbds.id, sbds2.id)
示例#7
0
    def test_load_dataset(self):
        """
        Creates a Dataset intance persisted to the filesystem and loads it as a new Dataset instance
        
        Asserts
        -------
            - Equal method on datasets return True
        
        Returns
        -------
        Loaded Dataset instance
        """
        ds1 = self.test_save_dataset()
        ds2 = Dataset.load(ds1.id)

        ds2.get_info()

        self.assertEqual(ds1, ds2)
        return ds2
示例#8
0
    def test_save_dataset(self):
        """
        Creates a new Dataset instance and a new Project instance, 
        sets Datset instance to project's path and then saves
        the Dataset

        Asserts
        -------
            - The dataset is created in the expected collection

        Returns
        -------
        A Dataset instace, persisted to the filesystem
        """
        ds = self.test_create_dataset()
        ds.save()

        self.assertIsNotNone(Dataset.load(ds.id))
        return ds
示例#9
0
    def setUp(self):
        set_project_path(testenv.MOCK_PROJECT_PATH)

        self.p = Project(path=testenv.TEST_PATH,
                         name=testenv.MOCK_PROJECT_NAME)
        self.ds = Dataset.read_file(path=testenv.MOCK_DATASET,
                                    first_line_heading=False)

        self.ds.save()

        self.sbds = SubDataset(self.ds, method="k_fold", by=5)
        self.sbds.save()

        self.approach = Approach(self.p,
                                 "logistic_regression",
                                 self.sbds,
                                 path=str(Path(testenv.TEST_PATH, "lr")))
        shutil.copyfile(testenv.APPROACH_EXAMPLE,
                        str(self.approach.script_path))
        self.approach.save()
示例#10
0
    def setUp(self):
        set_project_path(testenv.MOCK_PROJECT_PATH)

        self.path_to_dataset = testenv.MOCK_DATASET
        self.path_to_test_dir = testenv.TEST_PATH
        self.aux_project_name = testenv.MOCK_PROJECT_NAME
        self.path_to_auxproj = testenv.MOCK_PROJECT_PATH
        self.project_default_name = testenv.DEFAULT_PROJECT_NAME

        self.p = Project(path=self.path_to_test_dir,
                         name=self.aux_project_name)
        self.ds = Dataset.read_file(path=self.path_to_dataset)
        self.ds.save()

        self.sbds = SubDataset(self.ds, method="k_fold", by=5)
        self.sbds.save()

        self.approach = Approach(self.p, "test_approach", self.sbds)
        shutil.copyfile(testenv.APPROACH_EXAMPLE,
                        str(self.approach.script_path))
        self.approach.save()
示例#11
0
# create new project
path_to_project = str(Path(".").absolute())
project_name = "test_project"
project_path = Path(path_to_project, project_name)

# If project exist delete and recreate it
if project_path.is_dir():
    proj = Project.load(project_path)
    clean(proj)

proj = Project(name="test_project", path=path_to_project)

# add a datasource
path_to_dataset = str(Path(r"./test/resources/test_dataset.csv").absolute())
ds = Dataset.read_file(path_to_dataset)
ds.set_project_path(proj.path)
ds.save()

# create subdataset
sbds = SubDataset(ds, method="k_fold", by=5)
sbds.save()

# set apporach
example_approach_path = r"./test/resources/approach_example.py"
param_path = r"./test/resources/parameters_example.yml"
a = Approach(proj, "approach_example", sbds)
shutil.copyfile(example_approach_path, str(a.script_path))
shutil.copyfile(param_path, str(a.params_path))
a.save()
示例#12
0
 def test_automatically_detect_regression(self):
     Project(name=self.aux_project_name, path=self.path_to_test_dir)
     ds = Dataset.read_file("test/resources/housing.csv",
                            label="median_house_value")
     self.assertEqual(ds.problem_type, "regression")
示例#13
0
 def test_automatically_detect_binary_clf(self):
     Project(name=self.aux_project_name, path=self.path_to_test_dir)
     ds = Dataset.read_file("test/resources/titanic.csv", label="Survived")
     self.assertEqual(ds.problem_type, "binary_clf")
示例#14
0
 def test_automatically_detect_clf(self):
     Project(name=self.aux_project_name, path=self.path_to_test_dir)
     ds = Dataset.read_file("test/resources/Iris.csv")
     self.assertEqual(ds.problem_type, "clf")