Exemplo n.º 1
0
 def test_arboreal_set_params(self):
     # test that the set_params function sets parameters
     at = ArborealTree()
     at.set_params(**self.nondefault_vals)
     # assert at has the right number of attributes set:
     user_attributes = [
         a for a in vars(at) if not (a.startswith("_") or a.endswith("_"))
     ]
     self.assertEqual(len(user_attributes), len(self.nondefault_vals))
     # now check equality of nondefault_vals and set values:
     for arg_name, nondefault_val in self.nondefault_vals.items():
         self.assertEqual(getattr(at, arg_name), nondefault_val)
Exemplo n.º 2
0
 def test_arboreal_constructor(self):
     # test that the constructor receives input args and sets them
     at = ArborealTree()
     # assert at has the right number of attributes set:
     user_attributes = [
         a for a in vars(at) if not (a.startswith("_") or a.endswith("_"))
     ]
     self.assertEqual(len(user_attributes), len(self.default_vals))
     # now check equality of default values and set values:
     for arg_name, default_val in self.default_vals.items():
         self.assertEqual(getattr(at, arg_name), default_val)
Exemplo n.º 3
0
    def test_grid_search_interface_for_arboreal_tree(self):
        param_grid = {
            "bootstrap_criterion_fraction_threshold": [0.8, 0.999_999_999],
            "maximum_bootstrap_branching_factor": [1, 2, 3],
            "feature_subset_fraction": [None, 0.4],
            "min_samples_split": [2, 3],
        }
        arb = ArborealTree(random_seed=self.random_seed)
        gs = GridSearchCV(
            arb,
            param_grid,
            cv=3,
            scoring="accuracy",
            iid=True,
            verbose=0,
            n_jobs=self.n_jobs,
        )  # verbose = 2 for helpful info during the search

        gs.fit(self.X_train, self.y_train)
        gs_accuracy = gs.score(self.X_test, self.y_test)

        expected_accuracy = 0.90
        self.assertAlmostEqual(gs_accuracy, expected_accuracy, places=2)
Exemplo n.º 4
0
 def test_arboreal_set_then_get_params(self):
     at = ArborealTree()
     at.set_params(**self.nondefault_vals)
     got_params = at.get_params()
     self.assertEqual(got_params, self.nondefault_vals)
Exemplo n.º 5
0
 def test_arboreal_get_params(self):
     at = ArborealTree()
     got_params = at.get_params()
     self.assertEqual(got_params, self.default_vals)
Exemplo n.º 6
0
    def test_arboreal_constructor_and_set_params_are_equal(self):
        at_1 = ArborealTree(**self.default_vals)
        at_2 = ArborealTree()
        at_2.set_params(**self.default_vals)
        self.assertEqual(at_1.get_params(), at_2.get_params())

        at_3 = ArborealTree(**self.nondefault_vals)
        at_4 = ArborealTree()
        at_4.set_params(**self.nondefault_vals)
        self.assertEqual(at_3.get_params(), at_4.get_params())
Exemplo n.º 7
0
    "sepal length (cm)",
    "sepal width (cm)",
    "petal length (cm)",
    "petal width (cm)",
]
m.categoricals = ["target"]
m.target = "target"

# Create an Arboreal Dataset for train and test
train_dataset = Dataset(metadata=m, datapoints=train_datapoints)
test_dataset = Dataset(
    metadata=m, datapoints=test_datapoints, validate_target=False
)

# Fit an ArborealTree on the train set
tree = ArborealTree()  # or DecisionTree() or RandomForest()
tree.fit(train_dataset)

# Predict data points in the test set
predictions = tree.transform(test_dataset)

# Evaluate our performance on the test set
results = []
prediction_datatypes = set()
for dp in test_datapoints_for_eval:
    target = dp["target"]
    prediction = predictions[dp["identifier"]]
    predicted_value = prediction[0]
    prediction_datatype = prediction[1]
    prediction_datatypes.add(prediction_datatype)
    assert (