def test__extract_arfftrace(self):
        param_grid = {
            "hidden_layer_sizes": [[5, 5], [10, 10], [20, 20]],
            "activation": ['identity', 'logistic', 'tanh', 'relu'],
            "learning_rate_init": [0.1, 0.01, 0.001, 0.0001],
            "max_iter": [10, 20, 40, 80]
        }
        num_iters = 10
        task = openml.tasks.get_task(20)
        clf = RandomizedSearchCV(MLPClassifier(), param_grid, num_iters)
        # just run the task
        train, _ = task.get_train_test_split_indices(0, 0)
        X, y = task.get_X_and_y()
        clf.fit(X[train], y[train])

        # check num layers of MLP
        self.assertIn(clf.best_estimator_.hidden_layer_sizes,
                      param_grid['hidden_layer_sizes'])

        trace_attribute_list = _extract_arfftrace_attributes(clf)
        trace_list = _extract_arfftrace(clf, 0, 0)
        self.assertIsInstance(trace_attribute_list, list)
        self.assertEquals(len(trace_attribute_list), 5 + len(param_grid))
        self.assertIsInstance(trace_list, list)
        self.assertEquals(len(trace_list), num_iters)

        # found parameters
        optimized_params = set()

        for att_idx in range(len(trace_attribute_list)):
            att_type = trace_attribute_list[att_idx][1]
            att_name = trace_attribute_list[att_idx][0]
            if att_name.startswith("parameter_"):
                # add this to the found parameters
                param_name = att_name[len("parameter_"):]
                optimized_params.add(param_name)

                for line_idx in range(len(trace_list)):
                    val = json.loads(trace_list[line_idx][att_idx])
                    legal_values = param_grid[param_name]
                    self.assertIn(val, legal_values)
            else:
                # repeat, fold, itt, bool
                for line_idx in range(len(trace_list)):
                    val = trace_list[line_idx][att_idx]
                    if isinstance(att_type, list):
                        self.assertIn(val, att_type)
                    elif att_name in ['repeat', 'fold', 'iteration']:
                        self.assertIsInstance(trace_list[line_idx][att_idx],
                                              int)
                    else:  # att_type = real
                        self.assertIsInstance(trace_list[line_idx][att_idx],
                                              float)

        self.assertEqual(set(param_grid.keys()), optimized_params)
    def test__extract_arfftrace(self):
        param_grid = {
            "max_depth": [3, None],
            "max_features": [1, 2, 3, 4],
            "bootstrap": [True, False],
            "criterion": ["gini", "entropy"]
        }
        num_iters = 10
        task = openml.tasks.get_task(20)
        clf = RandomizedSearchCV(RandomForestClassifier(), param_grid,
                                 num_iters)
        # just run the task
        train, _ = task.get_train_test_split_indices(0, 0)
        X, y = task.get_X_and_y()
        clf.fit(X[train], y[train])

        trace_attribute_list = _extract_arfftrace_attributes(clf)
        trace_list = _extract_arfftrace(clf, 0, 0)
        self.assertIsInstance(trace_attribute_list, list)
        self.assertEquals(len(trace_attribute_list), 5 + len(param_grid))
        self.assertIsInstance(trace_list, list)
        self.assertEquals(len(trace_list), num_iters)

        # found parameters
        optimized_params = set()

        for att_idx in range(len(trace_attribute_list)):
            att_type = trace_attribute_list[att_idx][1]
            att_name = trace_attribute_list[att_idx][0]
            if att_name.startswith("parameter_"):
                # add this to the found parameters
                param_name = att_name[len("parameter_"):]
                optimized_params.add(param_name)

                for line_idx in range(len(trace_list)):
                    val = json.loads(trace_list[line_idx][att_idx])
                    legal_values = param_grid[param_name]
                    self.assertIn(val, legal_values)
            else:
                # repeat, fold, itt, bool
                for line_idx in range(len(trace_list)):
                    val = trace_list[line_idx][att_idx]
                    if isinstance(att_type, list):
                        self.assertIn(val, att_type)
                    elif att_name in ['repeat', 'fold', 'iteration']:
                        self.assertIsInstance(trace_list[line_idx][att_idx],
                                              int)
                    else:  # att_type = real
                        self.assertIsInstance(trace_list[line_idx][att_idx],
                                              float)

        self.assertEqual(set(param_grid.keys()), optimized_params)
Пример #3
0
    def test__extract_arfftrace(self):
        param_grid = {"max_depth": [3, None],
                      "max_features": [1, 2, 3, 4],
                      "bootstrap": [True, False],
                      "criterion": ["gini", "entropy"]}
        num_iters = 10
        task = openml.tasks.get_task(20)
        clf = RandomizedSearchCV(RandomForestClassifier(), param_grid, num_iters)
        # just run the task
        train, _ = task.get_train_test_split_indices(0, 0)
        X, y = task.get_X_and_y()
        clf.fit(X[train], y[train])

        trace_attribute_list = _extract_arfftrace_attributes(clf)
        trace_list = _extract_arfftrace(clf, 0, 0)
        self.assertIsInstance(trace_attribute_list, list)
        self.assertEquals(len(trace_attribute_list), 5 + len(param_grid))
        self.assertIsInstance(trace_list, list)
        self.assertEquals(len(trace_list), num_iters)

        # found parameters
        optimized_params = set()

        for att_idx in range(len(trace_attribute_list)):
            att_type = trace_attribute_list[att_idx][1]
            att_name = trace_attribute_list[att_idx][0]
            if att_name.startswith("parameter_"):
                # add this to the found parameters
                param_name = att_name[len("parameter_"):]
                optimized_params.add(param_name)

                for line_idx in range(len(trace_list)):
                    val = json.loads(trace_list[line_idx][att_idx])
                    legal_values = param_grid[param_name]
                    self.assertIn(val, legal_values)
            else:
                # repeat, fold, itt, bool
                for line_idx in range(len(trace_list)):
                    val = trace_list[line_idx][att_idx]
                    if isinstance(att_type, list):
                        self.assertIn(val, att_type)
                    elif att_name in ['repeat', 'fold', 'iteration']:
                        self.assertIsInstance(trace_list[line_idx][att_idx], int)
                    else: # att_type = real
                        self.assertIsInstance(trace_list[line_idx][att_idx], float)


        self.assertEqual(set(param_grid.keys()), optimized_params)