示例#1
0
    def test4_glm_random_grid_search_metric(self, metric_name,
                                            bigger_is_better):
        """
        This function will test the last stopping condition using metrics.

        :param metric_name: metric we want to use to test the last stopping condition
        :param bigger_is_better: higher metric value indicates better model performance

        :return: None
        """
        print(
            "*******************************************************************************************"
        )
        print("test4_glm_random_grid_search_metric using " + metric_name +
              " for family " + self.family)
        h2o.cluster_info()

        search_criteria = {
            "strategy": "RandomDiscrete",
            "stopping_metric": metric_name,
            "stopping_tolerance": random.uniform(1e-8, self.max_tolerance),
            "stopping_rounds": random.randint(1, self.max_stopping_rounds),
            "seed": round(time.time())
        }

        print("GLM Gaussian grid search_criteria: {0}".format(search_criteria))

        # add max_runtime_secs back into hyper-parameters to limit model runtime.
        self.hyper_params["max_runtime_secs"] = [
            0.3
        ]  # arbitrarily set to 0.1 second

        # fire off random grid-search
        grid_model = \
            H2OGridSearch(H2OGeneralizedLinearEstimator(family=self.family, nfolds=self.nfolds),
                          hyper_params=self.hyper_params, search_criteria=search_criteria)
        grid_model.train(x=self.x_indices,
                         y=self.y_index,
                         training_frame=self.training1_data)

        # bool indicating if randomized grid search has calculated the early stopping condition correctly
        stopped_correctly = \
            pyunit_utils.evaluate_metrics_stopping(grid_model.models, metric_name, bigger_is_better, search_criteria,
                                                   self.possible_number_models)

        if stopped_correctly:
            print("test4_glm_random_grid_search_metric " + metric_name +
                  ": passed. ")
        else:
            self.test_failed += 1
            self.test_failed_array[self.test_num] = 1
            print("test4_glm_random_grid_search_metric " + metric_name +
                  ": failed. ")

        self.test_num += 1
    def test4_glm_random_grid_search_metric(self, metric_name, bigger_is_better):
        """
        This function will test the last stopping condition using metrics.

        :param metric_name: metric we want to use to test the last stopping condition
        :param bigger_is_better: higher metric value indicates better model performance

        :return: None
        """
        print("*******************************************************************************************")
        print("test4_glm_random_grid_search_metric using " + metric_name + " for family " + self.family)
        h2o.cluster_info()

        search_criteria = {
            "strategy": "RandomDiscrete",
            "stopping_metric": metric_name,
            "stopping_tolerance": random.uniform(1e-8, self.max_tolerance),
            "stopping_rounds": random.randint(1, self.max_stopping_rounds),
            "seed": int(round(time.time())),
        }

        print("GLM Binomial grid search_criteria: {0}".format(search_criteria))

        # add max_runtime_secs back into hyper-parameters to limit model runtime.
        self.hyper_params["max_runtime_secs"] = [0.3]  # arbitrarily set

        # fire off random grid-search
        grid_model = H2OGridSearch(
            H2OGeneralizedLinearEstimator(family=self.family, nfolds=self.nfolds),
            hyper_params=self.hyper_params,
            search_criteria=search_criteria,
        )
        grid_model.train(x=self.x_indices, y=self.y_index, training_frame=self.training1_data)

        # bool indicating if randomized grid search has calculated the early stopping condition correctly
        stopped_correctly = pyunit_utils.evaluate_metrics_stopping(
            grid_model.models, metric_name, bigger_is_better, search_criteria, self.possible_number_models
        )

        if stopped_correctly:
            print("test4_glm_random_grid_search_metric " + metric_name + ": passed. ")
        else:
            self.test_failed += 1
            self.test_failed_array[self.test_num] = 1
            print("test4_glm_random_grid_search_metric " + metric_name + ": failed. ")

        self.test_num += 1