Пример #1
0
    def testNewResources(self):
        sched = ResourceChangingScheduler(resources_allocation_function=(
            lambda a, b, c, d: PlacementGroupFactory([{
                "CPU": 2
            }])))

        def train(config, checkpoint_dir=None):
            tune.report(metric=1, resources=tune.get_trial_resources())

        analysis = tune.run(train,
                            scheduler=sched,
                            stop={"training_iteration": 2},
                            resources_per_trial=PlacementGroupFactory([{
                                "CPU": 1
                            }]),
                            num_samples=1)

        results_list = list(analysis.results.values())
        assert results_list[0]["resources"].head_cpus == 2.0
Пример #2
0
def tune_xgboost(use_class_trainable=True):
    search_space = {
        # You can mix constants with search space objects.
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
        "max_depth": 9,
        "learning_rate": 1,
        "min_child_weight": tune.grid_search([2, 3]),
        "subsample": tune.grid_search([0.8, 0.9]),
        "colsample_bynode": tune.grid_search([0.8, 0.9]),
        "random_state": 1,
        "num_parallel_tree": 2000,
    }
    # This will enable aggressive early stopping of bad trials.
    base_scheduler = ASHAScheduler(
        max_t=16,
        grace_period=1,
        reduction_factor=2  # 16 training iterations
    )

    def example_resources_allocation_function(
        trial_runner: "trial_runner.TrialRunner",
        trial: Trial,
        result: Dict[str, Any],
        scheduler: "ResourceChangingScheduler",
    ) -> Union[None, PlacementGroupFactory, Resources]:
        """This is a basic example of a resource allocating function.

        The function naively balances available CPUs over live trials.

        This function returns a new ``PlacementGroupFactory`` with updated
        resource requirements, or None. If the returned
        ``PlacementGroupFactory`` is equal by value to the one the
        trial has currently, the scheduler will skip the update process
        internally (same with None).

        See :class:`DistributeResources` for a more complex,
        robust approach.

        Args:
            trial_runner (TrialRunner): Trial runner for this Tune run.
                Can be used to obtain information about other trials.
            trial (Trial): The trial to allocate new resources to.
            result (Dict[str, Any]): The latest results of trial.
            scheduler (ResourceChangingScheduler): The scheduler calling
                the function.
        """

        # Get base trial resources as defined in
        # ``tune.run(resources_per_trial)``
        base_trial_resource = scheduler._base_trial_resources

        # Don't bother if this is just the first iteration
        if result["training_iteration"] < 1:
            return None

        # default values if resources_per_trial is unspecified
        if base_trial_resource is None:
            base_trial_resource = PlacementGroupFactory([{"CPU": 1, "GPU": 0}])

        # Assume that the number of CPUs cannot go below what was
        # specified in tune.run
        min_cpu = base_trial_resource.required_resources.get("CPU", 0)

        # Get the number of CPUs available in total (not just free)
        total_available_cpus = trial_runner.trial_executor._avail_resources.cpu

        # Divide the free CPUs among all live trials
        cpu_to_use = max(
            min_cpu,
            total_available_cpus // len(trial_runner.get_live_trials()))

        # Assign new CPUs to the trial in a PlacementGroupFactory
        return PlacementGroupFactory([{"CPU": cpu_to_use, "GPU": 0}])

    # You can either define your own resources_allocation_function, or
    # use the default one - DistributeResources

    # from ray.tune.schedulers.resource_changing_scheduler import \
    #    DistributeResources

    scheduler = ResourceChangingScheduler(
        base_scheduler=base_scheduler,
        resources_allocation_function=example_resources_allocation_function
        # resources_allocation_function=DistributeResources()  # default
    )

    if use_class_trainable:
        fn = BreastCancerTrainable
    else:
        fn = train_breast_cancer

    analysis = tune.run(
        fn,
        metric="eval-logloss",
        mode="min",
        resources_per_trial=PlacementGroupFactory([{
            "CPU": 1,
            "GPU": 0
        }]),
        config=search_space,
        num_samples=1,
        scheduler=scheduler,
        checkpoint_at_end=use_class_trainable,
    )

    if use_class_trainable:
        assert analysis.results_df["nthread"].max() > 1

    return analysis