Пример #1
0
def run_experiments(experiments,
                    scheduler=None,
                    with_server=False,
                    server_port=TuneServer.DEFAULT_PORT,
                    verbose=True):
    """Tunes experiments.

    Args:
        experiments (Experiment | list | dict): Experiments to run.
        scheduler (TrialScheduler): Scheduler for executing
            the experiment. Choose among FIFO (default), MedianStopping,
            AsyncHyperBand, HyperBand, or HyperOpt.
        with_server (bool): Starts a background Tune server. Needed for
            using the Client API.
        server_port (int): Port number for launching TuneServer.
        verbose (bool): How much output should be printed for each trial.
    """

    if scheduler is None:
        scheduler = FIFOScheduler()

    runner = TrialRunner(
        scheduler,
        launch_web_server=with_server,
        server_port=server_port,
        verbose=verbose)
    exp_list = experiments
    if isinstance(experiments, Experiment):
        exp_list = [experiments]
    elif type(experiments) is dict:
        exp_list = [
            Experiment.from_json(name, spec)
            for name, spec in experiments.items()
        ]

    if (type(exp_list) is list
            and all(isinstance(exp, Experiment) for exp in exp_list)):
        for experiment in exp_list:
            scheduler.add_experiment(experiment, runner)
    else:
        raise TuneError("Invalid argument: {}".format(experiments))

    print(runner.debug_string(max_debug=99999))

    last_debug = 0
    while not runner.is_finished():
        runner.step()
        if time.time() - last_debug > DEBUG_PRINT_INTERVAL:
            print(runner.debug_string())
            last_debug = time.time()

    print(runner.debug_string(max_debug=99999))

    for trial in runner.get_trials():
        # TODO(rliaw): What about errored?
        if trial.status != Trial.TERMINATED:
            raise TuneError("Trial did not complete", trial)

    wait_for_log_sync()
    return runner.get_trials()
Пример #2
0
    def __init__(self,
                 time_attr='training_iteration',
                 reward_attr='episode_reward_mean',
                 max_t=100,
                 grace_period=10,
                 reduction_factor=3,
                 brackets=3):
        assert max_t > 0, "Max (time_attr) not valid!"
        assert max_t >= grace_period, "grace_period must be <= max_t!"
        assert grace_period > 0, "grace_period must be positive!"
        assert reduction_factor > 1, "Reduction Factor not valid!"
        assert brackets > 0, "brackets must be positive!"
        FIFOScheduler.__init__(self)
        self._reduction_factor = reduction_factor
        self._max_t = max_t

        self._trial_info = {}  # Stores Trial -> Bracket

        # Tracks state for new trial add
        self._brackets = [
            _Bracket(grace_period, max_t, reduction_factor, s)
            for s in range(brackets)
        ]
        self._counter = 0  # for
        self._num_stopped = 0
        self._reward_attr = reward_attr
        self._time_attr = time_attr
Пример #3
0
 def __init__(
         self, time_attr="time_total_s", reward_attr="episode_reward_mean",
         grace_period=60.0, min_samples_required=3, hard_stop=True):
     FIFOScheduler.__init__(self)
     self._stopped_trials = set()
     self._completed_trials = set()
     self._results = collections.defaultdict(list)
     self._grace_period = grace_period
     self._min_samples_required = min_samples_required
     self._reward_attr = reward_attr
     self._time_attr = time_attr
     self._hard_stop = hard_stop
Пример #4
0
 def __init__(
         self, time_attr='time_total_s', reward_attr='episode_reward_mean',
         grace_period=60.0, min_samples_required=3, hard_stop=True):
     FIFOScheduler.__init__(self)
     self._stopped_trials = set()
     self._completed_trials = set()
     self._results = collections.defaultdict(list)
     self._grace_period = grace_period
     self._min_samples_required = min_samples_required
     self._reward_attr = reward_attr
     self._time_attr = time_attr
     self._hard_stop = hard_stop
Пример #5
0
    def __init__(self,
                 scheduler=None,
                 launch_web_server=False,
                 server_port=TuneServer.DEFAULT_PORT):
        """Initializes a new TrialRunner.

        Args:
            scheduler (TrialScheduler): Defaults to FIFOScheduler.
            launch_web_server (bool): Flag for starting TuneServer
            server_port (int): Port number for launching TuneServer"""

        self._scheduler_alg = scheduler or FIFOScheduler()
        self._trials = []
        self._running = {}
        self._avail_resources = Resources(cpu=0, gpu=0)
        self._committed_resources = Resources(cpu=0, gpu=0)
        self._resources_initialized = False

        # For debugging, it may be useful to halt trials after some time has
        # elapsed. TODO(ekl) consider exposing this in the API.
        self._global_time_limit = float(
            os.environ.get("TRIALRUNNER_WALLTIME_LIMIT", float('inf')))
        self._total_time = 0
        self._server = None
        if launch_web_server:
            self._server = TuneServer(self, server_port)
        self._stop_queue = []
Пример #6
0
    def __init__(
            self, time_attr='training_iteration',
            reward_attr='episode_reward_mean',
            grace_period=10.0, perturbation_interval=6.0,
            hyperparameter_mutations=None):
        FIFOScheduler.__init__(self)
        self._completed_trials = set()
        self._results = collections.defaultdict(list)
        self._last_perturbation_time = {}
        self._grace_period = grace_period
        self._reward_attr = reward_attr
        self._time_attr = time_attr

        self._hyperparameter_mutations = hyperparameter_mutations
        self._perturbation_interval = perturbation_interval
        self._checkpoint_paths = {}
Пример #7
0
def run_experiments(experiments,
                    scheduler=None,
                    with_server=False,
                    server_port=TuneServer.DEFAULT_PORT):

    # Make sure rllib agents are registered
    from ray import rllib  # noqa # pylint: disable=unused-import

    if scheduler is None:
        scheduler = FIFOScheduler()

    runner = TrialRunner(scheduler,
                         launch_web_server=with_server,
                         server_port=server_port)

    for name, spec in experiments.items():
        for trial in generate_trials(spec, name):
            runner.add_trial(trial)
    print(runner.debug_string(max_debug=99999))

    last_debug = 0
    while not runner.is_finished():
        runner.step()
        if time.time() - last_debug > DEBUG_PRINT_INTERVAL:
            print(runner.debug_string())
            last_debug = time.time()

    print(runner.debug_string(max_debug=99999))

    for trial in runner.get_trials():
        # TODO(rliaw): What about errored?
        if trial.status != Trial.TERMINATED:
            raise TuneError("Trial did not complete", trial)

    return runner.get_trials()
Пример #8
0
    def __init__(self, scheduler=None):
        """Initializes a new TrialRunner."""

        self._scheduler_alg = scheduler or FIFOScheduler()
        self._trials = []
        self._running = {}
        self._avail_resources = Resources(cpu=0, gpu=0)
        self._committed_resources = Resources(cpu=0, gpu=0)
Пример #9
0
    def __init__(
            self, time_attr="time_total_s", reward_attr="episode_reward_mean",
            perturbation_interval=60.0, hyperparam_mutations={},
            resample_probability=0.25, custom_explore_fn=None):
        if not hyperparam_mutations and not custom_explore_fn:
            raise TuneError(
                "You must specify at least one of `hyperparam_mutations` or "
                "`custom_explore_fn` to use PBT.")
        FIFOScheduler.__init__(self)
        self._reward_attr = reward_attr
        self._time_attr = time_attr
        self._perturbation_interval = perturbation_interval
        self._hyperparam_mutations = hyperparam_mutations
        self._resample_probability = resample_probability
        self._trial_state = {}
        self._custom_explore_fn = custom_explore_fn

        # Metrics
        self._num_checkpoints = 0
        self._num_perturbations = 0
Пример #10
0
    def __init__(self,
                 time_attr='training_iteration',
                 reward_attr='episode_reward_mean',
                 max_t=81):
        assert max_t > 0, "Max (time_attr) not valid!"
        FIFOScheduler.__init__(self)
        self._eta = 3
        self._s_max_1 = 5
        # bracket max trials
        self._get_n0 = lambda s: int(
            np.ceil(self._s_max_1 / (s + 1) * self._eta**s))
        # bracket initial iterations
        self._get_r0 = lambda s: int((max_t * self._eta**(-s)))
        self._hyperbands = [[]]  # list of hyperband iterations
        self._trial_info = {}  # Stores Trial -> Bracket, Band Iteration

        # Tracks state for new trial add
        self._state = {"bracket": None, "band_idx": 0}
        self._num_stopped = 0
        self._reward_attr = reward_attr
        self._time_attr = time_attr
Пример #11
0
    def __init__(
            self, time_attr='training_iteration',
            reward_attr='episode_reward_mean', max_t=100,
            grace_period=10, reduction_factor=3, brackets=3):
        assert max_t > 0, "Max (time_attr) not valid!"
        assert max_t >= grace_period, "grace_period must be <= max_t!"
        assert grace_period > 0, "grace_period must be positive!"
        assert reduction_factor > 1, "Reduction Factor not valid!"
        assert brackets > 0, "brackets must be positive!"
        FIFOScheduler.__init__(self)
        self._reduction_factor = reduction_factor
        self._max_t = max_t

        self._trial_info = {}  # Stores Trial -> Bracket

        # Tracks state for new trial add
        self._brackets = [_Bracket(
            grace_period, max_t, reduction_factor, s) for s in range(brackets)]
        self._counter = 0  # for
        self._num_stopped = 0
        self._reward_attr = reward_attr
        self._time_attr = time_attr
Пример #12
0
    def __init__(
            self, time_attr='training_iteration',
            reward_attr='episode_reward_mean', max_t=81):
        assert max_t > 0, "Max (time_attr) not valid!"
        FIFOScheduler.__init__(self)
        self._eta = 3
        self._s_max_1 = 5
        self._max_t_attr = max_t
        # bracket max trials
        self._get_n0 = lambda s: int(
            np.ceil(self._s_max_1/(s+1) * self._eta**s))
        # bracket initial iterations
        self._get_r0 = lambda s: int((max_t*self._eta**(-s)))
        self._hyperbands = [[]]  # list of hyperband iterations
        self._trial_info = {}  # Stores Trial -> Bracket, Band Iteration

        # Tracks state for new trial add
        self._state = {"bracket": None,
                       "band_idx": 0}
        self._num_stopped = 0
        self._reward_attr = reward_attr
        self._time_attr = time_attr
Пример #13
0
    def __init__(self,
                 time_attr="time_total_s",
                 reward_attr="episode_reward_mean",
                 perturbation_interval=60.0,
                 hyperparam_mutations={},
                 resample_probability=0.25,
                 custom_explore_fn=None):
        if not hyperparam_mutations and not custom_explore_fn:
            raise TuneError(
                "You must specify at least one of `hyperparam_mutations` or "
                "`custom_explore_fn` to use PBT.")
        FIFOScheduler.__init__(self)
        self._reward_attr = reward_attr
        self._time_attr = time_attr
        self._perturbation_interval = perturbation_interval
        self._hyperparam_mutations = hyperparam_mutations
        self._resample_probability = resample_probability
        self._trial_state = {}
        self._custom_explore_fn = custom_explore_fn

        # Metrics
        self._num_checkpoints = 0
        self._num_perturbations = 0
Пример #14
0
    def __init__(self, scheduler=None):
        """Initializes a new TrialRunner."""

        self._scheduler_alg = scheduler or FIFOScheduler()
        self._trials = []
        self._running = {}
        self._avail_resources = Resources(cpu=0, gpu=0)
        self._committed_resources = Resources(cpu=0, gpu=0)
        self._resources_initialized = False

        # For debugging, it may be useful to halt trials after some time has
        # elapsed. TODO(ekl) consider exposing this in the API.
        self._global_time_limit = float(
            os.environ.get("TRIALRUNNER_WALLTIME_LIMIT", float('inf')))
        self._total_time = 0
Пример #15
0
    def __init__(self, max_iter=200, eta=3):
        """
        args:
            max_iter (int): maximum iterations per configuration
            eta (int): # defines downsampling rate (default=3)
        """
        assert max_iter > 0, "Max Iterations not valid!"
        assert eta > 1, "Downsampling rate (eta) not valid!"

        FIFOScheduler.__init__(self)
        self._eta = eta
        self._s_max_1 = s_max_1 = calculate_bracket_count(max_iter, eta)
        # total number of iterations per execution of Succesive Halving (n,r)
        B = s_max_1 * max_iter
        # bracket trial count total
        self._get_n0 = lambda s: int(np.ceil(B / max_iter / (s + 1) * eta**s))
        # bracket initial iterations
        self._get_r0 = lambda s: int(max_iter * eta**(-s))
        self._hyperbands = [[]]  # list of hyperband iterations
        self._trial_info = {}  # Stores Trial -> Bracket, Band Iteration

        # Tracks state for new trial add
        self._state = {"bracket": None, "band_idx": 0}
        self._num_stopped = 0
Пример #16
0
    def __init__(self,
                 search_alg,
                 scheduler=None,
                 launch_web_server=False,
                 server_port=TuneServer.DEFAULT_PORT,
                 verbose=True,
                 queue_trials=False):
        """Initializes a new TrialRunner.

        Args:
            search_alg (SearchAlgorithm): SearchAlgorithm for generating
                Trial objects.
            scheduler (TrialScheduler): Defaults to FIFOScheduler.
            launch_web_server (bool): Flag for starting TuneServer
            server_port (int): Port number for launching TuneServer
            verbose (bool): Flag for verbosity. If False, trial results
                will not be output.
            queue_trials (bool): Whether to queue trials when the cluster does
                not currently have enough resources to launch one. This should
                be set to True when running on an autoscaling cluster to enable
                automatic scale-up.
        """
        self._search_alg = search_alg
        self._scheduler_alg = scheduler or FIFOScheduler()
        self._trials = []
        self._running = {}
        self._avail_resources = Resources(cpu=0, gpu=0)
        self._committed_resources = Resources(cpu=0, gpu=0)
        self._resources_initialized = False

        # For debugging, it may be useful to halt trials after some time has
        # elapsed. TODO(ekl) consider exposing this in the API.
        self._global_time_limit = float(
            os.environ.get("TRIALRUNNER_WALLTIME_LIMIT", float('inf')))
        self._total_time = 0
        self._server = None
        if launch_web_server:
            self._server = TuneServer(self, server_port)
        self._stop_queue = []
        self._verbose = verbose
        self._queue_trials = queue_trials
Пример #17
0
def run_experiments(experiments, scheduler=None, **ray_args):
    if scheduler is None:
        scheduler = FIFOScheduler()
    runner = TrialRunner(scheduler)

    for name, spec in experiments.items():
        for trial in generate_trials(spec, name):
            runner.add_trial(trial)
    print(runner.debug_string())

    ray.init(**ray_args)

    while not runner.is_finished():
        runner.step()
        print(runner.debug_string())

    for trial in runner.get_trials():
        if trial.status != Trial.TERMINATED:
            raise TuneError("Trial did not complete", trial)

    return runner.get_trials()
Пример #18
0
 def choose_trial_to_run(self, trial_runner):
     self._add_new_trials_if_needed(trial_runner)
     return FIFOScheduler.choose_trial_to_run(self, trial_runner)
Пример #19
0
def run_experiments(experiments=None,
                    search_alg=None,
                    scheduler=None,
                    with_server=False,
                    server_port=TuneServer.DEFAULT_PORT,
                    verbose=True,
                    queue_trials=False):
    """Tunes experiments.

    Args:
        experiments (Experiment | list | dict): Experiments to run.
        search_alg (SearchAlgorithm): Search Algorithm. Defaults to
            BasicVariantGenerator.
        scheduler (TrialScheduler): Scheduler for executing
            the experiment. Choose among FIFO (default), MedianStopping,
            AsyncHyperBand, and HyperBand.
        with_server (bool): Starts a background Tune server. Needed for
            using the Client API.
        server_port (int): Port number for launching TuneServer.
        verbose (bool): How much output should be printed for each trial.
        queue_trials (bool): Whether to queue trials when the cluster does
            not currently have enough resources to launch one. This should
            be set to True when running on an autoscaling cluster to enable
            automatic scale-up.

    Returns:
        List of Trial objects, holding data for each executed trial.
    """
    if scheduler is None:
        scheduler = FIFOScheduler()

    if search_alg is None:
        assert experiments is not None, "Experiments need to be specified" \
            "if search_alg is not provided."
        search_alg = BasicVariantGenerator(experiments)

    runner = TrialRunner(search_alg,
                         scheduler=scheduler,
                         launch_web_server=with_server,
                         server_port=server_port,
                         verbose=verbose,
                         queue_trials=queue_trials)

    print(runner.debug_string(max_debug=99999))

    last_debug = 0
    while not runner.is_finished():
        runner.step()
        if time.time() - last_debug > DEBUG_PRINT_INTERVAL:
            print(runner.debug_string())
            last_debug = time.time()

    print(runner.debug_string(max_debug=99999))

    errored_trials = []
    for trial in runner.get_trials():
        if trial.status != Trial.TERMINATED:
            errored_trials += [trial]

    if errored_trials:
        raise TuneError("Trials did not complete", errored_trials)

    wait_for_log_sync()
    return runner.get_trials()
Пример #20
0
def run_experiments(experiments,
                    scheduler=None,
                    with_server=False,
                    server_port=TuneServer.DEFAULT_PORT,
                    verbose=True):
    """Tunes experiments.

    Args:
        experiments (Experiment | list | dict): Experiments to run.
        scheduler (TrialScheduler): Scheduler for executing
            the experiment. Choose among FIFO (default), MedianStopping,
            AsyncHyperBand, or HyperBand.
        with_server (bool): Starts a background Tune server. Needed for
            using the Client API.
        server_port (int): Port number for launching TuneServer.
        verbose (bool): How much output should be printed for each trial.
    """

    # Make sure rllib agents are registered
    from ray import rllib  # noqa # pylint: disable=unused-import

    if scheduler is None:
        scheduler = FIFOScheduler()

    runner = TrialRunner(scheduler,
                         launch_web_server=with_server,
                         server_port=server_port)

    if type(experiments) is dict:
        for name, spec in experiments.items():
            for trial in generate_trials(spec, name):
                trial.set_verbose(verbose)
                runner.add_trial(trial)
    elif (type(experiments) is list
          and all(isinstance(exp, Experiment) for exp in experiments)):
        for experiment in experiments:
            for trial in experiment.trials():
                trial.set_verbose(verbose)
                runner.add_trial(trial)
    elif isinstance(experiments, Experiment):
        for trial in experiments.trials():
            trial.set_verbose(verbose)
            runner.add_trial(trial)

    print(runner.debug_string(max_debug=99999))

    last_debug = 0
    while not runner.is_finished():
        runner.step()
        if time.time() - last_debug > DEBUG_PRINT_INTERVAL:
            print(runner.debug_string())
            last_debug = time.time()

    print(runner.debug_string(max_debug=99999))

    for trial in runner.get_trials():
        # TODO(rliaw): What about errored?
        if trial.status != Trial.TERMINATED:
            raise TuneError("Trial did not complete", trial)

    wait_for_log_sync()
    return runner.get_trials()
Пример #21
0
def run_experiments(experiments,
                    scheduler=None,
                    with_server=False,
                    server_port=TuneServer.DEFAULT_PORT,
                    verbose=True,
                    queue_trials=False):
    """Tunes experiments.

    Args:
        experiments (Experiment | list | dict): Experiments to run.
        scheduler (TrialScheduler): Scheduler for executing
            the experiment. Choose among FIFO (default), MedianStopping,
            AsyncHyperBand, HyperBand, or HyperOpt.
        with_server (bool): Starts a background Tune server. Needed for
            using the Client API.
        server_port (int): Port number for launching TuneServer.
        verbose (bool): How much output should be printed for each trial.
        queue_trials (bool): Whether to queue trials when the cluster does
            not currently have enough resources to launch one. This should
            be set to True when running on an autoscaling cluster to enable
            automatic scale-up.

    Returns:
        List of Trial objects, holding data for each executed trial.
    """

    if scheduler is None:
        scheduler = FIFOScheduler()

    runner = TrialRunner(
        scheduler,
        launch_web_server=with_server,
        server_port=server_port,
        verbose=verbose,
        queue_trials=queue_trials)
    exp_list = experiments
    if isinstance(experiments, Experiment):
        exp_list = [experiments]
    elif type(experiments) is dict:
        exp_list = [
            Experiment.from_json(name, spec)
            for name, spec in experiments.items()
        ]

    if (type(exp_list) is list
            and all(isinstance(exp, Experiment) for exp in exp_list)):
        for experiment in exp_list:
            scheduler.add_experiment(experiment, runner)
    else:
        raise TuneError("Invalid argument: {}".format(experiments))

    print(runner.debug_string(max_debug=99999))

    last_debug = 0
    while not runner.is_finished():
        runner.step()
        if time.time() - last_debug > DEBUG_PRINT_INTERVAL:
            print(runner.debug_string())
            last_debug = time.time()

    print(runner.debug_string(max_debug=99999))

    errored_trials = []
    for trial in runner.get_trials():
        if trial.status != Trial.TERMINATED:
            errored_trials += [trial]

    if errored_trials:
        raise TuneError("Trials did not complete", errored_trials)

    wait_for_log_sync()
    return runner.get_trials()