示例#1
0
 def basicSetup(self, resample_prob=0.0, explore=None):
     pbt = PopulationBasedTraining(time_attr="training_iteration",
                                   perturbation_interval=10,
                                   resample_probability=resample_prob,
                                   hyperparam_mutations={
                                       "id_factor": [100],
                                       "float_factor": lambda: 100.0,
                                       "int_factor": lambda: 10,
                                   },
                                   custom_explore_fn=explore)
     runner = _MockTrialRunner(pbt)
     for i in range(5):
         trial = _MockTrial(
             i, {
                 "id_factor": i,
                 "float_factor": 2.0,
                 "const_factor": 3,
                 "int_factor": 10
             })
         runner.add_trial(trial)
         trial.status = Trial.RUNNING
         self.assertEqual(
             pbt.on_trial_result(runner, trial, result(10, 50 * i)),
             TrialScheduler.CONTINUE)
     pbt.reset_stats()
     return pbt, runner
示例#2
0
 def schedulerSetup(self, num_trials):
     sched = PopulationBasedTraining()
     runner = _MockTrialRunnerPBT()
     for i in range(num_trials):
         t = _MockTrialPBT("__parameter_tuning")
         t.config = {'test': 1, 'test1': 1, 'env': 'test'}
         t.experiment_tag = str(i)
         runner._launch_trial(t)
     return sched, runner
示例#3
0
 def basicSetup(self, resample_prob=0.0, explore=None):
     pbt = PopulationBasedTraining(
         time_attr="training_iteration",
         perturbation_interval=10,
         resample_probability=resample_prob,
         hyperparam_mutations={
             "id_factor": [100],
             "float_factor": lambda: 100.0,
             "int_factor": lambda: 10,
         },
         custom_explore_fn=explore)
     runner = _MockTrialRunner(pbt)
     for i in range(5):
         trial = _MockTrial(
             i,
             {"id_factor": i, "float_factor": 2.0, "const_factor": 3,
              "int_factor": 10})
         runner.add_trial(trial)
         trial.status = Trial.RUNNING
         self.assertEqual(
             pbt.on_trial_result(runner, trial, result(10, 50 * i)),
             TrialScheduler.CONTINUE)
     pbt.reset_stats()
     return pbt, runner
示例#4
0
        # ensure we collect enough timesteps to do sgd
        if config["timesteps_per_batch"] < config["sgd_batchsize"] * 2:
            config["timesteps_per_batch"] = config["sgd_batchsize"] * 2
        # ensure we run at least one sgd iter
        if config["num_sgd_iter"] < 1:
            config["num_sgd_iter"] = 1
        return config

    pbt = PopulationBasedTraining(
        time_attr="time_total_s",
        reward_attr="episode_reward_mean",
        perturbation_interval=120,
        resample_probability=0.25,
        # Specifies the mutations of these hyperparams
        hyperparam_mutations={
            "lambda": lambda: random.uniform(0.9, 1.0),
            "clip_param": lambda: random.uniform(0.01, 0.5),
            "sgd_stepsize": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
            "num_sgd_iter": lambda: random.randint(1, 30),
            "sgd_batchsize": lambda: random.randint(128, 16384),
            "timesteps_per_batch": lambda: random.randint(2000, 160000),
        },
        custom_explore_fn=explore)

    ray.init()
    run_experiments(
        {
            "pbt_humanoid_test": {
                "run": "PPO",
                "env": "Humanoid-v1",
                "repeat": 8,
示例#5
0
        },
        "stop": {
            "mean_accuracy": 0.80,
            "timesteps_total": 300,
        },
        "config": {
            "epochs": 1,
            "batch_size": 64,
            "lr": grid_search([10**-4, 10**-5]),
            "decay": lambda spec: spec.config.lr / 100.0,
            "dropout": grid_search([0.25, 0.5]),
        },
        "repeat": 4,
    }

    if args.smoke_test:
        train_spec["config"]["lr"] = 10**-4
        train_spec["config"]["dropout"] = 0.5

    ray.init()

    pbt = PopulationBasedTraining(time_attr="timesteps_total",
                                  reward_attr="mean_accuracy",
                                  perturbation_interval=10,
                                  hyperparam_mutations={
                                      "dropout":
                                      lambda _: np.random.uniform(0, 1),
                                  })

    run_experiments({"pbt_cifar10": train_spec}, scheduler=pbt)
示例#6
0
register_trainable("my_class", MyTrainableClass)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--smoke-test",
                        action="store_true",
                        help="Finish quickly for testing")
    args, _ = parser.parse_known_args()
    ray.init()

    pbt = PopulationBasedTraining(
        time_attr="training_iteration",
        reward_attr="episode_reward_mean",
        perturbation_interval=10,
        hyperparam_mutations={
            # Allow for scaling-based perturbations, with a uniform backing
            # distribution for resampling.
            "factor_1": lambda: random.uniform(0.0, 20.0),
            # Allow perturbations within this set of categorical values.
            "factor_2": [1, 2],
        })

    # Try to find the best factor 1 and factor 2
    run_experiments(
        {
            "pbt_test": {
                "run": "my_class",
                "stop": {
                    "training_iteration": 2 if args.smoke_test else 99999
                },
                "repeat": 10,
示例#7
0
    if config["num_sgd_iter"] < 1:
        config["num_sgd_iter"] = 1
    return config


pbt = PopulationBasedTraining(
    time_attr="training_iteration",
    reward_attr="episode_reward_mean",
    perturbation_interval=75,
    resample_probability=0.25,  #0.25,
    # Specifies the mutations of these hyperparams
    hyperparam_mutations={
        "lambda": lambda: random.uniform(0.9, 1.0),
        "gamma": lambda: random.uniform(0.99, 0.999),
        "horizon": lambda: random.randint(256, 2048),
        "clip_param": lambda: random.uniform(0.01, 0.4),
        "sgd_stepsize": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
        "num_sgd_iter": lambda: random.randint(2, 10),
        #"sgd_batchsize": lambda: random.randint(128, 16384),
        "timesteps_per_batch": lambda: random.randint(16, 256),
        "vf_loss_coeff": lambda: random.uniform(0.3, 1),
        "entropy_coeff": lambda: random.uniform(0.0, 0.01),
        #"kl_coeff": [0.0,0.2,1.0],
        "kl_target": lambda: random.uniform(0.003, 0.03)
    }
    #,custom_explore_fn=explore
)

ray.init()

run_experiments(
    {