Python PPOTrainer.stop примеры использования

Язык программирования: Python

Пространство имен/Пакет: ray.rllib.agents.ppo

Класс/Тип: PPOTrainer

Метод/Функция: stop

Примеров на hotexamples.com: 11

Python PPOTrainer.stop - 11 примеров найдено. Это лучшие примеры Python кода для ray.rllib.agents.ppo.PPOTrainer.stop, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PPOTrainer(30)

train(30)

restore(30)

get_policy(26)

save(22)

compute_action(17)

stop(11)

with_updates(8)

set_weights(3)

get_weights(3)

compute_single_action(3)

import_model(2)

restore_from_object(2)

save_to_object(2)

export_policy_model(1)

export_model(1)

load_checkpoint(1)

merge_trainer_configs(1)

evaluate(1)

env_creator(1)

default_resource_request(1)

latest_avg_trainer_result(1)

Пример #1

Показать файл

Файл: test_backward_compat.py Проект: vishalbelsare/ray

    def test_old_configs(self):
        """Tests creating various Trainers (Algorithms) using 1.10 config dicts."""
        from ray.rllib.tests.backward_compat.old_ppo import DEFAULT_CONFIG
        from ray.rllib.agents.ppo import PPOTrainer

        config = DEFAULT_CONFIG.copy()
        trainer = PPOTrainer(config=config, env="CartPole-v0")
        trainer.train()
        trainer.stop()

Пример #2

Показать файл

Файл: main.py Проект: wouterreijgers/Simulationv2

def Hunter_trainer(config, reporter):
    multi_hunter_trainer = PPOTrainer(MultiHunterEnv, config)
    for _ in range(100):
        environment.simulate()
        result = multi_hunter_trainer.train()
        result["phase"] = 1
        reporter(**result)
        phase1_time = result["timesteps_total"]
    state = multi_hunter_trainer.save()
    multi_hunter_trainer.stop()

Пример #3

Показать файл

def main():
    ray.init()

    #  Hyperparameters of PPO are not well tuned. Most of them refer to https://github.com/xtma/pytorch_car_caring/blob/master/train.py
    trainer = PPOTrainer(env=MyEnv,
                         config={
                             "use_pytorch": True,
                             "model": {
                                 "custom_model": "mymodel",
                                 "custom_options": {
                                     'encoder_path': args.encoder_path,
                                     'train_encoder': args.train_encoder
                                 },
                                 "custom_action_dist": "mydist",
                             },
                             "env_config": {
                                 'game': 'CarRacing'
                             },
                             "num_workers": args.num_workers,
                             "num_envs_per_worker": args.num_envs_per_worker,
                             "num_gpus": args.num_gpus,
                             "use_gae": args.use_gae,
                             "batch_mode": args.batch_mode,
                             "vf_loss_coeff": args.vf_loss_coeff,
                             "vf_clip_param": args.vf_clip_param,
                             "lr": args.lr,
                             "kl_coeff": args.kl_coeff,
                             "num_sgd_iter": args.num_sgd_iter,
                             "grad_clip": args.grad_clip,
                             "clip_param": args.clip_param,
                             "rollout_fragment_length":
                             args.rollout_fragment_length,
                             "train_batch_size": args.train_batch_size,
                             "sgd_minibatch_size": args.sgd_minibatch_size
                         })

    for i in range(args.train_epochs):
        trainer.train()
        print("%d Train Done" % (i), "Save Freq: %d" % (args.model_save_freq))
        if (i + 1) % args.model_save_freq == 0:
            print("%d Episodes Done" % (i))
            weights = trainer.get_policy().get_weights()
            torch.save(weights, args.model_save_path + "%d-mode.pt" % (i + 1))
    trainer.save(args.trainer_save_path)
    print("Done All!")
    trainer.stop()

Пример #4

Показать файл

def my_train_fn(config, reporter):
    # Train for n iterations with high LR
    agent1 = PPOTrainer(env="CartPole-v0", config=config)
    for _ in range(10):
        result = agent1.train()
        result["phase"] = 1
        reporter(**result)
        phase1_time = result["timesteps_total"]
    state = agent1.save()
    agent1.stop()

    # Train for n iterations with low LR
    config["lr"] = 0.0001
    agent2 = PPOTrainer(env="CartPole-v0", config=config)
    agent2.restore(state)
    for _ in range(10):
        result = agent2.train()
        result["phase"] = 2
        result["timesteps_total"] += phase1_time  # keep time moving forward
        reporter(**result)
    agent2.stop()

Пример #5

Показать файл

    def test_ppo_sample_waste(self):
        # Check we at least collect the initial wave of samples
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "rollout_fragment_length": 200,
                             "train_batch_size": 128,
                             "num_workers": 3,
                         })
        result = ppo.train()
        self.assertEqual(result["info"]["num_steps_sampled"], 600)
        ppo.stop()

        # Check we collect at least the specified amount of samples
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "rollout_fragment_length": 200,
                             "train_batch_size": 900,
                             "num_workers": 3,
                         })
        result = ppo.train()
        self.assertEqual(result["info"]["num_steps_sampled"], 1200)
        ppo.stop()

        # Check in vectorized mode
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "rollout_fragment_length": 200,
                             "num_envs_per_worker": 2,
                             "train_batch_size": 900,
                             "num_workers": 3,
                         })
        result = ppo.train()
        self.assertEqual(result["info"]["num_steps_sampled"], 1200)
        ppo.stop()

Пример #6

Показать файл

Файл: test_optimizers.py Проект: denklewer/ray

    def testPPOSampleWaste(self):
        ray.init(num_cpus=4)

        # Check we at least collect the initial wave of samples
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "sample_batch_size": 200,
                             "train_batch_size": 128,
                             "num_workers": 3,
                         })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 600)
        ppo.stop()

        # Check we collect at least the specified amount of samples
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "sample_batch_size": 200,
                             "train_batch_size": 900,
                             "num_workers": 3,
                         })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 1000)
        ppo.stop()

        # Check in vectorized mode
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "sample_batch_size": 200,
                             "num_envs_per_worker": 2,
                             "train_batch_size": 900,
                             "num_workers": 3,
                         })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 1200)
        ppo.stop()

        # Check legacy mode
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "sample_batch_size": 200,
                             "train_batch_size": 128,
                             "num_workers": 3,
                             "straggler_mitigation": True,
                         })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 200)
        ppo.stop()

Пример #7

Показать файл

Файл: test_optimizers.py Проект: drmeitingtu/ray

    def test_ppo_sample_waste(self):
        # Check we at least collect the initial wave of samples
        ppo = PPOTrainer(
            env="CartPole-v0",
            config={
                "sample_batch_size": 200,
                "train_batch_size": 128,
                "num_workers": 3,
            })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 600)
        ppo.stop()

        # Check we collect at least the specified amount of samples
        ppo = PPOTrainer(
            env="CartPole-v0",
            config={
                "sample_batch_size": 200,
                "train_batch_size": 900,
                "num_workers": 3,
            })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 1000)
        ppo.stop()

        # Check in vectorized mode
        ppo = PPOTrainer(
            env="CartPole-v0",
            config={
                "sample_batch_size": 200,
                "num_envs_per_worker": 2,
                "train_batch_size": 900,
                "num_workers": 3,
            })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 1200)
        ppo.stop()

Пример #8

Показать файл

Файл: acrobot_ppo.py Проект: Wandercraft/jiminy

agent_cfg[
    "shuffle_sequences"] = True  # Whether to shuffle sequences in the batch when training
agent_cfg[
    "grad_clip"] = None  # Clamp the norm of the gradient during optimization (None to disable)

# ====================== Run the optimization ======================

agent_cfg["lr"] = 1.0e-4
agent_cfg["lr_schedule"] = None

train_agent = Trainer(agent_cfg, "env", logger_creator)
checkpoint_path = train(train_agent, max_timesteps=100000)

# ===================== Enjoy the trained agent ======================

test_agent = Trainer(agent_cfg, "env", logger_creator)
test_agent.restore(checkpoint_path)
test(test_agent, explore=False)

# =================== Terminate Ray backend ====================

train_agent.stop()
test_agent.stop()
ray.shutdown()

# =================== Terminate the Ray backend ====================

train_agent.stop()
test_agent.stop()
ray.shutdown()

Пример #9

Показать файл

 def test_local(self):
     cf = DEFAULT_CONFIG.copy()
     for _ in framework_iterator(cf):
         agent = PPOTrainer(cf, "CartPole-v0")
         print(agent.train())
         agent.stop()

Пример #10

Показать файл

Файл: restore_1_of_n_agents_from_checkpoint.py Проект: stjordanis/ray

    # Create a new dummy Trainer to "fix" our checkpoint.
    new_trainer = PPOTrainer(config=config)
    # Get untrained weights for all policies.
    untrained_weights = new_trainer.get_weights()
    # Restore all policies from checkpoint.
    new_trainer.restore(best_checkpoint)
    # Set back all weights (except for 1st agent) to original
    # untrained weights.
    new_trainer.set_weights(
        {pid: w
         for pid, w in untrained_weights.items() if pid != "policy_0"})
    # Create the checkpoint from which tune can pick up the
    # experiment.
    new_checkpoint = new_trainer.save()
    new_trainer.stop()
    print(".. checkpoint to restore from (all policies reset, "
          f"except policy_0): {new_checkpoint}")

    print("Starting new tune.run")

    # Start our actual experiment.
    stop = {
        "episode_reward_mean": args.stop_reward,
        "timesteps_total": args.stop_timesteps,
        "training_iteration": args.stop_iters,
    }

    # Make sure, the non-1st policies are not updated anymore.
    config["multiagent"]["policies_to_train"] = [
        pid for pid in policy_ids if pid != "policy_0"

Пример #11

Показать файл

    #                     else random.choice(["fb_1", "fb_2"])
    #         },
    #     },
    # )

    trainer = PPOTrainer(
        env="rcrs_env",
        config={
            "env": "rcrs_env",
            "num_workers": 1,
            "multiagent": {
                "policies": {
                    "fb_1": (None, obs_space, act_space, {}),
                    "fb_2": (None, obs_space, act_space, {}),
                },
                "policy_mapping_fn":
                lambda agent_id: "fb_1" if agent_id.startswith("fb_1_") else
                random.choice(["fb_1", "fb_2"])
            },
        },
    )

    for i in range(2):
        result = trainer.train()
        print(pretty_print(result))
        if i % 1 == 0:
            checkpoint = trainer.save()
            print("checkpoint saved at", checkpoint)
    statess = trainer.save()
    trainer.stop()