Пример #1
0
def test_recurrent_eval_callback():
    env_id = 'Pendulum-v0'

    # Create envs
    env = make_vec_env(env_id, n_envs=4)
    eval_env = make_vec_env(env_id, n_envs=1)

    # Create RL model
    model = PPO2('MlpLstmPolicy', env)

    # Stop training if the performance is good enough
    callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=-1200,
                                                     verbose=1)

    eval_callback = EvalCallback(eval_env,
                                 callback_on_new_best=callback_on_best,
                                 best_model_save_path=LOG_FOLDER,
                                 log_path=LOG_FOLDER,
                                 eval_freq=100)

    model.learn(300, callback=eval_callback)

    # Cleanup
    if os.path.exists(LOG_FOLDER):
        shutil.rmtree(LOG_FOLDER)
Пример #2
0
def test_callbacks(model_class):

    env_id = 'Pendulum-v0'
    if model_class in [ACER, DQN]:
        env_id = 'CartPole-v1'

    allowed_failures = []
    # Number of training timesteps is too short
    # otherwise, the training would take too long, or would require
    # custom parameter per algorithm
    if model_class in [PPO1, DQN, TRPO]:
        allowed_failures = ['rollout_end']

    # Create RL model
    model = model_class('MlpPolicy', env_id)

    checkpoint_callback = CheckpointCallback(save_freq=500,
                                             save_path=LOG_FOLDER)

    # For testing: use the same training env
    eval_env = model.get_env()
    # Stop training if the performance is good enough
    callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=-1200,
                                                     verbose=1)

    eval_callback = EvalCallback(eval_env,
                                 callback_on_new_best=callback_on_best,
                                 best_model_save_path=LOG_FOLDER,
                                 log_path=LOG_FOLDER,
                                 eval_freq=100)

    # Equivalent to the `checkpoint_callback`
    # but here in an event-driven manner
    checkpoint_on_event = CheckpointCallback(save_freq=1,
                                             save_path=LOG_FOLDER,
                                             name_prefix='event')
    event_callback = EveryNTimesteps(n_steps=500, callback=checkpoint_on_event)

    callback = CallbackList(
        [checkpoint_callback, eval_callback, event_callback])

    model.learn(500, callback=callback)
    model.learn(200, callback=None)
    custom_callback = CustomCallback()
    model.learn(200, callback=custom_callback)
    # Check that every called were executed
    custom_callback.validate(allowed_failures=allowed_failures)
    # Transform callback into a callback list automatically
    custom_callback = CustomCallback()
    model.learn(500,
                callback=[checkpoint_callback, eval_callback, custom_callback])
    # Check that every called were executed
    custom_callback.validate(allowed_failures=allowed_failures)

    # Automatic wrapping, old way of doing callbacks
    model.learn(200, callback=lambda _locals, _globals: True)

    # Cleanup
    if os.path.exists(LOG_FOLDER):
        shutil.rmtree(LOG_FOLDER)
Пример #3
0
    def train(self, tensorboard_log: str) -> None:

        try:
            self.load_model(tensorboard_log=tensorboard_log)

        except:
            self.create_model(tensorboard_log=tensorboard_log)

        # Stop training if reward gets close to zero
        callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=-0.1,
                                                         verbose=1)
        eval_callback = EvalCallback(self.env,
                                     callback_on_new_best=callback_on_best,
                                     verbose=1)

        # Save model at regular time intervals
        checkpoint_callback = CheckpointCallback(
            save_freq=1000, save_path='./model_checkpoints/')

        # Chain callbacks together
        callback = CallbackList([eval_callback, checkpoint_callback])

        # Train model
        self.model.learn(total_timesteps=int(1e10),
                         callback=callback,
                         tb_log_name="run")

        # Save trained model
        print("Training is finished!")
Пример #4
0
 def init_env(env_id):
     if parallel:
         env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
         reward_env = SubprocVecEnv([make_env(env_id, i) for i in range(1)])
     else:
         env = DummyVecEnv([make_env(env_id, i) for i in range(num_cpu)])
         reward_env = DummyVecEnv([make_env(env_id, i) for i in range(1)])
     if terminate_early:
         callback_on_best = StopTrainingOnRewardThreshold(
             reward_threshold=0.85, verbose=verbose)
         eval_callback = EvalCallback(reward_env,
                                      callback_on_new_best=callback_on_best,
                                      eval_freq=10_000,
                                      verbose=verbose)
         return env, reward_env, eval_callback
     else:
         return env, reward_env, None
Пример #5
0
    def train(self):

        # Load latest model if available
        try:
            path = os.getcwd()
            os.chdir(os.getcwd() + '/model_checkpoints')
            files = [x for x in os.listdir() if x.endswith(".zip")]
            num = []
            for file in files:
                num.append([int(x) for x in file.split('_') if x.isdigit()][0])
            filename = "rl_model_" + str(max(num)) + "_steps.zip"
            print("Tentative: " + filename)
            self.model = PPO2.load(load_path=filename, env=DummyVecEnv([lambda: self.env]), tensorboard_log='./a2c_rasp_tensorboard/')
            print("Successfully loaded the previous model: " + filename)
            os.chdir(path)
        except:
            # Vector-encode our new environment
            env = DummyVecEnv([lambda: self.env])
            # Create new model
            self.model = PPO2('MlpPolicy', env, verbose=1, tensorboard_log='./a2c_rasp_tensorboard/')
            print("Successfully created new model")

        # Stop training if reward get close to zero
        callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=-1e-2, verbose=1)
        eval_callback = EvalCallback(self.env, callback_on_new_best=callback_on_best, verbose=1)

        # Save model at regular time intervals
        checkpoint_callback = CheckpointCallback(save_freq=2000, save_path='./model_checkpoints/')

        # Chain callbacks together
        callback = CallbackList([eval_callback, checkpoint_callback])

        # Train model
        episode = 1
        while episode < 10:
            # Update location of red dot
            _ = self.env.square
            if self.env.trainable:
                print("Beginning episode number {}".format(episode))
                self.model.learn(total_timesteps=int(1e10), callback=callback, tb_log_name="run")
                episode += 1

        # Save trained model
        self.model.save("raspberry_agent")
Пример #6
0
 def build_eval_callback(
     self, eval_freq=10000, reward_threshold=900, log_path=None, eval_episodes=10, eval_env=None,
 ):
     callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=reward_threshold, verbose=1)
     eval_callback = EvalCallback(
         eval_env=eval_env,
         best_model_save_path=log_path,
         log_path=log_path,
         eval_freq=eval_freq,
         deterministic=True,
         render=False,
         n_eval_episodes=eval_episodes,
         callback_on_new_best=callback_on_best,
         verbose=1,
     )
     self.logger.debug(
         "Eval callback called every {} timesteps: stop training when mean reward is above {} in {} episodes".format(
             eval_freq, reward_threshold, eval_episodes
         )
     )
     return eval_callback
from stable_baselines import PPO2
from stable_baselines.common.policies import CnnPolicy
from stable_baselines.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
from BeautifulBlueSquare.BlueGymEnv import simpleAvoidance

# Separate evaluation env
eval_env = simpleAvoidance()

# Stop training when the model reaches the reward threshold, 800 * .9 = 720
callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=720,
                                                 verbose=1)

# Create call back that will eval model and save the best one and stop training once reward has reached 490
eval_callback = EvalCallback(eval_env,
                             n_eval_episodes=20,
                             eval_freq=int(800 * 50),
                             callback_on_new_best=callback_on_best,
                             best_model_save_path="model",
                             log_path="model",
                             verbose=1)

# Almost infinite number of timesteps, but the training will stop
# early as soon as the reward threshold is reached
env = simpleAvoidance()

model = PPO2(CnnPolicy, env, gamma=.99, n_steps=256)
model.learn(total_timesteps=int(20e6), callback=eval_callback)
Пример #8
0
from stable_baselines import DQN

from Config import Config
from Result import Result

# Inits Battleship gym environments and config
config = Config(5, [3, 2, 2], True, False, False)
env2 = gym.make('Battleships-v0', config=config)
env3 = gym.make('Battleships-v0', config=config)
env = DummyVecEnv([lambda: env2])
env4 = DummyVecEnv([lambda: env3])
check_env(env2, warn=True)

# Define Callback
#Callback stops training if maximum is reached in mean reward
callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=env2.calculate_threshold(), verbose=1)
# Callback safes the currently best model
eval_callback = EvalCallback(env4, callback_on_new_best=callback_on_best, verbose=1, best_model_save_path='./DQN_Models/best/')
checkpoint_callback = CheckpointCallback(save_freq=1e4, save_path='./model_checkpoints/')

# Uncomment, to train a new fresh model, otherwise a allready trained model will be trained
#model = DQN(MlpPolicy, env, verbose=2, tensorboard_log="./logs/progress_tensorboard/")

# Load current best model
model = DQN.load("DQN_Models/dqn_5x5_3_SingleShot.zip", verbose=2, env=env, tensorboard_log="./logs/progress_tensorboard/")

# Train model
model.learn(total_timesteps=1000000, callback=[checkpoint_callback, eval_callback])

#Delete current model and load the best model
del model