import time
# from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import DDPG
from stable_baselines.ddpg.policies import MlpPolicy


if __name__ == '__main__':
    # Create and wrap the environment
    # env = gym.make('game-stock-exchange-v0')
    env = gym.make('game-stock-exchange-continuous-v0')
    env = DummyVecEnv([lambda: env])

    model = DDPG(MlpPolicy, env, verbose=1)
    # model = A2C.load("a2c_gym_exchange_continuous", env=env)
    model.learning_rate = 1e-7

    # Train the agent
    model.learn(total_timesteps=100000) # default=1000000

    # Save the agent
    model.save("ddpg_gym_exchange_continuous")
    # del model  # delete trained model to demonstrate loading

    # Load the trained agent
    model = DDPG.load("ddpg_gym_exchange_continuous", env=env)

    # Enjoy trained agent
    obs = env.reset()
    actions = Counter()
    pnl = defaultdict(float)
Exemplo n.º 2
0
        str([
            my_signal_rate, my_signal_repetitions, my_step_limit, lr_start,
            lr_end, timesteps
        ]))
    f.close()
except:
    print("envparameters couldn't be saved. They are:" + str([
        my_signal_rate, my_signal_repetitions, my_step_limit, lr_start, lr_end,
        timesteps
    ]))

lr_update_interval = 500
model_save_interval = 20000
modulo_number = model_save_interval / lr_update_interval

i = 0
while (i <= (timesteps / lr_update_interval)):
    # linear: model.learning_rate = lr_start-(lr_stepsize*(i+pretraining_iterations))
    # log:
    model.learning_rate = lr_start * 0.5**((i * lr_update_interval) *
                                           (10 / timesteps))
    # static: model.learning_rate = static_learning_rate
    model.learn(total_timesteps=lr_update_interval,
                tb_log_name=name,
                log_interval=10,
                reset_num_timesteps=False)
    if (i % modulo_number == (modulo_number - 1)):
        model.save("/media/ryuga/TOSHIBA EXT/BA/Models/" + name + "_" +
                   str(i / modulo_number))
    i += 1