#!/usr/bin/env python3 import gym import sys sys.path.insert(0, "../../../spyrl") from spyrl.activity.learning import Learning from spyrl.activity.activity_config import ActivityConfig from spyrl.agent_builder.impl.actor_critic_traces_agent_builder import ActorCriticTracesAgentBuilder from spyrl.listener.impl.basic_functions import BasicFunctions from spyrl.listener.impl.file_log_listener import RewardType from example.lunarlander.helper.lunarlander_discretiser import LunarLanderDiscretiser24576 if __name__ == '__main__': env = gym.make('LunarLander-v2') num_actions = env.action_space.n config = ActivityConfig(start_trial=1, num_trials=1, num_episodes=100, out_path='results/acet-21/') agent_builder = ActorCriticTracesAgentBuilder( num_actions, discretiser=LunarLanderDiscretiser24576()) milestone_episodes = [1000, 2000, 4000, 5000, 8000] learning = Learning( listener=BasicFunctions(render=False, draw=False, reward_type=RewardType.TOTAL, milestone_episodes=milestone_episodes)) learning.learn(env, agent_builder, config)
def create_agent(self, seed, initial_policy_path=None): num_inputs = 8 memory_size = 50_000 batch_size = 64 dqn_dims = [num_inputs, 128, self.num_actions] c = 100 return DQNWithTargetNetworkAgent(memory_size, batch_size, dqn_dims, self.normaliser, c, seed) if __name__ == '__main__': start_trial = 1 if len(sys.argv) > 1: start_trial = int(sys.argv[1]) env = gym.make('LunarLander-v2') config = ActivityConfig(start_trial=start_trial, num_trials=10, num_episodes=10000, out_path='result/lunarlander/dqnwtn-03/') agent_builder = DQNAgentBuilder(env.action_space.n) milestone_episodes = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000] learning = Learning( listener=BasicFunctions(render=False, draw=False, milestone_episodes=milestone_episodes, reward_type=RewardType.TOTAL)) learning.add_listener(Gmailer("DQNwTN-03")) learning.learn(env, agent_builder, config)
class DoubleD2DSQLAgentBuilder(AgentBuilder): def create_agent(self, seed, initial_policy_path=None): num_inputs = 8 memory_size = 50_000; batch_size = 64; dqn_dims = [num_inputs, 128, self.num_actions] trial = seed normalised_training_set_path = os.path.join(get_project_dir(), normalised_training_set_parent_path + \ '/d2dspl-normalised_training_set-' + str(trial).zfill(2) + '-00005000.txt') return DoubleD2DSQLAgent2(normalised_training_set_path, target_loss, memory_size, batch_size, dqn_dims, self.normaliser, seed) if __name__ == '__main__': env = gym.make('LunarLander-v2') num_actions = env.action_space.n max_num_samples_for_classifier = 500 num_episodes = 10000 session_id = '22' target_loss = 0.01 #0.001 normalised_training_set_parent_path = 'result/lunarlander/d2dspl-acet-' + str(num_episodes) + '-' + session_id description = 'dd2dsql-' + session_id + '. mem_size=50,000, batch_size=64, hidden dims=128, DoubleD2DSQLAgent2 (fixed epsilon)' + \ 'training set from ' + normalised_training_set_parent_path + \ '/d2dspl-normalised_training_set-0x-00050000.txt\n' + 'target_loss: ' + str(target_loss) milestone_episodes = [1000,2000,3000,4000,5000,6000,7000,8000,9000,10000] config = ActivityConfig(start_trial=1, num_trials = 10, num_episodes=num_episodes, out_path='result/lunarlander/dd2dsql-' + session_id + '/') agent_builder = DoubleD2DSQLAgentBuilder(num_actions) listeners = [BasicFunctions(render=False, draw=False, reward_type=RewardType.TOTAL, milestone_episodes=milestone_episodes), Gmailer("DD2DSQL-22"), SessionLogger(description)] learning = Learning(listeners=listeners) learning.learn(env, agent_builder, config)
from spyrl.listener.impl.basic_functions import BasicFunctions __author__ = "Budi Kurniawan" __copyright__ = "Copyright 2021, Budi Kurniawan" __license__ = "GPL" __version__ = "0.1.0" class PPOAgentBuilder(AgentBuilder): def create_agent(self, seed, initial_policy_path=None): normaliser = None seed = 1 return PPOAgent(nn_dims, normaliser, seed, local_steps_per_epoch=7) if __name__ == '__main__': id = 'CartPole-v2' gym.envs.register(id=id, entry_point='gym.envs.classic_control:CartPoleEnv', max_episode_steps=100_000) env = gym.make(id) num_actions = env.action_space.n num_states = env.observation_space.shape[0] nn_dims = (num_states, 64, 64, num_actions) out_path = os.path.join(get_project_dir(), 'result/cartpole/ppo-test/') config = ActivityConfig(num_episodes=2, out_path=out_path) agent_builder = PPOAgentBuilder(num_actions) learning = Learning(listener=BasicFunctions(render=False)) learning.learn(env, agent_builder, config)
from spyrl.listener.impl.file_log_listener import RewardType from example.lunarlander.helper.lunarlander_discretiser import LunarLanderDiscretiser,\ LunarLanderDiscretiser24576 __author__ = "Budi Kurniawan" __copyright__ = "Copyright 2021, Budi Kurniawan" __license__ = "GPL" __version__ = "0.1.0" if __name__ == '__main__': env = gym.make('LunarLander-v2') num_actions = env.action_space.n max_num_samples_for_classifier = 500 num_episodes = 10000 session_id = '24' milestone_episodes = [5000, 10000] config = ActivityConfig(start_trial=1, num_trials=10, num_episodes=num_episodes, out_path='result/lunarlander/d2dspl-acet-' + str(num_episodes) + '-' + session_id + '/') agent_builder = D2DSPLActorCriticTracesAgentBuilder( num_actions, LunarLanderDiscretiser24576(), max_num_samples_for_classifier, None, [80, 80]) learning = Learning( listener=BasicFunctions(render=False, draw=False, reward_type=RewardType.TOTAL, milestone_episodes=milestone_episodes)) learning.add_listener(Gmailer("D2DSPL-ACET-10000-24")) learning.learn(env, agent_builder, config)