Пример #1
0
#!/usr/bin/env python3
import gym
import sys
sys.path.insert(0, "../../../spyrl")
from spyrl.activity.learning import Learning
from spyrl.activity.activity_config import ActivityConfig
from spyrl.agent_builder.impl.actor_critic_traces_agent_builder import ActorCriticTracesAgentBuilder
from spyrl.listener.impl.basic_functions import BasicFunctions
from spyrl.listener.impl.file_log_listener import RewardType
from example.lunarlander.helper.lunarlander_discretiser import LunarLanderDiscretiser24576

if __name__ == '__main__':
    env = gym.make('LunarLander-v2')
    num_actions = env.action_space.n
    config = ActivityConfig(start_trial=1,
                            num_trials=1,
                            num_episodes=100,
                            out_path='results/acet-21/')
    agent_builder = ActorCriticTracesAgentBuilder(
        num_actions, discretiser=LunarLanderDiscretiser24576())
    milestone_episodes = [1000, 2000, 4000, 5000, 8000]
    learning = Learning(
        listener=BasicFunctions(render=False,
                                draw=False,
                                reward_type=RewardType.TOTAL,
                                milestone_episodes=milestone_episodes))
    learning.learn(env, agent_builder, config)
    def create_agent(self, seed, initial_policy_path=None):
        num_inputs = 8
        memory_size = 50_000
        batch_size = 64
        dqn_dims = [num_inputs, 128, self.num_actions]
        c = 100
        return DQNWithTargetNetworkAgent(memory_size, batch_size, dqn_dims,
                                         self.normaliser, c, seed)


if __name__ == '__main__':

    start_trial = 1
    if len(sys.argv) > 1:
        start_trial = int(sys.argv[1])

    env = gym.make('LunarLander-v2')
    config = ActivityConfig(start_trial=start_trial,
                            num_trials=10,
                            num_episodes=10000,
                            out_path='result/lunarlander/dqnwtn-03/')
    agent_builder = DQNAgentBuilder(env.action_space.n)
    milestone_episodes = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000]
    learning = Learning(
        listener=BasicFunctions(render=False,
                                draw=False,
                                milestone_episodes=milestone_episodes,
                                reward_type=RewardType.TOTAL))
    learning.add_listener(Gmailer("DQNwTN-03"))

    learning.learn(env, agent_builder, config)
class DoubleD2DSQLAgentBuilder(AgentBuilder):
    def create_agent(self, seed, initial_policy_path=None):
        num_inputs = 8
        memory_size = 50_000; batch_size = 64; dqn_dims = [num_inputs, 128, self.num_actions]
        trial = seed
        normalised_training_set_path = os.path.join(get_project_dir(), normalised_training_set_parent_path + \
                '/d2dspl-normalised_training_set-' + str(trial).zfill(2) + '-00005000.txt')
        return DoubleD2DSQLAgent2(normalised_training_set_path, target_loss, memory_size, batch_size, dqn_dims, self.normaliser, seed)

if __name__ == '__main__':
    env = gym.make('LunarLander-v2')
    num_actions = env.action_space.n
    max_num_samples_for_classifier = 500
    num_episodes = 10000
    session_id = '22'
    target_loss = 0.01 #0.001

    normalised_training_set_parent_path = 'result/lunarlander/d2dspl-acet-' + str(num_episodes) + '-' + session_id
    description = 'dd2dsql-' + session_id + '. mem_size=50,000, batch_size=64, hidden dims=128, DoubleD2DSQLAgent2 (fixed epsilon)' + \
            'training set from ' + normalised_training_set_parent_path + \
            '/d2dspl-normalised_training_set-0x-00050000.txt\n' + 'target_loss: ' + str(target_loss)
    
    milestone_episodes = [1000,2000,3000,4000,5000,6000,7000,8000,9000,10000]
    config = ActivityConfig(start_trial=1, num_trials = 10, num_episodes=num_episodes, 
                            out_path='result/lunarlander/dd2dsql-' + session_id + '/')
    agent_builder = DoubleD2DSQLAgentBuilder(num_actions)
    listeners = [BasicFunctions(render=False, draw=False, reward_type=RewardType.TOTAL, milestone_episodes=milestone_episodes),
                Gmailer("DD2DSQL-22"), SessionLogger(description)]
    learning = Learning(listeners=listeners)
    learning.learn(env, agent_builder, config)
Пример #4
0
from spyrl.listener.impl.basic_functions import BasicFunctions

__author__ = "Budi Kurniawan"
__copyright__ = "Copyright 2021, Budi Kurniawan"
__license__ = "GPL"
__version__ = "0.1.0"


class PPOAgentBuilder(AgentBuilder):
    def create_agent(self, seed, initial_policy_path=None):
        normaliser = None
        seed = 1
        return PPOAgent(nn_dims, normaliser, seed, local_steps_per_epoch=7)


if __name__ == '__main__':
    id = 'CartPole-v2'
    gym.envs.register(id=id,
                      entry_point='gym.envs.classic_control:CartPoleEnv',
                      max_episode_steps=100_000)
    env = gym.make(id)
    num_actions = env.action_space.n
    num_states = env.observation_space.shape[0]
    nn_dims = (num_states, 64, 64, num_actions)

    out_path = os.path.join(get_project_dir(), 'result/cartpole/ppo-test/')
    config = ActivityConfig(num_episodes=2, out_path=out_path)

    agent_builder = PPOAgentBuilder(num_actions)
    learning = Learning(listener=BasicFunctions(render=False))
    learning.learn(env, agent_builder, config)
from spyrl.listener.impl.file_log_listener import RewardType
from example.lunarlander.helper.lunarlander_discretiser import LunarLanderDiscretiser,\
    LunarLanderDiscretiser24576

__author__ = "Budi Kurniawan"
__copyright__ = "Copyright 2021, Budi Kurniawan"
__license__ = "GPL"
__version__ = "0.1.0"

if __name__ == '__main__':
    env = gym.make('LunarLander-v2')
    num_actions = env.action_space.n
    max_num_samples_for_classifier = 500
    num_episodes = 10000
    session_id = '24'
    milestone_episodes = [5000, 10000]
    config = ActivityConfig(start_trial=1,
                            num_trials=10,
                            num_episodes=num_episodes,
                            out_path='result/lunarlander/d2dspl-acet-' +
                            str(num_episodes) + '-' + session_id + '/')
    agent_builder = D2DSPLActorCriticTracesAgentBuilder(
        num_actions, LunarLanderDiscretiser24576(),
        max_num_samples_for_classifier, None, [80, 80])
    learning = Learning(
        listener=BasicFunctions(render=False,
                                draw=False,
                                reward_type=RewardType.TOTAL,
                                milestone_episodes=milestone_episodes))
    learning.add_listener(Gmailer("D2DSPL-ACET-10000-24"))
    learning.learn(env, agent_builder, config)