def exp_double_duel(): import matplotlib.pyplot as plt eps = 1000 env = gym.make('CartPole-v0') env.seed(19) state_size = env.observation_space.shape[0] action_size = env.action_space.n log_dir = './logs/prova_pole' net = models.DenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24), lr=0.001) a = Agent(game=env, net=net, log_dir=log_dir, pol=policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)) r = a.learn(eps, False, 10, verbose=False) plt.plot(range(eps), r, label='DQN') net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24), lr=0.001, layer_size_val=(4, 4)) env.seed(19) a = Agent(game=env, net=net, log_dir=log_dir, pol=policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)) r = a.learn(eps, False, 10, verbose=False) plt.plot(range(eps), r, label='Duel DQN 4 4') for i in [50, 100, 200, 300, 500, 750, 1000, 2000, 3000]: net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24), lr=0.001, layer_size_val=(4, 4)) n = models.DoubleDQNWrapper(network=net, update_time=i) a = Agent(game=env, net=n, log_dir=log_dir, pol=policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)) r = a.learn(eps, False, 10, verbose=False) plt.plot(range(eps), r, label='Double Duel DQN 4 4 '+str(i)) plt.legend() plt.savefig('exp_double_duel.png')
def exp_ddqn(): import matplotlib.pyplot as plt eps = 1000 env = gym.make('CartPole-v0') env.seed(19) pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500) log_dir = './logs/prova_pole'+pol.name n = models.DenseDQN(log_dir=log_dir, action_size=env.action_space.n, state_size=env.observation_space.shape[0], layer_size=(24, 24), lr=0.001) a = Agent(game=env, net=n, log_dir=log_dir, pol=pol) r = a.learn(eps, False, 10, verbose=False) plt.plot(range(eps), r, label='DQN') for i in [50, 100, 200, 300, 500, 750, 1000, 2000, 3000]: pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500) log_dir = './logs/prova_pole'+pol.name n = models.DoubleDQNWrapper(network=models.DenseDQN(log_dir=log_dir, action_size=env.action_space.n, state_size=env.observation_space.shape[0], layer_size=(24, 24), lr=0.001), update_time=i) a = Agent(game=env, net=n, log_dir=log_dir, pol=pol) r = a.learn(eps, False, 10, verbose=False) plt.plot(range(eps), r, label='Update time: {}'.format(i)) plt.legend() plt.savefig('exp_ddqn.png')
def __init__(self, config=None): if config is None: config = {} self.env = wrap_dqn(gym.make(config.get('game', 'PongNoFrameskip-v4'))) self.action_size = self.env.action_space.n self.to_vis = config.get('visualize', False) self.verbose = config.get('verbose', True) self.backup = config.get('backup', 25) self.episodes = config.get('episodes', 300) self.depth = config.get('depth', 4) self.state_size = config.get('space', (84, 84)) self.model = None self._target_model = None self.prioritized = config.get(('prioritized', False)) if self.prioritized: self.memory = PrioritizedMemory( max_len=config.get('mem_size', 100000)) else: self.memory = SimpleMemory(max_len=config.get('mem_size', 100000)) if config.get('duel', False): self.model = self._duel_conv() else: self.model = self._conv() self.model.compile(Adam(lr=config.get('lr', 1e-4)), loss=huber_loss) if config.get('target', True): self._target_model = clone_model(self.model) self._target_model.set_weights(self.model.get_weights()) self._time = 0 self.update_time = config.get('target_update', 1000) self.env._max_episode_steps = None self.batch_size = config.get('batch', 32 * 3) self.to_observe = config.get('to_observe', 10000) self.log_dir = config['log_dir'] if not os.path.exists(self.log_dir): os.makedirs(self.log_dir) plot_model(self.model, to_file=os.path.join(self.log_dir, 'model.png'), show_shapes=True) attr = { 'batch size': self.batch_size, 'to observe': self.to_observe, 'depth': self.depth } self.results = {'info': attr} load_prev = config.get('load', False) self.gamma = None pol = None if 'pol' in config: if config['pol'] == 'random': pol = policy.RandomPolicy() elif config['pol'] == 'eps': pol = policy.EpsPolicy(config.get('pol_eps', 0.1)) self.pol = pol if load_prev: path = sorted([ int(x) for x in os.listdir(self.log_dir) if os.path.isdir(os.path.join(self.log_dir, x)) ]) if len(path) != 0: load_prev = self.load(os.path.join(self.log_dir, str(path[-1]))) if self.pol is None: self.pol = policy.AnnealedPolicy( inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=config.get('ex_min', 0.02), value_test=0.5, nb_steps=config.get('ex_steps', 100000)) if self.gamma is None: self.gamma = policy.EpsPolicy(float(config.get('gamma', 0.99))).get_value
import sys sys.path.append('..') import policy from dqn.agent_with_depth_less_memory import ImageAgent as ia_less from dqn.models_with_depth import DenseDQN, DoubleDQNWrapper, ConvDQM, ConvDDQN n = ConvDQM(action_size=6, state_size=(84, 84), depth=4, lr=1e-4) n = DoubleDQNWrapper(n, 10000) # n = DenseDQN(action_size=3, state_size=6, depth=4, lr=0.001, layer_size=(64, 64)) pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy( 1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.02, value_test=0.5, nb_steps=100000) agent = ia_less(pol=pol, network=n, to_observe=10000, max_len_memory=100000, log_dir='../pong/good_wrappers_DDQN_32x3-8/', load_prev=True, gamma=0.99) # agent = ram_less(pol=pol, network=n, to_observe=50000, max_len_memory=1000000, # log_dir='../logs/pong/ram/depth2_huber_DQN/', load_prev=False)
def exp_duel(): import matplotlib.pyplot as plt eps = 1000 env = gym.make('CartPole-v0') env.seed(19) state_size = env.observation_space.shape[0] action_size = env.action_space.n pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500) log_dir = './logs/prova_pole' + pol.name net = models.DenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24), lr=0.001) a = Agent(game=env, net=net, log_dir=log_dir, pol=pol) r = a.learn(eps, False, 10, verbose=False) print(r[-1]) plt.plot(range(eps), r, label='DQN') pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500) net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24), lr=0.001, layer_size_val=(12, 12)) env.seed(19) a = Agent(game=env, net=net, log_dir=log_dir, pol=pol) r = a.learn(eps, False, 10, verbose=False) print(r[-1]) plt.plot(range(eps), r, label='Duel DQN 12 12') pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500) net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24), lr=0.001, layer_size_val=(8, 8)) env.seed(19) a = Agent(game=env, net=net, log_dir=log_dir, pol=pol) r = a.learn(eps, False, 10, verbose=False) print(r[-1]) plt.plot(range(eps), r, label='Duel DQN 8 8') pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500) net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24), lr=0.001, layer_size_val=(4, 4)) a = Agent(game=env, net=net, log_dir=log_dir, pol=pol) r = a.learn(eps, False, 10, verbose=False) plt.plot(range(eps), r, label='Duel DQN 4 4') print(r[-1]) pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps', value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500) net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24), lr=0.001, layer_size_val=(24, 24)) a = Agent(game=env, net=net, log_dir=log_dir, pol=pol) r = a.learn(eps, False, 10, verbose=False) plt.plot(range(eps), r, label='Duel DQN 24 24') print(r[-1]) plt.legend() plt.savefig('exp_duel.png')