"EXPERIENCE_BUFFER_SIZE": 10000, "GRADIENT_CLIPPING": False, "DOUBLE_QLEARNING": True, "SOFT_UPDATE": True, "SOFT_UPDATE_TAU": 5e-3, "NUM_AGENTS": 2, "SHARING": True, "FOCUSED_SHARING": True, "SHARE_BATCH_SIZE": 100, # 128 "FOCUSED_SHARING_THRESHOLD": 10 } sharings = [True] for sharing in sharings: params["FOCUSED_SHARING"] = sharing exp = MultiAgentExperiment(params) result = exp.run() print("Sharing: {}".format(sharing)) print("Method {} took an average of {:.2f} episodes".format(params["METHOD"], result)) # Method DQN took an average of 263.90 episodes # DQN without regular experience sharing took: # Method DQN took an average of 263.27 episodes for agent 0 # Method DQN took an average of 254.87 episodes for agent 1 # DQN with regular experience sharing took: # Method DQN took an average of 179.00 episodes for agent 0 # Method DQN took an average of 208.33 episodes for agent 1 # DQN with focused experience sharing took:
dqn_focus_sharing = dqn_sharing.copy() dqn_focus_sharing.update(focus) # others dqn_prio_sharing = dqn_sharing.copy() dqn_prio_sharing.update(prio) dqn_prio_focus_sharing = dqn_sharing.copy() dqn_prio_focus_sharing.update(prio) dqn_prio_focus_sharing.update(focus) ## prepare the experiment exp_group = exp_group experiments = { 'dqn': dqn, 'dqn_prio': dqn_prio, 'dqn_sharing': dqn_sharing, 'dqn_prio_sharing': dqn_prio_sharing, 'dqn_focus_sharing': dqn_focus_sharing, 'dqn_prio_focus_sharing': dqn_prio_focus_sharing, } for exp_name, params in reversed(list(experiments.items())): print(exp_name, params) if 'sharing' in exp_name: exp = MultiAgentExperiment(params, exp_name, exp_group) else: exp = UntilWinExperiment(params, exp_name, exp_group) exp.run()
"SOFT_UPDATE_TAU": 5e-3, "PRIORITIZED_REPLAY": True, "PRIO_REPLAY_ALPHA": 0.6, "PRIO_REPLAY_BETA_START": 0.4, "PRIO_REPLAY_BETA_FRAMES": 10000, } exp = UntilWinExperiment(params) # add sharing new_params = { "NUM_AGENTS": 2, "SHARE_BATCH_SIZE": 128, "SHARING": True, "FOCUSED_SHARING": True, "FOCUSED_SHARING_THRESHOLD": 3, } params.update(new_params) exp = MultiAgentExperiment(params) result = exp.run() print("Method {} took an average of {:.2f} episodes".format( params["METHOD"], result)) # Method DQN took an average of 263.90 episodes # Method DQN took an average of 145.40 episodes # with a much larger buffer and prioritized replay # Meth