示例#1
0
    "EXPERIENCE_BUFFER_SIZE": 10000,
    "GRADIENT_CLIPPING": False,
    "DOUBLE_QLEARNING": True,
    "SOFT_UPDATE": True,
    "SOFT_UPDATE_TAU": 5e-3,
    "NUM_AGENTS": 2,
    "SHARING": True,
    "FOCUSED_SHARING": True,
    "SHARE_BATCH_SIZE": 100, # 128
    "FOCUSED_SHARING_THRESHOLD": 10
}

sharings = [True]
for sharing in sharings:
    params["FOCUSED_SHARING"] = sharing
    exp = MultiAgentExperiment(params)
    result = exp.run()
    print("Sharing: {}".format(sharing))
    print("Method {} took an average of {:.2f} episodes".format(params["METHOD"], result))

# Method DQN took an average of 263.90 episodes

# DQN without regular experience sharing took:
# Method DQN took an average of 263.27 episodes for agent 0
# Method DQN took an average of 254.87 episodes for agent 1

# DQN with regular experience sharing took:
# Method DQN took an average of 179.00 episodes for agent 0
# Method DQN took an average of 208.33 episodes for agent 1

# DQN with focused experience sharing took:
示例#2
0
dqn_focus_sharing = dqn_sharing.copy()
dqn_focus_sharing.update(focus)

# others
dqn_prio_sharing = dqn_sharing.copy()
dqn_prio_sharing.update(prio)

dqn_prio_focus_sharing = dqn_sharing.copy()
dqn_prio_focus_sharing.update(prio)
dqn_prio_focus_sharing.update(focus)

## prepare the experiment
exp_group = exp_group
experiments = {
    'dqn': dqn,
    'dqn_prio': dqn_prio,
    'dqn_sharing': dqn_sharing,
    'dqn_prio_sharing': dqn_prio_sharing,
    'dqn_focus_sharing': dqn_focus_sharing,
    'dqn_prio_focus_sharing': dqn_prio_focus_sharing,
}

for exp_name, params in reversed(list(experiments.items())):
    print(exp_name, params)
    if 'sharing' in exp_name:
        exp = MultiAgentExperiment(params, exp_name, exp_group)
    else:
        exp = UntilWinExperiment(params, exp_name, exp_group)
    exp.run()
示例#3
0
    "SOFT_UPDATE_TAU": 5e-3,
    "PRIORITIZED_REPLAY": True,
    "PRIO_REPLAY_ALPHA": 0.6,
    "PRIO_REPLAY_BETA_START": 0.4,
    "PRIO_REPLAY_BETA_FRAMES": 10000,
}

exp = UntilWinExperiment(params)

# add sharing
new_params = {
    "NUM_AGENTS": 2,
    "SHARE_BATCH_SIZE": 128,
    "SHARING": True,
    "FOCUSED_SHARING": True,
    "FOCUSED_SHARING_THRESHOLD": 3,
}
params.update(new_params)
exp = MultiAgentExperiment(params)

result = exp.run()
print("Method {} took an average of {:.2f} episodes".format(
    params["METHOD"], result))

# Method DQN took an average of 263.90 episodes

# Method DQN took an average of 145.40 episodes
# with a much larger buffer and prioritized replay

# Meth