示例#1
0
    def __init__(self,
                 env,
                 kind="diff",
                 gamma=0.8,
                 weight=0.9,
                 advantage=False):
        if kind == "qv":
            qmodel, vmodel = self.create_qv_models(
                env.observation_space.shape[0], env.action_space.n)
            model = (qmodel, vmodel)
        else:
            model = self.create_model(env.observation_space.shape[0],
                                      env.action_space.n)

        brain = QBrain(model,
                       kind=kind,
                       advantage=advantage,
                       gamma=gamma,
                       v_selectivity=False,
                       qnet_soft_update=0.01,
                       diff_qnet_weight=weight)
        brain.compile(Adam(lr=1e-3), ["mse"])
        MultiDQNAgent.__init__(self,
                               env,
                               brain,
                               train_sample_size=1000,
                               train_batch_size=50)
示例#2
0
 def __init__(self, env, kind="diff", gamma=0.99, diff_qnet_weight=0.7):
     model = self.create_model(env.observation_space.shape[0],
                               env.action_space.n)
     brain = QBrain(model,
                    kind=kind,
                    gamma=gamma,
                    v_selectivity=False,
                    qnet_soft_update=0.01,
                    diff_qnet_weight=diff_qnet_weight)
     brain.compile(Adam(lr=1e-3), ["mse"])
     MultiDQNAgent.__init__(self,
                            env,
                            brain,
                            train_sample_size=1000,
                            train_batch_size=50)
示例#3
0

env = TankTargetEnv()
memory = ReplayMemory(100000, v_selectivity=True)
tanks = []

for i in xrange(3):

    model = create_model(env.observation_space.shape[-1],
                         env.action_space.shape[-1])
    brain = QBrain(model,
                   typ="diff",
                   memory=memory,
                   soft_update=0.01,
                   gamma=0.99)
    brain.compile(Adam(lr=1e-3), ["mse"])
    if i > 0:
        brain.transfer(tanks[0].Brain)  # make all brains the same initially

    tanks.append(TankAgent(env, brain, train_sample_size=1000))

controller = SynchronousMultiAgentController(env,
                                             tanks,
                                             rounds_between_train=10000,
                                             episodes_between_train=1)

taus = [0.01, 0.1, 1.0, 2.0]
ntaus = len(taus)
t = 0

test_policy = BoltzmannQPolicy(0.005)