Пример #1
0
def test_double_dqn():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1, )))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    memory = SequentialMemory(limit=1000, window_length=1)
    policy = EpsGreedyQPolicy(eps=.1)
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50,
                   target_model_update=1e-1,
                   policy=policy,
                   enable_double_dqn=True)
    dqn.compile(Adam(lr=1e-3))

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
    policy.eps = 0.
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Пример #2
0
def test_sarsa():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))

    policy = EpsGreedyQPolicy(eps=.1)
    sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy)
    sarsa.compile(Adam(lr=1e-3))

    sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0)
    policy.eps = 0.
    h = sarsa.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Пример #3
0
def test_sarsa():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))

    policy = EpsGreedyQPolicy(eps=.1)
    sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy)
    sarsa.compile(Adam(lr=1e-3))

    sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0)
    policy.eps = 0.
    h = sarsa.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Пример #4
0
def test_duel_dqn():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))

    memory = SequentialMemory(limit=1000, window_length=1)
    policy = EpsGreedyQPolicy(eps=.1)
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,
                   target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True)
    dqn.compile(Adam(lr=1e-3))

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
    policy.eps = 0.
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Пример #5
0
               policy=policy,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=400,
               target_model_update=1e-1,
               enable_double_dqn=True,
               enable_dueling_network=True)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

outputFile = open("2014.csv", "w+")
outputFile.write(
    "iteration,trainAccuracy,trainCoverage,trainReward,validationAccuracy,validationCoverage,validationReward\n"
)
iteration = 0

policy.eps = 0.1
for i in range(0, 100):
    dqn.fit(trainEnv,
            nb_steps=3000,
            visualize=False,
            callbacks=[trainer],
            verbose=0)
    (episodes, trainCoverage, trainAccuracy, trainReward) = trainer.getInfo()
    dqn.test(validationEnv,
             nb_episodes=300,
             verbose=0,
             callbacks=[validator],
             visualize=False)
    (episodes, validCoverage, validAccuracy, validReward) = validator.getInfo()
    outputFile.write(
        str(iteration) + "," + str(trainAccuracy) + "," + str(trainCoverage) +
Пример #6
0
               enable_double_dqn=False,
               memory=memory,
               policy=policy)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

#dqn.load_weights("Q.weights")

bot.send_message(chat_id=telegramChatID,
                 text="Experiment started - " +
                 datetime.datetime.now().strftime("%H:%M"))

percIncrement = 100 / epochs
perc = 0
for i in range(epochs):
    policy.eps = 1
    dqn.fit(trainEnv, nb_steps=10000, visualize=False, verbose=0)
    dqn.test(testEnv, nb_episodes=20, verbose=0, visualize=False)
    perc += percIncrement
    try:
        bot.send_message(chat_id=telegramChatID,
                         text=str(perc) + " % - " +
                         datetime.datetime.now().strftime("%H:%M"))
    except:
        print(str(perc) + " % - " + datetime.datetime.now().strftime("%H:%M"))
    trainEnv.changeOutput("walks/train/walk" + str(i + 1) + ".csv")
    testEnv.changeOutput("walks/test/walk" + str(i + 1) + ".csv")

dqn.save_weights("Q.weights", overwrite=True)

bot.send_message(chat_id=telegramChatID,