Exemplo n.º 1
0
def main():
    """
    in lunar_lander.py, add: (line 280 before self.world.Step(1.0/FPS, 6*30, 2*30))
    self.lander.ApplyForceToCenter((
            np.random.normal(loc=SIDE_ENGINE_POWER, scale=SIDE_ENGINE_POWER / 3),  # side
            np.random.normal(loc=MAIN_ENGINE_POWER, scale=MAIN_ENGINE_POWER / 3)  # main

            #np.random.normal(loc=SIDE_ENGINE_POWER / 6, scale=SIDE_ENGINE_POWER / 3), #side
            #np.random.normal(loc=MAIN_ENGINE_POWER / 6, scale=MAIN_ENGINE_POWER / 3) #main

            #self.np_random.uniform(-INITIAL_RANDOM/32, INITIAL_RANDOM/32),
            #self.np_random.uniform(-INITIAL_RANDOM/32, INITIAL_RANDOM/32)
        ), True)
    """
    num_iter = 1000

    with open('results/sarsa_data/sarsa_Q_5X4Ys.json') as json_file:
        Q = json.load(json_file)

    env = lander.LunarLander()
    r_seq = noisy_lander(env, Q, render=False, num_iter=num_iter, seg=10)

    y = np.array(r_seq)
    x = np.linspace(0, num_iter, y.shape[0])

    plt.plot(x, y, label='Sarsa Agent reward (Force)')
    plt.savefig("results/sarsa_Force_agent_slightbb.png")
    np.savetxt("results/sarsa_Force_agent_slightbb.txt", y)
Exemplo n.º 2
0
def main():
    num_iter = 100000

    env = lander.LunarLander()
    Q, r_seq = sarsa_lander(env, render=True, num_iter=num_iter, seg=50)

    y = np.array(r_seq)
    x = np.linspace(0, num_iter, y.shape[0])

    plt.plot(x, y, label='Sarsa reward')
    plt.savefig("sarsa_reward.png")
Exemplo n.º 3
0
def main():
    num_iter = 10000

    env = lander.LunarLander()
    Q, r_seq = sarsa_lander(env, render=True, num_iter=num_iter, seg=100)

    y = np.array(r_seq)
    x = np.linspace(0, num_iter, y.shape[0])

    plt.plot(x, y, label='Noisy Sarsa reward')
    plt.savefig("results/noisy_sarsa_reward.png")

    np.savetxt("results/noisy_sarsa_reward.txt", y)
Exemplo n.º 4
0
def main():
    num_iter = 10000

    env = lander.LunarLander()
    r_seq = random_lander(env, render=False, num_iter=num_iter, seg=100)

    y = np.array(r_seq)
    x = np.linspace(0, num_iter, y.shape[0])

    plt.plot(x, y, label='Random reward')
    plt.savefig("results/random_reward.png")

    np.savetxt("results/random_reward.txt", y)
Exemplo n.º 5
0
def main():
    lr = 1e-2
    theta = np.random.randn(4, 9) / 100.0

    env = lander.LunarLander()
    theta = linear_approximation_lander(theta,
                                        lr,
                                        env,
                                        render=True,
                                        num_iter=100)

    print("Final theta: ", theta)
    np.savetxt("weights/linear_approximation_theta.txt", theta)
Exemplo n.º 6
0
def main():
    num_iter = 1000

    with open('results/sarsa_data/sarsa_Q_3XY.json') as json_file:
        Q = json.load(json_file)

    env = lander.LunarLander()
    r_seq = noisy_lander(env, Q, render=False, num_iter=num_iter, seg=10)

    y = np.array(r_seq)
    x = np.linspace(0, num_iter, y.shape[0])

    plt.plot(x, y, label='Noisy Agent reward')
    plt.savefig("results/noisy_agent.png")

    np.savetxt("results/noisy_agent.txt", y)
Exemplo n.º 7
0
def main():
    num_iter = 10000

    env = lander.LunarLander()
    Q, r_seq = sarsa_lander(env, render=True, num_iter=num_iter, seg=100)

    y = np.array(r_seq)
    x = np.linspace(0, num_iter, y.shape[0])

    plt.plot(x, y, label='Sarsa reward')
    plt.savefig("results/sarsa_reward.png")

    np.savetxt("results/sarsa_reward.txt", y)

    q = json.dumps(Q, indent=4)
    f = open("results/sarsa_Q.json", "w")
    f.write(q)
    f.close()
Exemplo n.º 8
0
        finishes = np.array(finishes)
        states = np.squeeze(states)
        next_states = np.squeeze(next_states)
        q_vals_next_state = model.predict_on_batch(next_states)
        q_vals_target = model.predict_on_batch(states)
        max_q_values_next_state = np.amax(q_vals_next_state, axis=1)
        q_vals_target[np.arange(batch_size), actions] = rewards + gamma * (
            max_q_values_next_state) * (1 - finishes)
        model.fit(states, q_vals_target, verbose=0)
        global epsilon
        if epsilon > min_eps:
            epsilon *= 0.996


if __name__ == '__main__':
    env = lander.LunarLander()
    # env.seed(0)
    num_episodes = 400
    np.random.seed(0)
    scores = []
    for i in range(num_episodes + 1):
        score = 0
        state = env.reset()
        finished = False
        if i != 0 and i % 50 == 0:
            model.save(".\saved_models\model_" + str(i) + "_episodes.h5")
        for j in range(3000):
            state = np.reshape(state, (1, 8))
            if np.random.random() <= epsilon:
                action = np.random.choice(4)
            else:
Exemplo n.º 9
0
def main():
    baseline_lander(lander.LunarLander(), render=True)
Exemplo n.º 10
0
def main():
    random_lander(lander.LunarLander(), render=True)
def main():
    heuristic_lander(lander.LunarLander(), render=True)