def main(): """ in lunar_lander.py, add: (line 280 before self.world.Step(1.0/FPS, 6*30, 2*30)) self.lander.ApplyForceToCenter(( np.random.normal(loc=SIDE_ENGINE_POWER, scale=SIDE_ENGINE_POWER / 3), # side np.random.normal(loc=MAIN_ENGINE_POWER, scale=MAIN_ENGINE_POWER / 3) # main #np.random.normal(loc=SIDE_ENGINE_POWER / 6, scale=SIDE_ENGINE_POWER / 3), #side #np.random.normal(loc=MAIN_ENGINE_POWER / 6, scale=MAIN_ENGINE_POWER / 3) #main #self.np_random.uniform(-INITIAL_RANDOM/32, INITIAL_RANDOM/32), #self.np_random.uniform(-INITIAL_RANDOM/32, INITIAL_RANDOM/32) ), True) """ num_iter = 1000 with open('results/sarsa_data/sarsa_Q_5X4Ys.json') as json_file: Q = json.load(json_file) env = lander.LunarLander() r_seq = noisy_lander(env, Q, render=False, num_iter=num_iter, seg=10) y = np.array(r_seq) x = np.linspace(0, num_iter, y.shape[0]) plt.plot(x, y, label='Sarsa Agent reward (Force)') plt.savefig("results/sarsa_Force_agent_slightbb.png") np.savetxt("results/sarsa_Force_agent_slightbb.txt", y)
def main(): num_iter = 100000 env = lander.LunarLander() Q, r_seq = sarsa_lander(env, render=True, num_iter=num_iter, seg=50) y = np.array(r_seq) x = np.linspace(0, num_iter, y.shape[0]) plt.plot(x, y, label='Sarsa reward') plt.savefig("sarsa_reward.png")
def main(): num_iter = 10000 env = lander.LunarLander() Q, r_seq = sarsa_lander(env, render=True, num_iter=num_iter, seg=100) y = np.array(r_seq) x = np.linspace(0, num_iter, y.shape[0]) plt.plot(x, y, label='Noisy Sarsa reward') plt.savefig("results/noisy_sarsa_reward.png") np.savetxt("results/noisy_sarsa_reward.txt", y)
def main(): num_iter = 10000 env = lander.LunarLander() r_seq = random_lander(env, render=False, num_iter=num_iter, seg=100) y = np.array(r_seq) x = np.linspace(0, num_iter, y.shape[0]) plt.plot(x, y, label='Random reward') plt.savefig("results/random_reward.png") np.savetxt("results/random_reward.txt", y)
def main(): lr = 1e-2 theta = np.random.randn(4, 9) / 100.0 env = lander.LunarLander() theta = linear_approximation_lander(theta, lr, env, render=True, num_iter=100) print("Final theta: ", theta) np.savetxt("weights/linear_approximation_theta.txt", theta)
def main(): num_iter = 1000 with open('results/sarsa_data/sarsa_Q_3XY.json') as json_file: Q = json.load(json_file) env = lander.LunarLander() r_seq = noisy_lander(env, Q, render=False, num_iter=num_iter, seg=10) y = np.array(r_seq) x = np.linspace(0, num_iter, y.shape[0]) plt.plot(x, y, label='Noisy Agent reward') plt.savefig("results/noisy_agent.png") np.savetxt("results/noisy_agent.txt", y)
def main(): num_iter = 10000 env = lander.LunarLander() Q, r_seq = sarsa_lander(env, render=True, num_iter=num_iter, seg=100) y = np.array(r_seq) x = np.linspace(0, num_iter, y.shape[0]) plt.plot(x, y, label='Sarsa reward') plt.savefig("results/sarsa_reward.png") np.savetxt("results/sarsa_reward.txt", y) q = json.dumps(Q, indent=4) f = open("results/sarsa_Q.json", "w") f.write(q) f.close()
finishes = np.array(finishes) states = np.squeeze(states) next_states = np.squeeze(next_states) q_vals_next_state = model.predict_on_batch(next_states) q_vals_target = model.predict_on_batch(states) max_q_values_next_state = np.amax(q_vals_next_state, axis=1) q_vals_target[np.arange(batch_size), actions] = rewards + gamma * ( max_q_values_next_state) * (1 - finishes) model.fit(states, q_vals_target, verbose=0) global epsilon if epsilon > min_eps: epsilon *= 0.996 if __name__ == '__main__': env = lander.LunarLander() # env.seed(0) num_episodes = 400 np.random.seed(0) scores = [] for i in range(num_episodes + 1): score = 0 state = env.reset() finished = False if i != 0 and i % 50 == 0: model.save(".\saved_models\model_" + str(i) + "_episodes.h5") for j in range(3000): state = np.reshape(state, (1, 8)) if np.random.random() <= epsilon: action = np.random.choice(4) else:
def main(): baseline_lander(lander.LunarLander(), render=True)
def main(): random_lander(lander.LunarLander(), render=True)
def main(): heuristic_lander(lander.LunarLander(), render=True)