agent = Agent(**cfg) agent.load_dict() for episode in range(300): s0 = env.reset() total_reward = 1 while True: a0 = agent.act(s0) s1, r1, done, _ = env.step(a0) if done: r1 = -1 agent.put(s0, a0, r1, s1) if done: break total_reward += r1 s0 = s1 agent.learn() agent.load_dict() print("episode", episode, "total_reward", total_reward) path = os.path.realpath(args.ckpt_path) if not os.path.exists(path): os.makedirs(path) ckpt_name = path + "/dqn.ckpt" save_checkpoint(agent.policy_net, ckpt_name)
position_bounds, # Position bounds velocity_bounds # Velocity bounds ) # Instanced Agent agent = Agent( policy, # NeuralNetwork class model, actions, # Actions array (after discretization) episodes, # Max number of episodes epoches, # Max number of epoches per episode greed_factor # Greed factor ) # Getting the result array of len(episodes) length results = agent.learn() # Success episodes success_results = [x for x in results if x["state"][0] >= position_bounds[1]] # Writing the log file log_file = open("model.log", "w+") log_file.write("Episodes: {0}\n".format(episodes)) log_file.write("Epoches: {0}\n".format(epoches)) log_file.write("Epsilon-Greedy: {0}\n".format(greed_factor)) log_file.write("\n------- {0} successful episodes -------\n\n".format( len(success_results))) for r in success_results: log_file.write(str(r) + "\n")