# TODO: load DQN agent # ... state_dim = 4 num_actions = 2 Q = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) agent = DQNAgent(Q, Q_target, num_actions) agent.load("C:\\Users\\Monish\\Desktop\\workspace\\exercise3_R\\reinforcement_learning\\models_cartpole\\dqn_agent_199.pt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std() if not os.path.exists("./results"): os.mkdir("./results") fname = "./results/cartpole_results_dqn-%s.json" % datetime.now().strftime("%Y%m%d-%H%M%S") fh = open(fname, "w") json.dump(results, fh)
num_actions = 2 Q_network = MLP(state_dim, num_actions) Q_target_network = MLP(state_dim, num_actions) agent = DQNAgent(Q=Q_network, Q_target=Q_target_network, num_actions=num_actions) agent.load(args.model) n_test_episodes = args.episodes episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True, max_timesteps=250) episode_rewards.append(stats.episode_reward) print('Episode %d (reward: %d)' % (i, stats.episode_reward)) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std() if not os.path.exists("./results"): os.mkdir("./results") fname = "./results/cartpole_results_dqn-%s.json" % datetime.now().strftime(