def run_evaluation(config, path, episodes=50): env = config["env"] agent = NECAgent(config) agent.nec_net.load_state_dict(torch.load(path)) env.eval() agent.eval() rewards = [] for ep in range(1, episodes + 1): obs, reward_sum = env.reset(), 0 while True: env.render(mode='rgb-array') obs = torch.from_numpy(np.float32(obs)) action = agent.step(obs) next_obs, reward, done, info = env.step(action) reward_sum += reward obs = next_obs if done: if config['env_name'].startswith('CartPole'): reward_sum -= reward rewards.append(reward_sum) break plt.plot(range(1, episodes + 1), rewards) plt.savefig(f"eval_{config['exp_name']}.png")
def main(env_id, embedding_size): env = wrap_deepmind(make_atari(env_id), scale=True) embedding_model = DQN(embedding_size) agent = NECAgent(env, embedding_model) # subprocess.Popen(["tensorboard", "--logdir", "runs"]) configure("runs/pong-run") for t in count(): if t == 0: reward = agent.warmup() else: reward = agent.episode() print("Episode {}\nTotal Reward: {}".format(t, reward)) log_value('score', reward, t)
def run_training(config, return_agent=False): env = config["env"] env.train() agent = NECAgent(config) done = True epsilon = 1 for t in tqdm(range(1, config["max_steps"] + 1)): if done: obs, done = env.reset(), False agent.new_episode() if config["epsilon_anneal_start"] < t <= config["epsilon_anneal_end"]: epsilon -= (config["initial_epsilon"] - config["final_epsilon"] ) / (config["epsilon_anneal_end"] - config["epsilon_anneal_start"]) agent.set_epsilon(epsilon) # env.render() if type(obs) is np.ndarray: obs = torch.from_numpy(np.float32(obs)) action = agent.step(obs.to(config['device'])) next_obs, reward, done, info = env.step(action) solved = agent.update((reward, done)) if solved: return obs = next_obs if t >= config["start_learning_step"]: if t % config["replay_frequency"] == 0: agent.optimize() if t % config["eval_frequency"] == 0: # agent.eval() # # evaluate agent here # path = f'{os.getcwd()}/pong/trained_agents/nec_{agent.exp_name}_{t // config["eval_frequency"]}.pth' torch.save(agent.nec_net.state_dict(), path) # agent.train() if return_agent: return agent
def main(): env = PongWrapper(gym.make('Pong-v0')) embedding_model = AtariDQN(5) agent = NECAgent(env, embedding_model) agent.train()
def main(): env = PongWrapper(gym.make('CartPole-v0')) embedding_model = CartPoleDQN(5) agent = NECAgent(env, embedding_model) agent.train()
def main(): env = CartPoleWrapper(gym.make('CartPole-v1')) embedding_model = DQN(5) agent = NECAgent(env, embedding_model, test_period=25) agent.train()
if df.loc[self.cur, 'EMPI'] == df.loc[self.cur+1, 'EMPI']: next_state = self.df.loc[self.cur + 1, feature_fields].values reward=reward+intermediate_reward(self.cur_state, next_state) done= 0 self.cur+=1 else: # trajectory is finished next_state = np.zeros(len(self.cur_state)) done = 1 self.cur_state=next_state return next_state,reward,done,action env=Pseudo_env(df) embedding_model = Embed(len(feature_fields),32) agent = NECAgent(env, embedding_model,batch_size=32,sgd_lr=1e-5) for t in count(): if t < 100: reward = agent.warmup() else: reward = agent.episode() print("Episode {}\nTotal Reward: {}".format(t, reward)) test_df = pd.read_csv('HFpEF data/aim3data_test_set.csv') a = test_df.copy() num = np.size(a,0) patient_num = np.size(pd.unique(a['EMPI'])) from torch import Tensor