import torch import matplotlib.pyplot as plt import agent from unityagents import UnityEnvironment from configs import get_dqn_cfg_defaults plt.ion() cfgs = get_dqn_cfg_defaults().HYPER_PARAMETER def show_results(env, brain_name, agent): # load the weights from file agent.qnetwork_local.load_state_dict( torch.load('./results/checkpoint_dqn_vec.pth', map_location=lambda storage, loc: storage)) with torch.no_grad(): for i in range(3): score = 0 env_info = env.reset(train_mode=False)[brain_name] state = env_info.vector_observations[0] while True: action = agent.act(state) env_info = env.step(action)[brain_name] next_state = env_info.vector_observations[ 0] # get the next state reward = env_info.rewards[0] # get the reward done = env_info.local_done[0] # see if episode has finished state = next_state score += reward if done: break
import torch import torch.nn as nn import torch.nn.functional as F from configs import get_dqn_cfg_defaults cfgs_model = get_dqn_cfg_defaults().MODEL_PARAMETER class QNetwork(nn.Module): """Actor (Policy) Model.""" def __init__(self, state_size, action_size, seed): """Initialize parameters and build model. Params ====== state_size (int): Dimension of each state action_size (int): Dimension of each action seed (int): Random seed """ super(QNetwork, self).__init__() self.seed = torch.manual_seed(seed) self.state_size = state_size self.action_size = action_size self.fc1 = nn.Linear(self.state_size, cfgs_model.H1) self.fc2 = nn.Linear(cfgs_model.H1, cfgs_model.H2) self.fc3 = nn.Linear(cfgs_model.H2, cfgs_model.H3) self.fc4 = nn.Linear(cfgs_model.H3, self.action_size) def forward(self, state): """Build a network that maps state -> action values.""" x = F.relu(self.fc1(state)) x = F.relu(self.fc2(x))