示例#1
0
import torch
import matplotlib.pyplot as plt
import agent
from unityagents import UnityEnvironment
from configs import get_dqn_cfg_defaults

plt.ion()
cfgs = get_dqn_cfg_defaults().HYPER_PARAMETER


def show_results(env, brain_name, agent):
    # load the weights from file
    agent.qnetwork_local.load_state_dict(
        torch.load('./results/checkpoint_dqn_vec.pth',
                   map_location=lambda storage, loc: storage))
    with torch.no_grad():
        for i in range(3):
            score = 0
            env_info = env.reset(train_mode=False)[brain_name]
            state = env_info.vector_observations[0]
            while True:
                action = agent.act(state)
                env_info = env.step(action)[brain_name]
                next_state = env_info.vector_observations[
                    0]  # get the next state
                reward = env_info.rewards[0]  # get the reward
                done = env_info.local_done[0]  # see if episode has finished
                state = next_state
                score += reward
                if done:
                    break
示例#2
0
import torch
import torch.nn as nn
import torch.nn.functional as F
from configs import get_dqn_cfg_defaults

cfgs_model = get_dqn_cfg_defaults().MODEL_PARAMETER


class QNetwork(nn.Module):
    """Actor (Policy) Model."""
    def __init__(self, state_size, action_size, seed):
        """Initialize parameters and build model.
        Params
        ======
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            seed (int): Random seed
        """
        super(QNetwork, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.state_size = state_size
        self.action_size = action_size
        self.fc1 = nn.Linear(self.state_size, cfgs_model.H1)
        self.fc2 = nn.Linear(cfgs_model.H1, cfgs_model.H2)
        self.fc3 = nn.Linear(cfgs_model.H2, cfgs_model.H3)
        self.fc4 = nn.Linear(cfgs_model.H3, self.action_size)

    def forward(self, state):
        """Build a network that maps state -> action values."""
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))