Пример #1
0
mod_action_space = [2, 3, 4, 5]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = Env(device)
agent = Agent(eps=dum_val,
              eps_min=dum_val,
              eps_max=dum_val,
              eps_decay=dum_val,
              num_actions=len(mod_action_space),
              device=device)
agent.turn_eps_off()
stack = Frstack(initial_frame=env.state)

# create policy net and load saved weights
policy_net = DDQN(NUM_FRAMES, len(mod_action_space))
if USE_GPU:
    policy_net.cuda()


def test():
    policy_net.load_state_dict(torch.load(POLICY_NET_PATH))
    policy_net.eval()

    print("testing...")
    all_rewards = []
    all_images = []

    for episode in range(NUM_TEST_EPISODES):
        env.reset()
        episode_reward = 0
        stack.push(env.state, True)
        curr_state = stack.get_stack()
Пример #2
0
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = Env(device)
agent = Agent(eps=EPS_MAX,
              eps_min=EPS_MIN,
              eps_max=EPS_MAX,
              eps_decay=EPS_DECAY,
              num_actions=len(mod_action_space),
              device=device)
memory = PriorityReplayBuffer(MEMORY_SIZE)
stack = Frstack(initial_frame=env.state)

# initialize policy and target network
policy_net = DDQN(NUM_FRAMES, len(mod_action_space))
target_net = DDQN(NUM_FRAMES, len(mod_action_space))
if USE_GPU:
    policy_net.cuda()
    target_net.cuda()
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()
# TODO: consider RMSProp vs Adam - DeepMind paper uses RMSProp
optimizer = optim.Adam(params=policy_net.parameters(), lr=ALPHA)


def experience_replay():
    # experience tuple - (state, action, next_state, reward, done)
    batch, idxs, is_weights = memory.sample(BATCH_SIZE)
    batch = list(zip(*batch))

    # convert experiences from numpy to CUDA (if available) tensors
    state_tensors = torch.from_numpy(np.stack(batch[0])).type(dtype)
    action_tensors = torch.from_numpy(np.stack(batch[1])).type(dlongtype)