Exemplo n.º 1
0
if __name__ == "__main__":
    # maze game
    env = Env()
    RL = DeepQNetwork(env.n_actions,
                      env.n_features,
                      learning_rate=0.0001,
                      reward_decay=0.9,
                      e_greedy=0.75,
                      replace_target_iter=2000,
                      memory_size=MEMORYCAPACITY,
                      batch_size=64
                      # output_graph=True
                      )
    RL.restore_model()
    for episode in range(EPS):
        env.build_map()
        value = 0

        for step in range(STEP):
            state = env.state.copy()
            action = RL.choose_action(state)
            env.step(action_space[action])
            state_ = env.state.copy()
            reward, dist = compute_reward(state, state_)

            RL.store_transition(state, action, reward, state_)
            value += reward
            if dist < DIST:
                break

            if RL.memory_counter > MEMORYCAPACITY: