示例#1
0
def eval_q_net():
    """
    连跑训练地图各5000次
    日志输出单步成功率,回合成功率
    :return:
    """
    brain = BrainDQN()
    brain.epsilon = 0

    for index in range(len(_MAP_LIST)):
        maze = env.Maze(_MAP_LIST,
                        is_show=True,
                        is_loop=False,
                        map_index=index)
        maze.effective_epsilon = 1
        for i in range(1001):
            observation, key_observation = maze.reset()
            while True:
                maze.render(False)

                action, is_random = brain.getAction(observation,
                                                    key_observation)

                next_observation, next_key_observation, r, done = maze.step(
                    action, is_random)

                observation = next_observation.copy()

                key_observation = next_key_observation.copy()

                if done:
                    break
示例#2
0
def main(is_debug):
    brain = BrainDQN()
    if is_debug:
        brain.epsilon = 0

    maze = env.Maze(_MAP_LIST, is_show=True)

    while True:
        observation, key_observation = maze.reset()
        while True:
            maze.render(is_debug)

            action, is_random = brain.getAction(observation, key_observation)

            next_observation, next_key_observation, r, done = maze.step(
                action, is_random)

            brain.setPerception(observation, key_observation, next_observation,
                                next_key_observation, action, r, done)

            observation = next_observation.copy()

            key_observation = next_key_observation.copy()

            if done:
                break
示例#3
0
def eval_q_net():
    maze = env.Maze(_MAP1, is_show=True)

    memory = ReplayMemory()
    memory.load("eval_data")

    brain = BrainDQN()

    # 评估时不随机
    brain.epsilon = 0
    r_list = []

    for data in memory.memory:
        s, action, r, s_, done = data[0], data[1], data[2], data[3], data[4]

        action, _ = brain.getAction(s)

        maze.set_observation(s)

        _, r_eval, done_eval = maze.step(action, False)

        r_list.append(r_eval - r)

    logging.info(sum(r_list) / len(r_list))
    plt.plot(np.arange(len(r_list)), r_list)
    plt.ylabel('reward loss')
    plt.xlabel('sample')
    plt.show()
示例#4
0
def main():
    brain = BrainDQN()

    maze = env.Maze(_MAP1, is_show=True)

    while True:
        observation = maze.reset()
        while True:
            maze.render()
            action, is_random = brain.getAction(observation)
            next_observation, r, done = maze.step(action, is_random)
            brain.setPerception(observation, next_observation, action, r, done)
            observation = next_observation.copy()

            if done:
                break
示例#5
0
def gen_eval_data_manually():
    maze = env.Maze(_MAP1, is_show=True)

    key_map = {
        "1": env.LEFT_DOWN,
        "2": env.DOWN,
        "3": env.RIGHT_DOWN,
        "4": env.LEFT,
        "5": env.STOP,
        "6": env.RIGHT,
        "7": env.LEFT_UP,
        "8": env.UP,
        "9": env.RIGHT_UP
    }

    memory = ReplayMemory()
    memory.load("eval_data")

    done = True
    while True:
        if done:
            observation = maze.reset()
        s = input('input your path. Enter "exit" to close\n')
        if s == "exit":
            break

        next_observation, r, done = maze.step(key_map[s], True)

        memory.push_back(
            (observation.copy(), key_map[s], r, next_observation.copy(), done))

        observation = next_observation.copy()

        maze.render(is_sleep=True)

    memory.dump("eval_data")