Exemplo n.º 1
0
def show_gen(idx):
    global ginst

    genaction = GenAction(idx)
    env = SnakeEnv(use_simple=True)
    obs = env.reset()

    while True:
        action = genaction(obs)
        _, obs, done, _ = env(action)
        env.render()
        if done:
            break

    env.close()
Exemplo n.º 2
0
from stable_baselines import PPO2

from snake_env import SnakeEnv

parser = argparse.ArgumentParser()
parser.add_argument('--mode', choices=['train', 'test'], default='test')
args = parser.parse_args()

env = SnakeEnv((20, 20), 'standard')
env = DummyVecEnv([lambda: env])
model = PPO2(CnnPolicy, env, verbose=1)

if args.mode == 'train':

    model.learn(total_timesteps=20000)
    model.save('policy_baseline_snake')

elif args.mode == 'test':

    obs = env.reset()
    model.load('policy_baseline_snake')

    for i in range(1000):
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)
        env.render()
        if done:
            env.reset()

env.close()
Exemplo n.º 3
0
def play(record=0, no_render=False):
    env = SnakeEnv(need_render=not no_render, alg='最短路径')
    obs = env.reset()
    env.render()
    input()
    x, y = [], []

    directions = {
        (-1, 0): env.right,
        (1, 0): env.left,
        (0, -1): env.down,
        (0, 1): env.up
    }

    need_record = True if record else False
    new_dst = None
    origin_dst = None
    # counter = 20
    use_random = False
    while True:
        if not record and not no_render:
            env.render()
        src = np.where(obs == 2)
        src = int(src[1]), int(src[0])
        dst = np.where(obs == -1)
        dst = int(dst[0]), int(dst[1])

        if new_dst is not None:
            paths = bfs(obs, start=src, dst=new_dst)
        else:
            paths = bfs(obs, start=src, dst=dst)

        if paths is None:
            # origin_dst = dst
            # new_dst = (
            #     np.random.randint(0, obs.shape[0]),
            #     np.random.randint(0, obs.shape[1]),
            # )
            # counter -= 1
            # if counter <= 0:
            #     print('score: ', env.status.score)
            #     new_dst = None
            #     origin_dst = None
            #     counter = 20
            #     obs = env.reset()
            # continue
            use_random = True
        else:
            new_dst = None
            if new_dst is not None and paths[1] == new_dst:
                new_dst = None
                if origin_dst is not None:
                    dst = origin_dst
                    origin_dst = None
                    # counter = 20
                    continue

        # if counter <= 0 or paths is None or len(paths) <= 1:
        #     print('score: ', env.status.score)
        #     obs = env.reset()
        #     continue

        if use_random:
            action = np.random.randint(0, 4)
            use_random = False
        else:
            dst = paths[1]
            dire = src[0] - dst[0], src[1] - dst[1]
            action = directions[dire]
        # import ipdb
        # ipdb.set_trace()
        if need_record:
            x.append(obs)
            y.append(action)
            if len(y) >= record:
                return x, y

            if len(y) % 1000 == 0:
                print(len(y))

        _, obs, done, _ = env(action)
        # counter = 20

        if done:
            print(env.status.score)
            sleep(1.5)
            break
    if not record and not no_render:
        env.render()

    env.close()