def show_gen(idx): global ginst genaction = GenAction(idx) env = SnakeEnv(use_simple=True) obs = env.reset() while True: action = genaction(obs) _, obs, done, _ = env(action) env.render() if done: break env.close()
from stable_baselines import PPO2 from snake_env import SnakeEnv parser = argparse.ArgumentParser() parser.add_argument('--mode', choices=['train', 'test'], default='test') args = parser.parse_args() env = SnakeEnv((20, 20), 'standard') env = DummyVecEnv([lambda: env]) model = PPO2(CnnPolicy, env, verbose=1) if args.mode == 'train': model.learn(total_timesteps=20000) model.save('policy_baseline_snake') elif args.mode == 'test': obs = env.reset() model.load('policy_baseline_snake') for i in range(1000): action, _states = model.predict(obs) obs, reward, done, info = env.step(action) env.render() if done: env.reset() env.close()
def play(record=0, no_render=False): env = SnakeEnv(need_render=not no_render, alg='最短路径') obs = env.reset() env.render() input() x, y = [], [] directions = { (-1, 0): env.right, (1, 0): env.left, (0, -1): env.down, (0, 1): env.up } need_record = True if record else False new_dst = None origin_dst = None # counter = 20 use_random = False while True: if not record and not no_render: env.render() src = np.where(obs == 2) src = int(src[1]), int(src[0]) dst = np.where(obs == -1) dst = int(dst[0]), int(dst[1]) if new_dst is not None: paths = bfs(obs, start=src, dst=new_dst) else: paths = bfs(obs, start=src, dst=dst) if paths is None: # origin_dst = dst # new_dst = ( # np.random.randint(0, obs.shape[0]), # np.random.randint(0, obs.shape[1]), # ) # counter -= 1 # if counter <= 0: # print('score: ', env.status.score) # new_dst = None # origin_dst = None # counter = 20 # obs = env.reset() # continue use_random = True else: new_dst = None if new_dst is not None and paths[1] == new_dst: new_dst = None if origin_dst is not None: dst = origin_dst origin_dst = None # counter = 20 continue # if counter <= 0 or paths is None or len(paths) <= 1: # print('score: ', env.status.score) # obs = env.reset() # continue if use_random: action = np.random.randint(0, 4) use_random = False else: dst = paths[1] dire = src[0] - dst[0], src[1] - dst[1] action = directions[dire] # import ipdb # ipdb.set_trace() if need_record: x.append(obs) y.append(action) if len(y) >= record: return x, y if len(y) % 1000 == 0: print(len(y)) _, obs, done, _ = env(action) # counter = 20 if done: print(env.status.score) sleep(1.5) break if not record and not no_render: env.render() env.close()