def main(): render_bool = True if not render_bool: os.environ["SDL_VIDEODRIVER"] = "dummy" # else: # pygame.display.set_mode((800, 600 + 60)) # 创建环境 game = GameEnv() p = PLE(game, display_screen=render_bool, fps=60, force_fps=False ) # , fps=30, display_screen=render_bool, force_fps=True) p.init() # 根据parl框架构建agent print(p.getActionSet()) act_dim = len(p.getActionSet()) width, height = p.getScreenDims() rpm = ReplayMemory(MEMORY_SIZE) # DQN的经验回放池 obs_dim = 1, width, height model = Model(act_dim=act_dim) alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE) agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim, e_greed=0.5, e_greed_decrement=0.00001) # e_greed有一定概率随机选取动作,探索 # 加载模型 best_eval_reward = -1000 if os.path.exists('./model_dqn.ckpt'): print("loaded model:", './model_dqn.ckpt') agent.restore('./model_dqn.ckpt') best_eval_reward = evaluate(p, agent, render=render_bool) # run_episode(env, agent, train_or_test='test', render=True) # exit() # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够 while len(rpm) < MEMORY_WARMUP_SIZE: run_episode(p, agent, rpm) max_episode = 200000 # 开始训练 episode = 0 while episode < max_episode: # 训练max_episode个回合,test部分不计算入episode数量 # train part for i in range(0, 5): total_reward = run_episode(p, agent, rpm) episode += 1 # test part eval_reward = evaluate(p, agent, render=render_bool) # render=True 查看显示效果 logger.info('episode:{} e_greed:{} test_reward:{}'.format( episode, agent.e_greed, eval_reward)) # 保存模型到文件 ./model.ckpt agent.save('./model_dqn_%d.ckpt' % rate_num) if best_eval_reward < eval_reward: best_eval_reward = eval_reward agent.save('./model_dqn.ckpt')
class MyEnv(Environment): VALIDATION_MODE = 0 memSize = 4 # original size is 288x512 so dividing dividing_factor = 8 width = 288 // dividing_factor height = 512 // dividing_factor def __init__(self, rng, game=None, frame_skip=4, ple_options={ "display_screen": True, "force_fps": True, "fps": 30 }): self._mode = -1 self._mode_score = 0.0 self._mode_episode_count = 0 self._frame_skip = frame_skip if frame_skip >= 1 else 1 self._random_state = rng if game is None: raise ValueError("Game must be provided") self._ple = PLE(game, **ple_options) self._ple.init() w, h = self._ple.getScreenDims() self._screen = np.empty((w, h), dtype=np.uint8) self._reduced_screen = np.empty((self.width, self.height), dtype=np.uint8) self._actions = self._ple.getActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._mode_score = 0.0 self._mode_episode_count = 0 else: self._mode_episode_count += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 print("Dead at score {}".format(self._ple.game.getScore())) self._ple.reset_game() # for _ in range(self._random_state.randint(15)): # self._ple.act(self._ple.NOOP) # self._screen = self._ple.getScreenGrayscale() # cv2.resize(self._screen, (48, 48), # self._reduced_screen, # interpolation=cv2.INTER_NEAREST) return [self.memSize * [self.width * [self.height * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frame_skip): reward += self._ple.act(action) if self.inTerminalState(): break self._screen = self._ple.getScreenGrayscale() self._reduced_screen = cv2.resize(self._screen, (self.height, self.width), interpolation=cv2.INTER_NEAREST) cv2.imshow("debug", self._reduced_screen.T) cv2.waitKey(1) self._mode_score += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._mode_episode_count += 1 mean = (self._mode_score / self._mode_episode_count if self._mode_episode_count else "N/A") print("== Mean score per episode is {} over {} episodes ==".format( mean, self._mode_episode_count)) def inputDimensions(self): return [(self.memSize, self.width, self.height)] def observationType(self, subject): return np.float32 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reduced_screen) / 256.] def inTerminalState(self): return self._ple.game_over()
class PLEWaterWorldEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game_name='WaterWorld', display_screen=True, ple_game=True, obs_type="Image", reward_type=1): ''' For WaterWorld: getGameState() returns [player x position, player y position, player x velocity, player y velocity, player distance to each creep] player distance to each creep is a dict with "GOOD" : [], "BAD" : [] @Params: obs_type : "RAM" : getGameState() "Image" : (48, 48, 3) reward_type : 0 : means [reward1, reward2] 1 : means raw reward 2 : means change of dis = sum(distance_from_good) - sum(distance_from_bad) ''' # set headless mode os.environ['SDL_VIDEODRIVER'] = 'dummy' # open up a game state to communicate with emulator import importlib if ple_game: game_module_name = ('ple.games.%s' % game_name).lower() else: game_module_name = game_name.lower() game_module = importlib.import_module(game_module_name) game = getattr(game_module, game_name)() ################################################################## # old one #self.game_state = PLE(game, fps=30, display_screen=display_screen) # use arg state_preprocessor to support self.game_state.getGameState() self.game_state = PLE(game, fps=30, display_screen=display_screen, state_preprocessor=self.process_state) ################################################################## self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_height, self.screen_width = self.game_state.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) self.viewer = None ############################################ self.obs_type = obs_type self.reward_type = reward_type # every reward type's max-abs value self.rewards_ths = [10.0, 5.0] # change observation space: self.img_width = 84 self.img_height = 84 self.img_shape = (self.img_width, self.img_height, 3) if self.obs_type == "Image": self.observation_space = spaces.Box(low=0, high=255, shape=self.img_shape, dtype=np.uint8) else: print("Water world only supports image observation!") sys.exit(0) ############################################ ############################################# # Add state processer def process_state(self, state): return np.array([state.values()]) ############################################# def _step(self, a, gamma=0.99): ############################################# # old observation old_ram = self.game_state.getGameState() ############################################# reward = self.game_state.act(self._action_set[a]) state = self._get_image() terminal = self.game_state.game_over() ############################################# # new observation ram = self.game_state.getGameState() ############################################# ############################################# # reward 2 if self.reward_type == 2: reward = self.get_reward(old_ram, ram, terminal, 2, gamma) # reward 0 if self.reward_type == 0: reward1 = reward reward2 = self.get_reward(old_ram, ram, terminal, 2, gamma) reward = np.array([reward1, reward2]) ############################################## ############################################################ # reward scaling if self.reward_type == 0: for rt in range(len(reward)): reward[rt] = reward[rt] / self.rewards_ths[rt] else: reward = reward / self.rewards_ths[self.reward_type - 1] ############################################################ return state, reward, terminal, {} ############################################# # Add for reward ############################################# def get_reward(self, old_ram, ram, done, reward_type, gamma=0.99): ''' @Params: old_ram, ram : numpy.array, [dict_values([x, y, z, w, {"GOOD" : [], "BAD" : []}])] reward_type : 2 , change of distance from good - bad ''' old_ram = list(old_ram[0]) ram = list(ram[0]) reward = 0.0 if not done: if reward_type == 2: old_goods = np.array(old_ram[4]["GOOD"]) old_bads = np.array(old_ram[4]["BAD"]) goods = np.array(ram[4]["GOOD"]) bads = np.array(ram[4]["BAD"]) mean_old_goods = np.mean( old_goods) if len(old_goods) > 0 else 0.0 mean_old_bads = np.mean(old_bads) if len(old_bads) > 0 else 0.0 mean_goods = np.mean(goods) if len(goods) > 0 else 0.0 mean_bads = np.mean(bads) if len(bads) > 0 else 0.0 old_sum_dis = mean_old_goods - mean_old_bads sum_dis = mean_goods - mean_bads reward = old_sum_dis - gamma * sum_dis if reward > 5.0: reward = 5.0 elif reward < -5.0: reward = -5.0 return reward ############################################# ############################################# def _get_image(self): image_rotated = np.fliplr( np.rot90(self.game_state.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple ########################################## # resize image img = Image.fromarray(image_rotated) img = img.resize((self.img_width, self.img_height), Image.ANTIALIAS) image_resized = np.array(img).astype(np.uint8) ########################################## return image_resized @property def _n_actions(self): return len(self._action_set) # return: (states, observations) def _reset(self): self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) self.game_state.reset_game() state = self._get_image() return state def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def _seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init()
class CustomGameMetaEnv(gym.Env): """ Basically runs the same configuration for 2 episodes and then terminates """ def __init__(self, task={}): self._task = task os.environ['SDL_VIDEODRIVER'] = 'dummy' import importlib game_module = importlib.import_module('ple.games.customgame') game = getattr(game_module, 'customgame')() self.game_state = PLE(game, fps=30, display_screen=False) self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_width, self.screen_height = self.game_state.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3)) self.num_actions = len(self._action_set) self.viewer = None # env tracking variables self.done_counter = 0 self.curr_task = None self.t = 0 self.reward_mult = 1.0 def seed(self, seed=None): if not seed: seed = np.random.randint(2**31-1) rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init() return [seed] def reset_task(self, task): pass def render(self, mode='human'): img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def reset(self): self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3)) self.game_state.reset_game() state = self._get_image() self.curr_task = self.game_state.game.get_task() self.t = 0 self.game_state.game.set_task(self.curr_task) return state def _get_image(self): image_rotated = np.fliplr(np.rot90(self.game_state.getScreenRGB(),3)) # Hack to fix the rotated image returned by ple return image_rotated def step(self, action): reward = self.game_state.act(self._action_set[action]) * self.reward_mult state = self._get_image() terminal = self.game_state.game_over() self.t += 1 prev_done_counter = int(self.done_counter) if terminal or self.t == 400: self.done_counter += 1 if self.done_counter == 2: self.done_counter = 0 self.game_state.game.set_task(None) terminal = True else: state = self.reset() terminal = False return state, reward, terminal, {'done': float(abs(self.done_counter - prev_done_counter))}
class MyEnv(Environment): VALIDATION_MODE = 0 def __init__(self, rng, game=None, frame_skip=4, ple_options={"display_screen": True, "force_fps":True, "fps":30}): self._mode = -1 self._mode_score = 0.0 self._mode_episode_count = 0 self._frameSkip = frame_skip if frame_skip >= 1 else 1 self._random_state = rng if game is None: raise ValueError("Game must be provided") self._ple = PLE(game, **ple_options) self._ple.init() w, h = self._ple.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reducedScreen = np.empty((48, 48), dtype=np.uint8) self._actions = self._ple.getActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._mode_score = 0.0 self._mode_episode_count = 0 else: self._mode_episode_count += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 self._ple.reset_game() for _ in range(self._random_state.randint(15)): self._ple.act(self._ple.NOOP) self._screen = self._ple.getScreenGrayscale() cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST) return [4 * [48 * [48 * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frameSkip): reward += self._ple.act(action) if self.inTerminalState(): break self._screen = self._ple.getScreenGrayscale() cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST) self._mode_score += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._mode_episode_count += 1 print("== Mean score per episode is {} over {} episodes ==".format(self._mode_score / self._mode_episode_count, self._mode_episode_count)) def inputDimensions(self): return [(4, 48, 48)] def observationType(self, subject): return np.uint8 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reducedScreen)] def inTerminalState(self): return self._ple.game_over()
class OriginalGameEnv(gym.Env): def __init__(self, task={}): self._task = task os.environ['SDL_VIDEODRIVER'] = 'dummy' import importlib game_module = importlib.import_module('ple.games.originalgame') game = getattr(game_module, 'originalGame')() self.game_state = PLE(game, fps=30, display_screen=False) self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_width, self.screen_height = self.game_state.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3)) self.num_actions = len(self._action_set) self.viewer = None def seed(self, seed=None): if not seed: seed = np.random.randint(2**31 - 1) rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init() return [seed] def reset_task(self, task): pass def render(self, mode='human'): img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def reset(self): self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3)) self.game_state.reset_game() state = self._get_image() return state def _get_image(self): image_rotated = np.fliplr( np.rot90(self.game_state.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple return image_rotated def step(self, action): reward = self.game_state.act(self._action_set[action]) state = self._get_image() terminal = self.game_state.game_over() return state, reward, terminal, {}
class MonsterKongEnv(gym.Env): metadata = {'render.modes': ['human']} def __init__(self, map_config): self.map_config = map_config self.game = MonsterKong(self.map_config) self.fps = 30 self.frame_skip = 1 self.num_steps = 1 self.force_fps = True self.display_screen = True self.nb_frames = 500 self.reward = 0.0 self.episode_end_sleep = 0.2 if map_config.has_key('fps'): self.fps = map_config['fps'] if map_config.has_key('frame_skip'): self.frame_skip = map_config['frame_skip'] if map_config.has_key('force_fps'): self.force_fps = map_config['force_fps'] if map_config.has_key('display_screen'): self.display_screen = map_config['display_screen'] if map_config.has_key('episode_length'): self.nb_frames = map_config['episode_length'] if map_config.has_key('episode_end_sleep'): self.episode_end_sleep = map_config['episode_end_sleep'] self.current_step = 0 self._seed() self.p = PLE(self.game, fps=self.fps, frame_skip=self.frame_skip, num_steps=self.num_steps, force_fps=self.force_fps, display_screen=self.display_screen, rng=self.rng) self.p.init() self._action_set = self.p.getActionSet()[1:] self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.p.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3)) def _seed(self, seed=24): self.rng = seed def _step(self, action_taken): reward = 0.0 action = self._action_set[action_taken] reward += self.p.act(action) obs = self.p.getScreenRGB() done = self.p.game_over() info = {'PLE': self.p} self.current_step += 1 if self.current_step >= self.nb_frames: done = True return obs, reward, done, info def _reset(self): self.current_step = 0 # Noop and reset if done start_done = True while start_done: self.p.reset_game() _, _, start_done, _ = self._step(4) #self.p.init() if self.p.display_screen: self._render() if self.episode_end_sleep > 0: time.sleep(self.episode_end_sleep) return self.p.getScreenRGB() def _render(self, mode='human', close=False): if close: return # TODO: implement close original = self.p.display_screen self.p.display_screen = True self.p._draw_frame() self.p.display_screen = original
class PLEFlappyBirdEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game_name='FlappyBird', display_screen=True, ple_game=True, obs_type="Image", reward_type=1): ''' For FlappyBird: getGameState() returns [player y position, player velocity, next pipe distance to player, next pipe top y position, next pipe bottom y position, next next pipe distance, next next pipe top y, next next pipe bottom y] @Params: obs_type : "RAM" : getGameState() "Image" : (512, 288, 3) reward_type : 0 : means [reward1, reward2] 1 : means raw reward 2 : means change of y-axis distance from the middle of next top pipe ans bottom pipe ''' # set headless mode os.environ['SDL_VIDEODRIVER'] = 'dummy' # open up a game state to communicate with emulator import importlib if ple_game: game_module_name = ('ple.games.%s' % game_name).lower() else: game_module_name = game_name.lower() game_module = importlib.import_module(game_module_name) game = getattr(game_module, game_name)() ################################################################## # old one #self.game_state = PLE(game, fps=30, display_screen=display_screen) # use arg state_preprocessor to support self.game_state.getGameState() self.game_state = PLE(game, fps=30, display_screen=display_screen, state_preprocessor=self.process_state) ################################################################## self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_height, self.screen_width = self.game_state.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) self.viewer = None ############################################ self.obs_type = obs_type self.reward_type = reward_type # every reward type's max-abs value self.rewards_ths = [5.0, 10.0] # change observation space: self.img_width = 84 self.img_height = 84 self.img_shape = (self.img_width, self.img_height, 3) if self.obs_type == "Image": self.observation_space = spaces.Box(low=0, high=255, shape=self.img_shape, dtype=np.uint8) elif self.obs_type == "RAM": self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(8, ), dtype=np.float32) ############################################ ############################################# # Add state processer def process_state(self, state): return np.array([state.values()]) ############################################# def _step(self, a, gamma=0.99): ############################################# if isinstance(a, np.ndarray): a = a[0] # old observation old_ram = self.game_state.getGameState() ############################################# reward = self.game_state.act(self._action_set[a]) ############################################# #state = self._get_image() if self.obs_type == "Image": state = self._get_image() ############################################# terminal = self.game_state.game_over() ############################################# # new observation ram = self.game_state.getGameState() ############################################# ############################################# pass_pipe = False # pass one pipe if reward == 1.0: pass_pipe = True if self.reward_type == 1: reward = reward / self.rewards_ths[0] # reward 2 if self.reward_type == 2: reward = self.get_reward(reward, old_ram, ram, terminal, 2, pass_pipe, gamma) # reward 0 if self.reward_type == 0: reward1 = reward / self.rewards_ths[0] reward2 = self.get_reward(reward, old_ram, ram, terminal, 2, pass_pipe, gamma) reward = np.array([reward1, reward2]) ''' if reward1 > 0.0: print("Pass one pipe:", reward) print("Old ram:", list(old_ram[0])) print("Ram:", list(ram[0])) ''' ############################################## ############################################################ # reward scaling ''' if self.reward_type == 0: for rt in range(len(reward)): reward[rt] = reward[rt] / self.rewards_ths[rt] else: reward = reward / self.rewards_ths[self.reward_type - 1] ''' ############################################################ ############################################## # obs if self.obs_type == "RAM": state = self.game_state.getGameState() state = np.array(list(state[0])) ############################################## return state, reward, terminal, {} ############################################# # Add for reward ############################################# def get_reward(self, src_reward, old_ram, ram, done, reward_type, pass_pipe, gamma=0.99): ''' @Params: old_ram, ram : numpy.array, [dict_values([x1, x2, ..., x8])] reward_type : 2 , change of y-axis distance from the middle line of the next top and bottom pipe ''' old_ram = list(old_ram[0]) ram = list(ram[0]) reward = src_reward if not (done or pass_pipe): if reward_type == 2: # distance to middle of two pipes old_py, old_top_y, old_bottom_y = old_ram[0], old_ram[ 3], old_ram[4] py, top_y, bottom_y = ram[0], ram[3], ram[4] old_dis = abs(old_py - (old_top_y + old_bottom_y) / 2.0) dis = abs(py - (top_y + bottom_y) / 2.0) reward = (src_reward / self.rewards_ths[0] ) + (old_dis - gamma * dis) / self.rewards_ths[1] ''' # if pipes changed, reward = 0.0 old_next_pipe_distance = old_ram[2] next_pipe_distance = ram[2] print(old_ram, ram) print(old_next_pipe_distance, next_pipe_distance, old_dis, dis, old_top_y, old_bottom_y, top_y, bottom_y, reward) ''' return reward ############################################# ############################################# def _get_image(self): image_rotated = np.fliplr( np.rot90(self.game_state.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple ''' try: self.cnt += 1 except Exception: self.cnt = 0 if self.cnt <= 10000: img = Image.fromarray(image_rotated) img.save("/home/lxcnju/workspace/flappy_bird_images/fb_{}.jpg".format(self.cnt)) ''' ########################################## # resize image img = Image.fromarray(image_rotated) img = img.resize((self.img_width, self.img_height), Image.ANTIALIAS) image_resized = np.array(img).astype(np.uint8) ########################################## return image_resized @property def _n_actions(self): return len(self._action_set) # return: (states, observations) def _reset(self): self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) self.game_state.reset_game() ####################################### if self.obs_type == "Image": state = self._get_image() elif self.obs_type == "RAM": state = self.game_state.getGameState() state = np.array(list(state[0])) ####################################### return state def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def _seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init()
class ple_wrapper: def __init__(self, game, display_screen=False): from ple import PLE assert game in [ 'catcher', 'monsterkong', 'flappybird', 'pixelcopter', 'pong', 'puckworld', 'raycastmaze', 'snake', 'waterworld' ] if game == 'catcher': from ple.games.catcher import Catcher env = Catcher() elif game == 'monsterkong': from ple.games.monsterkong import MonsterKong env = MonsterKong() elif game == 'flappybird': from ple.games.flappybird import FlappyBird env = FlappyBird() elif game == 'pixelcopter': from ple.games.pixelcopter import Pixelcopter env = Pixelcopter() elif game == 'pong': from ple.games.pong import Pong env = Pong() elif game == 'puckworld': from ple.games.puckworld import PuckWorld env = PuckWorld() elif game == 'raycastmaze': from ple.games.raycastmaze import RaycastMaze env = RaycastMaze() elif game == 'snake': from ple.games.snake import Snake env = Snake() elif game == 'waterworld': from ple.games.waterworld import WaterWorld env = WaterWorld() self.p = PLE(env, fps=30, display_screen=display_screen) self.action_set = self.p.getActionSet() self.action_size = len(self.action_set) self.screen_dims = self.p.getScreenDims() self.p.init() def gray_scale(self, frame): gray_scale_frame = np.dot(frame, np.array([.299, .587, .114])).astype(np.uint8) assert gray_scale_frame.shape == frame.shape[:-1] return gray_scale_frame def get_screen(self): return np.transpose(self.gray_scale(self.p.getScreenRGB())) def reset(self): self.p.reset_game() state, _, done = self.step(-1) assert done == False return state #return self.get_screen() def step(self, action): reward = self.p.act(self.action_set[action]) state_ = self.get_screen() done = self.p.game_over() return state_, reward, done
from tflearn.layers.core import input_data, dropout, fully_connected from tflearn.layers.estimator import regression from tflearn.layers.conv import conv_2d, max_pool_2d, highway_conv_2d from tflearn.layers.normalization import local_response_normalization, batch_normalization from statistics import mean, median from collections import Counter from ple.games.flappybird import FlappyBird from ple.games.catcher import Catcher from ple import PLE game = FlappyBird() env = PLE(game, fps=30, display_screen=True) # environment interface to game env.init() print(env.getActionSet()) #97 i 100 #119 za flappy print(env.getScreenDims()) LR = 1e-3 #env = gym.make('SpaceInvaders-v0') #print(env.observation_space) #print(env.action_space) #env.reset() goal_steps = 10000 score_requirement = 30 initial_games = 1000 def initial_population(): training_data = [] scores = [] accepted_scores = []
class PLEEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game_name='FlappyBird', display_screen=True): # set headless mode os.environ['SDL_VIDEODRIVER'] = 'dummy' # open up a game state to communicate with emulator import importlib game_module_name = ('ple.games.%s' % game_name).lower() game_module = importlib.import_module(game_module_name) game = getattr(game_module, game_name)() #* converts non-visual state representation to numpy array def process_state(state): return np.array([ state.values() ]) self.game_state = PLE(game, fps=30, display_screen=display_screen, state_preprocessor=process_state) #* added state_preprocessor self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_height, self.screen_width = self.game_state.getScreenDims() #self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) #self.observation_space = spaces.Box(self.low, self.high) self.viewer = None def _step(self, a): reward = self.game_state.act(self._action_set[a]) state = self.game_state.getGameState() terminal = self.game_state.game_over() ''' reward system: did you die? -1000 else +1 ''' if terminal == True: reward = -1000 else: reward = 1 return state, reward, terminal, {} def _get_image(self): image_rotated = np.fliplr(np.rot90(self.game_state.getScreenRGB(),3)) # Hack to fix the rotated image returned by ple return image_rotated @property def _n_actions(self): return len(self._action_set) # return: (states, observations) def _reset(self): #self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) self.game_state.reset_game() state = self.game_state.getGameState() return state def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def _seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init()
class SnakeQNetwork: def __init__(self, food_reward=10, dead_reward=-10, alive_reward=2, discount_factor=0.95, batch_size=10, train_epochs=100, history_size=1000, history_sample_size=50): self.food_reward = food_reward self.dead_reward = dead_reward self.alive_reward = alive_reward self.discount_factor = discount_factor self.batch_size = batch_size self.train_epochs = train_epochs self.history_size = history_size self.history_sample_size = history_sample_size self.q_learning_history = QLearningHistory(history_size) self.exploration_factor = 0.2 self.next_move_prediction = None self.is_neural_network_initialized = False pygame.init() self.game = Snake(width=64, height=64) self.env = PLE(self.game, display_screen=True) self.env.init() self.LOG = gym.logger def run(self, maximum_number_of_iterations=10000, learning_rate=0.5, training=False): for iteration in range(0, maximum_number_of_iterations): if not self.is_neural_network_initialized: self.___initialize_neural_newtork() self.is_neural_network_initialized = True observation = self.env.getScreenGrayscale() observation_width = self.env.getScreenDims()[0] observation_height = self.env.getScreenDims()[1] self.game.init() # exit the while loop only if it's GAME OVER while True: q_values = self.next_move_prediction.predict( x=observation.reshape( 1, observation_width * observation_height), batch_size=1) best_snake_action = np.argmax(q_values) reward = self.__take_snake_action(best_snake_action) previous_observation = copy.deepcopy(observation) observation = self.env.getScreenGrayscale() is_game_over = self.env.game_over() self.LOG.info( "Current action reward: {r}. Is game over: {d}".format( r=reward, d=is_game_over)) if training: reward = self.__get_custom_reward(reward) self.q_learning_history.record_event( state=previous_observation, action=best_snake_action, reward=reward, new_state=observation) last_event = self.q_learning_history.get_last_event() self.LOG.info( "Added event #{n} to history. Action: {a}; Reward: {r}" .format(a=last_event[1], r=reward, n=self.q_learning_history.size)) if self.q_learning_history.is_full(): history_batch = random.sample( self.q_learning_history.get_events(), self.history_sample_size) self.LOG.info( "Sampling {n} events from history.".format( n=self.history_sample_size)) training_batch_data = [] training_batch_labels = [] for history_event in history_batch: old_state, action, reward, new_state = history_event q_values_before_action = self.next_move_prediction.predict( x=old_state.reshape( 1, observation_width * observation_height), batch_size=1) q_values_after_action = self.next_move_prediction.predict( x=new_state.reshape( 1, observation_width * observation_height), batch_size=1) best_q_value_after_action = np.argmax( q_values_after_action) training_q_values = np.zeros((1, 4)) for value_idx in range( 0, len(q_values_before_action)): training_q_values[ value_idx] = q_values_before_action[ value_idx] output_update = learning_rate * ( reward + (self.discount_factor * best_q_value_after_action)) training_q_values[0][:] = 0 training_q_values[0][action] = output_update training_batch_data.append( old_state.reshape( observation_width * observation_height, )) training_batch_labels.append( training_q_values.reshape(4, )) training_batch_data = np.array(training_batch_data) training_batch_labels = np.array(training_batch_labels) self.next_move_prediction.fit( x=training_batch_data, y=training_batch_labels, epochs=self.train_epochs, batch_size=self.batch_size) if is_game_over: break if self.exploration_factor > 0.1: self.exploration_factor -= (1.0 / maximum_number_of_iterations) self.LOG.info( "Exploration factor updated! New value: {v}".format( v=self.exploration_factor)) def ___initialize_neural_newtork(self): input_layer_size = self.env.getScreenDims( )[0] * self.env.getScreenDims()[1] hidden_layer_size = 100 output_layer_size = 4 input_layer = Dense(kernel_initializer='lecun_uniform', units=hidden_layer_size, input_shape=(input_layer_size, ), activation='sigmoid') hidden_layer = Dense(kernel_initializer='lecun_uniform', units=output_layer_size, activation='linear') self.next_move_prediction = Sequential() self.next_move_prediction.add(input_layer) self.next_move_prediction.add(hidden_layer) self.next_move_prediction.compile(optimizer='rmsprop', loss='mean_squared_error') def __take_snake_action(self, snake_action): random_number = np.random.random_sample() if not self.q_learning_history.is_full(): snake_action = random.choice(self.env.getActionSet()) self.LOG.info("Snake chose to do a random move - add to qHistory!") return self.env.act(snake_action) elif random_number < self.exploration_factor: snake_action = random.choice(self.env.getActionSet()) self.LOG.info( "Random number is smaller than exploration factor, {r} < {ef}! Snake chose random move!" .format(r=random_number, ef=self.exploration_factor)) return self.env.act(snake_action) elif snake_action == 0: self.LOG.info("Snake chose to go up") return self.env.act(115) elif snake_action == 1: self.LOG.info("Snake chose to go left") return self.env.act(97) elif snake_action == 2: self.LOG.info("Snake chose to go down") return self.env.act(119) elif snake_action == 3: self.LOG.info("Snake chose to go right") return self.env.act(100) def __get_custom_reward(self, reward): if reward >= 1: self.LOG.info( "Has eaten food! Reward is {r}".format(r=self.food_reward)) return self.food_reward elif reward >= 0: self.LOG.info( "Stayed alive! Reward is {r}".format(r=self.alive_reward)) return self.alive_reward else: self.LOG.info("Crashed! Reward is {r}".format(r=self.dead_reward)) return self.dead_reward
class PLEEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game_name='FlappyBird', display_screen=True, observe_state=False): # open up a game state to communicate with emulator import importlib game_module_name = ('ple.games.%s' % game_name).lower() game_module = importlib.import_module(game_module_name) game = getattr(game_module, game_name)() self.game_state = PLE(game, fps=30, display_screen=display_screen, state_preprocessor=state_preprocessor) self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_width, self.screen_height = self.game_state.getScreenDims() if self.screen_height + self.screen_width > 500: img_scale = 0.25 else: img_scale = 1.0 self.screen_width = int(self.screen_width * img_scale) self.screen_height = int(self.screen_height * img_scale) self.observe_state = observe_state if self.observe_state: # the bounds are typically not infinity self.observation_space = spaces.Box( low=-float('inf'), high=float('inf'), shape=self.game_state.state_dim) else: self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) self.viewer = None def _step(self, a): reward = self.game_state.act(self._action_set[a]) if self.observe_state: state = self.game_state.getGameState() else: state = self._get_image() terminal = self.game_state.game_over() return state, reward, terminal, {} def _resize_frame(self, frame): pil_image = Image.fromarray(frame) pil_image = pil_image.resize((self.screen_width, self.screen_height), Image.ANTIALIAS) return np.array(pil_image) def _get_image(self): image_rotated = np.fliplr( np.rot90(self.game_state.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple return self._resize_frame(image_rotated) @property def _n_actions(self): return len(self._action_set) # return: (states, observations) def _reset(self, **kwargs): self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) self.game_state.reset_game(**kwargs) if self.observe_state: state = self.game_state.getGameState() else: state = self._get_image() return state def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def _seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init()
class PLECatcherEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game_name='Catcher', display_screen=True, ple_game=True, obs_type="Image", reward_type = 1): ''' For Catcher: getGameState() returns [player x position, player velocity, fruits x position, fruits y position] @Params: obs_type : "RAM" : getGameState() "Image" : (64, 64, 3) reward_type : 0 : means [reward1, reward2] 1 : means raw reward 2 : means change of x-axis distance from fruit ''' # set headless mode os.environ['SDL_VIDEODRIVER'] = 'dummy' # open up a game state to communicate with emulator import importlib if ple_game: game_module_name = ('ple.games.%s' % game_name).lower() else: game_module_name = game_name.lower() game_module = importlib.import_module(game_module_name) game = getattr(game_module, game_name)() ################################################################## # old one #self.game_state = PLE(game, fps=30, display_screen=display_screen) # use arg state_preprocessor to support self.game_state.getGameState() self.game_state = PLE(game, fps=30, display_screen=display_screen, state_preprocessor = self.process_state) ################################################################## self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_height, self.screen_width = self.game_state.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype = np.uint8) self.viewer = None ############################################ self.obs_type = obs_type self.reward_type = reward_type # every reward type's max-abs value self.rewards_ths = [1.0, 2.0] # change observation space: self.img_width = 84 self.img_height = 84 self.img_shape = (self.img_width, self.img_height, 3) if self.obs_type == "Image": self.observation_space = spaces.Box(low = 0, high = 255, shape = self.img_shape, dtype = np.uint8) elif self.obs_type == "RAM": self.observation_space = spaces.Box(low = -100.0, high = 100.0, shape = (4, ), dtype = np.float32) ############################################ ############################################# # Add state processer def process_state(self, state): return np.array([state.values()]) ############################################# def _step(self, a, gamma = 0.99): ############################################# if isinstance(a,np.ndarray): a = a[0] # old observation old_ram = self.game_state.getGameState() ############################################# reward = self.game_state.act(self._action_set[a]) ############################################# #state = self._get_image() if self.obs_type == "Image": state = self._get_image() ############################################# terminal = self.game_state.game_over() ############################################# # new observation ram = self.game_state.getGameState() ############################################# ############################################# if self.reward_type == 1: reward = reward / self.rewards_ths[0] # reward 2 if self.reward_type == 2: reward = self.get_reward(reward, old_ram, ram, terminal, 2, gamma) # reward 0 if self.reward_type == 0: reward1 = reward / self.rewards_ths[0] reward2 = self.get_reward(reward, old_ram, ram, terminal, 2, gamma) reward = np.array([reward1, reward2]) ############################################## ############################################################ ''' # reward scaling if self.reward_type == 0: for rt in range(len(reward)): reward[rt] = reward[rt] / self.rewards_ths[rt] else: reward = reward / self.rewards_ths[self.reward_type - 1] ''' ############################################################ ############################################## # obs if self.obs_type == "RAM": state = self.game_state.getGameState() state = np.array(list(state[0])) ############################################## return state, reward, terminal, {} ############################################# # Add for reward ############################################# def get_reward(self, src_reward, old_ram, ram, done, reward_type, gamma): ''' @Params: old_ram, ram : numpy.array, [dict_values([x, y, z, w])] reward_type : 2 , distance of x-axis change ''' old_ram = list(old_ram[0]) ram = list(ram[0]) reward = src_reward if not done: if reward_type == 2: old_px, old_fx = old_ram[0], old_ram[2] px, fx = ram[0], ram[2] old_dis = abs(old_px - old_fx) dis = abs(px - fx) reward = old_dis - gamma * dis # a new epoch old_fy, fy = old_ram[3], ram[3] if old_fy > fy: reward = 0.0 reward = min(reward, 2.0) reward = max(reward, -2.0) reward = src_reward / self.rewards_ths[0] + reward / self.rewards_ths[1] return reward ############################################# ############################################# def _get_image(self): image_rotated = np.fliplr(np.rot90(self.game_state.getScreenRGB(),3)) # Hack to fix the rotated image returned by ple ########################################## # resize image img = Image.fromarray(image_rotated) img = img.resize((self.img_width, self.img_height), Image.ANTIALIAS) image_resized = np.array(img).astype(np.uint8) ########################################## return image_resized @property def _n_actions(self): return len(self._action_set) # return: (states, observations) def _reset(self): self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype = np.uint8) self.game_state.reset_game() ####################################### if self.obs_type == "Image": state = self._get_image() elif self.obs_type == "RAM": state = self.game_state.getGameState() state = np.array(list(state[0])) ####################################### return state def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def _seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init()
class PLEEnv(gym.Env): metadata = {"render.modes": ["human", "rgb_array"]} def __init__(self, game_name="FlappyBird", display_screen=True, ple_game=True, **kwargs): # set headless mode os.environ["SDL_VIDEODRIVER"] = "dummy" # open up a game state to communicate with emulator import importlib if ple_game: game_module_name = ("ple.games.%s" % game_name).lower() else: game_module_name = game_name.lower() game_module = importlib.import_module(game_module_name) game = getattr(game_module, game_name)(**kwargs) self.game_state = PLE(game, fps=30, display_screen=display_screen) self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_height, self.screen_width = self.game_state.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) self.viewer = None def step(self, a): reward = self.game_state.act(self._action_set[a]) state = self.get_image() terminal = self.game_state.game_over() return state, reward, terminal, {} def get_image(self): image_rotated = np.fliplr( np.rot90(self.game_state.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple return image_rotated @property def n_actions(self): return len(self._action_set) # return: (states, observations) def reset(self): self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) self.game_state.reset_game() state = self.get_image() return state def render(self, mode="human", close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self.get_image() if mode == "rgb_array": return img elif mode == "human": from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None
def main(): global render_bool render_bool = True # parl.connect('localhost:8037') if dummy_mode: render_bool = False if not render_bool: os.environ["SDL_VIDEODRIVER"] = "dummy" # else: # pygame.display.set_mode((800, 600 + 60)) # 创建环境 game = GameEnv() p = PLE(game, display_screen=render_bool, fps=30, force_fps=True ) # , fps=30, display_screen=render_bool, force_fps=True) p.init() # 根据parl框架构建agent print(p.getActionSet()) act_dim = len(p.getActionSet()) width, height = p.getScreenDims() rpm = ReplayMemory(MEMORY_SIZE) # DQN的经验回放池 obs_dim = get_env_obs(p).shape model = Model(act_dim=act_dim) if MODE == "DDPG": alg = RL_Alg(model, gamma=GAMMA, tau=0.001, actor_lr=LEARNING_RATE, critic_lr=LEARNING_RATE) if MODE == "DQN": alg = RL_Alg(model, gamma=GAMMA, lr=LEARNING_RATE, act_dim=act_dim) agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim) # e_greed有一定概率随机选取动作,探索 # 加载模型 best_eval_reward = -1000 cache_fn = './model_pixelcopter_%s.ckpt' % MODE if os.path.exists(cache_fn): print("loaded model:", cache_fn) agent.restore(cache_fn) best_eval_reward = evaluate(p, agent, render=render_bool) # run_episode(env, agent, train_or_test='test', render=True) # exit() # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够 while len(rpm) < MEMORY_WARMUP_SIZE: run_episode(p, agent, rpm) max_episode = 200000 # 开始训练 episode = 0 while episode < max_episode: # 训练max_episode个回合,test部分不计算入episode数量 # train part for i in range(0, 5): total_reward = run_episode(p, agent, rpm) episode += 1 # test part eval_reward = evaluate(p, agent, render=render_bool) # render=True 查看显示效果 logger.info('episode:{} e_greed:{} test_reward:{}'.format( episode, e_greed, eval_reward)) # 保存模型到文件 ./model.ckpt agent.save(cache_fn + "." + str(rate_num)) if best_eval_reward < eval_reward: best_eval_reward = eval_reward agent.save(cache_fn)
class AngryBirdEnv(gym.Env): def __init__(self, display_screen=True): self.game_state = PLE(AngryBird(render=display_screen), fps=30, display_screen=display_screen) #self.game_state.init() self.display_screen = display_screen self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_height, self.screen_width = self.game_state.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, \ shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) self.viewer = None def step(self, a): states = [] if self._action_set[a] == K_r: while len(self.game_state.game.player.plan) > 1: _ = self.game_state.act(list(self._action_set)[a]) state = self._get_image() if self.display_screen: self.render() states.append(state) reward = self.game_state.act(list(self._action_set)[a]) if self.display_screen: self.render() reward = self.game_state.game.getScore( ) and not self.game_state.game.player.died terminal = (self.game_state.game_over() or self.game_state.game.player.died) if self._action_set[a] != K_r or len(states) == 0: states = self._get_image() reward = 0 #states.append(self._get_image()) pass else: states = states[0] #temporary self.reset() assert reward in [0, 1 ], 'Reward is not what it should be: ' + str(reward) return states, reward, terminal, {} def _get_image(self): image_rotated = np.fliplr(np.rot90(self.game_state.getScreenRGB(), 3)) return image_rotated @property def _n_actions(self): return len(self._action_set) def reset(self): ''' Performs ther eset of the gym env ''' #if self.display_screen: # time.sleep(1) self.observation_space = spaces.Box(low=0, high=255, \ shape=(self.screen_width, self.screen_height, 3), dtype=np.uint8) self.game_state.game.reset() state = self._get_image() if self.display_screen: self.render() return state def render(self, mode='rgb_array', close=False): ''' Performs the rendering for the gym env ''' if self.display_screen: if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def _seed(self, _): self.game_state.init() def reset_hard(self): try: self.close() self.viewer.close() self.viewer = None except: pass self.__init__()
class PLEEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game_name='FlappyBird', display_screen=True, ple_game=True, root_game_name=None, reward_type='sparse', obs_type=None, **kwargs): # set headless mode os.environ['SDL_VIDEODRIVER'] = 'dummy' os.environ['SDL_AUDIODRIVER'] = 'dummy' # open up a game state to communicate with emulator import importlib if ple_game: game_module_name = ('ple.games.%s' % game_name).lower() else: game_module_name = F"{root_game_name.lower()}.envs" game_module = importlib.import_module(game_module_name) game = getattr(game_module, game_name)(**kwargs) self.ple_wrapper = PLE(game, fps=30, display_screen=display_screen) self.ple_wrapper.init() game.reward_type = reward_type self._action_set = self.ple_wrapper.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_height, self.screen_width = self.ple_wrapper.getScreenDims( ) # Assume observation space to be (64, 64, 3) due to procgen self.observation_space = spaces.Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8) self.viewer = None assert obs_type is not None, obs_type self.obs_type = obs_type self.reward_range = game.rewards['win'] def step(self, a): reward = self.ple_wrapper.act(self._action_set[a]) if self.obs_type == 'state': state = self.ple_wrapper.game.get_state() elif self.obs_type == 'image': state = self._get_image() terminal = self.ple_wrapper.game_over() return state, reward, terminal, {} def _get_image(self): image_rotated = np.fliplr( np.rot90(self.ple_wrapper.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple return cv2.resize(image_rotated, (64, 64), interpolation=cv2.INTER_AREA) @property def _n_actions(self): return len(self._action_set) # return: (states, observations) def reset(self): self.ple_wrapper.reset_game() if self.obs_type == 'state': state = self.ple_wrapper.game.get_state() elif self.obs_type == 'image': state = self._get_image() return state def render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def seed(self, seed): rng = np.random.RandomState(seed) self.ple_wrapper.rng = rng self.ple_wrapper.game.rng = self.ple_wrapper.rng self.ple_wrapper.init() def get_keys_to_action(self): return { (): 0, (32, ): 1, (119, ): 2, (100, ): 3, (97, ): 4, (115, ): 5, (100, 119): 6, (97, 119): 7, (100, 115): 8, (97, 115): 9, (32, 119): 10, (32, 100): 11, (32, 97): 12, (32, 115): 13, (32, 100, 119): 14, (32, 97, 119): 15, (32, 100, 115): 16, (32, 97, 115): 17 }
# shooting agent agent = ShootAgent(p.getActionSet()) # init agent and game. p.init() # lets do a random number of NOOP's for i in range(np.random.randint(0, max_noops)): reward = p.act(p.NOOP) # start our training loop for f in range(nb_frames): # if the game is over if p.game_over(): p.reset_game() print('game over') (screen_width, screen_height) = p.getScreenDims() print(screen_width, screen_height) print(p.getGameStateDims()) obs = p.getScreenRGB() from PIL import Image img = Image.fromarray(obs) img.show() # state = p.getGameState() break # print(state) # action = agent.pickAction(reward, obs) # reward = p.act(action) # print('score: {}'.format(reward))
class PLEEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game_name, display_screen=True): # set headless mode os.environ['SDL_VIDEODRIVER'] = 'dummy' # open up a game state to communicate with emulator import importlib game_module_name = ('ple.games.%s' % game_name).lower() game_module = importlib.import_module(game_module_name) game = getattr(game_module, game_name)() self.game_state = PLE(game, fps=30, frame_skip=2, display_screen=display_screen) self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_width, self.screen_height = self.game_state.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3)) self.viewer = None self.count = 0 def step(self, a): reward = self.game_state.act(self._action_set[a]) state = self._get_image() #import scipy.misc #scipy.misc.imsave('outfile'+str(self.count)+'.jpg', state) #self.count = self.count+1 terminal = self.game_state.game_over() #print(randomAction) #print(a,self._action_set[a]) return state, reward, terminal, {} def _get_image(self): #image_rotated = self.game_state.getScreenRGB() image_rotated = np.fliplr( np.rot90(self.game_state.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple return image_rotated @property def n_actions(self): return len(self._action_set) # return: (states, observations) def reset(self): self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3)) self.game_state.reset_game() state = self._get_image() return state def render(self, mode='human', close=False): #print('HERE') if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init()
class PLEEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, prespecified_game=True, game_name='MyCatcher', display_screen=True, rgb_state=False): # open up a game state to communicate with emulator import importlib if prespecified_game: game_module_name = ('ple.games.%s' % game_name).lower() else: game_module_name = ('domains.ple.%s' % game_name).lower() game_module = importlib.import_module(game_module_name) self.game = getattr(game_module, game_name)() self.rgb_state = rgb_state if self.rgb_state: self.game_state = PLE(self.game, fps=30, display_screen=display_screen) else: if prespecified_game: self.game_state = PLE( self.game, fps=30, display_screen=display_screen, state_preprocessor=process_state_prespecified) else: self.game_state = PLE(self.game, fps=30, display_screen=display_screen, state_preprocessor=process_state) self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) if self.rgb_state: self.state_width, self.state_height = self.game_state.getScreenDims( ) self.observation_space = spaces.Box(low=0, high=255, shape=(self.state_width, self.state_height, 3)) else: self.state_dim = self.game_state.getGameStateDims() self.observation_space = spaces.Box(low=0, high=255, shape=self.state_dim) self.viewer = None self.feature_bins = [] if hasattr(self.game, 'feature_bins'): self.feature_bins = self.game.feature_bins def get_source_state(self, state): if hasattr(self.game, 'get_source_state'): return self.game.get_source_state(state) return None def get_uniform_state_weights(self): if hasattr(self.game, 'get_uniform_state_weights'): return self.game.get_uniform_state_weights() else: states = self.get_states() weights = np.ones(len(states)) weights = [float(i) / sum(weights) for i in weights] return states, weights def generate_training_subset(self, percent_sim_data): if hasattr(self.game, 'generate_training_subset'): return self.game.generate_training_subset(percent_sim_data) def set_to_training_set(self): if hasattr(self.game, 'set_to_training_set'): return self.game.set_to_training_set() def set_to_testing_set(self): if hasattr(self.game, 'set_to_testing_set'): return self.game.set_to_testing_set() def get_states(self): if hasattr(self.game, 'states'): return self.game.states def _step(self, a): reward = self.game_state.act(self._action_set[a]) state = self._get_state() terminal = self.game_state.game_over() return state, reward, terminal, {} def _get_image(self, game_state): image_rotated = np.fliplr( np.rot90(game_state.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple return image_rotated def _get_state(self): if self.rgb_state: return self._get_image(self.game_state) else: return self.game_state.getGameState() @property def _n_actions(self): return len(self._action_set) # return: (states, observations) def _reset(self): if self.rgb_state: self.observation_space = spaces.Box(low=0, high=255, shape=(self.state_width, self.state_height, 3)) else: self.observation_space = spaces.Box(low=0, high=255, shape=self.state_dim) self.game_state.reset_game() state = self._get_state() return state def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image(self.game_state) if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def _seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init()
class MyEnv(Environment): VALIDATION_MODE = 0 def __init__(self, rng, game=None, frame_skip=4, ple_options={ "display_screen": True, "force_fps": True, "fps": 30 }): self._mode = -1 self._mode_score = 0.0 self._mode_episode_count = 0 self._frame_skip = frame_skip if frame_skip >= 1 else 1 self._random_state = rng if game is None: raise ValueError("Game must be provided") self._ple = PLE(game, **ple_options) self._ple.init() w, h = self._ple.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reduced_screen = np.empty((48, 48), dtype=np.uint8) self._actions = self._ple.getActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._mode_score = 0.0 self._mode_episode_count = 0 else: self._mode_episode_count += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 self._ple.reset_game() for _ in range(self._random_state.randint(15)): self._ple.act(self._ple.NOOP) self._screen = self._ple.getScreenGrayscale() cv2.resize(self._screen, (48, 48), self._reduced_screen, interpolation=cv2.INTER_NEAREST) return [4 * [48 * [48 * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frame_skip): reward += self._ple.act(action) if self.inTerminalState(): break self._screen = self._ple.getScreenGrayscale() cv2.resize(self._screen, (48, 48), self._reduced_screen, interpolation=cv2.INTER_NEAREST) self._mode_score += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._mode_episode_count += 1 print("== Mean score per episode is {} over {} episodes ==".format( self._mode_score / self._mode_episode_count, self._mode_episode_count)) def inputDimensions(self): return [(4, 48, 48)] def observationType(self, subject): return np.float32 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reduced_screen) / 256.] def inTerminalState(self): return self._ple.game_over()
class PygameLearningEnvironment(Environment): def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2, hold_action=2, visualize=False, width=84, height=84, lives=1): """ Initialize Pygame Learning Environment https://github.com/ntasfi/PyGame-Learning-Environment Args: env_name: PLE environment fps: frames per second force_fps: False for slower speeds frame_skip: number of env frames to skip hold_action: number of env frames to hold each action for isRGB: get color or greyscale version of statespace #isRGB = False, game_height,game_width: height and width of environment visualize: If set True, the program will visualize the trainings, will slow down training lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score) """ self.env_name = game_name self.rewards = rewards self.lives = lives self.state_as_image = state_as_image self.fps = fps #30 # frames per second self.force_fps = force_fps #True # False for slower speeds self.frame_skip = frame_skip # frames to skip self.ple_num_steps = hold_action # frames to continue action for #self.isRGB = isRGB #always returns color, lets tensorforce due the processing self.visualize = visualize self.width = width self.height = height #testing self.reached_terminal = 0 self.episode_time_steps = 0 self.episode_reward = 0 self.total_time_steps = 0 if self.env_name == 'catcher': self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives) elif self.env_name == 'pixelcopter': self.game = Pixelcopter(width=self.width, height=self.height) elif self.env_name == 'pong': self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives) elif self.env_name == 'puckworld': self.game = PuckWorld(width=self.width, height=self.height) elif self.env_name == 'raycastmaze': self.game = RaycastMaze(width=self.width, height=self.height) elif self.env_name == 'snake': self.game = Snake(width=self.width, height=self.height) elif self.env_name == 'waterworld': self.game = WaterWorld(width=self.width, height=self.height) elif self.env_name == 'monsterkong': self.game = MonsterKong() elif self.env_name == 'flappybird': self.game = FlappyBird(width=144, height=256) # limitations on height and width for flappy bird else: raise TensorForceError('Unknown Game Environement.') if self.state_as_image: process_state = None else: #create a preprocessor to read the state dictionary as a numpy array def process_state(state): # ret_value = np.fromiter(state.values(),dtype=float,count=len(state)) ret_value = np.array(list(state.values()), dtype=np.float32) return ret_value # make a PLE instance self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip, num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize, state_preprocessor = process_state) #self.env.init() #self.env.act(self.env.NOOP) #game starts on black screen #self.env.reset_game() #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.reset_game() # setup gamescreen object if state_as_image: w, h = self.env.getScreenDims() self.gamescreen = np.empty((h, w, 3), dtype=np.uint8) else: self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32) # if isRGB: # self.gamescreen = np.empty((h, w, 3), dtype=np.uint8) # else: # self.gamescreen = np.empty((h, w), dtype=np.uint8) # setup action converter # PLE returns legal action indexes, convert these to just numbers self.action_list = self.env.getActionSet() self.action_list = sorted(self.action_list, key=lambda x: (x is None, x)) def __str__(self): return 'PygameLearningEnvironment({})'.format(self.env_name) def close(self): pygame.quit() self.env = None def reset(self): # if isinstance(self.gym, gym.wrappers.Monitor): # self.gym.stats_recorder.done = True #env.act(env.NOOP) # need to take an action or screen is black # clear gamescreen if self.state_as_image: self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8) else: self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.float32) self.env.reset_game() return self.current_state def execute(self, actions): #print("lives check in ple {}".format(self.env.lives())) #self.env.saveScreen("test_screen_capture_before_{}.png".format(self.total_time_steps)) #lives_check = self.env.lives() #testing code ple_actions = self.action_list[actions] reward = self.env.act(ple_actions) state = self.current_state # testing code # self.env.saveScreen("test_screen_capture_after_{}.png".format(self.total_time_steps)) # self.episode_time_steps += 1 # self.episode_reward += reward # self.total_time_steps += 1 # print("reward is {}".format(reward)) # #if self.env.lives() != lives_check: # # print('lives are different is game over? {}'.format(self.env.game_over())) # print('lives {}, game over {}, old lives {}'.format(self.env.lives(),self.env.game_over(),lives_check)) if self.env.game_over(): terminal = True # testing code self.reached_terminal += 1 # print("GAME OVER reached terminal {}".format(self.reached_terminal)) # print("episode time steps {}, episode reward {}".format(self.episode_time_steps,self.episode_reward)) # self.episode_reward = 0 # self.episode_time_steps = 0 # print("total timesteps {}".format(self.total_time_steps)) else: terminal = False return state, terminal, reward @property def actions(self): return dict(type='int', num_actions=len(self.action_list), names=self.action_list) # @property # def actions(self): # return OpenAIGym.action_from_space(space=self.gym.action_space) #ALE implementation # @property # def actions(self): # return dict(type='int', num_actions=len(self.action_inds), names=self.action_names) @property def states(self): return dict(shape=self.gamescreen.shape, type=float) @property def current_state(self): #returned state can either be an image or an np array of key components if self.state_as_image: self.gamescreen = self.env.getScreenRGB() # if isRGB: # self.gamescreen = self.env.getScreenRGB() # else: # self.gamescreen = self.env.getScreenGrayscale() else: self.gamescreen = self.env.getGameState() return np.copy(self.gamescreen) #ALE implementation # @property # def states(self): # return dict(shape=self.gamescreen.shape, type=float) # @property # def current_state(self): # self.gamescreen = self.ale.getScreenRGB(self.gamescreen) # return np.copy(self.gamescreen) # @property # def is_terminal(self): # if self.loss_of_life_termination and self.life_lost: # return True # else: # return self.ale.game_over()
class PLEEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, game_name='FlappyBird', display_screen=True): # open up a game state to communicate with emulator import importlib game_module_name = ('ple.games.%s' % game_name).lower() game_module = importlib.import_module(game_module_name) game = getattr(game_module, game_name)() self.game_state = PLE(game, fps=30, display_screen=display_screen) self.game_state.init() self._action_set = self.game_state.getActionSet() self.action_space = spaces.Discrete(len(self._action_set)) self.screen_width, self.screen_height = self.game_state.getScreenDims() self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3)) self.viewer = None def _step(self, a): reward = self.game_state.act(self._action_set[a]) state = self._get_image() terminal = self.game_state.game_over() return state, reward, terminal, {} def _get_image(self): image_rotated = np.fliplr( np.rot90(self.game_state.getScreenRGB(), 3)) # Hack to fix the rotated image returned by ple return image_rotated @property def _n_actions(self): return len(self._action_set) # return: (states, observations) def _reset(self): self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3)) self.game_state.reset_game() state = self._get_image() return state def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return img = self._get_image() if mode == 'rgb_array': return img elif mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def _seed(self, seed): rng = np.random.RandomState(seed) self.game_state.rng = rng self.game_state.game.rng = self.game_state.rng self.game_state.init()
def discounted_rewards(rewards, gamma=0.99): res = [] for r in reversed(rewards): cum_reward = res[0] if res else 0 res.insert(0, gamma * cum_reward + r) return res def train(env, agent): optimizer = torch.optim.Adam(agent.parameters()) while True: agent.zero_grad() p, r = play_episode(env, agent) r = torch.tensor(discounted_rewards(r), device=agent.device) loss = -r * p loss = loss.mean() loss.backward() optimizer.step() if __name__ == '__main__': env = PLE(Snake(), fps=30, display_screen=True) env.init() agent = Agent(env.getScreenDims(), 16, env.getActionSet()) train(env, agent)