def __init__(self, game, obs_type, frame_skip): self.gym_env = AtariEnv(game=game, obs_type=obs_type, frameskip=frame_skip, repeat_action_probability=0.05) self.game_name = game self.step_count = 0 self.gym_env.reset() self.lives = self.gym_env.ale.lives()
def get_charts(name): charts = [] print('###############################' + name + '###############################') const_best_score = -100000 chart_const_best = ([0], [0]) env = AtariEnv(game=name, obs_type='image', frameskip=(2, 5), repeat_action_probability=0.25) #gym.make(name) env.reset() actionN = env.action_space.n policy = [] max_steps = 30000 n_episode = 50 rew = [[0] for _ in range(n_episode)] p = lambda na: random.randint(0, na - 1) cnt = 0 for i_episode in range(n_episode): ob = env.reset() while cnt < max_steps: cnt += 1 action = p(actionN) #env.action_space.sample() S_prime, r, done, info = env.step(action) rew[i_episode].append(r) if done: break cnt = 0 rew[i_episode] = np.array(rew[i_episode]) env.close() return rew
def __init__(self, game, use_greyscale=True, hist_len=4, action_repeat=4, pool_len=2, noop_max=30, rescale_height=84, rescale_width=84, life_episode=True, render_scale=3, render_hist=False): AtariEnv.__init__(self, game, obs_type='image', frameskip=1, repeat_action_probability=0.) self.use_greyscale = use_greyscale self.action_repeat = action_repeat self.noop_max = noop_max self.life_episode = life_episode self.render_scale = render_scale self.render_hist = render_hist (self.orig_screen_width, self.orig_screen_height) = self.ale.getScreenDims() self.screen_width = rescale_width if rescale_width > 0 else self.orig_screen_width self.screen_height = rescale_height if rescale_height > 0 else self.orig_screen_height self.use_rescale = rescale_width > 0 or rescale_height > 0 self.img_dims = 1 if self.use_greyscale else 3 pool_shape = (pool_len, self.orig_screen_height, self.orig_screen_width, 3) hist_shape = (hist_len, self.screen_height, self.screen_width, self.img_dims) self.observation_space = gym.spaces.Box(low=0, high=255, shape=hist_shape) self.pool_fbuff = np.zeros(pool_shape, dtype=np.uint8) self.hist_fbuff = np.zeros(hist_shape, dtype=np.uint8) self.hist_fbuff_readonly = self.hist_fbuff.view() self.hist_fbuff_readonly.setflags(write=False) self._setup_image_pipeline()
def create_gym_env_wrapper_atari(config): from gym.envs.atari import AtariEnv from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper ae = AtariEnv(**config["AtariEnv"]) # IMP Had initially thought to put this config in config["GymEnvWrapper"] # but because of code below which converts var_env_configs to env_config, # it's best to leave those configs as top level configs in the dict! gew = GymEnvWrapper(ae, **config) return gew
class GameEnv(object): def __init__(self, game, obs_type, frame_skip): self.gym_env = AtariEnv(game=game, obs_type=obs_type, frameskip=frame_skip, repeat_action_probability=0.05) self.game_name = game self.step_count = 0 self.gym_env.reset() self.lives = self.gym_env.ale.lives() def step(self, action): self.step_count += 1 observation, reward, done, _ = self.gym_env.step(action) score = reward new_lives = self.gym_env.ale.lives() reward = max(NEGATIVE_REWARD, min(POSITIVE_REWARD, reward)) if self.lives > new_lives: reward = NEGATIVE_REWARD if self.game_name == 'breakout': self.gym_env.ale.act(1) self.lives = new_lives return observation, reward, done, new_lives, score def render(self): return self.gym_env.render() def random_action(self): return self.gym_env.action_space.sample() def action_num(self): return self.gym_env.action_space.n def reset(self, skip_begin_frame=3): assert skip_begin_frame > 0 self.gym_env.reset() obs = None for _ in range(skip_begin_frame): obs, _, _, _ = self.gym_env.step( self.gym_env.action_space.sample()) self.lives = self.gym_env.ale.lives() self.step_count = 0 return obs def close(self): self.gym_env.close()
def test_r_delay(self): """ """ print("\033[32;1;4mTEST_REWARD_DELAY\033[0m") config = { "AtariEnv": { "game": "beam_rider", # "breakout", "obs_type": "image", "frameskip": 1, }, "delay": 1, # "GymEnvWrapper": { "atari_preprocessing": True, "frame_skip": 4, "grayscale_obs": False, "state_space_type": "discrete", "action_space_type": "discrete", "seed": 0, # }, # 'seed': 0, #seed } # config["log_filename"] = log_filename from gym.envs.atari import AtariEnv ae = AtariEnv(**{ "game": "beam_rider", "obs_type": "image", "frameskip": 1 }) aew = GymEnvWrapper(ae, **config) ob = aew.reset() print("observation_space.shape:", ob.shape) # print(ob) total_reward = 0.0 for i in range(200): act = aew.action_space.sample() next_state, reward, done, info = aew.step(act) print("step, reward, done, act:", i, reward, done, act) if i == 154 or i == 159: assert reward == 44.0, ("1-step delayed reward in step: " + str(i) + " should have been 44.0.") total_reward += reward print("total_reward:", total_reward) aew.reset()
def test_discrete_irr_features(self): """ """ print("\033[32;1;4mTEST_DISC_IRR_FEATURES\033[0m") config = { "AtariEnv": { "game": "beam_rider", # "breakout", "obs_type": "image", "frameskip": 1, }, "delay": 1, # "GymEnvWrapper": { "atari_preprocessing": True, "frame_skip": 4, "grayscale_obs": False, "state_space_type": "discrete", "action_space_type": "discrete", "seed": 0, "irrelevant_features": { "state_space_type": "discrete", "action_space_type": "discrete", "state_space_size": 8, "action_space_size": 8, "completely_connected": True, "repeats_in_sequences": False, "generate_random_mdp": True, # TODO currently RLToyEnv needs to have at least 1 terminal state, allow it to have 0 in future. "terminal_state_density": 0.2, } # }, # 'seed': 0, #seed } # config["log_filename"] = log_filename from gym.envs.atari import AtariEnv ae = AtariEnv(**{ "game": "beam_rider", "obs_type": "image", "frameskip": 1 }) aew = GymEnvWrapper(ae, **config) ob = aew.reset() print("type(observation_space):", type(ob)) # print(ob) total_reward = 0.0 for i in range(200): act = aew.action_space.sample() next_state, reward, done, info = aew.step(act) print( "step, reward, done, act, next_state[1]:", i, reward, done, act, next_state[1], ) if i == 154 or i == 159: assert reward == 44.0, ("1-step delayed reward in step: " + str(i) + " should have been 44.0.") total_reward += reward print("total_reward:", total_reward) aew.reset()
def test_r_delay_p_noise_r_noise(self): """ P noise is currently only for discrete env #TODO """ print("\033[32;1;4mTEST_MULTIPLE\033[0m") config = { "AtariEnv": { "game": "beam_rider", # "breakout", "obs_type": "image", "frameskip": 1, }, "delay": 1, "reward_noise": lambda a: a.normal(0, 0.1), "transition_noise": 0.1, # "GymEnvWrapper": { "atari_preprocessing": True, "frame_skip": 4, "grayscale_obs": False, "state_space_type": "discrete", "action_space_type": "discrete", "seed": 0, # }, # 'seed': 0, #seed } # config["log_filename"] = log_filename from gym.envs.atari import AtariEnv ae = AtariEnv(**{ "game": "beam_rider", "obs_type": "image", "frameskip": 1 }) aew = GymEnvWrapper(ae, **config) ob = aew.reset() print("observation_space.shape:", ob.shape) # print(ob) total_reward = 0.0 for i in range(200): act = aew.action_space.sample() next_state, reward, done, info = aew.step(act) print("step, reward, done, act:", i, reward, done, act) # Testing hardcoded values at these timesteps implicitly tests that there # were 21 noisy transitions in total and noise inserted in rewards. if i == 154: np.testing.assert_allclose( reward, 44.12183457980473, rtol=1e-05, err_msg="1-step delayed reward in step: " + str(i) + " should have been 44.0.", ) if i == 199: np.testing.assert_allclose( reward, 0.07467690634910334, rtol=1e-05, err_msg="1-step delayed reward in step: " + str(i) + " should have been 44.0.", ) total_reward += reward print("total_reward:", total_reward) aew.reset()
from gym.envs.atari import AtariEnv import numpy as np from utils import * from dqn import DQN model=DQN() images,result=model.build() init = tf.global_variables_initializer() env = AtariEnv(game='pong', obs_type='image', frameskip=4) action_space=6 episodes=1 done=False reward=0 states = RecentStateBuffer(4, preprocess=preprocess_image) with tf.Session() as sess: sess.run(init) for i in range(episodes): states.reset() state = env.reset() states.add(state) last_states=states.get() action=0 while len(last_states)<4: action = np.random.randint(action_space) state, reward, done, _ = env.step(action) states.add(state) new_states=states.get() #DQN.add_transition(last_states,action,reward,new_states,done) last_states=new_states state_images=preprocess_images(last_states) print(state_images)
n_learning = 0 learning_frame_interval = 64 update_step_interval = 4 n_frames = 0 n_learning_steps = 0 gamma = 0.99 # exploration policy policy = EpsPolicy() base_eps = 0.5 min_eps = 0.05 max_eps = 0.5 # Optimizer opt = Adam(learning_rate=0.1) # gym env env = AtariEnv(game='ms_pacman', obs_type='image') obs = env.reset() for i in range(n_epochs): obs = env.reset() cur_ep_n_frames = 0 s = obs_to_state(obs) cur_eps = min(max_eps, max(min_eps, base_eps / math.log(i + 2))) cur_score = 0 while True: env.render() n_frames += 1 cur_ep_n_frames += 1 a = policy.get_action(s, q, n_acts, cur_eps) obs1, r, done, _ = env.step(a) cur_score += r
from .DDQN import DQN from .wrappers import normalize_obs, reward_wrapper import numpy as np import random import gym import sys import time import os from gym.envs.atari import AtariEnv from gym.wrappers import Monitor os.environ['CUDA_VISIBLE_DEVICES'] = '-1' RAM_BALL_X_POS = 49 if __name__ == '__main__': env = AtariEnv(frameskip=1) # env = Monitor(env, '.', force=True) # env = reward_wrapper(env) env = normalize_obs(env) dqn = DQN(env.observation_space.shape[0], gym.spaces.Discrete(4)) dir_ = os.path.dirname(os.path.abspath(__file__)) model_file = os.path.join(dir_, 'model2') print(model_file) dqn.model.load_weights(model_file) dqn.epsilon = 0.0 ball_near_player = False for i in range(1): scores = [0, 0] prev_bounce = 0
from gym.envs.atari import AtariEnv import cv2 import numpy as np import torch import torch.optim as optim import torch.nn.functional as F from memory import MemoryReplay from CONST import * from tqdm import tqdm makeEnv = lambda game: AtariEnv( game, obs_type='image', frameskip=4 if game != 'space_invaders' else 3) # TYPES FRAMETYPE = np.uint8 class ActorDQN(): """ Actor to host an excecute a gym environment for atari games. Paramenters ----------- game: str Name of the game to execute. gameActions: int Number of actions that the agent can execute. If doubt check the actions in CONST.ACTION_MEANING policy: torch policy Object that hosts the policy network to process the environment's observations.