Пример #1
0
 def __init__(self, game, obs_type, frame_skip):
     self.gym_env = AtariEnv(game=game,
                             obs_type=obs_type,
                             frameskip=frame_skip,
                             repeat_action_probability=0.05)
     self.game_name = game
     self.step_count = 0
     self.gym_env.reset()
     self.lives = self.gym_env.ale.lives()
Пример #2
0
def get_charts(name):
    charts = []
    print('###############################' + name +
          '###############################')
    const_best_score = -100000
    chart_const_best = ([0], [0])
    env = AtariEnv(game=name,
                   obs_type='image',
                   frameskip=(2, 5),
                   repeat_action_probability=0.25)  #gym.make(name)
    env.reset()
    actionN = env.action_space.n
    policy = []
    max_steps = 30000
    n_episode = 50
    rew = [[0] for _ in range(n_episode)]
    p = lambda na: random.randint(0, na - 1)
    cnt = 0
    for i_episode in range(n_episode):
        ob = env.reset()
        while cnt < max_steps:
            cnt += 1
            action = p(actionN)  #env.action_space.sample()
            S_prime, r, done, info = env.step(action)
            rew[i_episode].append(r)
            if done:
                break
        cnt = 0
        rew[i_episode] = np.array(rew[i_episode])
    env.close()
    return rew
Пример #3
0
    def __init__(self,
                 game,
                 use_greyscale=True,
                 hist_len=4,
                 action_repeat=4,
                 pool_len=2,
                 noop_max=30,
                 rescale_height=84,
                 rescale_width=84,
                 life_episode=True,
                 render_scale=3,
                 render_hist=False):
        AtariEnv.__init__(self,
                          game,
                          obs_type='image',
                          frameskip=1,
                          repeat_action_probability=0.)

        self.use_greyscale = use_greyscale
        self.action_repeat = action_repeat
        self.noop_max = noop_max
        self.life_episode = life_episode
        self.render_scale = render_scale
        self.render_hist = render_hist

        (self.orig_screen_width,
         self.orig_screen_height) = self.ale.getScreenDims()
        self.screen_width = rescale_width if rescale_width > 0 else self.orig_screen_width
        self.screen_height = rescale_height if rescale_height > 0 else self.orig_screen_height

        self.use_rescale = rescale_width > 0 or rescale_height > 0

        self.img_dims = 1 if self.use_greyscale else 3
        pool_shape = (pool_len, self.orig_screen_height,
                      self.orig_screen_width, 3)
        hist_shape = (hist_len, self.screen_height, self.screen_width,
                      self.img_dims)
        self.observation_space = gym.spaces.Box(low=0,
                                                high=255,
                                                shape=hist_shape)

        self.pool_fbuff = np.zeros(pool_shape, dtype=np.uint8)
        self.hist_fbuff = np.zeros(hist_shape, dtype=np.uint8)
        self.hist_fbuff_readonly = self.hist_fbuff.view()
        self.hist_fbuff_readonly.setflags(write=False)
        self._setup_image_pipeline()
Пример #4
0
def create_gym_env_wrapper_atari(config):
    from gym.envs.atari import AtariEnv
    from mdp_playground.envs.gym_env_wrapper import GymEnvWrapper

    ae = AtariEnv(**config["AtariEnv"])
    # IMP Had initially thought to put this config in config["GymEnvWrapper"]
    # but because of code below which converts var_env_configs to env_config,
    # it's best to leave those configs as top level configs in the dict!
    gew = GymEnvWrapper(ae, **config)
    return gew
Пример #5
0
class GameEnv(object):
    def __init__(self, game, obs_type, frame_skip):
        self.gym_env = AtariEnv(game=game,
                                obs_type=obs_type,
                                frameskip=frame_skip,
                                repeat_action_probability=0.05)
        self.game_name = game
        self.step_count = 0
        self.gym_env.reset()
        self.lives = self.gym_env.ale.lives()

    def step(self, action):
        self.step_count += 1
        observation, reward, done, _ = self.gym_env.step(action)
        score = reward
        new_lives = self.gym_env.ale.lives()
        reward = max(NEGATIVE_REWARD, min(POSITIVE_REWARD, reward))

        if self.lives > new_lives:
            reward = NEGATIVE_REWARD
            if self.game_name == 'breakout':
                self.gym_env.ale.act(1)
        self.lives = new_lives
        return observation, reward, done, new_lives, score

    def render(self):
        return self.gym_env.render()

    def random_action(self):
        return self.gym_env.action_space.sample()

    def action_num(self):
        return self.gym_env.action_space.n

    def reset(self, skip_begin_frame=3):
        assert skip_begin_frame > 0
        self.gym_env.reset()
        obs = None
        for _ in range(skip_begin_frame):
            obs, _, _, _ = self.gym_env.step(
                self.gym_env.action_space.sample())
        self.lives = self.gym_env.ale.lives()
        self.step_count = 0
        return obs

    def close(self):
        self.gym_env.close()
Пример #6
0
    def test_r_delay(self):
        """ """
        print("\033[32;1;4mTEST_REWARD_DELAY\033[0m")
        config = {
            "AtariEnv": {
                "game": "beam_rider",  # "breakout",
                "obs_type": "image",
                "frameskip": 1,
            },
            "delay": 1,
            # "GymEnvWrapper": {
            "atari_preprocessing": True,
            "frame_skip": 4,
            "grayscale_obs": False,
            "state_space_type": "discrete",
            "action_space_type": "discrete",
            "seed": 0,
            # },
            # 'seed': 0, #seed
        }

        # config["log_filename"] = log_filename

        from gym.envs.atari import AtariEnv

        ae = AtariEnv(**{
            "game": "beam_rider",
            "obs_type": "image",
            "frameskip": 1
        })
        aew = GymEnvWrapper(ae, **config)
        ob = aew.reset()
        print("observation_space.shape:", ob.shape)
        # print(ob)
        total_reward = 0.0
        for i in range(200):
            act = aew.action_space.sample()
            next_state, reward, done, info = aew.step(act)
            print("step, reward, done, act:", i, reward, done, act)
            if i == 154 or i == 159:
                assert reward == 44.0, ("1-step delayed reward in step: " +
                                        str(i) + " should have been 44.0.")
            total_reward += reward
        print("total_reward:", total_reward)
        aew.reset()
Пример #7
0
    def test_discrete_irr_features(self):
        """ """
        print("\033[32;1;4mTEST_DISC_IRR_FEATURES\033[0m")
        config = {
            "AtariEnv": {
                "game": "beam_rider",  # "breakout",
                "obs_type": "image",
                "frameskip": 1,
            },
            "delay": 1,
            # "GymEnvWrapper": {
            "atari_preprocessing": True,
            "frame_skip": 4,
            "grayscale_obs": False,
            "state_space_type": "discrete",
            "action_space_type": "discrete",
            "seed": 0,
            "irrelevant_features": {
                "state_space_type": "discrete",
                "action_space_type": "discrete",
                "state_space_size": 8,
                "action_space_size": 8,
                "completely_connected": True,
                "repeats_in_sequences": False,
                "generate_random_mdp": True,
                # TODO currently RLToyEnv needs to have at least 1 terminal state, allow it to have 0 in future.
                "terminal_state_density": 0.2,
            }
            # },
            # 'seed': 0, #seed
        }

        # config["log_filename"] = log_filename

        from gym.envs.atari import AtariEnv

        ae = AtariEnv(**{
            "game": "beam_rider",
            "obs_type": "image",
            "frameskip": 1
        })
        aew = GymEnvWrapper(ae, **config)
        ob = aew.reset()
        print("type(observation_space):", type(ob))
        # print(ob)
        total_reward = 0.0
        for i in range(200):
            act = aew.action_space.sample()
            next_state, reward, done, info = aew.step(act)
            print(
                "step, reward, done, act, next_state[1]:",
                i,
                reward,
                done,
                act,
                next_state[1],
            )
            if i == 154 or i == 159:
                assert reward == 44.0, ("1-step delayed reward in step: " +
                                        str(i) + " should have been 44.0.")
            total_reward += reward
        print("total_reward:", total_reward)
        aew.reset()
Пример #8
0
    def test_r_delay_p_noise_r_noise(self):
        """
        P noise is currently only for discrete env #TODO
        """
        print("\033[32;1;4mTEST_MULTIPLE\033[0m")
        config = {
            "AtariEnv": {
                "game": "beam_rider",  # "breakout",
                "obs_type": "image",
                "frameskip": 1,
            },
            "delay": 1,
            "reward_noise": lambda a: a.normal(0, 0.1),
            "transition_noise": 0.1,
            # "GymEnvWrapper": {
            "atari_preprocessing": True,
            "frame_skip": 4,
            "grayscale_obs": False,
            "state_space_type": "discrete",
            "action_space_type": "discrete",
            "seed": 0,
            # },
            # 'seed': 0, #seed
        }

        # config["log_filename"] = log_filename

        from gym.envs.atari import AtariEnv

        ae = AtariEnv(**{
            "game": "beam_rider",
            "obs_type": "image",
            "frameskip": 1
        })
        aew = GymEnvWrapper(ae, **config)
        ob = aew.reset()
        print("observation_space.shape:", ob.shape)
        # print(ob)
        total_reward = 0.0
        for i in range(200):
            act = aew.action_space.sample()
            next_state, reward, done, info = aew.step(act)
            print("step, reward, done, act:", i, reward, done, act)
            # Testing hardcoded values at these timesteps implicitly tests that there
            # were 21 noisy transitions in total and noise inserted in rewards.
            if i == 154:
                np.testing.assert_allclose(
                    reward,
                    44.12183457980473,
                    rtol=1e-05,
                    err_msg="1-step delayed reward in step: " + str(i) +
                    " should have been 44.0.",
                )
            if i == 199:
                np.testing.assert_allclose(
                    reward,
                    0.07467690634910334,
                    rtol=1e-05,
                    err_msg="1-step delayed reward in step: " + str(i) +
                    " should have been 44.0.",
                )
            total_reward += reward
        print("total_reward:", total_reward)
        aew.reset()
Пример #9
0
from gym.envs.atari import AtariEnv
import numpy as np
from utils import *
from dqn import DQN

model=DQN()
images,result=model.build()
init = tf.global_variables_initializer()
env = AtariEnv(game='pong', obs_type='image', frameskip=4)
action_space=6
episodes=1
done=False
reward=0
states = RecentStateBuffer(4, preprocess=preprocess_image)
with tf.Session() as sess:
    sess.run(init)
    for i in range(episodes):
        states.reset()
        state = env.reset()
        states.add(state)
        last_states=states.get()
        action=0
        while len(last_states)<4:
            action = np.random.randint(action_space)
            state, reward, done, _ = env.step(action)
            states.add(state)
            new_states=states.get()
            #DQN.add_transition(last_states,action,reward,new_states,done)
            last_states=new_states
        state_images=preprocess_images(last_states)
        print(state_images)
Пример #10
0
    n_learning = 0
    learning_frame_interval = 64
    update_step_interval = 4
    n_frames = 0
    n_learning_steps = 0
    gamma = 0.99
    # exploration policy
    policy = EpsPolicy()
    base_eps = 0.5
    min_eps = 0.05
    max_eps = 0.5
    # Optimizer
    opt = Adam(learning_rate=0.1)

    # gym env
    env = AtariEnv(game='ms_pacman', obs_type='image')
    obs = env.reset()

    for i in range(n_epochs):
        obs = env.reset()
        cur_ep_n_frames = 0
        s = obs_to_state(obs)
        cur_eps = min(max_eps, max(min_eps, base_eps / math.log(i + 2)))
        cur_score = 0
        while True:
            env.render()
            n_frames += 1
            cur_ep_n_frames += 1
            a = policy.get_action(s, q, n_acts, cur_eps)
            obs1, r, done, _ = env.step(a)
            cur_score += r
Пример #11
0
from .DDQN import DQN
from .wrappers import normalize_obs, reward_wrapper
import numpy as np
import random
import gym
import sys
import time
import os
from gym.envs.atari import AtariEnv
from gym.wrappers import Monitor
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

RAM_BALL_X_POS = 49

if __name__ == '__main__':
    env = AtariEnv(frameskip=1)
    # env = Monitor(env, '.', force=True)
    # env = reward_wrapper(env)
    env = normalize_obs(env)
    dqn = DQN(env.observation_space.shape[0], gym.spaces.Discrete(4))
    dir_ = os.path.dirname(os.path.abspath(__file__))
    model_file = os.path.join(dir_, 'model2')
    print(model_file)
    dqn.model.load_weights(model_file)
    dqn.epsilon = 0.0
    ball_near_player = False

    for i in range(1):
        scores = [0, 0]
        prev_bounce = 0
Пример #12
0
from gym.envs.atari import AtariEnv
import cv2
import numpy as np
import torch
import torch.optim as optim
import torch.nn.functional as F
from memory import MemoryReplay
from CONST import *
from tqdm import tqdm

makeEnv = lambda game: AtariEnv(
    game, obs_type='image', frameskip=4 if game != 'space_invaders' else 3)

# TYPES
FRAMETYPE = np.uint8


class ActorDQN():
    """
    Actor to host an excecute a gym environment for atari games.
    
    Paramenters
    -----------
    game: str
        Name of the game to execute.
    gameActions: int
        Number of actions that the agent can execute.
        If doubt check the actions in CONST.ACTION_MEANING
    policy: torch policy
        Object that hosts the policy network to process the
        environment's observations.