Exemplo n.º 1
0
def test_fixed_keys_step(tmpdir):
    env = MazeExplorer(random_key_positions=False, mazes_path=tmpdir.strpath)
    env.reset()
    for _ in range(1000):
        action = env.action_space.sample()
        observation, *_ = env.step(action)
        assert observation.shape == (42, 42, 3)
Exemplo n.º 2
0
def test_step_record(tmpdir):
    env = MazeExplorer(random_textures=True, mazes_path=tmpdir.strpath)
    env.reset()
    for _ in range(3):
        action = env.action_space.sample()
        observation, *_ = env.step_record(action, tmpdir)
        assert observation.shape == (42, 42, 3)
    assert len(list(Path(tmpdir).glob("*.png"))) == 6
Exemplo n.º 3
0
def test_generate_with_seed(tmpdir):
    env = MazeExplorer(10, seed=5, mazes_path=tmpdir.mkdir("maze_1").strpath)
    assert len(set(env.mazes)) == 10

    same_env = MazeExplorer(10,
                            seed=5,
                            mazes_path=tmpdir.mkdir("maze_2").strpath)
    assert set(env.mazes) == set(same_env.mazes)

    different_env = MazeExplorer(10,
                                 seed=42,
                                 mazes_path=tmpdir.mkdir("maze_3").strpath)
    assert len(set(env.mazes) - set(different_env.mazes)) == 10
Exemplo n.º 4
0
def test_reward_signal(tmpdir):
    env = MazeExplorer(seed=42, mazes_path=tmpdir.strpath)
    rews = []
    rewards = []
    for _ in range(1000):
        _, reward, done, _ = env.step(env.action_space.sample())
        rewards.append(reward)
        if done:
            env.reset()

    assert sum(rewards) > 1

    return rews, rewards
Exemplo n.º 5
0
def test_create_mazes(tmpdir):
    env = MazeExplorer(mazes_path=tmpdir.strpath)

    files = os.listdir(env.mazes_path)

    required_files = ["10x10.cfg", "10x10.wad", "10x10_MAP01.txt", "outputs"]

    assert set(files) == set(required_files)

    env.reset()
    for _ in range(10):
        action = env.action_space.sample()
        observation, *_ = env.step(action)
        assert observation.shape == (42, 42, 3)
Exemplo n.º 6
0
def test_generate_multiple_mazes(tmpdir):
    """
    This function should test whether new episodes sample from the selection of map levels in the wad.
    The assertion could be that every map is used at least once but given the stochasticity, it becomes
    a trade off between how many episodes to sample and how certain one can be that the test will pass.
    For now this test is implemented with the weaker condition that more than one map is sampled.
    This ensures at least that the map level is not being fixed.
    """
    for number_mazes in [1, 5, 10]:
        maps = set()
        env = MazeExplorer(number_maps=number_mazes, mazes_path=tmpdir.strpath)
        for _ in range(5000):
            *_, done, _ = env.step(env.action_space.sample())
            if done:
                env.reset()
                map_id = int(env.env.get_game_variable(GameVariable.USER4))
                maps.add(map_id)
        if number_mazes == 1:
            assert len(maps) == 1
        else:
            assert len(maps) > 1
Exemplo n.º 7
0
def test_generate_video(tmpdir):
    record_path = tmpdir.mkdir("record")

    env = MazeExplorer(mazes_path=tmpdir.strpath)
    env.reset()
    for _ in range(10):
        action = env.action_space.sample()
        env.step_record(action, record_path=record_path)

    video_destination = os.path.join(record_path, "movie.avi")
    MazeExplorer.generate_video(record_path, video_destination)

    assert os.path.isfile(video_destination)
    assert os.path.getsize(video_destination) > 0
Exemplo n.º 8
0
def load_stable_baselines_env(cfg_path, vector_length, mp, n_stack, number_maps, action_frame_repeat,
                              scaled_resolution):
    env_fn = lambda: MazeExplorer.load_vizdoom_env(cfg_path, number_maps, action_frame_repeat, scaled_resolution)

    if mp:
        env = SubprocVecEnv([env_fn for _ in range(vector_length)])
    else:
        env = DummyVecEnv([env_fn for _ in range(vector_length)])

    if n_stack > 0:
        env = VecFrameStack(env, n_stack=n_stack)

    return env
Exemplo n.º 9
0
    def __init__(self, cfg, seed=0, **kwargs):
        super(MazeExplorerEnv,self).__init__(**kwargs)
        self._max_step = cfg.training.max_step
        if not os.path.exists('maps') : os.mkdir('maps')
        if not os.path.exists(os.path.join('maps',cfg.saving.version)): os.mkdir(os.path.join('maps',cfg.saving.version))

        self.env_cfg = cfg.task.explorer
        map_size = self.env_cfg.map_size
        self.env = MazeExplorer(unique_maps=False, number_maps=self.env_cfg.num_maps, keys=self.env_cfg.num_keys,
                                size=(map_size,map_size), random_spawn=True, random_textures=True, random_key_positions=True,
                                action_frame_repeat=4, actions="MOVE_FORWARD TURN_LEFT TURN_RIGHT MOVE_LEFT MOVE_RIGHT",
                                scaled_resolution=(64, 64), data_augmentation=True, seed=seed, episode_timeout=self._max_step*4,
                                complexity=.3, density=.3, mazes_path='envs/maps/map_v1_%d'%seed)


        self.game = self.env.env
        self.game.close()
        self.game.set_automap_buffer_enabled(True)
        self.game.set_depth_buffer_enabled(True)
        self.game.set_automap_mode(AutomapMode.OBJECTS_WITH_SIZE)
        self.game.init()
        #self.game.add_game_args("+am_followplayer 1")
        self.ACTION_LIST = np.eye(5).astype(np.bool)
        self.action_space = gym.spaces.Discrete(len(self.ACTION_LIST))
        self.action_dim = self.action_space.n
        self.observation_space = OrderedDict({'image': gym.spaces.Box(0, 255, (4, 64, 64), dtype = np.uint8),
                                              'pose': gym.spaces.Box(-np.Inf, np.Inf, (4,), dtype=np.float32),
                                              'prev_action': gym.spaces.Box(0, 1, (self.action_dim,), dtype=np.float32),
                                              'episode': gym.spaces.Box(0,1,(1,),dtype=np.float32)})
        self._last_observation = None
        self._last_action = None
        self.time_t = -1
        self.episode_id = -1
        self.prev_pose = 0
        self.stuck_flag = 0
        self.success_num = 0
        self.total_reward = 0.0
Exemplo n.º 10
0
def test_save_load(tmpdir):
    env = MazeExplorer(mazes_path=tmpdir.mkdir("maze").strpath)

    saved_mazes_destination = os.path.join(tmpdir, "test_mazes")

    env.save(saved_mazes_destination)

    required_files = ["10x10.cfg", "10x10.wad", "10x10_MAP01.txt", "outputs"]

    assert set(required_files) == set(os.listdir(saved_mazes_destination))

    env = MazeExplorer.load_vizdoom_env(saved_mazes_destination, 1)

    for _ in range(10):
        action = env.action_space.sample()
        observation, *_ = env.step(action)
        assert observation.shape == (42, 42, 3)
Exemplo n.º 11
0
def test_generate_with_seed_step(tmpdir):
    env = MazeExplorer(10, seed=5, mazes_path=tmpdir.mkdir("maze_1").strpath)
    env.reset()
    for _ in range(5):
        env.step(env.action_space.sample())
Exemplo n.º 12
0
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License

from mazeexplorer import MazeExplorer

env_train = MazeExplorer(number_maps=10,
                         keys=9,
                         size=(10, 10),
                         random_spawn=True,
                         random_textures=True,
                         seed=42)
env_test = MazeExplorer(number_maps=10,
                        keys=9,
                        size=(10, 10),
                        random_spawn=True,
                        random_textures=True,
                        seed=43)
Exemplo n.º 13
0
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License

from mazeexplorer import MazeExplorer
import cv2

env_train = MazeExplorer(
    unique_maps=True,
    number_maps=10,
    keys=6,
    size=(20, 20),
    random_spawn=True,
    random_textures=True,
    random_key_positions=True,
    action_frame_repeat=4,
    actions="MOVE_FORWARD TURN_LEFT TURN_RIGHT MOVE_LEFT MOVE_RIGHT",
    scaled_resolution=(64, 64),
    data_augmentation=False)

obs = env_train.reset()
for i in range(1000):
    obs, rewards, dones, info = env_train.step(env_train.action_space.sample())
    cv2.imshow('hi', obs[:, :, [2, 1, 0]])
    cv2.waitKey(0)
Exemplo n.º 14
0
def stable_baseline_training(algorithm, steps, number_maps, random_spawn,
                             random_textures, lstm, random_keys, keys,
                             dimensions, num_cpus, n_stack, clip, complexity,
                             density, mp, eval_occurrence, eval_episodes, eval,
                             experiment_name, env_seed, alg_seed,
                             episode_timeout):
    """
    Runs OpenAI stable baselines implementation of specified algorithm on specified environment with specified training configurations.
    Note: For scenarios not using MazeExplorer but using the vizdoom wrapper, the .T transpose on the array being fed into the process image method needs to be removed.
    Note: Ensure relevant maps are in specified paths under mazes folder for simpler and manual scenarios.

    :param algorithm: which algorithm to run (currently support for PPO and A2C)
    :param steps: number of steps to run training
    :param number_maps: number of maps to generate and train on
    :param random_spawn: whether or not to randomise the spawn position of the agent
    :param random_textures: whether or not to randomise textures in generated maps
    :param lstm: whether or not to add an lstm to the network
    :param random_keys: whether to randmise key placement upon each new training episode in a given map
    :param keys: number of keys to place in generated maps
    :param dimensions: x, y dimensions of maps to be generated
    :param num_cpus: number of environments in which to train
    :param n_stack: number of frames to stack to feed as a state to the agent
    :param clip: whether or not to clip rewards from the environment
    :param complexity: float between 0 and 1 describing the complexity of the generated mazes
    :param density: float between 0 and 1 describing the density of the generated mazes
    :param mp: whether or not to use multiprocessing for workers
    :param eval_occurrence: parameter specifying period of running evaluation 
    :param eval_episodes: number of times to perform episode rollout during evaluation 
    :param eval: whether or not to use evaluation during training
    :param experiment_name: name of experiment for use in logging and file saving
    :param env_seed: seed to be used for environment generation
    :param alg_seed: seed to be used for stable-baseline algorithms
    :param episode_timeout: number of steps after which to terminate episode
    """

    timestamp = datetime.datetime.fromtimestamp(
        time.time()).strftime('%Y-%m-%d-%H-%M-%S')

    # generate a file in log directory containing training configuration information.
    experiment_name = experiment_name + "/" if experiment_name else ""
    OUTPUT_PATH = os.path.join(DIR_PATH, 'results', experiment_name, timestamp)
    os.makedirs(OUTPUT_PATH, exist_ok=True)
    with open(os.path.join(OUTPUT_PATH, 'params.txt'), 'w+') as f:
        f.write(
            str({
                'algorithm': algorithm,
                'number_maps': number_maps,
                'random_spawn': random_spawn,
                'random_textures': random_textures,
                'lstm': lstm,
                'random_keys': random_keys,
                'keys': keys,
                'dimensions': dimensions,
                'num_cpus': num_cpus,
                'clip': clip,
                'mp': mp,
                'n_stack': n_stack,
                'env_seed': env_seed,
                'alg_seed': alg_seed,
                'experiment_name': experiment_name,
                "eval_occurrence": eval_occurrence,
                "eval_episodes": eval_episodes,
                "episode_timeout": episode_timeout
            }))

    if clip:
        clip_range = (-1, 1)
    else:
        clip_range = False

    mazeexplorer_env = MazeExplorer(number_maps=number_maps,
                                    random_spawn=random_spawn,
                                    random_textures=random_textures,
                                    random_key_positions=random_keys,
                                    keys=keys,
                                    size=dimensions,
                                    clip=clip_range,
                                    seed=env_seed,
                                    complexity=complexity,
                                    density=density)

    if mp:
        env = SubprocVecEnv(
            [mazeexplorer_env.create_env() for _ in range(num_cpus)])
    else:
        env = DummyVecEnv([
            mazeexplorer_env.create_env() for _ in range(num_cpus)
        ])  # vectorise env

    if n_stack > 0:
        env = VecFrameStack(env, n_stack=n_stack)

    if algorithm == 'ppo':
        algo = PPO2
    elif algorithm == 'a2c':
        algo = A2C
    else:
        raise NotImplementedError("Only supports PPO and A2C")

    if lstm:
        model = algo(CnnLstmPolicy,
                     env,
                     verbose=1,
                     tensorboard_log=OUTPUT_PATH)
    else:
        model = algo(CnnPolicy, env, verbose=1, tensorboard_log=OUTPUT_PATH)

    if eval:
        evaluator = Evaluator(os.path.join(DIR_PATH, "eval_maps"), OUTPUT_PATH,
                              num_cpus, mp, n_stack)

        steps_taken = 0

        print("Training started...")

        while steps_taken < steps:
            print("Training...")
            model.learn(total_timesteps=min(eval_occurrence,
                                            (steps - steps_taken)),
                        reset_num_timesteps=False,
                        seed=alg_seed)

            steps_taken += eval_occurrence

            print("Evaluating...")

            evaluator.evaluate(model, steps_taken, eval_episodes,
                               save=True)  # do 100 rollouts and save scores

        print("Training completed.")

    else:
        model.learn(total_timesteps=steps, seed=alg_seed)
Exemplo n.º 15
0
class MazeExplorerEnv(gym.Env):
    metadata = {'render.modes': ['rgb_array']}
    def __init__(self, cfg, seed=0, **kwargs):
        super(MazeExplorerEnv,self).__init__(**kwargs)
        self._max_step = cfg.training.max_step
        if not os.path.exists('maps') : os.mkdir('maps')
        if not os.path.exists(os.path.join('maps',cfg.saving.version)): os.mkdir(os.path.join('maps',cfg.saving.version))

        self.env_cfg = cfg.task.explorer
        map_size = self.env_cfg.map_size
        self.env = MazeExplorer(unique_maps=False, number_maps=self.env_cfg.num_maps, keys=self.env_cfg.num_keys,
                                size=(map_size,map_size), random_spawn=True, random_textures=True, random_key_positions=True,
                                action_frame_repeat=4, actions="MOVE_FORWARD TURN_LEFT TURN_RIGHT MOVE_LEFT MOVE_RIGHT",
                                scaled_resolution=(64, 64), data_augmentation=True, seed=seed, episode_timeout=self._max_step*4,
                                complexity=.3, density=.3, mazes_path='envs/maps/map_v1_%d'%seed)


        self.game = self.env.env
        self.game.close()
        self.game.set_automap_buffer_enabled(True)
        self.game.set_depth_buffer_enabled(True)
        self.game.set_automap_mode(AutomapMode.OBJECTS_WITH_SIZE)
        self.game.init()
        #self.game.add_game_args("+am_followplayer 1")
        self.ACTION_LIST = np.eye(5).astype(np.bool)
        self.action_space = gym.spaces.Discrete(len(self.ACTION_LIST))
        self.action_dim = self.action_space.n
        self.observation_space = OrderedDict({'image': gym.spaces.Box(0, 255, (4, 64, 64), dtype = np.uint8),
                                              'pose': gym.spaces.Box(-np.Inf, np.Inf, (4,), dtype=np.float32),
                                              'prev_action': gym.spaces.Box(0, 1, (self.action_dim,), dtype=np.float32),
                                              'episode': gym.spaces.Box(0,1,(1,),dtype=np.float32)})
        self._last_observation = None
        self._last_action = None
        self.time_t = -1
        self.episode_id = -1
        self.prev_pose = 0
        self.stuck_flag = 0
        self.success_num = 0
        self.total_reward = 0.0

    def step(self, action):
        if isinstance(action, dict): action = action['action']
        rgb, reward, done, info = self.env.step(action)
        if reward > 0.5 : self.success_num += 1
        self.time_t += 1
        if self.time_t >= self._max_step - 1: done = True
        state = self.env.env.get_state()
        obs = None if done else state
        self._last_observation = obs if obs is not None else self._last_observation
        image = np.concatenate([self.process_image(self._last_observation.screen_buffer),
                                self.process_image(self._last_observation.depth_buffer)],2)
        agent_pose = self._last_observation.game_variables
        pose_x, pose_y = agent_pose[0]/2000, agent_pose[1]/2000
        pose_yaw = agent_pose[-1]/360
        #print(agent_pose, self.stuck_flag, self.time_t,self.game.is_episode_finished())k
        self.total_reward += reward
        if self.prev_pose is not None:
            progress = np.sqrt(((pose_x - self.prev_pose[0])**2 + (pose_y - self.prev_pose[1])**2))
        else: progress = 0.0
        #print(pose_x, pose_y, pose_yaw, progress)
        if progress < 0.01:
            self.stuck_flag += 1
        else: 
            self.stuck_flag = 0 
        if self.stuck_flag > 20 :
            done = True
            self.stuck_flag = 0.0
        self.prev_pose = [pose_x, pose_y]
        self._last_action = action
        obs = {'image': image.transpose(2,1,0), 'pose': np.array([pose_x, pose_y, pose_yaw, self.time_t+1]), 'prev_action': np.eye(self.action_dim)[self._last_action]}
        # for debug
        obs['episode'] = self.episode_id * 6 + self.env.seed
        return obs, reward, done, {'episode_id': self.episode_id, 'step_id':self.time_t, 'success': self.success_num}

    def process_image(self, image, resize=True, ch3=False):
        if len(image.shape) > 2:
            image = image.transpose(1, 2, 0)
        if resize :
            image = cv2.resize(image, dsize=(64,64))
        if len(image.shape) == 2 :
            image = np.expand_dims(image, 2)
        if image.shape[2] == 1 and ch3:
            image = np.concatenate([image]*3,2)
        return image


    def reset(self):
        _ = self.env.reset()
        if not self.game.is_automap_buffer_enabled():
            self.game.close()
            self.game.set_automap_buffer_enabled(True)
            self.game.set_depth_buffer_enabled(True)
            self.game.set_automap_mode(AutomapMode.OBJECTS_WITH_SIZE)
            self.game.init()
        state = self.game.get_state()
        self._last_observation = state
        self.time_t = -1
        image = np.concatenate([self.process_image(state.screen_buffer), self.process_image(state.depth_buffer)],2)
        agent_pose = state.game_variables[[0,1,-1]]
        pose_x, pose_y = agent_pose[0]/2000, agent_pose[1]/2000
        pose_yaw = agent_pose[-1]/360
        self._last_action = None
        obs = {'image': image.transpose(2,1,0), 'pose': np.array([pose_x, pose_y, pose_yaw, self.time_t + 1]), 'prev_action': np.zeros(self.action_dim)}
        self.episode_id += 1
        self.prev_pose = None
        self.stuck_flag = 0
        self.total_reward = 0.0
        self.success_num =  0
        obs['episode'] = self.episode_id * 6 + self.env.seed
        return obs

    def set_seed(self, seed = None):
        self.seed = seed
        self.env.seed(seed)

    def close(self):
        self.env.close()

    def render(self, mode='rgb_array', close=False):
        state = self._last_observation
        if mode == 'rgb_array':
            obs = self.process_image(self.env._rgb_array, resize=False)
            map = state.automap_buffer.transpose(1,2,0)
            view_img = np.concatenate([obs[:,:,:3], map],1).astype(np.uint8)
            view_img = np.ascontiguousarray(view_img)
            cv2.putText(view_img, 'reward: %.3f'%(self.total_reward), (200, 80), cv2.FONT_HERSHEY_PLAIN, 0.5, (255, 255, 255), 1)
            #view_img = cv2.resize(view_img, dsize=None, fx=2.0, fy=2.0)
            return view_img
        elif mode == 'human':
            obs = self.process_image(self.env._rgb_array, resize=False)
            map = state.automap_buffer.transpose(1,2,0)
            view_img = np.concatenate([obs[:,:,:3], map],1).astype(np.uint8)
            view_img = np.ascontiguousarray(view_img)
            cv2.putText(view_img, 'reward: %.3f'%(self.total_reward), (200, 80), cv2.FONT_HERSHEY_PLAIN, 0.5, (255, 255, 255), 1)
            #return view_img
            cv2.imshow('render', view_img[:,:,[2,1,0]])
            cv2.waitKey(1)
            #pop up a window and render
        else:
            super(MazeExplorerEnv, self).render(mode=mode) # just raise an exception