Exemplo n.º 1
1
class DoomEnvironment:
    def __init__(self, scenario, path_to_config="doom/config"):
        self.game = DoomGame()
        self.game.load_config(path_to_config + "/" + scenario + ".cfg")
        self.game.set_doom_scenario_path(path_to_config + "/" + scenario +
                                         ".wad")
        self.game.set_window_visible(False)
        self.game.init()
        self.num_actions = len(self.game.get_available_buttons())

    def reset(self):
        self.game.new_episode()
        game_state = self.game.get_state()
        obs = game_state.screen_buffer
        self.h, self.w = obs.shape[1:3]
        self.current_obs = self.preprocess_obs(obs)
        if self.game.get_available_game_variables_size() == 2:
            self.ammo, self.health = game_state.game_variables
        return self.get_obs()

    def get_obs(self):
        return self.current_obs[:, :, None]

    def get_obs_rgb(self):
        img = self.game.get_state().screen_buffer
        img = np.rollaxis(img, 0, 3)
        img = np.reshape(img, [self.h, self.w, 3])
        return img.astype(np.uint8)

    def preprocess_obs(self, obs):
        img = np.rollaxis(obs, 0, 3)
        img = np.reshape(img, [self.h, self.w, 3]).astype(np.float32)
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :,
                                                                2] * 0.114
        img = Image.fromarray(img)
        img = img.resize((84, 84), Image.BILINEAR)
        img = np.array(img)
        return img.astype(np.uint8)

    def action_to_doom(self, a):
        action = [0 for i in range(self.num_actions)]
        action[int(a)] = 1
        return action

    def step(self, a):
        action = self.action_to_doom(a)
        reward = self.game.make_action(action)

        done = self.game.is_episode_finished()

        if done:
            new_obs = np.zeros_like(self.current_obs, dtype=np.uint8)
        else:
            game_state = self.game.get_state()
            new_obs = game_state.screen_buffer
            new_obs = self.preprocess_obs(new_obs)

        self.current_obs = new_obs

        return self.get_obs(), reward, done

    def watch_random_play(self, max_ep_length=1000, frame_skip=4):
        self.reset()
        for i in range(max_ep_length):
            a = np.random.randint(self.num_actions)
            obs, reward, done = self.step(a)
            if done: break

            img = self.get_obs_rgb()
            if i % frame_skip == 0:
                plt.imshow(img)
                display.clear_output(wait=True)
                display.display(plt.gcf())
Exemplo n.º 2
0
class VizDoom(gym.Env):
    """
    Wraps a VizDoom environment
    """
    def __init__(self,
                 cfg_path,
                 number_maps,
                 scaled_resolution=(42, 42),
                 action_frame_repeat=4,
                 clip=(-1, 1),
                 seed=None,
                 data_augmentation=False):
        """
        Gym environment for training reinforcement learning agents.

        :param cfg_path: name of the mission (.cfg) to run
        :param number_maps: number of maps which are contained within the cfg file
        :param scaled_resolution: resolution (height, width) of the observation to be returned with each step
        :param action_frame_repeat: how many game tics should an action be active
        :param clip: how much the reward returned on each step should be clipped to
        :param seed: seed for random, used to determine the other that the doom maps should be shown.
        :param data_augmentation: bool to determine whether or not to use data augmentation
            (adding randomly colored, randomly sized boxes to observation)
        """

        self.cfg_path = str(cfg_path)
        if not os.path.exists(self.cfg_path):
            raise ValueError("Cfg file not found", cfg_path)

        if not self.cfg_path.endswith('.cfg'):
            raise ValueError("cfg_path must end with .cfg")

        self.number_maps = number_maps
        self.scaled_resolution = scaled_resolution
        self.action_frame_repeat = action_frame_repeat
        self.clip = clip
        self.data_augmentation = data_augmentation

        if seed:
            random.seed(seed)

        super(VizDoom, self).__init__()
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating environment: VizDoom (%s)", self.cfg_path)

        # Create an instace on VizDoom game, initalise it from a scenario config file
        self.env = DoomGame()
        self.env.load_config(self.cfg_path)
        self.env.init()

        # Perform config validation:
        # Only RGB format with a seperate channel per colour is supported
        # assert self.env.get_screen_format() == ScreenFormat.RGB24
        # Only discreete actions are supported (no delta actions)
        available_actions = self.env.get_available_buttons()
        not_supported_actions = [
            Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA,
            Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA,
            Button.MOVE_FORWARD_BACKWARD_DELTA
        ]
        assert len((set(available_actions) -
                    set(not_supported_actions))) == len(available_actions)

        # Allow only one button to be pressed at a given step
        self.action_space = gym.spaces.Discrete(
            self.env.get_available_buttons_size())

        rows = scaled_resolution[1]
        columns = scaled_resolution[0]
        self.observation_space = gym.spaces.Box(0.0,
                                                255.0,
                                                shape=(columns, rows, 3),
                                                dtype=np.float32)
        self._rgb_array = None
        self.reset()

    def _process_image(self, shape=None):
        """
        Convert the vizdoom environment observation numpy are into the desired resolution and shape
        :param shape: desired shape in the format (rows, columns)
        :return: resized and rescaled image in the format (rows, columns, channels)
        """
        if shape is None:
            rows, columns, _ = self.observation_space.shape
        else:
            rows, columns = shape
        # PIL resize has indexing opposite to numpy array
        img = VizDoom._resize(self._rgb_array.transpose(1, 2, 0),
                              (columns, rows))
        return img

    @staticmethod
    def _augment_data(img):
        """
        Augment input image with N randomly colored boxes of dimension x by y
        where N is randomly sampled between 0 and 6
        and x and y are randomly sampled from between 0.1 and 0.35
        :param img: input image to be augmented - format (rows, columns, channels)
        :return img: augmented image - format (rows, columns, channels)
        """
        dimx = img.shape[0]
        dimy = img.shape[1]
        max_rand_dim = .25
        min_rand_dim = .1
        num_blotches = np.random.randint(0, 6)

        for _ in range(num_blotches):
            # locations in [0,1]
            rand = np.random.rand
            rx = rand()
            ry = rand()
            rdx = rand() * max_rand_dim + min_rand_dim
            rdy = rand() * max_rand_dim + min_rand_dim

            rx, rdx = [round(r * dimx) for r in (rx, rdx)]
            ry, rdy = [round(r * dimy) for r in (ry, rdy)]
            for c in range(3):
                img[rx:rx + rdx, ry:ry + rdy, c] = np.random.randint(0, 255)
        return img

    @staticmethod
    def _resize(img, shape):
        """Resize the specified image.

        :param img: image to resize
        :param shape: desired shape in the format (rows, columns)
        :return: resized image
        """
        if not (OPENCV_AVAILABLE or PILLOW_AVAILABLE):
            raise ValueError('No image library backend found.'
                             ' Install either '
                             'OpenCV or Pillow to support image processing.')

        if OPENCV_AVAILABLE:
            return cv2.resize(img, shape, interpolation=cv2.INTER_AREA)

        if PILLOW_AVAILABLE:
            return np.array(PIL.Image.fromarray(img).resize(shape))

        raise NotImplementedError

    def reset(self):
        """
        Resets environment to start a new mission.

        If there is more than one maze it will randomly select a new maze.

        :return: initial observation of the environment as an rgb array in the format (rows, columns, channels)
        """
        if self.number_maps is not 0:
            self.doom_map = random.choice(
                ["map" + str(i).zfill(2) for i in range(self.number_maps)])
            self.env.set_doom_map(self.doom_map)
        self.env.new_episode()
        self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()
        return observation

    def step(self, action):
        """Perform the specified action for the self.action_frame_repeat ticks within the environment.
        :param action: the index of the action to perform. The actions are specified when the cfg is created. The
        defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT"
        :return: tuple following the gym interface, containing:
            - observation as a numpy array of shape (rows, height, channels)
            - scalar clipped reward
            - boolean which is true when the environment is done
            - {}
        """
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        reward = self.env.make_action(list(one_hot_action),
                                      self.action_frame_repeat)
        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()

        if self.data_augmentation:
            observation = VizDoom._augment_data(observation)

        if self.clip:
            reward = np.clip(reward, self.clip[0], self.clip[1])

        return observation, reward, done, {}

    def step_record(self, action, record_path, record_shape=(120, 140)):
        """Perform the specified action for the self.action_frame_repeat ticks within the environment.
        :param action: the index of the action to perform. The actions are specified when the cfg is created. The
        defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT"
        :param record_path: the path to save the image of the environment to
        :param record_shape: the shape of the image to save
        :return: tuple following the gym interface, containing:
            - observation as a numpy array of shape (rows, height, channels)
            - scalar clipped reward
            - boolean which is true when the environment is done
            - {}
        """
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        reward = 0
        for _ in range(self.action_frame_repeat // 2):
            reward += self.env.make_action(list(one_hot_action), 2)
            env_state = self.env.get_state()
            if env_state:
                self._rgb_array = self.env.get_state().screen_buffer
                imageio.imwrite(
                    os.path.join(record_path,
                                 str(datetime.datetime.now()) + ".png"),
                    self._process_image(record_shape))

        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()

        if self.clip:
            reward = np.clip(reward, self.clip[0], self.clip[1])

        return observation, reward, done, {}

    def close(self):
        """Close environment"""
        self.env.close()

    def render(self, mode='rgb_array'):
        """Render frame"""
        if mode == 'rgb_array':
            return self._rgb_array

        raise NotImplementedError

    def create_env(self):
        """
        Returns a function to create an environment with the generated mazes.

        Used for vectorising the environment. For example as used by Stable Baselines

        :return: a function to create an environment with the generated mazes
        """
        return lambda: VizDoom(self.cfg_path,
                               number_maps=self.number_maps,
                               scaled_resolution=self.scaled_resolution,
                               action_frame_repeat=self.action_frame_repeat)
Exemplo n.º 3
0
class ViZDoom(Environment):
    """
    ViZDoom environment (https://github.com/mwydmuch/ViZDoom).
    """
    def __init__(self, config_file):
        """
        Initialize ViZDoom environment.

        Args:
            config_file: .cfg file path, which defines how a world works and look like (maps)
        """
        self.game = DoomGame()

        # load configurations from file
        self.game.load_config(config_file)
        self.game.init()

        self.state_shape = self.featurize(self.game.get_state()).shape
        self.num_actions = len(self.game.get_available_buttons())

    def __str__(self):
        return 'ViZDoom'

    def states(self):
        return dict(type='float', shape=self.state_shape)

    def actions(self):
        return dict(type='int', shape=(), num_values=self.num_actions)

    def close(self):
        self.game.close()

    def reset(self):
        self.game.new_episode()
        return self.featurize(self.game.get_state())

    def seed(self, seed):
        self.game.setSeed(seed)
        return seed

    def featurize(self, state):
        H = state.screen_buffer.shape[0]
        W = state.screen_buffer.shape[1]
        _vars = state.game_variables.reshape(-1).astype(np.float32)
        _screen_buf = state.screen_buffer.reshape(-1).astype(np.float32)

        if state.depth_buffer is None:
            _depth_buf = np.zeros(H * W * 1, dtype=np.float32)
        else:
            _depth_buf = state.depth_buffer.reshape(-1).astype(np.float32)

        if state.labels_buffer is None:
            _labels_buf = np.zeros(H * W * 1, dtype=np.float32)
        else:
            _labels_buf = state.labels_buffer.reshape(-1).astype(np.float32)

        if state.automap_buffer is None:
            _automap_buf = np.zeros(H * W * 1, dtype=np.float32)
        else:
            _automap_buf = state.automap_buffer.reshape(-1).astype(np.float32)

        return np.concatenate(
            (_vars, _screen_buf, _depth_buf, _labels_buf, _automap_buf))

    def execute(self, action):
        one_hot_enc = [0] * self.num_actions
        one_hot_enc[action] = 1
        reward = self.game.make_action(one_hot_enc)
        terminal = self.game.is_episode_finished()
        states = self.featurize(self.game.get_state())
        return states, terminal, reward
Exemplo n.º 4
0
class VizDoomGym(gym.Env):
    """
    Wraps a VizDoom environment
    """
    def __init__(self):
        raise NotImplementedError

    def _init(self, mission_file: str, scaled_resolution: list):
        """
        :param mission_file: name of the mission (.cfg) to run,
        :param scaled_resolution: resolution (height, width) of the video frames
                                  to run training on
        """
        super(VizDoomGym, self).__init__()
        self.mission_file = mission_file
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating environment: VizDoom (%s)",
                          self.mission_file)

        self.deathmatch = True
        # distance we need the agent to travel per time-step, otherwise we penalise
        self.distance_threshold = 15

        self.prev_properties = None
        self.properties = None

        self.cum_kills = np.array([0])

        # Create an instace on VizDoom game, initalise it from a scenario config file
        self.env = DoomGame()
        self.env.load_config(self.mission_file)
        self.env.set_window_visible(False)
        self.env.set_screen_format(ScreenFormat.RGB24)
        if self.deathmatch:
            self.env.add_game_args("-deathmatch")

        self.env.set_doom_skill(4)
        self._action_frame_repeat = 4
        self.env.init()

        # Perform config validation:
        # Only RGB format with a seperate channel per colour is supported
        assert self.env.get_screen_format() == ScreenFormat.RGB24
        # Only discrete actions are supported (no delta actions)
        self.available_actions = self.env.get_available_buttons()
        not_supported_actions = [
            Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA,
            Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA,
            Button.MOVE_FORWARD_BACKWARD_DELTA
        ]
        # print(available_actions)
        assert len((set(self.available_actions) - set(not_supported_actions))) \
            == len(self.available_actions)

        self.metadata['render_modes'] = ['rgb_array']

        # Allow only one button to be pressed at a given step
        self.action_space = gym.spaces.Discrete(
            self.env.get_available_buttons_size() - 1)

        self.rows = scaled_resolution[0]
        self.columns = scaled_resolution[1]
        self.observation_space = gym.spaces.Box(low=0.0,
                                                high=1.0,
                                                shape=(self.rows, self.columns,
                                                       3),
                                                dtype=np.float32)

        self._rgb_array = None
        self.steps = 0
        self.global_steps = 0
        self.reset()

    def _process_image(self, img):
        # PIL resize has indexing opposite to numpy array
        img = np.array(Image.fromarray(img).resize((self.columns, self.rows)))
        img = img.astype(np.float32)
        img = img / 255.0
        return img

    def update_game_variables(self):
        """
        Check and update game variables.
        """
        # read game variables
        new_v = {
            k: self.env.get_game_variable(v)
            for k, v in game_variables.items()
        }
        assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z']
                   for k, v in new_v.items())
        new_v = {
            k: (int(v) if v.is_integer() else float(v))
            for k, v in new_v.items()
        }
        health = new_v['health']
        armor = new_v['armor']

        # check game variables
        assert 0 <= health <= 200 or health < 0 and self.env.is_player_dead()
        assert 0 <= armor <= 200, (health, armor)

        # update actor properties
        self.prev_properties = self.properties
        self.properties = new_v

    def update_reward(self):
        """
        Update reward.
        """

        # we need to know the current and previous properties
        assert self.prev_properties is not None and self.properties is not None

        reward = 0

        # kill
        d = self.properties['score'] - self.prev_properties['score']
        if d > 0:
            self.cum_kills += d
            reward += d * default_reward_values['KILL']

        # death
        if self.env.is_player_dead():
            reward += default_reward_values['DEATH']

        # suicide
        if self.properties['frag_count'] < self.prev_properties['frag_count']:
            reward += default_reward_values['SUICIDE']

        # found / lost health
        d = self.properties['health'] - self.prev_properties['health']
        if d != 0:
            if d > 0:
                reward += default_reward_values['MEDIKIT']
            else:
                reward += default_reward_values['INJURED']

        # found / lost armor
        d = self.properties['armor'] - self.prev_properties['armor']
        if d != 0:
            if d > 0:
                reward += default_reward_values['ARMOR']

        # found / lost ammo
        d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo']
        if d != 0:
            if d > 0:
                reward += default_reward_values['AMMO']
            else:
                reward += default_reward_values['USE_AMMO']

        # distance
        # turn_left = (Button.TURN_LEFT == self.available_actions[action])
        # turn_right = (Button.TURN_RIGHT == self.available_actions[action])
        # if not (turn_left or turn_right):
        diff_x = self.properties['position_x'] - self.prev_properties[
            'position_x']
        diff_y = self.properties['position_y'] - self.prev_properties[
            'position_y']
        distance = np.sqrt(diff_x**2 + diff_y**2)
        if distance > self.distance_threshold:
            reward += default_reward_values['DISTANCE'] * distance
        else:
            reward += default_reward_values['STANDSTILL']

        # living
        reward += default_reward_values['LIVING']

        return reward

    # def increase_difficulty(self):
    #     self.curr_skill += 1
    #     self.env.close()
    #     self.env.set_doom_skill(self.curr_skill)
    #     self.env.init()
    #     print('changing skill to', self.curr_skill)

    # def update_map(self):
    #     self.map_level += 1
    #     map_str = 'map0' + str(self.map_level)
    #     # go with initial wad file if there's still maps on it
    #     self.env.close()
    #     self.env.set_doom_map(map_str)
    #     self.env.init()

    def sub_reset(self):
        """Reset environment"""
        self.steps = 0
        self.cum_kills = np.array([0])
        self.prev_properties = None
        self.properties = None
        self.env.new_episode()
        self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image(self._rgb_array)
        return observation

    def reset(self):
        observation = self.sub_reset()
        return observation

    def sub_step(self, action):
        """Take step"""
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        # ALWAYS SPRINTING
        one_hot_action = np.append(one_hot_action, [1])
        assert len(one_hot_action) == len(self.env.get_available_buttons())

        _ = self.env.make_action(list(one_hot_action),
                                 self._action_frame_repeat)

        self.update_game_variables()

        if self.steps > 1:
            reward = self.update_reward()
        else:
            reward = 0

        self.steps += 1
        self.global_steps += 1
        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image(self._rgb_array)
        return observation, reward, done

    def step(self, action):
        observation, reward, done = self.sub_step(action)
        return observation, reward, done, {}

    def close(self):
        """Close environment"""
        self.env.close()

    def seed(self, seed=None):
        """Seed"""
        if seed:
            self.env.set_seed(seed)

    def render(self, mode='human'):
        """Render frame"""
        if mode == 'rgb_array':
            return self._rgb_array
        raise NotImplementedError
Exemplo n.º 5
0
class Vizdoom_env(object):
    def __init__(self, config='vizdoom_env/asset/default.cfg', verbose=False,
                 perception_type='more_simple'):
        self.verbose = verbose
        self.game = DoomGame()
        self.game.load_config(config)
        if self.verbose:
            self.game.set_window_visible(True)
            self.game.set_screen_resolution(ScreenResolution.RES_1280X960)

        self.game_variables = self.game.get_available_game_variables()
        self.buttons = self.game.get_available_buttons()
        self.action_strings = [b.__str__().replace('Button.', '')
                               for b in self.buttons]
        self.game_variable_strings = [v.__str__().replace('GameVariable.', '')
                                      for v in self.game_variables]
        self.perception_type = perception_type
        if perception_type == 'clear':
            self.distance_dict = CLEAR_DISTANCE_DICT
            self.horizontal_dict = CLEAR_HORIZONTAL_DICT
        elif perception_type == 'simple':
            pass
        elif perception_type == 'more_simple':
            pass
        else:
            self.distance_dict = DISTANCE_DICT
            self.horizontal_dict = HORIZONTAL_DICT

    def init_game(self):
        self.game.init()
        self.new_episode()

    def new_episode(self, init_state=None):
        self.game.new_episode()
        if init_state is not None:
            self.initialize_state(init_state)
        self.take_action('NONE')
        state = self.game.get_state()
        if state is None:
            raise RuntimeError('Cannot get initial states')
        img_arr = np.transpose(state.screen_buffer.copy(), [1, 2, 0])
        self.x_size = img_arr.shape[1]
        self.y_size = img_arr.shape[0]
        self.channel = img_arr.shape[2]
        self.get_state()
        if self.verbose:
            self.call_all_perception_primitives()
        p_v = self.get_perception_vector()
        self.s_h = [img_arr.copy()]
        self.a_h = []
        self.p_v_h = [p_v.copy()]  # perception vector

    def end_game(self):
        self.game.close()

    def state_transition(self, action_string):
        if action_string == 'NONE' or action_string in self.action_strings:
            self.take_action(action_string)
            self.a_h.append(action_string)
            if self.verbose:
                self.print_state()
            if FRAME_SKIP[action_string][2] == 0:
                self.get_state()
                self.s_h.append(self.screen.copy())
                p_v = self.get_perception_vector()
                self.p_v_h.append(p_v.copy())  # perception vector
            self.post_none(action_string)
            if FRAME_SKIP[action_string][2] == 1:
                self.get_state()
                self.s_h.append(self.screen.copy())
                p_v = self.get_perception_vector()
                self.p_v_h.append(p_v.copy())  # perception vector
            if self.verbose:
                self.call_all_perception_primitives()
        else:
            raise ValueError('Unknown action')

    def call_all_perception_primitives(self):
        for actor in MONSTER_LIST + ITEMS_IN_INTEREST:
            self.in_target(actor)
            for dist in self.distance_dict.keys():
                for horz in self.horizontal_dict.keys():
                    self.exist_actor_in_distance_horizontal(actor, dist, horz)
        for weapon_slot in range(1, 10):
            self.have_weapon(weapon_slot)
            self.have_ammo(weapon_slot)
            self.selected_weapon(weapon_slot)
        for actor in MONSTER_LIST:
            self.is_there(actor)
        self.no_selected_weapon_ammo()

    def take_action(self, action):
        action_vector = [a == action for a in self.action_strings]
        frame_skip = FRAME_SKIP[action][0]
        if action == 'ATTACK':
            state = self.game.get_state()
            gv_values = dict(zip(self.game_variable_strings,
                                 state.game_variables))
            weapon_num = int(gv_values['SELECTED_WEAPON'])
            frame_skip = ATTACK_FRAME_SKIP[weapon_num]
        self.game.make_action(action_vector, frame_skip)

    def post_none(self, action):
        none_vector = [a == 'NONE' for a in self.action_strings]
        self.game.make_action(none_vector, FRAME_SKIP[action][1])

    def get_action_list(self):
        return self.action_strings

    def init_actors(self):
        self.actors = {}

    def check_and_add_to_actors(self, actor_name, label):
        if actor_name not in self.actors:
            self.actors[actor_name] = []
        self.actors[actor_name].append(label)

    def get_actor_by_name(self, actor_name):
        if actor_name not in self.actors:
            self.actors[actor_name] = []
        return self.actors[actor_name]

    def get_state(self):
        state = self.game.get_state()
        if state is None:
            self.game_variables = dict()
            self.player = None
            self.monsters = []
            self.ammo = []
            self.init_actors()
            return
        self.game_variable_values = dict(zip(self.game_variable_strings, state.game_variables))
        self.monsters = []
        self.ammo = []
        self.weapons = []
        self.actors = {}
        for l in state.labels:
            if l.object_name in PLAYER_NAME:
                self.player = l
            elif l.object_name in MONSTER_LIST:
                self.monsters.append(l)
                self.check_and_add_to_actors(l.object_name, l)
            else:
                self.check_and_add_to_actors(l.object_name, l)

        self.labels = state.labels
        self.screen = np.transpose(state.screen_buffer, [1, 2, 0]).copy()

    def get_perception_vector_cond(self):
        if self.perception_type == 'simple' or \
                self.perception_type == 'more_simple':
            return self.get_perception_vector_cond_simple()
        else:
            return self.get_perception_vector_cond_basic()

    def get_perception_vector_cond_basic(self):
        vec = []
        for dist in self.distance_dict.keys():
            for horz in self.horizontal_dict.keys():
                for actor in MONSTER_LIST + ITEMS_IN_INTEREST:
                    vec.append('EXIST {} IN {} {}'.format(actor, dist, horz))
        for actor in MONSTER_LIST:
            vec.append('INTARGET {}'.format(actor))
        return vec

    def get_perception_vector_cond_simple(self):
        vec = []
        for actor in MONSTER_LIST:
            vec.append('ISTHERE {}'.format(actor))
        if self.perception_type == 'more_simple':
            return vec
        for actor in MONSTER_LIST:
            vec.append('INTARGET {}'.format(actor))
        return vec

    def get_perception_vector(self):
        if self.perception_type == 'simple' or\
                self.perception_type == 'more_simple':
            return self.get_perception_vector_simple()
        else: return self.get_perception_vector_basic()

    def get_perception_vector_basic(self):
        vec = []
        for dist in self.distance_dict.keys():
            for horz in self.horizontal_dict.keys():
                for actor in MONSTER_LIST + ITEMS_IN_INTEREST:
                    vec.append(self.exist_actor_in_distance_horizontal(actor, dist, horz))
        for actor in MONSTER_LIST:
            vec.append(self.in_target(actor))
        return np.array(vec)

    def get_perception_vector_simple(self):
        vec = []
        for actor in MONSTER_LIST:
            vec.append(self.is_there(actor))
        if self.perception_type == 'more_simple':
            return np.array(vec)
        for actor in MONSTER_LIST:
            vec.append(self.in_target(actor))
        return np.array(vec)

    def print_state(self):
        state = self.game.get_state()
        if state is None:
            print('No state')
            return
        game_variables = dict(zip(self.game_variable_strings, state.game_variables))
        game_variable_print = ''
        for key in sorted(game_variables.keys()):
            game_variable_print += '{}: {}, '.format(key, game_variables[key])
        game_variable_print += '\n'
        print(game_variable_print)
        for l in state.labels:
            print("id: {id}, name: {name}, position: [{pos_x},{pos_y},{pos_z}], "
                  "velocity: [{vel_x},{vel_y},{vel_z}], "
                  "angle: [{angle},{pitch},{roll}], "
                  "box: [{x},{y},{width},{height}]\n".format(
                      id=l.object_id, name=l.object_name,
                      pos_x=l.object_position_x, pos_y=l.object_position_y,
                      pos_z=l.object_position_z,
                      vel_x=l.object_velocity_x, vel_y=l.object_velocity_y,
                      vel_z=l.object_velocity_z,
                      angle=l.object_angle, pitch=l.object_pitch,
                      roll=l.object_roll,
                      x=l.x, y=l.y, width=l.width, height=l.height))

    def is_there(self, actor):
        if len(self.get_actor_by_name(actor)) > 0:
            if self.verbose: print('ISTHERE {}'.format(actor))
            return True
        else: return False

    def in_target(self, actor):
        center_x = self.x_size / 2
        center_y = self.y_size / 2
        for a in self.get_actor_by_name(actor):
            a_x_min, a_x_max = a.x, a.x + a.width
            a_y_min, a_y_max = a.y, a.y + a.height
            if center_x > a_x_min and center_x < a_x_max and\
                    center_y > a_y_min and center_y < a_y_max:
                        if self.verbose:
                            print('INTARGET {}'.format(actor))
                        return True
        return False

    def exist_actor_in_distance_horizontal(self, actor, dist, horz):
        cen_x = self.x_size / 2
        p = self.player
        for a in self.get_actor_by_name(actor):
            a_x_min, a_x_max = a.x, a.x + a.width
            d_x = a.object_position_x - p.object_position_x
            d_y = a.object_position_y - p.object_position_y
            d = math.sqrt(d_x**2 + d_y**2)
            if self.distance_dict[dist](d) and self.horizontal_dict[horz](a_x_min, a_x_max, cen_x):
                if self.verbose:
                    print('EXIST {} in {} {}'.format(actor, dist, horz))
                return True
        return False

    # Weapons
    # 1: Fist, chainsaw, 2: pistol, 3: shotgun, 4: chaingun, 5: rocket launcher, 6: plazma rifle
    # SELECT_WEAPON_1 switch between fist and chainsaw
    def have_weapon(self, weapon_slot):
        if self.game_variable_values['WEAPON{}'.format(weapon_slot)] > 0:
            if self.verbose:
                print('Have weapon {}'.format(weapon_slot))
            return True
        return False

    def have_ammo(self, weapon_slot):
        if weapon_slot == 1:  # Fist or Chainsaw
            if self.verbose:
                print('Have ammo {}'.format(weapon_slot))
            return True
        if self.game_variable_values['AMMO{}'.format(weapon_slot)] > 0:
            if self.verbose:
                print('Have ammo {}'.format(weapon_slot))
            return True
        return False

    def selected_weapon(self, weapon_slot):
        if self.game_variable_values['SELECTED_WEAPON'] == weapon_slot:
            if self.verbose:
                print('Weapon {} is selected'.format(weapon_slot))
            return True
        return False

    def no_selected_weapon_ammo(self):
        if self.game_variable_values['SELECTED_WEAPON_AMMO'] == 0:
            if self.verbose:
                print('no selected weapon ammo is left')
            return True
        return False

    def initialize_state(self, init_state):
        """ Takes random arguments and initialies the state

        Assumes that the max number of monster and ammo spawns is 5

        Params:
            init_state  [{"player_pos": [x, y], "monster_pos": [[x1, y1], [x2, y2]]}]
        """
        if 'player_pos' in init_state:
            x, y = init_state['player_pos']
            self.game.send_game_command('puke 20 {} {}'.format(x, y))
        if 'demon_pos' in init_state:
            for i, (x, y) in enumerate(init_state['demon_pos']):
                self.game.send_game_command(
                        'puke {} {} {}'.format(21 + i, x, y))
        if 'revenant_pos' in init_state:
            for i, (x, y) in enumerate(init_state['revenant_pos']):
                self.game.send_game_command(
                        'puke {} {} {}'.format(5 + i, x, y))
        if 'hellknight_pos' in init_state:
            for i, (x, y) in enumerate(init_state['hellknight_pos']):
                self.game.send_game_command(
                        'puke {} {} {}'.format(15 + i, x, y))
        if 'ammo_pos' in init_state:
            for i, (x, y) in enumerate(init_state['ammo_pos']):
                self.game.send_game_command(
                    'puke {} {} {}'.format(10 + i, x, y))