def reset(self, config): """Reset environment for a new episode using a given config.""" self._episode_start = timeit.default_timer() self._config = config self._action_set = football_action_set.get_action_set(config) self._trace = observation_processor.ObservationProcessor(self._config) if not self._env: self._env = football_env_core.FootballEnvCore() self._cumulative_reward = 0 self._step_count = 0 self._env.reset(config, self._trace) self._env_state = 'game_started'
def __init__(self, config): self._config = config player_config = {'index': 0} # There can be at most one agent at a time. We need to remember its # team and the index on the team to generate observations appropriately. self._agent = None self._agent_index = -1 self._agent_left_position = -1 self._agent_right_position = -1 self._players = self._construct_players(config['players'], player_config) self._env = football_env_core.FootballEnvCore(self._config) self._num_actions = len( football_action_set.get_action_set(self._config)) self._cached_observation = None
def __init__(self, config): self._config = config self._env = football_env_core.FootballEnvCore(config) self._env_state = 'initialized'