示例#1
0
    def reset(self):

        # Train our agent against the baseline
        # See https://github.com/Kaggle/kaggle-environments#Training
        self.trainer = self.env.train(self.agents)

        raw_obs = self.trainer.reset()

        # Raw observations (See https://github.com/google-research/football/blob/master/gfootball/doc/observation.md)
        raw_obs = raw_obs['players_raw'][0]

        if self.obs_representation == "smm":
            obs = observation_preprocessing.generate_smm([raw_obs])[0]
        elif self.obs_representation == "stacked_smm":
            obs = observation_preprocessing.generate_smm([raw_obs])[0]
            if not self.stacked_obs:
                self.stacked_obs.extend([obs] * 4)
            else:
                self.stacked_obs.append(obs)
            obs = np.concatenate(list(self.stacked_obs), axis=-1)
        elif self.obs_representation == "float115":
            obs = Simple115StateWrapper.convert_observation([raw_obs], True)[0]
        elif self.obs_representation == "pixels":
            pass
        elif self.obs_representation == "raw":
            obs, _ = OBSParser.parse(obs)

        return obs
示例#2
0
    def step(self, action):

        # Step through the environment

        raw_obs, reward, done, info = self.env.step([action])

        # Obtain raw observation
        raw_obs = raw_obs[0]

        # Extract metainfo from obs

        # Reward Shaping (If applicable)

        if "ball_possession" in self.rewards:

            # Reward winning ball possession and penalize lossing ball possession
            prev_ball_owned_team = self.ball_owned_team
            cur_ball_owned_team = self.raw_obs['ball_owned_team']

            # Win ball possession
            if prev_ball_owned_team == 1 and cur_ball_owned_team == 0:
                reward += 0.1

            # Lose ball possession
            if prev_ball_owned_team == 0 and cur_ball_owned_team == 1:
                reward -= 0.1

            self.ball_owned_team = cur_ball_owned_team

        # Scale Rewards
        #reward = reward * 10

        if self.obs_representation == "smm":
            obs = observation_preprocessing.generate_smm([raw_obs])[0]
        elif self.obs_representation == "stacked_smm":
            obs = observation_preprocessing.generate_smm([raw_obs])[0]
            if not self.stacked_obs:
                self.stacked_obs.extend([obs] * 4)
            else:
                self.stacked_obs.append(obs)
            obs = np.concatenate(list(self.stacked_obs), axis=-1)
        elif self.obs_representation == "float115":
            obs = Simple115StateWrapper.convert_observation([raw_obs], True)[0]
        elif self.obs_representation == "pixels":
            pass
        elif self.obs_representation == "raw":
            obs, (l_score, r_score, custom_reward) = OBSParser.parse(obs)

        # Extract MetaInfo like scoring from raw_obs
        __, (l_score, r_score, __) = OBSParser.parse(raw_obs)

        info['l_score'] = l_score
        info['r_score'] = r_score

        # Use goal difference as custom reward for now
        return obs, reward, done, info
示例#3
0
    def _side_to_df(self, get_s115: bool = True,
                    get_smm: bool = True, get_raw: bool = True) -> Tuple[Union[None, np.ndarray],
                                                                         Union[None, np.ndarray],
                                                                         Union[None, np.ndarray],
                                                                         np.ndarray]:
        """Select the correct side/agent from the downloaded data and return as df."""
        raw_obs = []
        s115_obs = []
        smm_obs = []
        actions = []
        for step in np.arange(1, self._expected_steps + 1):
            players_raw = self.data['steps'][step][self.side.value]['observation']['players_raw']

            if get_s115:
                s115_obs.append(Simple115StateWrapper.convert_observation(players_raw, fixed_positions=True))

            if get_smm:
                smm_obs.append(observation_preprocessing.generate_smm([players_raw[0]]))

            if get_raw:
                raw_obs.append(RawObs.convert_observation(players_raw))

            actions.append(self.data['steps'][step][self.side.value]['action'][0])

        s115_obs = np.concatenate(s115_obs, axis=0) if get_s115 else None
        smm_obs = np.concatenate(smm_obs, axis=0) if get_smm else None
        raw_obs = np.concatenate(raw_obs, axis=0).astype(np.float32) if get_raw else None

        return s115_obs, smm_obs, raw_obs, np.expand_dims(np.array(actions, dtype=np.uint8), axis=1)
示例#4
0
def agent(obs):
    global step_nr
    global previous_action
    global observations
    global state
    global policy
    # Get observations for the first (and only one) player we control.
    obs = obs['players_raw'][0]
    # Agent we trained uses Super Mini Map (SMM) representation.
    # See https://github.com/google-research/seed_rl/blob/master/football/env.py for details.
    obs = observation_preprocessing.generate_smm([obs])[0]
    print(obs.shape)
    if not observations:
        observations.extend([obs] * 4)
    else:
        observations.append(obs)

    # SEED packs observations to reduce transfer times.
    # See PackedBitsObservation in https://github.com/google-research/seed_rl/blob/master/football/observation.py
    obs = np.concatenate(list(observations), axis=-1)
    obs = np.packbits(obs, axis=-1)
    if obs.shape[-1] % 2 == 1:
        obs = np.pad(obs, [(0, 0)] * (obs.ndim - 1) + [(0, 1)], 'constant')
    obs = obs.view(np.uint16)

    # Execute our agent to obtain action to take.
    enc = lambda x: x
    dec = lambda x, s=None: x if s is None else tf.nest.pack_sequence_as(s, x)
    agent_output, state = policy.get_action(
        *dec(enc(prepare_agent_input(obs, previous_action, state))))
    previous_action = agent_output.action[0]
    return [int(previous_action)]
示例#5
0
    def process_obs(
        obs: Union[Dict[str, Any],
                   List[Any]]) -> Tuple[np.ndarray, np.ndarray]:
        """
        Obs can be from gym env or the version passed from Kaggle runner.

        We need to extract this dict to generate simple and SMM obs:
        dict_keys(['left_team_tired_factor', 'left_team_yellow_card', 'right_team_tired_factor', 'left_team',
                    'ball_owned_player', 'right_team_yellow_card', 'ball_rotation', 'ball_owned_team', 'ball',
                    'right_team_roles', 'right_team_active', 'steps_left', 'score', 'right_team', 'left_team_roles',
                    'ball_direction', 'left_team_active', 'left_team_direction', 'right_team_direction', 'game_mode',
                    'designated', 'active', 'sticky_actions'])

        Which is located in:
         - Kag obs: obs_kag_env['players_raw'][0].keys():
         - Gym obs: obs_gym_env[0].keys()
        """

        if isinstance(obs, dict):
            obs = obs['players_raw']

        # This can return multiple rows when env has:
        # number_of_left_players_agent_controls=1 and number_of_right_players_agent_controls=1
        simple_obs = Simple115StateWrapper.convert_observation(
            obs, fixed_positions=False).reshape(-1)
        smm_obs = observation_preprocessing.generate_smm([obs[0]])

        return smm_obs, simple_obs
示例#6
0
文件: ppo2.py 项目: sazas/football
  def take_action(self, observation):
    assert len(observation) == 1, 'Multiple players control is not supported'

    observation = observation_preprocessing.generate_smm(observation)
    observation = self._stacker.get(observation)
    action = self._policy.step(observation)[0][0]
    actions = [action] #[football_action_set.action_set_dict[self._action_set][action]]
    return actions
示例#7
0
 def add_raw_observation(self, observation):
     if 'extracted' in self._observation_kind:
         observation = observation_preprocessing.generate_smm(observation)
     else:
         assert False, 'Unsupported observation kind!'
     if self._data:
         self._data = self._data + [observation]
         self._data = self._data[-self._stacked_size:]
     else:
         self._data = [observation] * self._stacked_size
    def __getitem__(self, idx):
        """
        Return Stacked Spatial Minimap (SMM) and Float115_v2 Representation
        Reference: https://github.com/google-research/football/blob/master/gfootball/doc/observation.md#Observation%20Wrappers
        """

        # For Frame Stack
        stacked_obs = collections.deque([], maxlen=self.stack_frames)

        frame_name = self.df.loc[idx, 'frame_name']
        frame_step = int(frame_name.split('_')[1])
        if frame_step >= 5 and idx >= 5:
            for frame_idx in list(range(idx + 1))[-self.stack_frames:]:
                frame_name = self.df.loc[frame_idx, 'frame_name']
                with open(join(obs_frames_path, frame_name), 'rb') as pkl_file:
                    raw_obs = pickle.load(pkl_file)
                    smm_obs = observation_preprocessing.generate_smm([raw_obs
                                                                      ])[0]
                    smm_obs = smm_obs / 255.0
                    stacked_obs.append(smm_obs)

        else:
            with open(join(obs_frames_path, frame_name), 'rb') as pkl_file:
                raw_obs = pickle.load(pkl_file)
                smm_obs = observation_preprocessing.generate_smm([raw_obs])[0]
                smm_obs = smm_obs / 255.0
                stacked_obs.extend([smm_obs] * self.stack_frames)

        smm_frame = np.concatenate(list(stacked_obs), axis=-1)

        # Float115 Obs
        float115_frame = Simple115StateWrapper.convert_observation([raw_obs],
                                                                   True)[0]

        # Retrieve action
        action = self.df.loc[idx, 'action']

        if self.train:
            return (smm_frame, float115_frame), int(action)
        else:
            return (smm_frame, float115_frame)
def agent(obs):

    # Obs for first player (i.e. player we control)
    obs = obs['players_raw'][0]
    obs = observation_preprocessing.generate_smm([obs])[0]
    if not stacked_obs:
        stacked_obs.extend([obs] * 4)
    else:
        stacked_obs.append(obs)
    obs = np.concatenate(list(stacked_obs), axis=-1)

    action = policy.predict(obs)

    return [int(action)]
  def run_agent(self, obs, config, reward, info):
    print('About to start the agent')

    # Simple115 observation
    simple115_obs = wrappers.Simple115StateWrapper.convert_observation(obs.players_raw, True)
    # Or minimap observation.
    minimap = observation_preprocessing.generate_smm(obs.players_raw) 

    ## TODO: this should not be a batch dimension.
    print("Calling the model")
    action = np.argmax(self._model(minimap))
    print("Done")
    # you have to cast it back to int (from numpy.int64)
    return [int(action)]
示例#11
0
    def take_action(self, observations):
        assert len(observations) == 1, 'Multiple players control is not supported'
        # print()
        # print('take_action')
        # print(observation)
        # assert 0, observation

        observations = observation_preprocessing.generate_smm(observations)
        # print(observation)
        observations = self._stacker.get(observations)
        # print(observation)
        action = self._policy.step(observations)[0][0]
        print(action)
        assert self._action_set == 'default', self._action_set
        actions = [football_action_set.ACTION_SET_DICT[self._action_set][action]]
        # print(actions)
        return actions
示例#12
0
    def run_agent(self, obs, config, reward, info):
        print('About to start the agent')
        # As we train a simple, single player agent we want to control a single player at a time, so we use
        # MultiAgentToSingleAgent wrapper to modify multi-agent scenario observations.
        single_obs = wrappers.MultiAgentToSingleAgent.get_observation(
            obs.players_raw)

        # Then we can apply additional wrappers to use different observation format for the agent.
        # For more details see https://github.com/google-research/football/blob/master/gfootball/doc/observation.md
        # Simple115 observation:
        simple115_obs = wrappers.Simple115StateWrapper.convert_observation(
            obs.players_raw, True)
        # Minimap observation:
        minimap = observation_preprocessing.generate_smm(obs.players_raw)

        ## TODO: this should not be a batch dimension.
        print("Calling the model")
        action = np.argmax(self._model(minimap))
        print("Done")
        # you have to cast it back to int (from numpy.int64)
        return wrappers.MultiAgentToSingleAgent.get_action(
            action, obs.players_raw)
示例#13
0
def obs_convert(obs):
    return observation_preprocessing.generate_smm([obs['players_raw'][0]], channel_dimensions=(84, 84))[0]
示例#14
0
 def observation(self, obs):
     return observation_preprocessing.generate_smm(obs)
示例#15
0
def obs_convert(obs):
    return observation_preprocessing.generate_smm([obs['players_raw'][0]])[0]
示例#16
0
文件: ppo2_v2.py 项目: sazas/football
 def take_action(self, observation):
   observation = observation_preprocessing.generate_smm(observation)
   observation = self._stacker.get(observation)
   action = self._sess.run("player_0/ppo2_model/ArgMax:0", feed_dict={"player_0/ppo2_model/Ob:0": observation})
   return [int(action[0])]
def main(_):

    left_player = 'ppo2_cnn:left_players=1,policy=gfootball_impala_cnn,checkpoint=/Users/stephen/Documents/football/checkpoints/11_vs_11_easy_stochastic_v2'
    right_player = 'ppo2_cnn:right_players=1,policy=gfootball_impala_cnn,checkpoint=/Users/stephen/Documents/football/checkpoints/11_vs_11_easy_stochastic_v2'
    players = [left_player, right_player]

    env_config_values = {
        'dump_full_episodes': False,
        'dump_scores': False,
        'players': players,
        'level': '11_vs_11_easy_stochastic',
        'tracesdir': '/Users/stephen/Documents/football/logs',  # logdir
        'write_video': False
    }

    env_config = config.Config(env_config_values)
    env = football_env.FootballEnv(env_config)
    env.reset()

    player_config = {'index': 2}
    name, definition = config.parse_player_definition(left_player)
    config_name = 'player_{}'.format(name)
    if config_name in player_config:
        player_config[config_name] += 1
    else:
        player_config[config_name] = 0
    player_config.update(definition)
    player_config['stacked'] = True
    player = Player(player_config, env_config)
    stacker = ObservationStacker(4)

    n_timesteps = 30000  # 10 games
    game_i = 0
    observations = []
    actions = []

    for i in range(n_timesteps):
        obs, _, done, _ = env.step([])
        obs_processed = observation_preprocessing.generate_smm([obs])
        obs_processed = stacker.get(obs_processed)
        observations.append(obs_processed)
        act = player.take_action([obs])[0]
        actions.append(full_action_set.index(act))
        if done:
            env.reset()
            stacker.reset()
            observations = np.squeeze(np.vstack(
                observations))  # should not be shape (3000, 72, 96, 16)
            actions = np.array(actions)  # should be shape (n_samples,)
            with open(
                    f'/Users/stephen/Documents/football/data/observations{game_i}.pkl',
                    'wb') as f:
                pickle.dump(observations, f)
            with open(
                    f'/Users/stephen/Documents/football/data/actions{game_i}.pkl',
                    'wb') as f:
                pickle.dump(actions, f)
            game_i += 1
            observations = []
            actions = []

    print('Done :)')
示例#18
0
 def observation(self, obs):
     return observation_preprocessing.generate_smm(
         obs,
         channel_dimensions=self._channel_dimensions,
         config=self.env.unwrapped._config)
    from kaggle_environments import make

    env = make("football",
               configuration={
                   "save_video": True,
                   "scenario_name": "11_vs_11_kaggle"
               })

    # This is the observation that is passed on agent function.
    obs_kag_env = env.state[0]['observation']

    print(obs_kag_env.keys())

    simple_obs_ = Simple115StateWrapper.convert_observation(
        obs_kag_env['players_raw'], fixed_positions=False)
    smm_obs_ = observation_preprocessing.generate_smm(
        [obs_kag_env['players_raw'][0]])

    base_env = gym.make("GFootball-11_vs_11_kaggle-SMM-v0").unwrapped
    obs_gym_env = base_env.reset()

    wrapped_env = SimpleAndSMMObsWrapper(base_env.unwrapped)
    wrapped_env.reset()

    SimpleAndSMMObsWrapper.process_obs(obs_kag_env)
    SimpleAndSMMObsWrapper.process_obs(obs_gym_env)

    buff_wrapped_env = SMMFrameProcessWrapper(wrapped_env)
    buff_obs = buff_wrapped_env.reset()
    buff_obs = buff_wrapped_env.step(1)

    buffed_smm = SMMFrameProcessWrapper(