def process_obs( obs: Union[Dict[str, Any], List[Any]]) -> Tuple[np.ndarray, np.ndarray]: """ Obs can be from gym env or the version passed from Kaggle runner. We need to extract this dict to generate simple and SMM obs: dict_keys(['left_team_tired_factor', 'left_team_yellow_card', 'right_team_tired_factor', 'left_team', 'ball_owned_player', 'right_team_yellow_card', 'ball_rotation', 'ball_owned_team', 'ball', 'right_team_roles', 'right_team_active', 'steps_left', 'score', 'right_team', 'left_team_roles', 'ball_direction', 'left_team_active', 'left_team_direction', 'right_team_direction', 'game_mode', 'designated', 'active', 'sticky_actions']) Which is located in: - Kag obs: obs_kag_env['players_raw'][0].keys(): - Gym obs: obs_gym_env[0].keys() """ if isinstance(obs, dict): obs = obs['players_raw'] # This can return multiple rows when env has: # number_of_left_players_agent_controls=1 and number_of_right_players_agent_controls=1 simple_obs = Simple115StateWrapper.convert_observation( obs, fixed_positions=False).reshape(-1) smm_obs = observation_preprocessing.generate_smm([obs[0]]) return smm_obs, simple_obs
def _side_to_df(self, get_s115: bool = True, get_smm: bool = True, get_raw: bool = True) -> Tuple[Union[None, np.ndarray], Union[None, np.ndarray], Union[None, np.ndarray], np.ndarray]: """Select the correct side/agent from the downloaded data and return as df.""" raw_obs = [] s115_obs = [] smm_obs = [] actions = [] for step in np.arange(1, self._expected_steps + 1): players_raw = self.data['steps'][step][self.side.value]['observation']['players_raw'] if get_s115: s115_obs.append(Simple115StateWrapper.convert_observation(players_raw, fixed_positions=True)) if get_smm: smm_obs.append(observation_preprocessing.generate_smm([players_raw[0]])) if get_raw: raw_obs.append(RawObs.convert_observation(players_raw)) actions.append(self.data['steps'][step][self.side.value]['action'][0]) s115_obs = np.concatenate(s115_obs, axis=0) if get_s115 else None smm_obs = np.concatenate(smm_obs, axis=0) if get_smm else None raw_obs = np.concatenate(raw_obs, axis=0).astype(np.float32) if get_raw else None return s115_obs, smm_obs, raw_obs, np.expand_dims(np.array(actions, dtype=np.uint8), axis=1)
def take_action(self, observation): assert len(observation) == 1, 'Multiple players control is not supported' observation = Simple115StateWrapper.convert_observation(observation, True, True) action = self._policy.step(observation)[0][0] actions = [action] #[football_action_set.action_set_dict[self._action_set][action]] return actions
def reset(self): # Train our agent against the baseline # See https://github.com/Kaggle/kaggle-environments#Training self.trainer = self.env.train(self.agents) raw_obs = self.trainer.reset() # Raw observations (See https://github.com/google-research/football/blob/master/gfootball/doc/observation.md) raw_obs = raw_obs['players_raw'][0] if self.obs_representation == "smm": obs = observation_preprocessing.generate_smm([raw_obs])[0] elif self.obs_representation == "stacked_smm": obs = observation_preprocessing.generate_smm([raw_obs])[0] if not self.stacked_obs: self.stacked_obs.extend([obs] * 4) else: self.stacked_obs.append(obs) obs = np.concatenate(list(self.stacked_obs), axis=-1) elif self.obs_representation == "float115": obs = Simple115StateWrapper.convert_observation([raw_obs], True)[0] elif self.obs_representation == "pixels": pass elif self.obs_representation == "raw": obs, _ = OBSParser.parse(obs) return obs
def step(self, action): # Step through the environment raw_obs, reward, done, info = self.env.step([action]) # Obtain raw observation raw_obs = raw_obs[0] # Extract metainfo from obs # Reward Shaping (If applicable) if "ball_possession" in self.rewards: # Reward winning ball possession and penalize lossing ball possession prev_ball_owned_team = self.ball_owned_team cur_ball_owned_team = self.raw_obs['ball_owned_team'] # Win ball possession if prev_ball_owned_team == 1 and cur_ball_owned_team == 0: reward += 0.1 # Lose ball possession if prev_ball_owned_team == 0 and cur_ball_owned_team == 1: reward -= 0.1 self.ball_owned_team = cur_ball_owned_team # Scale Rewards #reward = reward * 10 if self.obs_representation == "smm": obs = observation_preprocessing.generate_smm([raw_obs])[0] elif self.obs_representation == "stacked_smm": obs = observation_preprocessing.generate_smm([raw_obs])[0] if not self.stacked_obs: self.stacked_obs.extend([obs] * 4) else: self.stacked_obs.append(obs) obs = np.concatenate(list(self.stacked_obs), axis=-1) elif self.obs_representation == "float115": obs = Simple115StateWrapper.convert_observation([raw_obs], True)[0] elif self.obs_representation == "pixels": pass elif self.obs_representation == "raw": obs, (l_score, r_score, custom_reward) = OBSParser.parse(obs) # Extract MetaInfo like scoring from raw_obs __, (l_score, r_score, __) = OBSParser.parse(raw_obs) info['l_score'] = l_score info['r_score'] = r_score # Use goal difference as custom reward for now return obs, reward, done, info
def process_obs(obs: Union[Dict[str, Any], List[Any]], using: List[str] = None) -> np.ndarray: """Generate array with simple obs and raw obs.""" if isinstance(obs, dict): obs = obs['players_raw'] simple_obs = Simple115StateWrapper.convert_observation( obs, fixed_positions=False).reshape(-1) raw_obs = RawObs(using=using).set_obs(obs[0]).process() return np.concatenate([simple_obs, raw_obs.reshape(-1) ]) if raw_obs is not None else simple_obs
def __getitem__(self, idx): """ Return Float115_v2 Representation Reference: https://github.com/google-research/football/blob/master/gfootball/doc/observation.md#Observation%20Wrappers """ frame_name = self.df.loc[idx, 'frame_name'] with open(join(obs_frames_path, frame_name), 'rb') as pkl_file: raw_obs = pickle.load(pkl_file) float115_frame = Simple115StateWrapper.convert_observation( [raw_obs], True)[0] # Retrieve action action = self.df.loc[idx, 'action'] if self.train: return float115_frame, int(action) else: return float115_frame
def agent(obs): global mod global buffer buffer.add(obs) buffered_obs = buffer.get() s115_obs = [] for b in buffered_obs: if b is None: # First step s115_obs.append(np.zeros(shape=(1, 115))) else: s115_obs.append(Simple115StateWrapper.convert_observation(b['players_raw'], fixed_positions=False)) obs = np.concatenate([np.array(s)[..., None] for s in s115_obs], axis=2) action = mod.predict(obs).argmax() return [int(action)]
def process_obs(obs: Union[Dict[str, Any], List[Any]]) -> np.ndarray: if isinstance(obs, dict): # From kaggle env obs_for_s115 = obs['players_raw'] obs_for_raw = obs['players_raw'] elif isinstance(obs, list): # From gfootball Env obs_for_s115 = obs obs_for_raw = obs[0] else: raise ValueError("Something unexpected about obs") simple_obs = Simple115StateWrapper.convert_observation( obs_for_s115, fixed_positions=False).reshape(-1) raw_obs = RawObs.convert_observation(obs_for_raw) return np.concatenate([simple_obs, raw_obs.squeeze()])
def __getitem__(self, idx): """ Return Stacked Spatial Minimap (SMM) and Float115_v2 Representation Reference: https://github.com/google-research/football/blob/master/gfootball/doc/observation.md#Observation%20Wrappers """ # For Frame Stack stacked_obs = collections.deque([], maxlen=self.stack_frames) frame_name = self.df.loc[idx, 'frame_name'] frame_step = int(frame_name.split('_')[1]) if frame_step >= 5 and idx >= 5: for frame_idx in list(range(idx + 1))[-self.stack_frames:]: frame_name = self.df.loc[frame_idx, 'frame_name'] with open(join(obs_frames_path, frame_name), 'rb') as pkl_file: raw_obs = pickle.load(pkl_file) smm_obs = observation_preprocessing.generate_smm([raw_obs ])[0] smm_obs = smm_obs / 255.0 stacked_obs.append(smm_obs) else: with open(join(obs_frames_path, frame_name), 'rb') as pkl_file: raw_obs = pickle.load(pkl_file) smm_obs = observation_preprocessing.generate_smm([raw_obs])[0] smm_obs = smm_obs / 255.0 stacked_obs.extend([smm_obs] * self.stack_frames) smm_frame = np.concatenate(list(stacked_obs), axis=-1) # Float115 Obs float115_frame = Simple115StateWrapper.convert_observation([raw_obs], True)[0] # Retrieve action action = self.df.loc[idx, 'action'] if self.train: return (smm_frame, float115_frame), int(action) else: return (smm_frame, float115_frame)
# Read dump files of football games and store them into trajectories # AUTHOR: Hongyang Xue # DATE: 2019-10-09 import six.moves.cPickle as Pickle from gfootball.env.football_action_set import action_set_dict from gfootball.env.wrappers import Simple115StateWrapper import gfootball.env as football_env import os env = football_env.create_environment("11_vs_11_easy_stochastic", representation='simple115', render=False) default_actions = action_set_dict['default'] simple115wrapper = Simple115StateWrapper(env) def process_single_dump(filename): traj = [] f = Pickle.load(open(filename, 'rb')) for frame in f: # frame is a dict with keys {debug, # observartion, reward, 'cumulative_reward'} act_frame = frame['debug']['action'][0] obs_frame = frame['observation'] if act_frame not in default_actions: act_frame = 0 else: act_frame = default_actions.index(act_frame) active = obs_frame['left_agent_controlled_player'] del obs_frame['left_agent_controlled_player'] obs_frame['active'] = active[0]
import gfootball # noqa from kaggle_environments import make env = make("football", configuration={ "save_video": True, "scenario_name": "11_vs_11_kaggle" }) # This is the observation that is passed on agent function. obs_kag_env = env.state[0]['observation'] print(obs_kag_env.keys()) simple_obs_ = Simple115StateWrapper.convert_observation( obs_kag_env['players_raw'], fixed_positions=False) smm_obs_ = observation_preprocessing.generate_smm( [obs_kag_env['players_raw'][0]]) base_env = gym.make("GFootball-11_vs_11_kaggle-SMM-v0").unwrapped obs_gym_env = base_env.reset() wrapped_env = SimpleAndSMMObsWrapper(base_env.unwrapped) wrapped_env.reset() SimpleAndSMMObsWrapper.process_obs(obs_kag_env) SimpleAndSMMObsWrapper.process_obs(obs_gym_env) buff_wrapped_env = SMMFrameProcessWrapper(wrapped_env) buff_obs = buff_wrapped_env.reset() buff_obs = buff_wrapped_env.step(1)