Exemplo n.º 1
0
    def __init__(self, args):
        super(GFootballEnv, self).__init__()

        self.representation = args.state.split('_')[0]
        self.stacked = 'stacked' in args.state

        # wrap the original environment
        # https://github.com/google-research/football/blob/master/gfootball/env/__init__.py
        # this is same as online environment
        self.raw_env = football_env.create_environment(
            env_name=args.level,
            stacked=False,  # set to False to align with online evaluation
            representation="raw",  # set to raw to align with online evaluation
            rewards=args.reward_experiment)
        self.obs_stack = deque([], maxlen=4)

        # this is for training environment
        self.real_env = football_env.create_environment(
            env_name=args.level,
            stacked=self.
            stacked,  # set to False to align with online evaluation
            representation=self.representation,
            rewards=args.reward_experiment)

        self.action_space = self.real_env.action_space
        self.observation_space = self.real_env.observation_space
def random():
    # render the environment
    display_train = False
    if display_train:
        env = football_env.create_environment(env_name='academy_empty_goal',
                                              representation='pixels',
                                              render=True)
    else:
        env = football_env.create_environment(env_name='academy_empty_goal',
                                              representation='simple115',
                                              render=True)

    total_rewards = 0.0
    total_steps = 0
    obs = env.reset()
    # start an episode.
    while True:
        action = env.action_space.sample()
        # choose a random action
        obs, reward, done, _ = env.step(action)
        # obtain rewards
        total_rewards += reward
        total_steps += 1
        if done:
            break
    print("episode done %d steps. total rewards %.2f" %
          (total_steps, total_rewards))
Exemplo n.º 3
0
    def __init__(self, args):
        self.num_agents = args.num_agents
        self.scenario_name = args.scenario_name

        # make env
        if not (args.use_render and args.save_videos):
            self.env = football_env.create_environment(
                env_name=args.scenario_name,
                stacked=args.use_stacked_frames,
                representation=args.representation,
                rewards=args.rewards,
                number_of_left_players_agent_controls=args.num_agents,
                number_of_right_players_agent_controls=0,
                channel_dimensions=(args.smm_width, args.smm_height),
                render=(args.use_render and args.save_gifs))
        else:
            # render env and save videos
            self.env = football_env.create_environment(
                env_name=args.scenario_name,
                stacked=args.use_stacked_frames,
                representation=args.representation,
                rewards=args.rewards,
                number_of_left_players_agent_controls=args.num_agents,
                number_of_right_players_agent_controls=0,
                channel_dimensions=(args.smm_width, args.smm_height),
                # video related params
                write_full_episode_dumps=True,
                render=True,
                write_video=True,
                dump_frequency=1,
                logdir=args.video_dir)

        self.max_steps = self.env.unwrapped.observation()[0]["steps_left"]
        self.remove_redundancy = args.remove_redundancy
        self.zero_feature = args.zero_feature
        self.share_reward = args.share_reward
        self.action_space = []
        self.observation_space = []
        self.share_observation_space = []

        if self.num_agents == 1:
            self.action_space.append(self.env.action_space)
            self.observation_space.append(self.env.observation_space)
            self.share_observation_space.append(self.env.observation_space)
        else:
            for idx in range(self.num_agents):
                self.action_space.append(
                    spaces.Discrete(n=self.env.action_space[idx].n))
                self.observation_space.append(
                    spaces.Box(low=self.env.observation_space.low[idx],
                               high=self.env.observation_space.high[idx],
                               shape=self.env.observation_space.shape[1:],
                               dtype=self.env.observation_space.dtype))
                self.share_observation_space.append(
                    spaces.Box(low=self.env.observation_space.low[idx],
                               high=self.env.observation_space.high[idx],
                               shape=self.env.observation_space.shape[1:],
                               dtype=self.env.observation_space.dtype))
def main():
    display_train = False
    # render environment
    if display_train:
        env = football_env.create_environment(env_name='academy_empty_goal',
                                              representation='pixels',
                                              render=True)
    else:
        env = football_env.create_environment(env_name='academy_empty_goal',
                                              representation='simple115',
                                              render=True)

    print("action space=", env.action_space)
    print("observation space", env.observation_space)
Exemplo n.º 5
0
    def __init__(self,
                 env_name='academy_empty_goal_close',
                 representation='simple115',
                 rewards='scoring,checkpoints',
                 stacked=False,
                 dump_path=None,
                 solved_at=1,
                 **kwargs):
        if football_env is None:
            raise ImportError('Could not import gfootball! '
                              'HINT: Perform the setup instructions here: '
                              'https://github.com/google-research/football')

        self._solved_at = solved_at
        self._env = football_env.create_environment(
            env_name=env_name,
            representation=representation,
            rewards=rewards,
            stacked=stacked,
            write_full_episode_dumps=dump_path is not None,
            write_goal_dumps=False,
            logdir=dump_path or '',
            **kwargs)

        self.action_space = self._env.action_space
        self.observation_space = self._env.observation_space
Exemplo n.º 6
0
    def watch(self, *, env, matches, weights, record):
        
        environment = SubprocVecEnv([
        
            lambda: football.create_environment(
                env_name = "11_vs_11_easy_stochastic",
                representation = self.configs[0]["representation"],
                rewards = self.configs[0]["rewards"],
                enable_goal_videos = False,
                enable_full_episode_videos = True,
                render = True,
                write_video = record,
                dump_frequency = 1,
                logdir = "/home/charlie/Projects/Python/Football/videos/",
                extra_players = self.configs[0]["extra_players"],
                number_of_left_players_agent_controls = self.configs[0]["number_of_left_players_agent_controls"],
                number_of_right_players_agent_controls = self.configs[0]["number_of_right_players_agent_controls"],
                enable_sides_swap = self.configs[0]["enable_sides_swap"]
            ) for _ in range(1)
        
        ])
        
        # self.model.set_env(environment)
        
        watch = PPO2.load(weights, env = environment)
        
        for match in range(matches):

            watch.learn(total_timesteps = 3100)
def create_gfootball_multiagent_env(save_dir, dump_freq, render):
    num_agents = 3  # for 'academy_3_vs_1_with_keeper' scenario
    env = football_env.create_environment(
        env_name='academy_3_vs_1_with_keeper',
        stacked=False,
        representation='simple115',
        rewards='scoring',
        logdir=str(save_dir/'football_dumps'),
        enable_goal_videos=False,
        enable_full_episode_videos=bool(dump_freq),
        render=render,
        write_video=True,
        dump_frequency=dump_freq,
        number_of_left_players_agent_controls=3,
        number_of_right_players_agent_controls=0,
        enable_sides_swap=False,
        channel_dimensions=(
            observation_preprocessing.SMM_WIDTH,
            observation_preprocessing.SMM_HEIGHT)
    )
    action_spaces = [gym.spaces.Discrete(env.action_space.nvec[1]) for _ in range(num_agents)]
    observation_spaces = [gym.spaces.Box(
        low=env.observation_space.low[0],
        high=env.observation_space.high[0],
        dtype=env.observation_space.dtype) for _ in range(num_agents)]

    return env, action_spaces, observation_spaces
Exemplo n.º 8
0
def get_env_info(env_name, state, reward_experiment, num_left_agents,
                 num_right_agents, representation, render, seed, num_agents):
    is_football = '11' in env_name or 'academy' in env_name
    if is_football:
        env = football_env.create_environment(
            representation=representation,
            env_name=env_name,
            stacked=('stacked' in state),
            rewards=reward_experiment,
            logdir=None,
            render=render and (seed == 0),
            dump_frequency=50 if render and seed == 0 else 0)
    else:
        env = create_atari_mjc_env(env_name)
    if num_agents == 1:
        from a2c_ppo_acktr.envs import ObsUnsqueezeWrapper
        env = ObsUnsqueezeWrapper(env)
    env.reset()
    num_left_player = env.unwrapped._cached_observation[0]['left_team'].shape[
        0] if is_football else 1
    num_right_player = env.unwrapped._cached_observation[0][
        'right_team'].shape[0] if is_football else 0
    observation_space = copy.deepcopy(env.observation_space)
    action_space = copy.deepcopy(env.action_space)
    env.close()
    return num_left_player, num_right_player, observation_space, action_space
Exemplo n.º 9
0
def _create_single_football_env(
    level,
    stacked,
    representation,
    reward_experiment,
    write_goal_dumps,
    write_full_episode_dumps,
    write_video,
    dump_frequency,
    render,
    process_number=0,
):
    """
  Creates gfootball environment.
  Meaning of all variables you can find in footbal.gfootball.examples.run_ppo2.py
  """
    env = create_environment(
        env_name=level,
        stacked=stacked,
        representation=representation,
        rewards=reward_experiment,
        logdir=logger.get_dir(),
        write_goal_dumps=write_goal_dumps and (process_number == 0),
        write_full_episode_dumps=write_full_episode_dumps
        and (process_number == 0),
        write_video=write_video,
        render=render and (process_number == 0),
        dump_frequency=dump_frequency if render and process_number == 0 else 0)
    env = monitor.Monitor(
        env,
        logger.get_dir()
        and os.path.join(logger.get_dir(), str(process_number)))
    return env
Exemplo n.º 10
0
def main(args):
    print(args.early_stop)
    print(args.disable_actions)
    set_seed(args.seed)
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # initialize environment
    n_players = 3
    env = football_env.create_environment(env_name="academy_3_vs_1_with_keeper",
                                          representation="simple115",
                                          number_of_left_players_agent_controls=n_players,
                                          stacked=False,
                                          logdir="/tmp/football",
                                          write_goal_dumps=False,
                                          write_full_episode_dumps=False,
                                          render=False)

    # state and action space
    state_space_size = env.observation_space.shape[1]  # we are using simple115 representation
    action_space_size = env.action_space.nvec.tolist()[0]  # 三个 players 动作空间相同
    action_list = list(range(action_space_size))
    # state[98:100] 表示控制的三个球员
    if args.disable_actions:
        action_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
        action_space_size = len(action_list)

    # model
    print("loading models")
    actors = [Actor(state_space_size=state_space_size, action_space_size=action_space_size) for _ in range(n_players)]
    critic = Critic(state_space_size=state_space_size)

    # maa2c
    maa2c = MAA2C(args=args, env=env, actors=actors, critic=critic, action_list=action_list, device=device)
    print("learn")
    maa2c.learn()
Exemplo n.º 11
0
def main(args):
    set_seed(args.seed)
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # initialize environment
    n_players = 3
    env = football_env.create_environment(
        env_name="academy_3_vs_1_with_keeper",
        representation="simple115",
        number_of_left_players_agent_controls=n_players,
        stacked=False,
        logdir="/tmp/football",
        write_goal_dumps=False,
        write_full_episode_dumps=False,
        render=False)

    # state and action space
    state_space_size = env.observation_space.shape[
        1]  # we are using simple115 representation
    action_space_size = env.action_space.nvec.tolist()[0]  # 三个 players 动作空间相同
    # state[98:100] 表示控制的三个球员

    # model
    print("loading models")
    actors = [
        Actor(state_space_size=state_space_size,
              action_space_size=action_space_size) for _ in range(n_players)
    ]
    critics = [
        Critic(state_space_size=state_space_size,
               action_space_size=action_space_size,
               n_players=n_players) for _ in range(n_players)
    ]
    old_actors = [
        Actor(state_space_size=state_space_size,
              action_space_size=action_space_size) for _ in range(n_players)
    ]
    old_critics = [
        Critic(state_space_size=state_space_size,
               action_space_size=action_space_size,
               n_players=n_players) for _ in range(n_players)
    ]
    for old_actor, actor in zip(old_actors, actors):
        old_actor.load_state_dict(actor.state_dict())
    for old_critic, critic in zip(old_critics, critics):
        old_critic.load_state_dict(critic.state_dict())

    # maddpg
    maddpg = MADDPG(env=env,
                    action_list=list(range(action_space_size)),
                    actors=actors,
                    critics=critics,
                    old_actors=old_actors,
                    old_critics=old_critics,
                    args=args,
                    device=device)
    print("learn")
    maddpg.learn()
Exemplo n.º 12
0
    def __init__(self,
                 env_name="11_vs_11_easy_stochastic",
                 obs_representation="stacked_smm",
                 rewards="scoring,checkpoints",
                 logdir="/tmp/football",
                 env_id=0):

        super(FootballEnvWrapper, self).__init__()

        print("Env: " + env_name)

        self.env = football_env.create_environment(
            env_name=env_name,
            stacked=False,
            representation='raw',
            rewards=rewards,
            logdir=logdir,
            write_goal_dumps=False and (env_id == 0),
            write_full_episode_dumps=False and (env_id == 0),
            write_video=False and (env_id == 0),
            render=False,
            dump_frequency=30)

        self.obs_representation = obs_representation

        # For Frame Stack
        self.stacked_obs = collections.deque([], maxlen=4)

        # Define observation space and action space
        # They must be gym.spaces objects

        self.action_space = gym.spaces.Discrete(19)  # 19 actions

        if obs_representation == "smm":
            self.observation_space = gym.spaces.Box(low=0,
                                                    high=255,
                                                    shape=(72, 96, 4),
                                                    dtype=np.uint8)
        elif obs_representation == "stacked_smm":
            self.observation_space = gym.spaces.Box(low=0,
                                                    high=255,
                                                    shape=(72, 96, 16),
                                                    dtype=np.uint8)
        elif obs_representation == "float115":
            self.observation_space = gym.spaces.Box(low=-30.0,
                                                    high=30.0,
                                                    shape=(115, ),
                                                    dtype=np.float32)
        elif obs_representation == "pixels":
            pass
        elif obs_representation == "raw":
            # Use OBSParser
            self.observation_space = gym.spaces.Box(low=-30.0,
                                                    high=30.0,
                                                    shape=(207, ),
                                                    dtype=np.float32)

        self.ball_owned_team = -1
        self.rewards = rewards
Exemplo n.º 13
0
    def __init__(self, seed_dir, dump_freq, representation, render):
        self.nagents = 3  # only using 'academy_3_vs_1_with_keeper' level for the moment
        self.representation = representation

        if representation == "simple37":
            env_representation = "simple115"  # we convert obs from 115 to 37 observations ourselves
        else:
            env_representation = representation

        # Instantiate environment

        self.env = football_env.create_environment(
            env_name='academy_3_vs_1_with_keeper',
            stacked=False,
            representation=env_representation,
            rewards='scoring',
            logdir=str(seed_dir / 'football_dumps'),
            enable_goal_videos=False,
            enable_full_episode_videos=bool(dump_freq),
            render=render,
            write_video=True,
            dump_frequency=dump_freq,
            number_of_left_players_agent_controls=3,
            number_of_right_players_agent_controls=0,
            enable_sides_swap=False,
            channel_dimensions=(observation_preprocessing.SMM_WIDTH,
                                observation_preprocessing.SMM_HEIGHT))

        obs_space_low = self.env.observation_space.low[0]
        obs_space_high = self.env.observation_space.high[0]

        # Adapting obs_space properties if we transform simple115 to simple37

        if self.representation == "simple37":
            obs_space_low = self.convert_simple115_to_simple37(
                simple115_vectors=np.expand_dims(obs_space_low, axis=0))[0]
            obs_space_high = self.convert_simple115_to_simple37(
                simple115_vectors=np.expand_dims(obs_space_high, axis=0))[0]

        # Define some useful attributes

        self.action_space = [
            gym.spaces.Discrete(self.env.action_space.nvec[1])
            for _ in range(self.nagents)
        ]
        self.observation_space = [
            gym.spaces.Box(low=obs_space_low,
                           high=obs_space_high,
                           dtype=self.env.observation_space.dtype)
            for _ in range(self.nagents)
        ]
        self.agent_types = ['agent' for _ in range(self.nagents)]
        cm = plt.cm.get_cmap('tab20')
        self.agent_colors = [
            np.array(cm(float(i) / float(self.nagents))[:3])
            for i in range(self.nagents)
        ]
Exemplo n.º 14
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        elif 'academy' in env_id or '11' in env_id:  # gfootball environments
            #env = football_env.create_environment(
            #          env_name=env_id, stacked=('stacked' in state),
            #          with_checkpoints=('with_checkpoints' in reward_experiment),
            #          logdir=log_dir,
            #          enable_goal_videos=dump_scores and (seed == 0),
            #          enable_full_episode_videos=dump_full_episodes and (seed == 0),
            #          render=render and (seed == 0),
            #          dump_frequency=50 if render and seed == 0 else 0,
            #          representation='extracted')
            env = football_env.create_environment(
                env_name=env_id,
                stacked=('stacked' in state),
                rewards=reward_experiment,
                logdir=log_dir,
                render=render and (seed == 0),
                dump_frequency=50 if render and seed == 0 else 0)
            env = EpisodeRewardScoreWrapper(
                env,
                number_of_left_players_agent_controls=1,
                number_of_right_players_agent_controls=0)

        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env, frame_stack=True)
        elif len(env.observation_space.shape
                 ) == 3 and 'academy' not in env_id and '11' not in env_id:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])
        return env
Exemplo n.º 15
0
 def __init__(self, version, envs, hours = 0, verbose = False, weights = None):
     
     self.version = version
     self.name = "football-ppo{}".format(version) + "-e{}"
     self.path = "models/football-ppo-{}/".format(version)
     
     self.defaults = {
         "env_name": "",
         "representation": "simple115",
         "rewards": "scoring",
         "render": False,
         "write_video": False,
         "dump_frequency": 1,
         "extra_players": None,
         "number_of_left_players_agent_controls": 1,
         "number_of_right_players_agent_controls": 0,
         "enable_sides_swap": False,
         "parallel": 1
     }
     
     self.configs = list(map(lambda b: dict(map(lambda a: (a[0], a[1] if a[0] not in b.keys() else b[a[0]]), self.defaults.items())), envs))
     
     self.training = SubprocVecEnv(reduce(lambda a, b: a + b, list(map(lambda config: [
     
         lambda: football.create_environment(
             env_name = config["env_name"],
             representation = config["representation"],
             rewards = config["rewards"],
             render = config["render"],
             write_video = config["write_video"],
             dump_frequency = config["dump_frequency"],
             extra_players = config["extra_players"],
             number_of_left_players_agent_controls = config["number_of_left_players_agent_controls"],
             number_of_right_players_agent_controls = config["number_of_right_players_agent_controls"],
             enable_sides_swap = config["enable_sides_swap"]
         ) for _ in range(config["parallel"])
     
     ], self.configs)), []))
     
     self.inputs = self.training.get_attr("observation_space")[0].shape[0]
     self.outputs = self.training.get_attr("action_space")[0].n
     
     self.verbose = verbose
     
     if not verbose:
         os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" 
         deprecation._PRINT_DEPRECATION_WARNINGS = False
         logger = logging.getLogger()
         logger.setLevel(logging.ERROR)
     
     if weights == None:
         self.model = PPO2(policy = MlpPolicy, env = self.training, verbose = int(self.verbose))
     else:
         self.model = PPO2.load(weights, env = self.training, learning_rate = 0.002)
 
     self.experience = hours * 60
Exemplo n.º 16
0
def create_single_football_env(args):
    env = football_env.create_environment(
        env_name=args.env_name, stacked=True,
        logdir='/tmp/rllib_test',
        write_goal_dumps=False, write_full_episode_dumps=False, render=args.render,
        dump_frequency=0,
        number_of_left_players_agent_controls=args.left_agent,
        number_of_right_players_agent_controls=args.right_agent,
        channel_dimensions=(42, 42))
    return env
Exemplo n.º 17
0
Arquivo: MAAC.py Projeto: leehe228/TIL
 def init_env():
     env = football_env.create_environment(
         env_name=config["academy_scenario"],
         rewards=config["scoring"],
         render=config["render_mode"],
         number_of_left_players_agent_controls=config["num_to_control"],
         representation='simple115v2')
     env.seed(seed + rank * 1000)
     np.random.seed(seed + rank * 1000)
     return env
Exemplo n.º 18
0
def create_single_football_env(iprocess):
  """Creates gfootball environment."""
  env = football_env.create_environment(
      env_name=FLAGS.level, stacked=('stacked' in FLAGS.state),
      rewards=FLAGS.reward_experiment,
      logdir=logger.get_dir(),
      write_goal_dumps=FLAGS.dump_scores and (iprocess == 0),
      write_full_episode_dumps=FLAGS.dump_full_episodes and (iprocess == 0),
      render=FLAGS.render and (iprocess == 0),
      dump_frequency=50 if FLAGS.render and iprocess == 0 else 0)
  env = monitor.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir()))
  return env
Exemplo n.º 19
0
  def __init__(self, num_agents):
    self.env = football_env.create_environment(
        env_name='11_vs_11_easy_stochastic', stacked=True,
        logdir='logs',
        write_goal_dumps=False, write_full_episode_dumps=False, render=False,
        dump_frequency=0,
        number_of_left_players_agent_controls=1,
        number_of_right_players_agent_controls=0,rewards='scoring,checkpoints')
 
    self.action_space = self.env.action_space
    self.observation_space = self.env.observation_space
    self.num_agents = 1
Exemplo n.º 20
0
 def get_next(self):
     scenario = self.training_plan.popleft()
     self.current_scenario_name = scenario
     env = football_env.create_environment(
         env_name=scenario,
         stacked=False,
         logdir='/tmp/football',
         representation=self.representation,
         write_goal_dumps=False,
         write_full_episode_dumps=False,
         render=False)
     return env
Exemplo n.º 21
0
    def __init__(self, config):
        self.mode = config["mode"]
        if self.mode == 'test':
            print(
                "\n++++++++++++++++++++RUNNING EVALUATION MODE++++++++++++++++++++++\n"
            )
            # Testing mode
            self.env = football_env.create_environment(
                env_name='11_vs_11_easy_stochastic',
                stacked=True,
                logdir='/tmp/rllib_test',
                write_goal_dumps=False,
                write_full_episode_dumps=False,
                render=False,
                dump_frequency=0,
                number_of_left_players_agent_controls=1,
                number_of_right_players_agent_controls=0,
                rewards='scoring,checkpoints')

            self.num_agents = 1
        else:
            # Training mode
            self.env = football_env.create_environment(
                env_name='11_vs_11_easy_stochastic',
                stacked=True,
                logdir='/tmp/rllib_test',
                write_goal_dumps=False,
                write_full_episode_dumps=False,
                render=False,
                dump_frequency=0,
                number_of_left_players_agent_controls=1,
                number_of_right_players_agent_controls=1,
                rewards='scoring,checkpoints')

            self.num_agents = 2
        self.observation_space = gym.spaces.Box(0,
                                                255, [72, 96, 16],
                                                dtype=np.float32)
        self.action_space = gym.spaces.Discrete(19)
Exemplo n.º 22
0
        def init_env():
            # (** EDITED **) Google Football Env
            # Gym Env 설정 함수를 변경
            env = football_env.create_environment(
                env_name=config["academy_scenario"],
                rewards=config["scoring"],
                render=config["render_mode"],
                number_of_left_players_agent_controls=config["num_to_control"],
                representation='raw')

            env.seed(seed + rank * 1000)
            np.random.seed(seed + rank * 1000)
            return env
Exemplo n.º 23
0
def create_single_football_env(seed):
    """Creates gfootball environment."""
    env = football_env.create_environment(
        env_name=args.level,
        stacked=('stacked' in args.state),
        rewards=args.reward_experiment,
        #   logdir=logger.get_dir(),
        #   enable_goal_videos=args.dump_scores and (seed == 0),
        #   enable_full_episode_videos=args.dump_full_episodes and (seed == 0),
        render=args.render and (seed == 0),
        dump_frequency=50 if args.render and seed == 0 else 0)
    #   env = monitor.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(),
    #                                                                str(seed)))
    return env
Exemplo n.º 24
0
def run(config):
    env = football_env.create_environment(
        env_name=config["academy_scenario"],
        rewards=config["scoring"],
        render=config["render_mode"],
        number_of_left_players_agent_controls=config["num_to_control"],
        representation='raw')

    model = AttentionSAC.init_from_save(
        "./models/football/MAAC3/run2/model.pt", True)
    # (** EDITED **) Set Replay Buffer
    # env.action_space, env.observation_space 의 shape를 iteration을 통해 버퍼 설정

    for ep_i in range(0, config["n_episodes"], config["n_rollout_threads"]):
        obs = env.reset()
        obs = make_state(obs)
        model.prep_rollouts(device='cpu')

        for et_i in range(config["episode_length"]):
            print("episode : {} | step : {}".format(ep_i, et_i), end='\r')
            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [
                Variable(torch.Tensor(np.vstack(obs[:, i])),
                         requires_grad=False) for i in range(model.nagents)
            ]
            # get actions as torch Variables
            torch_agent_actions = model.step(torch_obs, explore=True)
            # convert actions to numpy arrays
            agent_actions = [ac.data.numpy() for ac in torch_agent_actions]
            # rearrange actions to be per environment
            actions = [[ac[i] for ac in agent_actions]
                       for i in range(config["n_rollout_threads"])]

            # Reform Actions list to fit on Football Env
            # Google Football 환경은 액션 리스트 (one hot encoded)가 아닌 정수값을 받음
            actions_list = [[np.argmax(b) for b in a] for a in actions]

            # Step
            next_obs, rewards, dones, infos = env.step(actions_list)
            next_obs = make_state(next_obs)

            # Prevention of divergence
            # 안해주면 발산해서 학습 불가 (NaN)
            rewards = rewards - 0.000001

            # Reform Done Flag list
            # replay buffer에 알맞도록 done 리스트 재구성
            obs = next_obs

    env.close()
Exemplo n.º 25
0
 def __init__(self, num_agents):
   self.env = football_env.create_environment(
       env_name='test_example_multiagent', stacked=False,
       logdir=os.path.join(tempfile.gettempdir(), 'rllib_test'),
       write_goal_dumps=False, write_full_episode_dumps=False, render=True,
       dump_frequency=0,
       number_of_left_players_agent_controls=num_agents,
       channel_dimensions=(42, 42))
   self.action_space = gym.spaces.Discrete(self.env.action_space.nvec[1])
   self.observation_space = gym.spaces.Box(
       low=self.env.observation_space.low[0],
       high=self.env.observation_space.high[0],
       dtype=self.env.observation_space.dtype)
   self.num_agents = num_agents
Exemplo n.º 26
0
def create_single_football_env(seed, level='academy_empty_goal_close'):
    """Creates gfootball environment."""
    env = football_env.create_environment(
        env_name=level,
        stacked=('stacked' in args.state),
        rewards=args.reward_experiment,
        # logdir="/home/sahar/downloads",
        #   enable_goal_videos=args.dump_scores and (seed == 0),
        #   enable_full_episode_videos=args.dump_full_episodes and (seed == 0),
        render=args.render and (seed == 0),
        dump_frequency=50 if args.render and seed == 0 else 0)
    print("Creating env:{}".format(level))
    #   env = monitor.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(),
    #                                                                str(seed)))
    return env
Exemplo n.º 27
0
def create_single_football_env(seed, level):
    """Creates gfootball environment."""
    env = football_env.create_environment(env_name=level,
                                          stacked=('stacked'
                                                   in 'extracted_stacked'),
                                          rewards='scoring',
                                          logdir=logger.get_dir(),
                                          enable_goal_videos=False,
                                          enable_full_episode_videos=False,
                                          render=False,
                                          dump_frequency=0)
    env = monitor.Monitor(
        env,
        logger.get_dir() and os.path.join(logger.get_dir(), str(seed)))
    return env
Exemplo n.º 28
0
def create_single_football_env(seed):
    """Creates gfootball environment."""
    env = football_env.create_environment(
        env_name=FLAGS.level,
        stacked=('stacked' in FLAGS.state),
        with_checkpoints=('with_checkpoints' in FLAGS.reward_experiment),
        logdir=logger.get_dir(),
        enable_goal_videos=FLAGS.dump_scores and (seed == 0),
        enable_full_episode_videos=FLAGS.dump_full_episodes and (seed == 0),
        render=FLAGS.render and (seed == 0),
        dump_frequency=50 if FLAGS.render and seed == 0 else 0)
    env = monitor.Monitor(
        env,
        logger.get_dir() and os.path.join(logger.get_dir(), str(seed)))
    return env
Exemplo n.º 29
0
def make_football(env_id, max_episode_steps=None):
    'Modified make_atari to work with football env'
    env = football_env.create_environment(env_name=env_id,
                                          stacked=False,
                                          representation='extracted',
                                          write_goal_dumps=False,
                                          write_full_episode_dumps=False,
                                          rewards='scoring,checkpoints',
                                          channel_dimensions=(84, 84),
                                          render=False)
    #     env = NoopResetEnv(env, noop_max=30)
    #     env = MaxAndSkipEnv(env, skip=4) # TODO: does this make sense?

    assert max_episode_steps is None

    return env
Exemplo n.º 30
0
 def __init__(self, num_agents):
   self.env = football_env.create_environment(
       env_name='test_example_multiagent', stacked=False,
       rewards='scoring',
       #logdir='/tmp/rllib_test',
       enable_goal_videos=False, enable_full_episode_videos=False, render=True,
       dump_frequency=0,
       number_of_left_players_agent_controls=num_agents,
       channel_dimensions=(42, 42))
   #self.actions_are_logits = env_config.get("actions_are_logits", False)
   self.action_space = gym.spaces.Discrete(self.env.action_space.nvec[1])
   self.observation_space = gym.spaces.Box(
       low=self.env.observation_space.low[0],
       high=self.env.observation_space.high[0],
       dtype=self.env.observation_space.dtype)
   self.num_agents = num_agents