Пример #1
0
    def create_env_one_player(self,
                              player,
                              game_steps_per_episode=GAME_STEPS_PER_EPISODE,
                              step_mul=STEP_MUL,
                              version=VERSION,
                              map_name=MAP_NAME,
                              random_seed=RANDOM_SEED):

        player_aif = AgentInterfaceFormat(**AAIFP._asdict())
        agent_interface_format = [player_aif]

        # create env
        print('map name:', map_name)
        print('player.name:', player.name)
        print('player.race:', player.race)

        sc2_computer = Bot([Race.terran], Difficulty(DIFFICULTY),
                           [BotBuild.random])

        env = SC2Env(map_name=map_name,
                     players=[Agent(player.race, player.name), sc2_computer],
                     step_mul=step_mul,
                     game_steps_per_episode=game_steps_per_episode,
                     agent_interface_format=agent_interface_format,
                     version=version,
                     random_seed=random_seed)

        return env
Пример #2
0
    def create_env(
            self,
            player,
            opponent,
            game_steps_per_episode=GAME_STEPS_PER_EPISODE,
            step_mul=STEP_MUL,
            version=VERSION,
            # the map should be the same as in the expert replay
            map_name="AbyssalReef",
            random_seed=RANDOM_SEED):

        player_aif = AgentInterfaceFormat(**AAIFP._asdict())
        opponent_aif = AgentInterfaceFormat(**AAIFP._asdict())
        agent_interface_format = [player_aif, opponent_aif]

        # create env
        print('map name:', map_name)
        print('player.name:', player.name)
        print('opponent.name:', opponent.name)
        print('player.race:', player.race)
        print('opponent.race:', opponent.race)

        env = SC2Env(map_name=map_name,
                     players=[
                         Agent(player.race, player.name),
                         Agent(opponent.race, opponent.name)
                     ],
                     step_mul=step_mul,
                     game_steps_per_episode=game_steps_per_episode,
                     agent_interface_format=agent_interface_format,
                     version=version,
                     random_seed=random_seed)

        return env
Пример #3
0
    def create_env(self,
                   player,
                   opponent,
                   game_steps_per_episode=GAME_STEPS_PER_EPISODE,
                   step_mul=STEP_MUL,
                   version=None,
                   map_name="Simple64",
                   random_seed=1):

        player_aif = AgentInterfaceFormat(**AAIFP._asdict())
        opponent_aif = AgentInterfaceFormat(**AAIFP._asdict())
        agent_interface_format = [player_aif, opponent_aif]

        # create env
        print('map name:', map_name)
        print('player.name:', player.name)
        print('opponent.name:', opponent.name)
        print('player.race:', player.race)
        print('opponent.race:', opponent.race)

        env = SC2Env(map_name=map_name,
                     players=[
                         Agent(player.race, player.name),
                         Agent(opponent.race, opponent.name)
                     ],
                     step_mul=step_mul,
                     game_steps_per_episode=game_steps_per_episode,
                     agent_interface_format=agent_interface_format,
                     version=version,
                     random_seed=random_seed)

        return env
Пример #4
0
    def create_env_one_player(self, player, game_steps_per_episode=GAME_STEPS_PER_EPISODE, 
                              step_mul=STEP_MUL, version=VERSION, 
                              map_name=MAP_NAME, random_seed=RANDOM_SEED):

        player_aif = AgentInterfaceFormat(**AAIFP._asdict())
        agent_interface_format = [player_aif]

        # create env
        print('map name:', map_name) 
        print('player.name:', player.name)
        print('player.race:', player.race)

        # class BotBuild(enum.IntEnum):
        #   """Bot build strategies."""
        #   random = sc_pb.RandomBuild
        #   rush = sc_pb.Rush
        #   timing = sc_pb.Timing
        #   power = sc_pb.Power
        #   macro = sc_pb.Macro
        #   air = sc_pb.Air

        sc2_computer = Bot([Race.terran],
                           Difficulty(DIFFICULTY),
                           [BotBuild.random])

        env = SC2Env(map_name=map_name,
                     players=[Agent(player.race, player.name),
                              sc2_computer],
                     step_mul=step_mul,
                     game_steps_per_episode=game_steps_per_episode,
                     agent_interface_format=agent_interface_format,
                     version=version,
                     random_seed=random_seed)

        return env
Пример #5
0
def main(nb_epochs, max_rollouts, agent_type_name, map_name, step_mul):
    dims = Dimensions(screen=(200, 200), minimap=(50, 50))
    format = AgentInterfaceFormat(feature_dimensions=dims)
    game = SC2Env(
        map_name=map_name,
        players=[Agent(Race.protoss),
                 Bot(Race.terran, Difficulty.easy)],
        step_mul=step_mul,
        agent_interface_format=format,
        visualize=False)

    # Set size of network by resetting the game to get observation space
    init_obs = game.reset()[0]
    obs_dimension = len(flatten_features(init_obs.observation))

    agent = get_agent_from_name(agent_type_name)

    # Setup agent
    obs_shape = (obs_dimension, )
    nb_actions = ACT_DIM
    agent.setup(obs_shape=obs_shape,
                nb_actions=nb_actions,
                action_spec=game.action_spec(),
                noise_type="adaptive-param_0.01,ou_0.01")

    # Run the training
    run_agent(agent, game, nb_epochs, max_rollouts)
Пример #6
0
 def __init__(self):
     self.pysc2_env = SC2Env(
         map_name='CollectMineralShards',
         agent_race='T',
         screen_size_px=(64, 64),
         minimap_size_px=(64, 64),
         visualize=False,
         step_mul=8,
         game_steps_per_episode=None  # use map default
     )
     super().__init__()
Пример #7
0
 def __init__(self):
     self.pysc2_env = SC2Env(  # map_name='CollectMineralsAndGas',
         map_name='Simple64',
         agent_race='T',
         screen_size_px=(64, 64),
         minimap_size_px=(64, 64),
         visualize=True,
         step_mul=16,
         game_steps_per_episode=None  # use map default
     )
     self.obs_list = deque()
     self.action_list = deque()
     self.total_reward = 0
     self.last_army_count = 0
     self.step_reward = 0
     super().__init__()
Пример #8
0
def run():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--timesteps', default=int(1e6))
    parser.add_argument('--num_steps', default=5)
    parser.add_argument('--discount_rate', default=0.99)

    parser.add_argument('--learning_rate', default=2e-4)
    parser.add_argument('--summary_frequency', default=20000)
    parser.add_argument('--performance_num_episodes', default=10)
    parser.add_argument('--summary_log_dir', default="a2c")
    args = parser.parse_args()

    dimensions = Dimensions(screen=(32, 32), minimap=(1, 1))
    interfaceFormat = AgentInterfaceFormat(
        feature_dimensions=dimensions,
        use_feature_units=True,
    )

    global_seed(0)

    env = SC2Env(map_name="MoveToBeacon",
                 agent_interface_format=interfaceFormat,
                 step_mul=8,
                 random_seed=1)

    env = EnvWrapper(env)

    model = Model(policy=PolicyFullyConnected,
                  observation_space=env.observation_space,
                  action_space=env.action_space,
                  learning_rate=args.learning_rate,
                  spatial_resolution=(5, 5))

    runner = Runner(env=env,
                    model=model,
                    batch_size=args.num_steps,
                    discount_rate=args.discount_rate,
                    summary_log_dir=args.summary_log_dir,
                    summary_frequency=args.summary_frequency,
                    performance_num_episodes=args.performance_num_episodes)

    for _ in range(0, (args.timesteps // args.num_steps) + 1):
        runner.run()
Пример #9
0
def sc2_feature_env(env_id, seed, replay_dir, render):
    agent_interface_format = parse_agent_interface_format(
        feature_screen=84,
        feature_minimap=84,
        action_space='FEATURES'
    )
    env = SC2Env(
        map_name=env_id,
        step_mul=8,
        game_steps_per_episode=0,
        discount=0.99,
        agent_interface_format=agent_interface_format,
        random_seed=seed,
        save_replay_episodes=1 if replay_dir is not None else 0,
        replay_dir=replay_dir,
        visualize=render
    )
    env = AdeptSC2Env(env)
    return env
Пример #10
0
 def __init__(self,
              path_to_configuration=DEFAULT_CONFIGURATION,
              enable_visualisation=True,
              game_step_per_update=8):
     self.pysc2_env = SC2Env(  # map_name='CollectMineralsAndGas',
         map_name='Simple64',
         players=[Agent(Race.terran),
                  Bot(Race.random, Difficulty.very_easy)],
         agent_interface_format=[AgentInterfaceFormat(feature_dimensions=Dimensions(screen=(SCREEN_RESOLUTION,
                                                                                            SCREEN_RESOLUTION),
                                                                                    minimap=(MINIMAP_RESOLUTION,
                                                                                             MINIMAP_RESOLUTION)),
                                                      camera_width_world_units=TILES_VISIBLE_ON_SCREEN_WIDTH)],
         # git version give camera position in observation if asked
         visualize=enable_visualisation,
         step_mul=game_step_per_update,
         game_steps_per_episode=None  # use map default
     )
     self.general = General(path_to_configuration)
     action_spec = self.pysc2_env.action_spec()
     observation_spec = self.pysc2_env.observation_spec()
     self.general.setup(observation_spec, action_spec)
     # self.observation_space = self.general.training_memory.observation_space
     super().__init__()
Пример #11
0
 def _start_env(self):
     if isinstance(self.map_name, list) or isinstance(self.map_name, tuple):
         map_name = random.choice(self.map_name)
         self.max_reset_num = 0
     else:
         map_name = self.map_name
     self.env = SC2Env(
         map_name=map_name,
         players=self.players,
         step_mul=self.step_mul,
         agent_interface_format=self.agent_interface_format,
         game_steps_per_episode=self.max_steps_per_episode,
         disable_fog=self.disable_fog,
         visualize=self.visualize,
         random_seed=self.random_seed,
         score_index=self.score_index,
         score_multiplier=self.score_multiplier,
         save_replay_episodes=self.save_replay_episodes,
         replay_dir=self.replay_dir,
         version=self._version,
         use_pysc2_feature=self.use_pysc2_feature,
         update_game_info=self.update_game_info,
         **self.game_core_config,
     )
Пример #12
0
    def __init__(self, config: SC2EnvironmentConfig):
        self._aif = parse_agent_interface_format(
            feature_screen=config.screen_size,
            feature_minimap=config.minimap_size)
        self._visualize = config.visualize

        if config.function_set == 'all':
            self._func_ids = [f.id for f in FUNCTIONS]
        elif config.function_set == 'minigames':
            self._func_ids = [
                0, 1, 2, 3, 4, 6, 7, 12, 13, 42, 44, 50, 91, 183, 234, 309,
                331, 332, 333, 334, 451, 452, 490
            ]
        else:
            raise ValueError

        sc2_features = Features(agent_interface_format=self._aif)
        sc2_action_spec = sc2_features.action_spec()
        sc2_obs_spec = sc2_features.observation_spec()

        fn_args_mask = np.zeros(
            (len(self._func_ids), len(sc2_action_spec.types) + 1),
            dtype=np.bool)
        fn_args_mask[:, 0] = 1
        for i, func_id in enumerate(self._func_ids):
            used_args = [a.id + 1 for a in FUNCTIONS[func_id].args]
            fn_args_mask[i, used_args] = 1
        action_spec = [('function_id',
                        ActionSpec(0, (len(self._func_ids), ), None,
                                   fn_args_mask))]
        for t in sc2_action_spec.types:
            if t.name == 'screen' or t.name == 'screen2':
                space = 'screen'
            elif t.name == 'minimap':
                space = 'minimap'
            else:
                space = None

            action_spec.append(
                (t.name, ActionSpec(len(action_spec), t.sizes, space, None)))
        action_spec = OrderedDict(action_spec)

        def feature_spec(features):
            return [
                FeatureSpec(f.index, f.scale,
                            f.type == FeatureType.CATEGORICAL)
                for f in features
            ]

        obs_spec = OrderedDict([
            ('screen',
             ObservationSpec(0, sc2_obs_spec['feature_screen'], True,
                             feature_spec(SCREEN_FEATURES))),
            ('minimap',
             ObservationSpec(1, sc2_obs_spec['feature_minimap'], True,
                             feature_spec(MINIMAP_FEATURES))),
            ('available_actions',
             ObservationSpec(2, (len(self._func_ids), ), False, None)),
            ('player', ObservationSpec(3, sc2_obs_spec['player'], False, None))
        ])

        self.spec = EnvironmentSpec(action_spec, obs_spec)

        from pysc2.env.sc2_env import SC2Env, Agent, Race

        if not flags.FLAGS.is_parsed():
            flags.FLAGS(sys.argv)

        num_retries = 3
        while True:
            try:
                self._env = SC2Env(map_name=config.map_name,
                                   agent_interface_format=self._aif,
                                   players=[Agent(Race.protoss)],
                                   visualize=self._visualize)

                break
            except RequestError:
                num_retries -= 1
                logging.error(
                    'SC2Env creation failed, {} retries remaining'.format(
                        num_retries))
                if num_retries <= 0:
                    raise

        atexit.register(self._env.close)
Пример #13
0
def run():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--timesteps', default=int(1e6))
    parser.add_argument('--num_steps', default=128)
    parser.add_argument('--entropy_coefficient', default=0.01)
    parser.add_argument('--learning_rate', default=2e-4)
    parser.add_argument('--gae_gamma', default=0.99)
    parser.add_argument('--gae_lambda', default=0.95)
    parser.add_argument('--num_batches', default=4)
    parser.add_argument('--num_training_epochs', default=4)
    parser.add_argument('--clip_range', default=0.2)
    parser.add_argument('--summary_frequency', default=20000)
    parser.add_argument('--performance_num_episodes', default=10)
    parser.add_argument('--summary_log_dir', default="ppo_fc")
    args = parser.parse_args()

    dimensions = Dimensions(screen=(32, 32), minimap=(1, 1))
    interface_format = AgentInterfaceFormat(
        feature_dimensions=dimensions,
        use_feature_units=True,
    )

    global_seed(0)
    batch_size = args.num_steps // args.num_batches
    env = SC2Env(map_name="MoveToBeacon",
                 agent_interface_format=interface_format,
                 step_mul=8,
                 random_seed=1)

    env = EnvWrapper(env)

    model = Model(policy=PolicyFullyConnected,
                  observation_space=env.observation_space,
                  action_space=env.action_space,
                  learning_rate=args.learning_rate,
                  spatial_resolution=(5, 5),
                  clip_range=args.clip_range,
                  entropy_coefficient=args.entropy_coefficient)

    runner = Runner(env=env,
                    model=model,
                    num_steps=args.num_steps,
                    advantage_estimator_gamma=args.gae_gamma,
                    advantage_estimator_lambda=args.gae_lambda,
                    summary_frequency=args.summary_frequency,
                    performance_num_episodes=args.performance_num_episodes,
                    summary_log_dir=args.summary_log_dir)

    for _ in range(0, (args.timesteps // args.num_steps) + 1):
        assert args.num_steps % args.num_batches == 0
        step = runner.run()
        observations = np.asarray(step[0])
        actions = np.asarray(step[1])
        available_actions = np.asarray(step[2])
        actions_spatial = np.asarray(step[3])
        actions_spatial_mask = np.asarray(step[4])
        advantage_estimations = np.asarray(step[5])
        values = np.asarray(step[6])
        probs = np.asarray(step[7])
        probs_spatial = np.asarray(step[8])
        indexes = np.arange(args.num_steps)

        for _ in range(args.num_training_epochs):
            np.random.shuffle(indexes)

            for i in range(0, args.num_steps, batch_size):
                shuffled_indexes = indexes[i:i + batch_size]
                model.train(
                    observations=[
                        observations[0][shuffled_indexes],
                        observations[1][shuffled_indexes],
                        observations[2][shuffled_indexes]
                    ],
                    actions=actions[shuffled_indexes],
                    available_actions_mask=available_actions[shuffled_indexes],
                    actions_spatial=actions_spatial[shuffled_indexes],
                    actions_spatial_mask=actions_spatial_mask[
                        shuffled_indexes],
                    advantages=advantage_estimations[shuffled_indexes],
                    values=values[shuffled_indexes],
                    probs=probs[shuffled_indexes],
                    probs_spatial=probs_spatial[shuffled_indexes])
Пример #14
0
def make_sc2env(id=0, **kwargs):
    env = SC2Env(**kwargs)
    return SC2AtariEnv(env, id=id, dim=FLAGS.resolution)
Пример #15
0
def make_env(id=0, **kwargs):
    env = SC2Env(**kwargs)
Пример #16
0

class Args(object):
    def __init__(self):
        self.mode = 'train'
        self.env_name = 'DefeatZerglingsAndBanelings'


if __name__ == '__main__':

    args = Args()

    # Get the environment and extract the number of actions.
    np.random.seed(123)
    nb_actions = Action.get_size()
    env = SC2Env(map_name=args.env_name, visualize=False)

    # Next, we build our model. We use the same model that was described by Mnih et al. (2015).
    input_shape = (WINDOW_LENGTH, number_of_channels(),) + INPUT_SHAPE
    model = Sequential()
    model.add(Permute((3, 4, 1, 2), input_shape=input_shape))
    model.add(Reshape(INPUT_SHAPE + (WINDOW_LENGTH * number_of_channels(),)))
    model.add(Convolution2D(32, 8, 8, subsample=(4, 4)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 4, 4, subsample=(2, 2)))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 3, 3, subsample=(1, 1)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))