def create_env_one_player(self, player, game_steps_per_episode=GAME_STEPS_PER_EPISODE, step_mul=STEP_MUL, version=VERSION, map_name=MAP_NAME, random_seed=RANDOM_SEED): player_aif = AgentInterfaceFormat(**AAIFP._asdict()) agent_interface_format = [player_aif] # create env print('map name:', map_name) print('player.name:', player.name) print('player.race:', player.race) sc2_computer = Bot([Race.terran], Difficulty(DIFFICULTY), [BotBuild.random]) env = SC2Env(map_name=map_name, players=[Agent(player.race, player.name), sc2_computer], step_mul=step_mul, game_steps_per_episode=game_steps_per_episode, agent_interface_format=agent_interface_format, version=version, random_seed=random_seed) return env
def create_env( self, player, opponent, game_steps_per_episode=GAME_STEPS_PER_EPISODE, step_mul=STEP_MUL, version=VERSION, # the map should be the same as in the expert replay map_name="AbyssalReef", random_seed=RANDOM_SEED): player_aif = AgentInterfaceFormat(**AAIFP._asdict()) opponent_aif = AgentInterfaceFormat(**AAIFP._asdict()) agent_interface_format = [player_aif, opponent_aif] # create env print('map name:', map_name) print('player.name:', player.name) print('opponent.name:', opponent.name) print('player.race:', player.race) print('opponent.race:', opponent.race) env = SC2Env(map_name=map_name, players=[ Agent(player.race, player.name), Agent(opponent.race, opponent.name) ], step_mul=step_mul, game_steps_per_episode=game_steps_per_episode, agent_interface_format=agent_interface_format, version=version, random_seed=random_seed) return env
def create_env(self, player, opponent, game_steps_per_episode=GAME_STEPS_PER_EPISODE, step_mul=STEP_MUL, version=None, map_name="Simple64", random_seed=1): player_aif = AgentInterfaceFormat(**AAIFP._asdict()) opponent_aif = AgentInterfaceFormat(**AAIFP._asdict()) agent_interface_format = [player_aif, opponent_aif] # create env print('map name:', map_name) print('player.name:', player.name) print('opponent.name:', opponent.name) print('player.race:', player.race) print('opponent.race:', opponent.race) env = SC2Env(map_name=map_name, players=[ Agent(player.race, player.name), Agent(opponent.race, opponent.name) ], step_mul=step_mul, game_steps_per_episode=game_steps_per_episode, agent_interface_format=agent_interface_format, version=version, random_seed=random_seed) return env
def create_env_one_player(self, player, game_steps_per_episode=GAME_STEPS_PER_EPISODE, step_mul=STEP_MUL, version=VERSION, map_name=MAP_NAME, random_seed=RANDOM_SEED): player_aif = AgentInterfaceFormat(**AAIFP._asdict()) agent_interface_format = [player_aif] # create env print('map name:', map_name) print('player.name:', player.name) print('player.race:', player.race) # class BotBuild(enum.IntEnum): # """Bot build strategies.""" # random = sc_pb.RandomBuild # rush = sc_pb.Rush # timing = sc_pb.Timing # power = sc_pb.Power # macro = sc_pb.Macro # air = sc_pb.Air sc2_computer = Bot([Race.terran], Difficulty(DIFFICULTY), [BotBuild.random]) env = SC2Env(map_name=map_name, players=[Agent(player.race, player.name), sc2_computer], step_mul=step_mul, game_steps_per_episode=game_steps_per_episode, agent_interface_format=agent_interface_format, version=version, random_seed=random_seed) return env
def main(nb_epochs, max_rollouts, agent_type_name, map_name, step_mul): dims = Dimensions(screen=(200, 200), minimap=(50, 50)) format = AgentInterfaceFormat(feature_dimensions=dims) game = SC2Env( map_name=map_name, players=[Agent(Race.protoss), Bot(Race.terran, Difficulty.easy)], step_mul=step_mul, agent_interface_format=format, visualize=False) # Set size of network by resetting the game to get observation space init_obs = game.reset()[0] obs_dimension = len(flatten_features(init_obs.observation)) agent = get_agent_from_name(agent_type_name) # Setup agent obs_shape = (obs_dimension, ) nb_actions = ACT_DIM agent.setup(obs_shape=obs_shape, nb_actions=nb_actions, action_spec=game.action_spec(), noise_type="adaptive-param_0.01,ou_0.01") # Run the training run_agent(agent, game, nb_epochs, max_rollouts)
def __init__(self): self.pysc2_env = SC2Env( map_name='CollectMineralShards', agent_race='T', screen_size_px=(64, 64), minimap_size_px=(64, 64), visualize=False, step_mul=8, game_steps_per_episode=None # use map default ) super().__init__()
def __init__(self): self.pysc2_env = SC2Env( # map_name='CollectMineralsAndGas', map_name='Simple64', agent_race='T', screen_size_px=(64, 64), minimap_size_px=(64, 64), visualize=True, step_mul=16, game_steps_per_episode=None # use map default ) self.obs_list = deque() self.action_list = deque() self.total_reward = 0 self.last_army_count = 0 self.step_reward = 0 super().__init__()
def run(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--timesteps', default=int(1e6)) parser.add_argument('--num_steps', default=5) parser.add_argument('--discount_rate', default=0.99) parser.add_argument('--learning_rate', default=2e-4) parser.add_argument('--summary_frequency', default=20000) parser.add_argument('--performance_num_episodes', default=10) parser.add_argument('--summary_log_dir', default="a2c") args = parser.parse_args() dimensions = Dimensions(screen=(32, 32), minimap=(1, 1)) interfaceFormat = AgentInterfaceFormat( feature_dimensions=dimensions, use_feature_units=True, ) global_seed(0) env = SC2Env(map_name="MoveToBeacon", agent_interface_format=interfaceFormat, step_mul=8, random_seed=1) env = EnvWrapper(env) model = Model(policy=PolicyFullyConnected, observation_space=env.observation_space, action_space=env.action_space, learning_rate=args.learning_rate, spatial_resolution=(5, 5)) runner = Runner(env=env, model=model, batch_size=args.num_steps, discount_rate=args.discount_rate, summary_log_dir=args.summary_log_dir, summary_frequency=args.summary_frequency, performance_num_episodes=args.performance_num_episodes) for _ in range(0, (args.timesteps // args.num_steps) + 1): runner.run()
def sc2_feature_env(env_id, seed, replay_dir, render): agent_interface_format = parse_agent_interface_format( feature_screen=84, feature_minimap=84, action_space='FEATURES' ) env = SC2Env( map_name=env_id, step_mul=8, game_steps_per_episode=0, discount=0.99, agent_interface_format=agent_interface_format, random_seed=seed, save_replay_episodes=1 if replay_dir is not None else 0, replay_dir=replay_dir, visualize=render ) env = AdeptSC2Env(env) return env
def __init__(self, path_to_configuration=DEFAULT_CONFIGURATION, enable_visualisation=True, game_step_per_update=8): self.pysc2_env = SC2Env( # map_name='CollectMineralsAndGas', map_name='Simple64', players=[Agent(Race.terran), Bot(Race.random, Difficulty.very_easy)], agent_interface_format=[AgentInterfaceFormat(feature_dimensions=Dimensions(screen=(SCREEN_RESOLUTION, SCREEN_RESOLUTION), minimap=(MINIMAP_RESOLUTION, MINIMAP_RESOLUTION)), camera_width_world_units=TILES_VISIBLE_ON_SCREEN_WIDTH)], # git version give camera position in observation if asked visualize=enable_visualisation, step_mul=game_step_per_update, game_steps_per_episode=None # use map default ) self.general = General(path_to_configuration) action_spec = self.pysc2_env.action_spec() observation_spec = self.pysc2_env.observation_spec() self.general.setup(observation_spec, action_spec) # self.observation_space = self.general.training_memory.observation_space super().__init__()
def _start_env(self): if isinstance(self.map_name, list) or isinstance(self.map_name, tuple): map_name = random.choice(self.map_name) self.max_reset_num = 0 else: map_name = self.map_name self.env = SC2Env( map_name=map_name, players=self.players, step_mul=self.step_mul, agent_interface_format=self.agent_interface_format, game_steps_per_episode=self.max_steps_per_episode, disable_fog=self.disable_fog, visualize=self.visualize, random_seed=self.random_seed, score_index=self.score_index, score_multiplier=self.score_multiplier, save_replay_episodes=self.save_replay_episodes, replay_dir=self.replay_dir, version=self._version, use_pysc2_feature=self.use_pysc2_feature, update_game_info=self.update_game_info, **self.game_core_config, )
def __init__(self, config: SC2EnvironmentConfig): self._aif = parse_agent_interface_format( feature_screen=config.screen_size, feature_minimap=config.minimap_size) self._visualize = config.visualize if config.function_set == 'all': self._func_ids = [f.id for f in FUNCTIONS] elif config.function_set == 'minigames': self._func_ids = [ 0, 1, 2, 3, 4, 6, 7, 12, 13, 42, 44, 50, 91, 183, 234, 309, 331, 332, 333, 334, 451, 452, 490 ] else: raise ValueError sc2_features = Features(agent_interface_format=self._aif) sc2_action_spec = sc2_features.action_spec() sc2_obs_spec = sc2_features.observation_spec() fn_args_mask = np.zeros( (len(self._func_ids), len(sc2_action_spec.types) + 1), dtype=np.bool) fn_args_mask[:, 0] = 1 for i, func_id in enumerate(self._func_ids): used_args = [a.id + 1 for a in FUNCTIONS[func_id].args] fn_args_mask[i, used_args] = 1 action_spec = [('function_id', ActionSpec(0, (len(self._func_ids), ), None, fn_args_mask))] for t in sc2_action_spec.types: if t.name == 'screen' or t.name == 'screen2': space = 'screen' elif t.name == 'minimap': space = 'minimap' else: space = None action_spec.append( (t.name, ActionSpec(len(action_spec), t.sizes, space, None))) action_spec = OrderedDict(action_spec) def feature_spec(features): return [ FeatureSpec(f.index, f.scale, f.type == FeatureType.CATEGORICAL) for f in features ] obs_spec = OrderedDict([ ('screen', ObservationSpec(0, sc2_obs_spec['feature_screen'], True, feature_spec(SCREEN_FEATURES))), ('minimap', ObservationSpec(1, sc2_obs_spec['feature_minimap'], True, feature_spec(MINIMAP_FEATURES))), ('available_actions', ObservationSpec(2, (len(self._func_ids), ), False, None)), ('player', ObservationSpec(3, sc2_obs_spec['player'], False, None)) ]) self.spec = EnvironmentSpec(action_spec, obs_spec) from pysc2.env.sc2_env import SC2Env, Agent, Race if not flags.FLAGS.is_parsed(): flags.FLAGS(sys.argv) num_retries = 3 while True: try: self._env = SC2Env(map_name=config.map_name, agent_interface_format=self._aif, players=[Agent(Race.protoss)], visualize=self._visualize) break except RequestError: num_retries -= 1 logging.error( 'SC2Env creation failed, {} retries remaining'.format( num_retries)) if num_retries <= 0: raise atexit.register(self._env.close)
def run(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--timesteps', default=int(1e6)) parser.add_argument('--num_steps', default=128) parser.add_argument('--entropy_coefficient', default=0.01) parser.add_argument('--learning_rate', default=2e-4) parser.add_argument('--gae_gamma', default=0.99) parser.add_argument('--gae_lambda', default=0.95) parser.add_argument('--num_batches', default=4) parser.add_argument('--num_training_epochs', default=4) parser.add_argument('--clip_range', default=0.2) parser.add_argument('--summary_frequency', default=20000) parser.add_argument('--performance_num_episodes', default=10) parser.add_argument('--summary_log_dir', default="ppo_fc") args = parser.parse_args() dimensions = Dimensions(screen=(32, 32), minimap=(1, 1)) interface_format = AgentInterfaceFormat( feature_dimensions=dimensions, use_feature_units=True, ) global_seed(0) batch_size = args.num_steps // args.num_batches env = SC2Env(map_name="MoveToBeacon", agent_interface_format=interface_format, step_mul=8, random_seed=1) env = EnvWrapper(env) model = Model(policy=PolicyFullyConnected, observation_space=env.observation_space, action_space=env.action_space, learning_rate=args.learning_rate, spatial_resolution=(5, 5), clip_range=args.clip_range, entropy_coefficient=args.entropy_coefficient) runner = Runner(env=env, model=model, num_steps=args.num_steps, advantage_estimator_gamma=args.gae_gamma, advantage_estimator_lambda=args.gae_lambda, summary_frequency=args.summary_frequency, performance_num_episodes=args.performance_num_episodes, summary_log_dir=args.summary_log_dir) for _ in range(0, (args.timesteps // args.num_steps) + 1): assert args.num_steps % args.num_batches == 0 step = runner.run() observations = np.asarray(step[0]) actions = np.asarray(step[1]) available_actions = np.asarray(step[2]) actions_spatial = np.asarray(step[3]) actions_spatial_mask = np.asarray(step[4]) advantage_estimations = np.asarray(step[5]) values = np.asarray(step[6]) probs = np.asarray(step[7]) probs_spatial = np.asarray(step[8]) indexes = np.arange(args.num_steps) for _ in range(args.num_training_epochs): np.random.shuffle(indexes) for i in range(0, args.num_steps, batch_size): shuffled_indexes = indexes[i:i + batch_size] model.train( observations=[ observations[0][shuffled_indexes], observations[1][shuffled_indexes], observations[2][shuffled_indexes] ], actions=actions[shuffled_indexes], available_actions_mask=available_actions[shuffled_indexes], actions_spatial=actions_spatial[shuffled_indexes], actions_spatial_mask=actions_spatial_mask[ shuffled_indexes], advantages=advantage_estimations[shuffled_indexes], values=values[shuffled_indexes], probs=probs[shuffled_indexes], probs_spatial=probs_spatial[shuffled_indexes])
def make_sc2env(id=0, **kwargs): env = SC2Env(**kwargs) return SC2AtariEnv(env, id=id, dim=FLAGS.resolution)
def make_env(id=0, **kwargs): env = SC2Env(**kwargs)
class Args(object): def __init__(self): self.mode = 'train' self.env_name = 'DefeatZerglingsAndBanelings' if __name__ == '__main__': args = Args() # Get the environment and extract the number of actions. np.random.seed(123) nb_actions = Action.get_size() env = SC2Env(map_name=args.env_name, visualize=False) # Next, we build our model. We use the same model that was described by Mnih et al. (2015). input_shape = (WINDOW_LENGTH, number_of_channels(),) + INPUT_SHAPE model = Sequential() model.add(Permute((3, 4, 1, 2), input_shape=input_shape)) model.add(Reshape(INPUT_SHAPE + (WINDOW_LENGTH * number_of_channels(),))) model.add(Convolution2D(32, 8, 8, subsample=(4, 4))) model.add(Activation('relu')) model.add(Convolution2D(64, 4, 4, subsample=(2, 2))) model.add(Activation('relu')) model.add(Convolution2D(64, 3, 3, subsample=(1, 1))) model.add(Activation('relu')) model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu'))