class TestMultiplayerEnv(parameterized.TestCase, utils.TestCase): @parameterized.named_parameters( ("features", sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64))), ("rgb", sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions(screen=84, minimap=64))), ("features_and_rgb", [ sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64)), sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=32)) ]), ) def test_multi_player_env(self, agent_interface_format): steps = 100 step_mul = 16 players = 2 with sc2_env.SC2Env( map_name="Simple64", players=[ sc2_env.Agent(sc2_env.Race.random), sc2_env.Agent(sc2_env.Race.random) ], step_mul=step_mul, game_steps_per_episode=steps * step_mul // 2, agent_interface_format=agent_interface_format) as env: agents = [random_agent.RandomAgent() for _ in range(players)] run_loop.run_loop(agents, env, steps)
class TestRandomAgent(parameterized.TestCase, utils.TestCase): @parameterized.named_parameters( ("features", sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64))), ("rgb", sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64))), ("all", sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64), rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64), action_space=sc2_env.ActionSpace.FEATURES, use_unit_counts=True, use_feature_units=True)), ) def test_random_agent(self, agent_interface_format): steps = 250 step_mul = 8 with sc2_env.SC2Env( map_name=["Simple64", "Simple96"], players=[sc2_env.Agent([sc2_env.Race.random, sc2_env.Race.terran]), sc2_env.Bot([sc2_env.Race.zerg, sc2_env.Race.protoss], sc2_env.Difficulty.easy, [sc2_env.BotBuild.rush, sc2_env.BotBuild.timing])], agent_interface_format=agent_interface_format, step_mul=step_mul, game_steps_per_episode=steps * step_mul // 3) as env: agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, steps) self.assertEqual(agent.steps, steps)
def make_sc2env(num_players, render=False): if num_players == 1: players = [sc2_env.Agent(sc2_env.Race.terran)] else: players = [sc2_env.Agent(sc2_env.Race.terran), sc2_env.Agent(sc2_env.Race.terran)] if render: interface = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(MAP_SIZE, MAP_SIZE), minimap=(MAP_SIZE, MAP_SIZE) ), rgb_dimensions=sc2_env.Dimensions( screen=(RGB_SCREEN_WIDTH, RGB_SCREEN_HEIGHT), minimap=(RGB_SCREEN_WIDTH, RGB_SCREEN_HEIGHT), ), action_space=actions.ActionSpace.FEATURES) else: interface = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(MAP_SIZE, MAP_SIZE), minimap=(MAP_SIZE, MAP_SIZE) ), action_space=actions.ActionSpace.FEATURES) env_args = { 'agent_interface_format': interface, 'map_name': MAP_NAME, 'step_mul': FIVE_SECONDS, # 17 is ~1 action per second 'players': players, } maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, env_args['map_name'], players=num_players) return sc2_env.SC2Env(**env_args)
def make_sc2env(map_name, render=False, screen_size=RGB_SCREEN_SIZE, map_size=MAP_SIZE): rgb_dimensions = False if render: rgb_dimensions = sc2_env.Dimensions(screen=(screen_size, screen_size), minimap=(screen_size, screen_size)) env_args = { 'agent_interface_format': sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=(map_size, map_size), minimap=(map_size, map_size)), rgb_dimensions=rgb_dimensions, action_space=actions.ActionSpace.FEATURES, ), 'map_name': map_name, 'step_mul': SIMULATION_STEP_MUL, } maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps') register_map(maps_dir, env_args['map_name']) return sc2_env.SC2Env(**env_args)
def main(unused_argv): rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) logger.configure(dir=FLAGS.train_log_dir, format_strs=['log']) players = [] players.append(sc2_env.Agent(races[FLAGS.agent_race])) players.append(sc2_env.Agent(races[FLAGS.oppo_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=ncpu, inter_op_parallelism_threads=ncpu) config.gpu_options.allow_growth = True # pylint: disable=E1101 tf.Session(config=config).__enter__() #flags.DEFINE_float('param_tstep', 100000, 'the parameter totoal step') param_lam = FLAGS.param_lam param_gamma = FLAGS.param_gamma param_concurrent = FLAGS.param_concurrent param_lr = FLAGS.param_lr param_cr = FLAGS.param_cr param_tstep = FLAGS.param_tstep print('params, lam={} gamma={} concurrent={} lr={} tstep={}'.format( param_lam, param_gamma, param_concurrent, param_lr, param_tstep)) env = make_sc2_dis_env(num_env=param_concurrent, seed=rs, players=players, agent_interface_format=agent_interface_format) ppo2.learn(policy=CnnPolicy, env=env, nsteps=128, nminibatches=1, lam=param_lam, gamma=param_gamma, noptepochs=4, log_interval=1, ent_coef=0.01, lr=lambda f: f * param_lr, cliprange=lambda f: f * param_cr, total_timesteps=param_tstep, save_interval=10)
def setUpClass(cls): # super(TestCompareEnvironments, cls).setUpClass() super().setUpClass() players = [ sc2_env.Agent(race=sc2_env.Race.terran), sc2_env.Agent(race=sc2_env.Race.protoss), ] kwargs = { 'map_name': 'Flat64', 'players': players, 'agent_interface_format': [ sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=(32, 64), minimap=(8, 16)), rgb_dimensions=sc2_env.Dimensions(screen=(31, 63), minimap=(7, 15)), action_space=sc2_env.ActionSpace.FEATURES), sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions(screen=64, minimap=32)) ] } cls._env = sc2_env.SC2Env(**kwargs) cls._mock_env = mock_sc2_env.SC2TestEnv(**kwargs)
class TestRandomAgent(parameterized.TestCase, utils.TestCase): @parameterized.named_parameters( ("features", sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64))), ("rgb", sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64))), ("all", sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64), rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64), action_space=sc2_env.ActionSpace.FEATURES, use_unit_counts=True, use_feature_units=True)), ) def test_random_agent(self, agent_interface_format): steps = 250 step_mul = 8 with sc2_env.SC2Env(map_name="Simple64", agent_interface_format=agent_interface_format, step_mul=step_mul, game_steps_per_episode=steps * step_mul // 2) as env: agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, steps) self.assertEqual(agent.steps, steps)
def run_thread(players, agents, map_name, visualize): rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) print("Random seed: {}.".format(rs)) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError with sc2_env.SC2Env( map_name=map_name, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.game_steps_per_episode, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=visualize) as env: run_loop(agents, env, max_episodes=FLAGS.max_agent_episodes) if FLAGS.save_replay: env.save_replay("%s vs. %s" % (FLAGS.agent1, FLAGS.agent2))
def main(unused_argv): #env = gym.make("SC2GYMENV-v0") #env.settings['map_name'] = 'ScoutSimple64' rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) logger.configure(dir=FLAGS.train_log_dir, format_strs=['log']) players = [] players.append( sc2_env.Bot(races[FLAGS.bot_race], difficulties[FLAGS.difficulty])) players.append(sc2_env.Agent(races[FLAGS.agent_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError env = ZergScoutEnv( map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render) env = make(FLAGS.wrapper, env) network = model(FLAGS.wrapper) #deepq.models.mlp([64, 32]) print('params, lr={} bf={} ef={} ef_eps={}'.format(FLAGS.param_lr, FLAGS.param_bf, FLAGS.param_ef, FLAGS.param_efps)) act = deepq.learn(env, q_func=network, lr=FLAGS.param_lr, max_timesteps=100000, buffer_size=FLAGS.param_bf, exploration_fraction=FLAGS.param_ef, exploration_final_eps=FLAGS.param_efps, checkpoint_path=FLAGS.checkpoint_path, checkpoint_freq=FLAGS.checkpoint_freq, print_freq=10, callback=callback)
def test_heterogeneous_observations(self): with sc2_env.SC2Env( map_name="Simple64", players=[ sc2_env.Agent(sc2_env.Race.random), sc2_env.Agent(sc2_env.Race.random) ], agent_interface_format=[ sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(84, 87), minimap=(64, 67) ) ), sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=128, minimap=64 ) ) ]) as env: obs_specs = env.observation_spec() self.assertIsInstance(obs_specs, tuple) self.assertLen(obs_specs, 2) actions_specs = env.action_spec() self.assertIsInstance(actions_specs, tuple) self.assertLen(actions_specs, 2) agents = [] for obs_spec, action_spec in zip(obs_specs, actions_specs): agent = random_agent.RandomAgent() agent.setup(obs_spec, action_spec) agent.reset() agents.append(agent) time_steps = env.reset() for _ in range(100): self.assertIsInstance(time_steps, tuple) self.assertLen(time_steps, 2) actions = [] for i, agent in enumerate(agents): time_step = time_steps[i] obs = time_step.observation self.check_observation_matches_spec(obs, obs_specs[i]) actions.append(agent.step(time_step)) time_steps = env.step(actions)
def _thunk(): params['agent_interface_format'] = [ sc2_env.AgentInterfaceFormat(feature_dimensions=sc2_env.Dimensions( screen=(sz, sz), minimap=(sz, sz))) ] env = sc2_env.SC2Env(**params) return env
def main(): FLAGS(sys.argv) with sc2_env.SC2Env( map_name="CollectMineralShards", step_mul=step_mul, players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=64, minimap=64)), visualize=True) as env: model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True) act = deepq_mineral_shards.learn(env, q_func=model, num_actions=4, lr=1e-5, max_timesteps=2000000, buffer_size=100000, exploration_fraction=0.5, exploration_final_eps=0.01, train_freq=4, learning_starts=100000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True) act.save("mineral_shards.pkl")
def main(): FLAGS(sys.argv) with sc2_env.SC2Env( map_name="DefeatZerglingsAndBanelings", step_mul=step_mul, visualize=True, players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=64, minimap=64)), game_steps_per_episode=steps * step_mul) as env: model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True) demo_replay = [] act = dqfd.learn(env, q_func=model, num_actions=3, lr=1e-4, max_timesteps=10000000, buffer_size=100000, exploration_fraction=0.5, exploration_final_eps=0.01, train_freq=2, learning_starts=100000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, demo_replay=demo_replay) act.save("defeat_zerglings.pkl")
def default_macro_env_maker(kwargs): ''' :param kwargs: map_name, players, ... almost same as SC2Env :return: env_maker ''' assert kwargs.get('map_name') is not None screen_sz = kwargs.pop('screen_size', 64) minimap_sz = kwargs.pop('minimap_size', 64) difficulty = kwargs.pop('difficulty', sc2_env.Difficulty.very_easy) assert screen_sz == minimap_sz if 'agent_interface_format' not in kwargs: kwargs['agent_interface_format'] = sc2_env.AgentInterfaceFormat( use_feature_units=True, use_raw_units=True, use_unit_counts=True, feature_dimensions=sc2_env.Dimensions(screen=(screen_sz, screen_sz), minimap=(minimap_sz, minimap_sz))) if 'players' not in kwargs: kwargs['players'] = [ sc2_env.Agent(sc2_env.Race.protoss), sc2_env.Bot(sc2_env.Race.terran, difficulty) ] if 'game_steps_per_episode' not in kwargs: kwargs['game_steps_per_episode'] = 16000 if 'visualize' not in kwargs: kwargs['visualize'] = False if 'step_mul' not in kwargs: kwargs['step_mul'] = 4 return MacroEnv(**kwargs)
def initialize_environment(visualize=False): interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=FLAGS.screen_resolution, minimap=FLAGS.minimap_resolution)) return sc2_env.SC2Env(map_name=FLAGS.map_name, agent_interface_format=interface_format, visualize=visualize)
def __init__(self, map, agent_type, max_timesteps, max_episodes, step_mul=None, visualize=False): """ :param map[string]: map to use :param agent_type[agent object type]: agent to use :param max_timesteps[int]: maximum number of timesteps to run per episode :param max_episodes[int]: number of episodes to run :param step_mul: How many game steps per agent step (action/observation). None means use the map default. :param visualize: Whether to visualize the episodes """ self.aif = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=64, minimap=24), use_raw_units=True, camera_width_world_units=24) self.map = map self.max_timesteps = max_timesteps self.max_episodes = max_episodes self.step_mul = step_mul or 1 self.visualize = visualize self.agent_type = agent_type self.agents = []
def run_thread(agent, sess, display, players, numSteps): """Runs an agent thread.""" with sess.as_default(), sess.graph.as_default(): while RUN: try: agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=SCREEN_SIZE, minimap=MINIMAP_SIZE)) with sc2_env.SC2Env( map_name=flags.FLAGS.map, players=players, game_steps_per_episode=numSteps, agent_interface_format=agent_interface_format, visualize=display) as env: run_loop.run_loop([agent], env) except Exception as e: print(e) print(traceback.format_exc()) logging.error(traceback.format_exc()) # remove crahsed terminal history # agent.RemoveNonTerminalHistory() global NUM_CRASHES NUM_CRASHES += 1
def main(args): with tf.Session() as sess: with sc2_env.SC2Env( map_name=args['map_name'], agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=args['screen_size'], minimap=args['minimap_size'])), step_mul=args['step_mul'], game_steps_per_episode=args['max_episode_step'], visualize=False) as env: action_bound = int(args['screen_size']) / int(2) # sess, screen_size, action_dim, learning_rate, action_bound, minibatch_size, tau actor = actorNetwork(sess, args['screen_size'], args['action_dim'], action_bound, args['tau'], args['minibatch_size'], args['actor_lr']) # sess, screen_size, action_dim, learning_rate, tau, gamma, num_actor_vars, minibatch_size critic = criticNetwork(sess, args['screen_size'], actor.get_trainable_params_num(), args['tau'], args['critic_lr'], args['gamma'], args['action_dim']) replay_buffer = ReplayBuffer(buffer_size=args['buffer_size']) train(sess, env, actor, critic, args, replay_buffer)
def test_observation_matches_obs_spec(self): with sc2_env.SC2Env( map_name="Simple64", players=[sc2_env.Agent(sc2_env.Race.random), sc2_env.Bot(sc2_env.Race.random, sc2_env.Difficulty.easy)], agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(84, 87), minimap=(64, 67)))) as env: multiplayer_obs_spec = env.observation_spec() self.assertIsInstance(multiplayer_obs_spec, tuple) self.assertLen(multiplayer_obs_spec, 1) obs_spec = multiplayer_obs_spec[0] multiplayer_action_spec = env.action_spec() self.assertIsInstance(multiplayer_action_spec, tuple) self.assertLen(multiplayer_action_spec, 1) action_spec = multiplayer_action_spec[0] agent = random_agent.RandomAgent() agent.setup(obs_spec, action_spec) multiplayer_obs = env.reset() agent.reset() for _ in range(100): self.assertIsInstance(multiplayer_obs, tuple) self.assertLen(multiplayer_obs, 1) raw_obs = multiplayer_obs[0] obs = raw_obs.observation self.check_observation_matches_spec(obs, obs_spec) act = agent.step(raw_obs) multiplayer_act = (act,) multiplayer_obs = env.step(multiplayer_act)
def run_thread(agent, visualize): with sc2_env.SC2Env(map_name=FLAGS.map_name, agent_interface_format=[ sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(FLAGS.screen_resolution, FLAGS.screen_resolution), minimap=(FLAGS.minimap_resolution, FLAGS.minimap_resolution))) ], step_mul=FLAGS.step_mul, visualize=visualize) as env: replay_buffer = [] for trajectory, done in rollout([agent], env, FLAGS.max_steps): if FLAGS.train: replay_buffer.append(trajectory) if done: step = 0 with LOCK: global STEP STEP += 1 step = STEP learning_rate = FLAGS.learning_rate * ( 1 - 0.9 * step / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.gamma, learning_rate, step) replay_buffer = [] if (step + 1) % FLAGS.save_interval == 0: agent.save_model(FLAGS.logdir, step) if step >= FLAGS.max_steps: break
def main(unused_argv): """Run an agent.""" step_mul = FLAGS.step_mul players = 2 screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) agent_interface_format = None if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError players = [ sc2_env.Agent(sc2_env.Race.zerg), sc2_env.Agent(sc2_env.Race.zerg) ] agents = [] bot_difficulty = get_difficulty(FLAGS.difficulty) if FLAGS.player1 == 'Bot': players[0] = sc2_env.Bot(sc2_env.Race.zerg, bot_difficulty) else: agents.append(get_agent(FLAGS.player1)) if FLAGS.player2 == 'Bot': players[1] = sc2_env.Bot(sc2_env.Race.zerg, bot_difficulty) else: agents.append(get_agent(FLAGS.player2)) with sc2_env.SC2Env(map_name=FLAGS.map, visualize=FLAGS.visualize, players=players, step_mul=step_mul, game_steps_per_episode=FLAGS.max_steps_per_episode * step_mul, agent_interface_format=agent_interface_format, disable_fog=FLAGS.disable_fog) as env: run_loop(agents, env, max_frames=0, max_episodes=FLAGS.episodes, sleep_time_per_step=FLAGS.sleep_time_per_step, merge_units_info=FLAGS.merge_units_info)
def __init__(self, **kwargs): super(__class__, self).__init__( map_name='MoveToBeacon', players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64), ), **kwargs)
def make_envs(args, num_player, bot_level): aif = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=args.sz, minimap=args.sz), use_feature_units=True) env_args = dict(map_name=args.map, num_player=num_player, bot_level=bot_level, agent_interface_format=[aif, aif], step_mul=8, game_steps_per_episode=0) # return EnvPool([make_env(args.sz, **dict(env_args, visualize=i < args.render)) for i in range(args.envs)]) return make_env(**dict(env_args, visualize=True))
def test_hivemind(self): with sc2_env.SC2Env( map_name="CyberForest", agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64)), step_mul=self.step_mul, game_steps_per_episode=self.steps * self.step_mul) as env: agent = HiveMind() run_loop.run_loop([agent], env, self.steps)
def main(unused_argv): rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) players = [] players.append(sc2_env.Bot(races[FLAGS.bot_race], difficulties[FLAGS.difficulty])) players.append(sc2_env.Agent(races[FLAGS.agent_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions = sc2_env.Dimensions(screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions(screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError env = ZergScoutEnv( map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render ) env = make(FLAGS.wrapper, env) agent = RandomAgent(env.unwrapped.action_space) run_loop(agent, env, max_episodes=FLAGS.max_agent_episodes, max_step=FLAGS.max_step) if FLAGS.save_replay: env.unwrapped.save_replay('save') if FLAGS.profile: print(stopwatch.sw)
def make_sc2env(**kwargs): agent_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(arglist.FEAT2DSIZE, arglist.FEAT2DSIZE), minimap=(arglist.FEAT2DSIZE, arglist.FEAT2DSIZE), )) kwargs['agent_interface_format'] = [agent_format] env = sc2_env.SC2Env(**kwargs) return env
def main(): FLAGS(sys.argv) logdir = "tensorboard" if (FLAGS.algorithm == "deepq"): logdir = "tensorboard/zergling/%s/%s_%s_prio%s_duel%s_lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.exploration_fraction, FLAGS.prioritized, FLAGS.dueling, FLAGS.lr, start_time) elif (FLAGS.algorithm == "acktr"): logdir = "tensorboard/zergling/%s/%s_num%s_lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.num_cpu, FLAGS.lr, start_time) if (FLAGS.log == "tensorboard"): Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir=None, output_formats=[TensorBoardOutputFormat(logdir)]) elif (FLAGS.log == "stdout"): Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir=None, output_formats=[HumanOutputFormat(sys.stdout)]) with sc2_env.SC2Env( map_name="DefeatZerglingsAndBanelings", step_mul=step_mul, visualize=True, agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=32, minimap=32)), game_steps_per_episode=steps * step_mul) as env: obs = env.reset() #print(obs[0].observation) model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True) demo_replay = [] act = dqfd.learn(env, q_func=model, num_actions=3, lr=1e-4, max_timesteps=10000000, buffer_size=100000, exploration_fraction=0.5, exploration_final_eps=0.01, train_freq=2, learning_starts=100000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, callback=deepq_callback) act.save("defeat_zerglings.pkl")
def __init__(self, env_interface, load_model=False): mineral_env_config = { "map_name": "MoveToBeacon", "visualize": False, "step_mul": 64, 'game_steps_per_episode': None, "agent_interface_format": sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=84, minimap=84), action_space=actions.ActionSpace.FEATURES, use_feature_units=True) } self.env_interface = env_interface self.agent = ConvAgent(self.env_interface) self.weights_dir = './weights' self.weights_path = os.path.join(self.weights_dir, 'model.ckpt') self.env_interface = env_interface self.env = SCEnvironmentWrapper(self.env_interface, env_kwargs=mineral_env_config) self.curr_iteration = 0 self.epoch = 0 self.discount_factor = 0.7 self.td_lambda = 0.9 with self.agent.graph.as_default(): self.session = self.agent.session self.session.run(tf.global_variables_initializer()) self.rewards_input = tf.placeholder(tf.float32, [None], name="rewards") # T self.behavior_log_probs_input = tf.placeholder( tf.float32, [None, None], name="behavior_log_probs") # T self.saver = tf.train.Saver() self.loss = self._ac_loss() self.train_op = tf.train.AdamOptimizer(0.0003).minimize(self.loss) if load_model: try: self._load_model() except ValueError: print("Could not load model") self.variable_names = [v.name for v in tf.trainable_variables()] self.assign_placeholders = { t.name: tf.placeholder(t.dtype, t.shape) for t in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) } self.assigns = [ tf.assign(tensor, self.assign_placeholders[tensor.name]) for tensor in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) ] self.session.run(tf.global_variables_initializer())
def make_sc2env(**kwargs): agent_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(arglist.FEAT2DSIZE, arglist.FEAT2DSIZE), minimap=(arglist.FEAT2DSIZE, arglist.FEAT2DSIZE))) kwargs['agent_interface_format'] = agent_format env = sc2_env.SC2Env(**kwargs) # env = available_actions_printer.AvailableActionsPrinter(env) return env
def __init__(self, **kwargs): self._action_space = None self._episode = 0 self._episode_reward = 0 self._num_step = 0 self._observation_space = None self._total_reward = 0 self._kwargs = kwargs self._kwargs['agent_interface_format'] = \ sc2_env.AgentInterfaceFormat( sc2_env.Dimensions(64, 64)) self._env = sc2_env.SC2Env(**self._kwargs)