示例#1
0
class TestMultiplayerEnv(parameterized.TestCase, utils.TestCase):
    @parameterized.named_parameters(
        ("features",
         sc2_env.AgentInterfaceFormat(
             feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64))),
        ("rgb",
         sc2_env.AgentInterfaceFormat(
             rgb_dimensions=sc2_env.Dimensions(screen=84, minimap=64))),
        ("features_and_rgb", [
            sc2_env.AgentInterfaceFormat(
                feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64)),
            sc2_env.AgentInterfaceFormat(
                rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=32))
        ]),
    )
    def test_multi_player_env(self, agent_interface_format):
        steps = 100
        step_mul = 16
        players = 2
        with sc2_env.SC2Env(
                map_name="Simple64",
                players=[
                    sc2_env.Agent(sc2_env.Race.random),
                    sc2_env.Agent(sc2_env.Race.random)
                ],
                step_mul=step_mul,
                game_steps_per_episode=steps * step_mul // 2,
                agent_interface_format=agent_interface_format) as env:
            agents = [random_agent.RandomAgent() for _ in range(players)]
            run_loop.run_loop(agents, env, steps)
示例#2
0
class TestRandomAgent(parameterized.TestCase, utils.TestCase):

    @parameterized.named_parameters(
        ("features", sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64))),
        ("rgb", sc2_env.AgentInterfaceFormat(
            rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64))),
        ("all", sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64),
            rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64),
            action_space=sc2_env.ActionSpace.FEATURES,
            use_unit_counts=True,
            use_feature_units=True)),
    )
    def test_random_agent(self, agent_interface_format):
        steps = 250
        step_mul = 8
        with sc2_env.SC2Env(
                map_name=["Simple64", "Simple96"],
                players=[sc2_env.Agent([sc2_env.Race.random, sc2_env.Race.terran]),
                         sc2_env.Bot([sc2_env.Race.zerg, sc2_env.Race.protoss],
                                     sc2_env.Difficulty.easy,
                                     [sc2_env.BotBuild.rush, sc2_env.BotBuild.timing])],
                agent_interface_format=agent_interface_format,
                step_mul=step_mul,
                game_steps_per_episode=steps * step_mul // 3) as env:
            agent = random_agent.RandomAgent()
            run_loop.run_loop([agent], env, steps)

        self.assertEqual(agent.steps, steps)
示例#3
0
def make_sc2env(num_players, render=False):
    if num_players == 1:
        players = [sc2_env.Agent(sc2_env.Race.terran)]
    else:
        players = [sc2_env.Agent(sc2_env.Race.terran), sc2_env.Agent(sc2_env.Race.terran)]

    if render:
        interface = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=(MAP_SIZE, MAP_SIZE),
                minimap=(MAP_SIZE, MAP_SIZE)
            ),
            rgb_dimensions=sc2_env.Dimensions(
                screen=(RGB_SCREEN_WIDTH, RGB_SCREEN_HEIGHT),
                minimap=(RGB_SCREEN_WIDTH, RGB_SCREEN_HEIGHT),
            ),
            action_space=actions.ActionSpace.FEATURES)
    else:
        interface = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=(MAP_SIZE, MAP_SIZE),
                minimap=(MAP_SIZE, MAP_SIZE)
            ), action_space=actions.ActionSpace.FEATURES)

    env_args = {
        'agent_interface_format': interface,
        'map_name': MAP_NAME,
        'step_mul': FIVE_SECONDS,  # 17 is ~1 action per second
        'players': players,
    }
    maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps')
    register_map(maps_dir, env_args['map_name'], players=num_players)
    return sc2_env.SC2Env(**env_args)
示例#4
0
def make_sc2env(map_name,
                render=False,
                screen_size=RGB_SCREEN_SIZE,
                map_size=MAP_SIZE):
    rgb_dimensions = False
    if render:
        rgb_dimensions = sc2_env.Dimensions(screen=(screen_size, screen_size),
                                            minimap=(screen_size, screen_size))
    env_args = {
        'agent_interface_format':
        sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(screen=(map_size, map_size),
                                                  minimap=(map_size,
                                                           map_size)),
            rgb_dimensions=rgb_dimensions,
            action_space=actions.ActionSpace.FEATURES,
        ),
        'map_name':
        map_name,
        'step_mul':
        SIMULATION_STEP_MUL,
    }
    maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps')
    register_map(maps_dir, env_args['map_name'])
    return sc2_env.SC2Env(**env_args)
示例#5
0
def main(unused_argv):
    rs = FLAGS.random_seed
    if FLAGS.random_seed is None:
        rs = int((time.time() % 1) * 1000000)

    logger.configure(dir=FLAGS.train_log_dir, format_strs=['log'])

    players = []
    players.append(sc2_env.Agent(races[FLAGS.agent_race]))
    players.append(sc2_env.Agent(races[FLAGS.oppo_race]))

    screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4,
                  FLAGS.screen_resolution)
    if FLAGS.agent_interface_format == 'feature':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    elif FLAGS.agent_interface_format == 'rgb':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            rgb_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    else:
        raise NotImplementedError

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True  # pylint: disable=E1101
    tf.Session(config=config).__enter__()

    #flags.DEFINE_float('param_tstep', 100000, 'the parameter totoal step')
    param_lam = FLAGS.param_lam
    param_gamma = FLAGS.param_gamma
    param_concurrent = FLAGS.param_concurrent
    param_lr = FLAGS.param_lr
    param_cr = FLAGS.param_cr
    param_tstep = FLAGS.param_tstep
    print('params, lam={} gamma={} concurrent={} lr={} tstep={}'.format(
        param_lam, param_gamma, param_concurrent, param_lr, param_tstep))

    env = make_sc2_dis_env(num_env=param_concurrent,
                           seed=rs,
                           players=players,
                           agent_interface_format=agent_interface_format)

    ppo2.learn(policy=CnnPolicy,
               env=env,
               nsteps=128,
               nminibatches=1,
               lam=param_lam,
               gamma=param_gamma,
               noptepochs=4,
               log_interval=1,
               ent_coef=0.01,
               lr=lambda f: f * param_lr,
               cliprange=lambda f: f * param_cr,
               total_timesteps=param_tstep,
               save_interval=10)
 def setUpClass(cls):
     # super(TestCompareEnvironments, cls).setUpClass()
     super().setUpClass()
     players = [
         sc2_env.Agent(race=sc2_env.Race.terran),
         sc2_env.Agent(race=sc2_env.Race.protoss),
     ]
     kwargs = {
         'map_name':
         'Flat64',
         'players':
         players,
         'agent_interface_format': [
             sc2_env.AgentInterfaceFormat(
                 feature_dimensions=sc2_env.Dimensions(screen=(32, 64),
                                                       minimap=(8, 16)),
                 rgb_dimensions=sc2_env.Dimensions(screen=(31, 63),
                                                   minimap=(7, 15)),
                 action_space=sc2_env.ActionSpace.FEATURES),
             sc2_env.AgentInterfaceFormat(
                 rgb_dimensions=sc2_env.Dimensions(screen=64, minimap=32))
         ]
     }
     cls._env = sc2_env.SC2Env(**kwargs)
     cls._mock_env = mock_sc2_env.SC2TestEnv(**kwargs)
示例#7
0
class TestRandomAgent(parameterized.TestCase, utils.TestCase):
    @parameterized.named_parameters(
        ("features",
         sc2_env.AgentInterfaceFormat(
             feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64))),
        ("rgb",
         sc2_env.AgentInterfaceFormat(
             rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64))),
        ("all",
         sc2_env.AgentInterfaceFormat(
             feature_dimensions=sc2_env.Dimensions(screen=84, minimap=64),
             rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64),
             action_space=sc2_env.ActionSpace.FEATURES,
             use_unit_counts=True,
             use_feature_units=True)),
    )
    def test_random_agent(self, agent_interface_format):
        steps = 250
        step_mul = 8
        with sc2_env.SC2Env(map_name="Simple64",
                            agent_interface_format=agent_interface_format,
                            step_mul=step_mul,
                            game_steps_per_episode=steps * step_mul //
                            2) as env:
            agent = random_agent.RandomAgent()
            run_loop.run_loop([agent], env, steps)

        self.assertEqual(agent.steps, steps)
示例#8
0
def run_thread(players, agents, map_name, visualize):
    rs = FLAGS.random_seed
    if FLAGS.random_seed is None:
        rs = int((time.time() % 1) * 1000000)
    print("Random seed: {}.".format(rs))
    screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4,
                  FLAGS.screen_resolution)
    if FLAGS.agent_interface_format == 'feature':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    elif FLAGS.agent_interface_format == 'rgb':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            rgb_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    else:
        raise NotImplementedError
    with sc2_env.SC2Env(
            map_name=map_name,
            players=players,
            step_mul=FLAGS.step_mul,
            random_seed=rs,
            game_steps_per_episode=FLAGS.game_steps_per_episode,
            agent_interface_format=agent_interface_format,
            score_index=-1,  # this indicates the outcome is reward
            disable_fog=FLAGS.disable_fog,
            visualize=visualize) as env:

        run_loop(agents, env, max_episodes=FLAGS.max_agent_episodes)
        if FLAGS.save_replay:
            env.save_replay("%s vs. %s" % (FLAGS.agent1, FLAGS.agent2))
示例#9
0
def main(unused_argv):
    #env = gym.make("SC2GYMENV-v0")
    #env.settings['map_name'] = 'ScoutSimple64'

    rs = FLAGS.random_seed
    if FLAGS.random_seed is None:
        rs = int((time.time() % 1) * 1000000)

    logger.configure(dir=FLAGS.train_log_dir, format_strs=['log'])

    players = []
    players.append(
        sc2_env.Bot(races[FLAGS.bot_race], difficulties[FLAGS.difficulty]))
    players.append(sc2_env.Agent(races[FLAGS.agent_race]))

    screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4,
                  FLAGS.screen_resolution)
    if FLAGS.agent_interface_format == 'feature':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    elif FLAGS.agent_interface_format == 'rgb':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            rgb_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    else:
        raise NotImplementedError

    env = ZergScoutEnv(
        map_name=FLAGS.map,
        players=players,
        step_mul=FLAGS.step_mul,
        random_seed=rs,
        game_steps_per_episode=FLAGS.max_step,
        agent_interface_format=agent_interface_format,
        score_index=-1,  # this indicates the outcome is reward
        disable_fog=FLAGS.disable_fog,
        visualize=FLAGS.render)

    env = make(FLAGS.wrapper, env)

    network = model(FLAGS.wrapper)  #deepq.models.mlp([64, 32])

    print('params, lr={} bf={} ef={} ef_eps={}'.format(FLAGS.param_lr,
                                                       FLAGS.param_bf,
                                                       FLAGS.param_ef,
                                                       FLAGS.param_efps))

    act = deepq.learn(env,
                      q_func=network,
                      lr=FLAGS.param_lr,
                      max_timesteps=100000,
                      buffer_size=FLAGS.param_bf,
                      exploration_fraction=FLAGS.param_ef,
                      exploration_final_eps=FLAGS.param_efps,
                      checkpoint_path=FLAGS.checkpoint_path,
                      checkpoint_freq=FLAGS.checkpoint_freq,
                      print_freq=10,
                      callback=callback)
示例#10
0
  def test_heterogeneous_observations(self):
    with sc2_env.SC2Env(
        map_name="Simple64",
        players=[
            sc2_env.Agent(sc2_env.Race.random),
            sc2_env.Agent(sc2_env.Race.random)
        ],
        agent_interface_format=[
            sc2_env.AgentInterfaceFormat(
                feature_dimensions=sc2_env.Dimensions(
                    screen=(84, 87),
                    minimap=(64, 67)
                )
            ),
            sc2_env.AgentInterfaceFormat(
                rgb_dimensions=sc2_env.Dimensions(
                    screen=128,
                    minimap=64
                )
            )
        ]) as env:

      obs_specs = env.observation_spec()
      self.assertIsInstance(obs_specs, tuple)
      self.assertLen(obs_specs, 2)

      actions_specs = env.action_spec()
      self.assertIsInstance(actions_specs, tuple)
      self.assertLen(actions_specs, 2)

      agents = []
      for obs_spec, action_spec in zip(obs_specs, actions_specs):
        agent = random_agent.RandomAgent()
        agent.setup(obs_spec, action_spec)
        agent.reset()
        agents.append(agent)

      time_steps = env.reset()
      for _ in range(100):
        self.assertIsInstance(time_steps, tuple)
        self.assertLen(time_steps, 2)

        actions = []
        for i, agent in enumerate(agents):
          time_step = time_steps[i]
          obs = time_step.observation
          self.check_observation_matches_spec(obs, obs_specs[i])
          actions.append(agent.step(time_step))

        time_steps = env.step(actions)
示例#11
0
文件: env.py 项目: MCCCSunny/PMES
 def _thunk():
     params['agent_interface_format'] = [
         sc2_env.AgentInterfaceFormat(feature_dimensions=sc2_env.Dimensions(
             screen=(sz, sz), minimap=(sz, sz)))
     ]
     env = sc2_env.SC2Env(**params)
     return env
def main():
    FLAGS(sys.argv)
    with sc2_env.SC2Env(
            map_name="CollectMineralShards",
            step_mul=step_mul,
            players=[sc2_env.Agent(sc2_env.Race.terran)],
            agent_interface_format=sc2_env.AgentInterfaceFormat(
                feature_dimensions=sc2_env.Dimensions(screen=64, minimap=64)),
            visualize=True) as env:

        model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2),
                                               (64, 3, 1)],
                                        hiddens=[256],
                                        dueling=True)

        act = deepq_mineral_shards.learn(env,
                                         q_func=model,
                                         num_actions=4,
                                         lr=1e-5,
                                         max_timesteps=2000000,
                                         buffer_size=100000,
                                         exploration_fraction=0.5,
                                         exploration_final_eps=0.01,
                                         train_freq=4,
                                         learning_starts=100000,
                                         target_network_update_freq=1000,
                                         gamma=0.99,
                                         prioritized_replay=True)
        act.save("mineral_shards.pkl")
示例#13
0
def main():
    FLAGS(sys.argv)
    with sc2_env.SC2Env(
            map_name="DefeatZerglingsAndBanelings",
            step_mul=step_mul,
            visualize=True,
            players=[sc2_env.Agent(sc2_env.Race.terran)],
            agent_interface_format=sc2_env.AgentInterfaceFormat(
                feature_dimensions=sc2_env.Dimensions(screen=64, minimap=64)),
            game_steps_per_episode=steps * step_mul) as env:

        model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2),
                                               (64, 3, 1)],
                                        hiddens=[256],
                                        dueling=True)
        demo_replay = []
        act = dqfd.learn(env,
                         q_func=model,
                         num_actions=3,
                         lr=1e-4,
                         max_timesteps=10000000,
                         buffer_size=100000,
                         exploration_fraction=0.5,
                         exploration_final_eps=0.01,
                         train_freq=2,
                         learning_starts=100000,
                         target_network_update_freq=1000,
                         gamma=0.99,
                         prioritized_replay=True,
                         demo_replay=demo_replay)
        act.save("defeat_zerglings.pkl")
示例#14
0
def default_macro_env_maker(kwargs):
    '''
    :param kwargs: map_name, players, ... almost same as SC2Env
    :return: env_maker
    '''
    assert kwargs.get('map_name') is not None

    screen_sz = kwargs.pop('screen_size', 64)
    minimap_sz = kwargs.pop('minimap_size', 64)
    difficulty = kwargs.pop('difficulty', sc2_env.Difficulty.very_easy)
    assert screen_sz == minimap_sz
    if 'agent_interface_format' not in kwargs:
        kwargs['agent_interface_format'] = sc2_env.AgentInterfaceFormat(
            use_feature_units=True,
            use_raw_units=True,
            use_unit_counts=True,
            feature_dimensions=sc2_env.Dimensions(screen=(screen_sz,
                                                          screen_sz),
                                                  minimap=(minimap_sz,
                                                           minimap_sz)))

    if 'players' not in kwargs:
        kwargs['players'] = [
            sc2_env.Agent(sc2_env.Race.protoss),
            sc2_env.Bot(sc2_env.Race.terran, difficulty)
        ]
    if 'game_steps_per_episode' not in kwargs:
        kwargs['game_steps_per_episode'] = 16000
    if 'visualize' not in kwargs:
        kwargs['visualize'] = False
    if 'step_mul' not in kwargs:
        kwargs['step_mul'] = 4
    return MacroEnv(**kwargs)
def initialize_environment(visualize=False):
    interface_format = sc2_env.AgentInterfaceFormat(
        feature_dimensions=sc2_env.Dimensions(
            screen=FLAGS.screen_resolution, minimap=FLAGS.minimap_resolution))
    return sc2_env.SC2Env(map_name=FLAGS.map_name,
                          agent_interface_format=interface_format,
                          visualize=visualize)
示例#16
0
 def __init__(self,
              map,
              agent_type,
              max_timesteps,
              max_episodes,
              step_mul=None,
              visualize=False):
     """
     :param map[string]: map to use
     :param agent_type[agent object type]: agent to use
     :param max_timesteps[int]: maximum number of timesteps to run per episode
     :param max_episodes[int]: number of episodes to run
     :param step_mul: How many game steps per agent step (action/observation). None means use the map default.
     :param visualize: Whether to visualize the episodes
     """
     self.aif = sc2_env.AgentInterfaceFormat(
         feature_dimensions=sc2_env.Dimensions(screen=64, minimap=24),
         use_raw_units=True,
         camera_width_world_units=24)
     self.map = map
     self.max_timesteps = max_timesteps
     self.max_episodes = max_episodes
     self.step_mul = step_mul or 1
     self.visualize = visualize
     self.agent_type = agent_type
     self.agents = []
示例#17
0
def run_thread(agent, sess, display, players, numSteps):
    """Runs an agent thread."""

    with sess.as_default(), sess.graph.as_default():

        while RUN:
            try:

                agent_interface_format = sc2_env.AgentInterfaceFormat(
                    feature_dimensions=sc2_env.Dimensions(
                        screen=SCREEN_SIZE, minimap=MINIMAP_SIZE))

                with sc2_env.SC2Env(
                        map_name=flags.FLAGS.map,
                        players=players,
                        game_steps_per_episode=numSteps,
                        agent_interface_format=agent_interface_format,
                        visualize=display) as env:
                    run_loop.run_loop([agent], env)

            except Exception as e:
                print(e)
                print(traceback.format_exc())
                logging.error(traceback.format_exc())

            # remove crahsed terminal history
            # agent.RemoveNonTerminalHistory()
            global NUM_CRASHES
            NUM_CRASHES += 1
示例#18
0
def main(args):
    with tf.Session() as sess:

        with sc2_env.SC2Env(
                map_name=args['map_name'],
                agent_interface_format=sc2_env.AgentInterfaceFormat(
                    feature_dimensions=sc2_env.Dimensions(
                        screen=args['screen_size'],
                        minimap=args['minimap_size'])),
                step_mul=args['step_mul'],
                game_steps_per_episode=args['max_episode_step'],
                visualize=False) as env:
            action_bound = int(args['screen_size']) / int(2)
            # sess, screen_size, action_dim, learning_rate, action_bound, minibatch_size, tau
            actor = actorNetwork(sess, args['screen_size'], args['action_dim'],
                                 action_bound, args['tau'],
                                 args['minibatch_size'], args['actor_lr'])

            # sess, screen_size, action_dim, learning_rate, tau, gamma, num_actor_vars, minibatch_size
            critic = criticNetwork(sess, args['screen_size'],
                                   actor.get_trainable_params_num(),
                                   args['tau'], args['critic_lr'],
                                   args['gamma'], args['action_dim'])

            replay_buffer = ReplayBuffer(buffer_size=args['buffer_size'])

            train(sess, env, actor, critic, args, replay_buffer)
示例#19
0
  def test_observation_matches_obs_spec(self):
    with sc2_env.SC2Env(
        map_name="Simple64",
        players=[sc2_env.Agent(sc2_env.Race.random),
                 sc2_env.Bot(sc2_env.Race.random, sc2_env.Difficulty.easy)],
        agent_interface_format=sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=(84, 87),
                minimap=(64, 67)))) as env:

      multiplayer_obs_spec = env.observation_spec()
      self.assertIsInstance(multiplayer_obs_spec, tuple)
      self.assertLen(multiplayer_obs_spec, 1)
      obs_spec = multiplayer_obs_spec[0]

      multiplayer_action_spec = env.action_spec()
      self.assertIsInstance(multiplayer_action_spec, tuple)
      self.assertLen(multiplayer_action_spec, 1)
      action_spec = multiplayer_action_spec[0]

      agent = random_agent.RandomAgent()
      agent.setup(obs_spec, action_spec)

      multiplayer_obs = env.reset()
      agent.reset()
      for _ in range(100):
        self.assertIsInstance(multiplayer_obs, tuple)
        self.assertLen(multiplayer_obs, 1)
        raw_obs = multiplayer_obs[0]
        obs = raw_obs.observation
        self.check_observation_matches_spec(obs, obs_spec)

        act = agent.step(raw_obs)
        multiplayer_act = (act,)
        multiplayer_obs = env.step(multiplayer_act)
示例#20
0
def run_thread(agent, visualize):
    with sc2_env.SC2Env(map_name=FLAGS.map_name,
                        agent_interface_format=[
                            sc2_env.AgentInterfaceFormat(
                                feature_dimensions=sc2_env.Dimensions(
                                    screen=(FLAGS.screen_resolution,
                                            FLAGS.screen_resolution),
                                    minimap=(FLAGS.minimap_resolution,
                                             FLAGS.minimap_resolution)))
                        ],
                        step_mul=FLAGS.step_mul,
                        visualize=visualize) as env:

        replay_buffer = []
        for trajectory, done in rollout([agent], env, FLAGS.max_steps):
            if FLAGS.train:
                replay_buffer.append(trajectory)
                if done:
                    step = 0
                    with LOCK:
                        global STEP
                        STEP += 1
                        step = STEP
                    learning_rate = FLAGS.learning_rate * (
                        1 - 0.9 * step / FLAGS.max_steps)
                    agent.update(replay_buffer, FLAGS.gamma, learning_rate,
                                 step)
                    replay_buffer = []
                    if (step + 1) % FLAGS.save_interval == 0:
                        agent.save_model(FLAGS.logdir, step)
                    if step >= FLAGS.max_steps:
                        break
示例#21
0
def main(unused_argv):
    """Run an agent."""
    step_mul = FLAGS.step_mul
    players = 2

    screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4,
                  FLAGS.screen_resolution)
    agent_interface_format = None
    if FLAGS.agent_interface_format == 'feature':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    elif FLAGS.agent_interface_format == 'rgb':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            rgb_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    else:
        raise NotImplementedError
    players = [
        sc2_env.Agent(sc2_env.Race.zerg),
        sc2_env.Agent(sc2_env.Race.zerg)
    ]
    agents = []
    bot_difficulty = get_difficulty(FLAGS.difficulty)
    if FLAGS.player1 == 'Bot':
        players[0] = sc2_env.Bot(sc2_env.Race.zerg, bot_difficulty)
    else:
        agents.append(get_agent(FLAGS.player1))
    if FLAGS.player2 == 'Bot':
        players[1] = sc2_env.Bot(sc2_env.Race.zerg, bot_difficulty)
    else:
        agents.append(get_agent(FLAGS.player2))
    with sc2_env.SC2Env(map_name=FLAGS.map,
                        visualize=FLAGS.visualize,
                        players=players,
                        step_mul=step_mul,
                        game_steps_per_episode=FLAGS.max_steps_per_episode *
                        step_mul,
                        agent_interface_format=agent_interface_format,
                        disable_fog=FLAGS.disable_fog) as env:
        run_loop(agents,
                 env,
                 max_frames=0,
                 max_episodes=FLAGS.episodes,
                 sleep_time_per_step=FLAGS.sleep_time_per_step,
                 merge_units_info=FLAGS.merge_units_info)
示例#22
0
 def __init__(self, **kwargs):
     super(__class__, self).__init__(
         map_name='MoveToBeacon',
         players=[sc2_env.Agent(sc2_env.Race.terran)],
         agent_interface_format=sc2_env.AgentInterfaceFormat(
             feature_dimensions=sc2_env.Dimensions(screen=84,
                                                   minimap=64), ),
         **kwargs)
示例#23
0
def make_envs(args, num_player, bot_level):
    aif = sc2_env.AgentInterfaceFormat(
                        feature_dimensions=sc2_env.Dimensions(
                        screen=args.sz,
                        minimap=args.sz), use_feature_units=True)
    env_args = dict(map_name=args.map, num_player=num_player, bot_level=bot_level,
                    agent_interface_format=[aif, aif], step_mul=8, game_steps_per_episode=0)
    # return EnvPool([make_env(args.sz, **dict(env_args, visualize=i < args.render)) for i in range(args.envs)])
    return make_env(**dict(env_args, visualize=True))
示例#24
0
 def test_hivemind(self):
     with sc2_env.SC2Env(
             map_name="CyberForest",
             agent_interface_format=sc2_env.AgentInterfaceFormat(
                 feature_dimensions=sc2_env.Dimensions(screen=84,
                                                       minimap=64)),
             step_mul=self.step_mul,
             game_steps_per_episode=self.steps * self.step_mul) as env:
         agent = HiveMind()
         run_loop.run_loop([agent], env, self.steps)
示例#25
0
def main(unused_argv):
    rs = FLAGS.random_seed
    if FLAGS.random_seed is None:
        rs = int((time.time() % 1) * 1000000)

    players = []
    players.append(sc2_env.Bot(races[FLAGS.bot_race], difficulties[FLAGS.difficulty]))
    players.append(sc2_env.Agent(races[FLAGS.agent_race]))

    screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4,
                  FLAGS.screen_resolution)
    if FLAGS.agent_interface_format == 'feature':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
        feature_dimensions = sc2_env.Dimensions(screen=screen_res,
            minimap=FLAGS.minimap_resolution))
    elif FLAGS.agent_interface_format == 'rgb':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
        rgb_dimensions=sc2_env.Dimensions(screen=screen_res, 
            minimap=FLAGS.minimap_resolution))
    else:
        raise NotImplementedError

    env = ZergScoutEnv(
            map_name=FLAGS.map,
            players=players,
            step_mul=FLAGS.step_mul,
            random_seed=rs,
            game_steps_per_episode=FLAGS.max_step,
            agent_interface_format=agent_interface_format,
            score_index=-1,  # this indicates the outcome is reward
            disable_fog=FLAGS.disable_fog,
            visualize=FLAGS.render
        )

    env = make(FLAGS.wrapper, env)

    agent = RandomAgent(env.unwrapped.action_space)
    run_loop(agent, env, max_episodes=FLAGS.max_agent_episodes, max_step=FLAGS.max_step)
    if FLAGS.save_replay:
        env.unwrapped.save_replay('save')

    if FLAGS.profile:
        print(stopwatch.sw)
示例#26
0
def make_sc2env(**kwargs):
    agent_format = sc2_env.AgentInterfaceFormat(
        feature_dimensions=sc2_env.Dimensions(
            screen=(arglist.FEAT2DSIZE, arglist.FEAT2DSIZE),
            minimap=(arglist.FEAT2DSIZE, arglist.FEAT2DSIZE),
        ))

    kwargs['agent_interface_format'] = [agent_format]
    env = sc2_env.SC2Env(**kwargs)
    return env
示例#27
0
def main():
    FLAGS(sys.argv)

    logdir = "tensorboard"
    if (FLAGS.algorithm == "deepq"):
        logdir = "tensorboard/zergling/%s/%s_%s_prio%s_duel%s_lr%s/%s" % (
            FLAGS.algorithm, FLAGS.timesteps, FLAGS.exploration_fraction,
            FLAGS.prioritized, FLAGS.dueling, FLAGS.lr, start_time)
    elif (FLAGS.algorithm == "acktr"):
        logdir = "tensorboard/zergling/%s/%s_num%s_lr%s/%s" % (
            FLAGS.algorithm, FLAGS.timesteps, FLAGS.num_cpu, FLAGS.lr,
            start_time)

    if (FLAGS.log == "tensorboard"):
        Logger.DEFAULT \
          = Logger.CURRENT \
          = Logger(dir=None,
                   output_formats=[TensorBoardOutputFormat(logdir)])

    elif (FLAGS.log == "stdout"):
        Logger.DEFAULT \
          = Logger.CURRENT \
          = Logger(dir=None,
                   output_formats=[HumanOutputFormat(sys.stdout)])

    with sc2_env.SC2Env(
            map_name="DefeatZerglingsAndBanelings",
            step_mul=step_mul,
            visualize=True,
            agent_interface_format=sc2_env.AgentInterfaceFormat(
                feature_dimensions=sc2_env.Dimensions(screen=32, minimap=32)),
            game_steps_per_episode=steps * step_mul) as env:
        obs = env.reset()
        #print(obs[0].observation)
        model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2),
                                               (64, 3, 1)],
                                        hiddens=[256],
                                        dueling=True)
        demo_replay = []
        act = dqfd.learn(env,
                         q_func=model,
                         num_actions=3,
                         lr=1e-4,
                         max_timesteps=10000000,
                         buffer_size=100000,
                         exploration_fraction=0.5,
                         exploration_final_eps=0.01,
                         train_freq=2,
                         learning_starts=100000,
                         target_network_update_freq=1000,
                         gamma=0.99,
                         prioritized_replay=True,
                         callback=deepq_callback)
        act.save("defeat_zerglings.pkl")
示例#28
0
    def __init__(self, env_interface, load_model=False):
        mineral_env_config = {
            "map_name":
            "MoveToBeacon",
            "visualize":
            False,
            "step_mul":
            64,
            'game_steps_per_episode':
            None,
            "agent_interface_format":
            sc2_env.AgentInterfaceFormat(
                feature_dimensions=sc2_env.Dimensions(screen=84, minimap=84),
                action_space=actions.ActionSpace.FEATURES,
                use_feature_units=True)
        }
        self.env_interface = env_interface
        self.agent = ConvAgent(self.env_interface)
        self.weights_dir = './weights'
        self.weights_path = os.path.join(self.weights_dir, 'model.ckpt')
        self.env_interface = env_interface
        self.env = SCEnvironmentWrapper(self.env_interface,
                                        env_kwargs=mineral_env_config)
        self.curr_iteration = 0
        self.epoch = 0
        self.discount_factor = 0.7
        self.td_lambda = 0.9

        with self.agent.graph.as_default():
            self.session = self.agent.session
            self.session.run(tf.global_variables_initializer())
            self.rewards_input = tf.placeholder(tf.float32, [None],
                                                name="rewards")  # T
            self.behavior_log_probs_input = tf.placeholder(
                tf.float32, [None, None], name="behavior_log_probs")  # T
            self.saver = tf.train.Saver()
            self.loss = self._ac_loss()
            self.train_op = tf.train.AdamOptimizer(0.0003).minimize(self.loss)
            if load_model:
                try:
                    self._load_model()
                except ValueError:
                    print("Could not load model")

            self.variable_names = [v.name for v in tf.trainable_variables()]
            self.assign_placeholders = {
                t.name: tf.placeholder(t.dtype, t.shape)
                for t in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            }
            self.assigns = [
                tf.assign(tensor, self.assign_placeholders[tensor.name]) for
                tensor in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            ]
            self.session.run(tf.global_variables_initializer())
示例#29
0
def make_sc2env(**kwargs):
    agent_format = sc2_env.AgentInterfaceFormat(
        feature_dimensions=sc2_env.Dimensions(
            screen=(arglist.FEAT2DSIZE, arglist.FEAT2DSIZE),
            minimap=(arglist.FEAT2DSIZE, arglist.FEAT2DSIZE)))

    kwargs['agent_interface_format'] = agent_format

    env = sc2_env.SC2Env(**kwargs)
    # env = available_actions_printer.AvailableActionsPrinter(env)
    return env
示例#30
0
 def __init__(self, **kwargs):
     self._action_space = None
     self._episode = 0
     self._episode_reward = 0
     self._num_step = 0
     self._observation_space = None
     self._total_reward = 0
     self._kwargs = kwargs
     self._kwargs['agent_interface_format'] = \
     sc2_env.AgentInterfaceFormat(
         sc2_env.Dimensions(64, 64))
     self._env = sc2_env.SC2Env(**self._kwargs)