Exemplo n.º 1
0
 def testScreenSmallerThanMinimapRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=84, minimap=100)
Exemplo n.º 2
0
 def testScreenWidthHeightWithoutMinimapRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=(84, 80))
Exemplo n.º 3
0
 def testNoneNoneRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=None, minimap=None)
Exemplo n.º 4
0
    def __init__(self,
                 reward_types,
                 map_name=None,
                 unit_type=[],
                 generate_xai_replay=False,
                 xai_replay_dimension=256,
                 verbose=False):
        if map_name is None:
            map_name = MAP_NAME
        maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps')
        register_map(maps_dir, map_name)

        if generate_xai_replay:
            aif = features.AgentInterfaceFormat(
                feature_dimensions=features.Dimensions(screen=SCREEN_SIZE,
                                                       minimap=SCREEN_SIZE),
                rgb_dimensions=sc2_env.Dimensions(
                    screen=(xai_replay_dimension, xai_replay_dimension),
                    minimap=(64, 64),
                ),
                action_space=actions.ActionSpace.FEATURES,
                camera_width_world_units=28,
                #use_camera_position = True,
            )
            step_mul_value = 4
        else:
            aif = features.AgentInterfaceFormat(
                feature_dimensions=features.Dimensions(screen=SCREEN_SIZE,
                                                       minimap=SCREEN_SIZE),
                action_space=actions.ActionSpace.FEATURES,
                camera_width_world_units=50,
            )
            step_mul_value = 16

        self.sc2_env = sc2_env.SC2Env(
            map_name=map_name,
            agent_interface_format=aif,
            step_mul=step_mul_value,
            game_steps_per_episode=0,
            score_index=0,
            visualize=True,
        )

        self.current_obs = None
        self.actions_taken = 0
        self.decomposed_rewards = []
        self.verbose = verbose

        self.signal_of_end = False
        self.end_state = None
        self.get_income_signal = 2

        self.reward_types = reward_types
        self.last_decomposed_reward_dict = {}
        self.decomposed_reward_dict = {}
        for rt in reward_types:
            self.decomposed_reward_dict[rt] = 0
            self.last_decomposed_reward_dict[rt] = 0

        unit_type = [
            UNIT_TYPES['Marine'], UNIT_TYPES['Viking'], UNIT_TYPES['Colossus']
        ]
        self.input_screen_features = {
            "PLAYER_RELATIVE": [1, 4],
            "UNIT_TYPE": unit_type,
            'HIT_POINT': 0,
            'HIT_POINT_RATIO': 0,
            'SHIELD': 0,
            'SHIELD_RATIO': 0,
            'UNIT_DENSITY': 0
        }
Exemplo n.º 5
0
 def testScreenSizeWithoutMinimapRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=84)
Exemplo n.º 6
0
def run_one_env(config, run_num=0, run_variables=None, rename_if_duplicate=False, output_file=None):
    # save a copy of the configuration file being used for a run in the run's folder (first time only)
    if run_variables is None:
        run_variables = {}
    restore = True
    if not os.path.exists(config['model_dir']):
        restore = False
    elif rename_if_duplicate:
        restore = False
        time = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        config['model_dir'] = config['model_dir'] + '_' + time

    if not restore and not config['inference_only']:
        os.makedirs(config['model_dir'], exist_ok=True)
        with open(config['model_dir'] + '/config.json', 'w+') as fp:
            fp.write(json.dumps(config, indent=4))

    # if continuing from another model (say for transfer learning), we are restoring
    if config['copy_model_from'] != "":
        restore = True

    # variables for episode stats
    max_ep_score = None
    all_ep_scores = []
    last_n_ep_score = []
    all_ep_wins = []
    last_n_ep_wins = []
    win_count = 0

    # action use stats
    actions_used = {}

    if output_file is not None and not os.path.isfile(output_file):
        write_summary_file_header(output_file, run_variables)

    with sc2_env.SC2Env(
        map_name=config['env']['map_name'],
        players=[sc2_env.Agent(sc2_env.Race['random'], None)],
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(config['env']['screen_size'], config['env']['minimap_size']),
            action_space=actions.ActionSpace.FEATURES,
            use_feature_units=True,
            use_raw_units=True
        ),
        visualize=config['env']['visualize'],
        step_mul=config['env']['step_mul'],
        realtime=config['inference_only'] and config['inference_only_realtime']
    ) as env:
        tf.reset_default_graph()
        tf_config = tf.ConfigProto()
        # tf_config.gpu_options.allow_growth = True
        with tf.Session(config=tf_config) as sess:
            if 'use_scripted_bot' in config:
                if config['use_scripted_bot'] == 'noop':
                    rl_agent = scripted_bots.NoopBot()
                elif config['use_scripted_bot'] == 'random':
                    rl_agent = scripted_bots.RandomBot(config)
                elif config['use_scripted_bot'] == 'attack_weakest':
                    rl_agent = scripted_bots.AttackWeakestBot(config)
                elif config['use_scripted_bot'] == 'attack_weakest_nearest':
                    rl_agent = scripted_bots.AttackWeakestNearestBot(config)
                else:
                    rl_agent = DQNAgent(sess, config, restore)
            else:
                rl_agent = DQNAgent(sess, config, restore)
            # observations from the env are tuples of 1 Timestep per player
            obs = env.reset()[0]
            step = 0
            episode = 1
            episode_reward = 0

            # if we are using evaluation episodes, this will be true during those episodes
            eval_episode = False

            # Rewards from the map have to be integers,
            # and some maps calculate normalized float rewards and then multiply them by some factor.
            match = re.search(r"factor_(\d+)", config['env']['map_name'])
            factor = float(match.group(1)) if match else 1.0

            # For combat micro maps we may have a shaped reward or not, but we independently want to record win/loss
            match = re.match(r"^combat", config['env']['map_name'])
            win_loss = True if match else False

            new_episode = True
            while (config['max_steps'] == 0 or step <= config['max_steps']) and (config['max_episodes'] == 0 or episode <= config['max_episodes']):
                state = preprocess_state(obs, config)
                available_actions = obs.observation['available_actions']

                step_reward = obs.reward / factor
                if 'step_penalty' in config:
                    step_reward -= config['step_penalty']
                episode_reward += step_reward
                win = 0

                terminal = False
                # handle episode end
                if obs.step_type is StepType.LAST:
                    terminal = True
                    # if this map type uses this win/loss calc
                    if win_loss:
                        win = get_win_loss(obs)
                        win_count += win
                        if 'episode_extra_win_reward' in config:
                            step_reward += config['episode_extra_win_reward'] * win
                            episode_reward += config['episode_extra_win_reward'] * win

                    if eval_episode:
                        print("Eval Episode", episode, "finished. Steps:", step, "Win:", win, "Score:", episode_reward)
                    else:
                        print("Episode", episode, "finished. Steps:", step, "Win:", win, "Score:", episode_reward)

                    # don't add to run stats if doing an eval episode and not training
                    if not eval_episode or config['train_on_eval_episodes']:
                        if len(last_n_ep_score) == num_eps_summary_last:
                            last_n_ep_score.pop(0)
                            last_n_ep_wins.pop(0)
                        last_n_ep_score.append(episode_reward)
                        last_n_ep_wins.append(win)
                        all_ep_scores.append(episode_reward)
                        all_ep_wins.append(win)
                        if max_ep_score is None or episode_reward > max_ep_score:
                            max_ep_score = episode_reward

                    episode_reward = 0
                    episode += 1

                    # check for eval episode. can't have two eval eps in a row. repeat episode num after eval ep
                    if eval_episode and not config['train_on_eval_episodes']:
                        eval_episode = False
                        episode -= 1
                    else:
                        eval_episode = config['do_eval_episodes'] and episode % config['one_eval_episode_per'] == 0

                # we don't take an action (from the perspective of the agent) on a terminal state, so no step++
                if not terminal and (not eval_episode or config['train_on_eval_episodes']):
                    step += 1

                # observe the reward if this state is not the first of a new episode
                if not new_episode:
                    rl_agent.observe(terminal=terminal, reward=step_reward, win=win, eval_episode=eval_episode)

                if not terminal:
                    new_episode = False
                    action = rl_agent.act(state, available_actions, eval_episode=eval_episode)
                    action_for_sc = get_action_function(obs, action, config)

                    if not config['inference_only'] and (not eval_episode or config['train_on_eval_episodes']):
                        action_name = actions.FUNCTIONS[action_for_sc.function].name
                        if action_name not in actions_used:
                            actions_used[action_name] = [0] * (episode - 1)
                            actions_used[action_name].append(1)
                        else:
                            # this action may not have been used for some episode(s)
                            actions_used[action_name] += [0] * (episode - len(actions_used[action_name]))
                            # increment count for this episode
                            actions_used[action_name][-1] += 1
                else:
                    # take dummy no_op action if this is a terminal state
                    action_for_sc = actions.FunctionCall(0, [])
                    # if this was a terminal state, the next state is going to be the beginning of an episode
                    new_episode = True
                # actions passed into env.step() are in a list with one action per player
                obs = env.step([action_for_sc])[0]

    # write out run stats to output file if doing a batch
    if output_file is not None:
        write_summary_file_line(
            output_file,
            last_n_ep_score,
            all_ep_scores,
            last_n_ep_wins,
            all_ep_wins,
            config,
            run_num,
            step,
            episode,
            max_ep_score,
            win_count,
            run_variables
        )

    if experiments_summary_file is not None:
        write_summary_file_line(
            experiments_summary_file,
            last_n_ep_score,
            all_ep_scores,
            last_n_ep_wins,
            all_ep_wins,
            config,
            run_num,
            step,
            episode,
            max_ep_score,
            win_count
        )

    # write out the stats of which actions were used to a file if training
    if not config['inference_only']:
        with open(config['model_dir'] + '/action_stats.csv', 'a+') as f:
            headers = []
            for key in actions_used:
                headers.append(key)
                # add 0s to end if needed
                actions_used[key] += [0] * (episode - len(actions_used[key]))
            f.write(','.join(val for val in headers) + '\n')
            # get some key
            sample_key = ""
            for key in actions_used:
                sample_key = key
                break
            for i in range(len(actions_used[sample_key])):
                episode_actions = []
                for key in actions_used:
                    episode_actions.append(actions_used[key][i])
                f.write(','.join(str(val) for val in episode_actions) + '\n')

    # print out some results of the run if we are doing inference only not in realtime
    if config['inference_only'] and not config['inference_only_realtime']:
        print('Inference_only summary for', config['model_dir'] + ':')
        print('Num episodes:', episode - 1)
        print('Win rate:', win_count / (episode - 1))
        print('Average score:', sum(all_ep_scores) / (episode - 1))
        print('Max score:', max_ep_score)
def run_game_with_agent(agent, mapname, iterations):
    ### dqn parameters
    frame_num = 4
    state_size = [84, 84, 7 * frame_num]
    action_size = 2
    learning_rate = 0.001

    eps_f = 0.05
    eps_s = 1.00

    # Q learning hyperparameters
    gamma = 0.95  # Discounting rate

    ### TRAINING HYPERPARAMETERS
    total_episodes = 10  #prev 2000    # Total episodes for training
    batch_size = 2  #prev 100
    iter_num = 10  #prev 20

    ### Experience HYPERPARAMETERS
    print("pre training!")
    pretrain_length = batch_size  # Number of experiences stored in the Memory when initialized for the first time
    experience_size = 3 * batch_size  #prev 800
    dqn_sc2 = DQN(state_size, action_size, learning_rate)
    game_data = []
    exp = experience(experience_size)
    decay_rate = 0.0005  #prev 0.0005
    with sc2_env.SC2Env(
            map_name=mapname,
            agent_interface_format=features.AgentInterfaceFormat(
                feature_dimensions=features.Dimensions(screen=84, minimap=64),
                use_feature_units=True),
            step_mul=100,  #if too low, nothing really happens
            visualize=True,
            game_steps_per_episode=0) as env:

        agent.setup(env.observation_spec(), env.action_spec())
        #pretrain
        state = State(4)
        for i in range(pretrain_length):
            # print("Playing game {}".format(i+1))
            timesteps = env.reset()
            agent.reset()
            state.reinitialize(transform_state(timesteps))  #initialize state

            while True:
                # print("new")
                state_old = state
                [step_actions, step_actions_array] = random_step(timesteps[0])
                # print("random action: ", step_actions)
                done = False
                if timesteps[0].last():
                    done = True
                reward = get_reward(timesteps[0], done)
                timesteps = env.step([step_actions])
                state.add(transform_state(timesteps))
                # for layer in state[0]:
                #     # print("layer: ", layer.shape)
                #     print ("Is zero?: ", (np.zeros(layer.shape) == layer).all())
                exp.add([
                    state_old.get(), step_actions_array, reward,
                    state.get(), done
                ])

                if timesteps[0].last():
                    break

                # print("timesteps [0] : ", type(timesteps[0]))

        #train
        agent.setup(env.observation_spec(), env.action_spec())
        saver = tf.train.Saver()
        # total_learning_episodes =10
        agent.reset()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            time = 0  #initialize time
            for i in range(iter_num):
                train_iteration(env, sess, agent, exp, saver, total_episodes,
                                dqn_sc2, eps_s, eps_f, decay_rate, time,
                                batch_size, gamma)
                test(env, agent, saver, dqn_sc2, exp)
    return []
def training_game():
    env = Environment(
        map_name="HallucinIce",
        visualize=True,
        game_steps_per_episode=150,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=64, minimap=32)))

    input_shape = (_SIZE, _SIZE, 1)
    nb_actions = _SIZE * _SIZE  # Should this be an integer

    model = neural_network_model(input_shape, nb_actions)
    # memory : how many subsequent observations should be provided to the network?
    memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    ### Policy
    # Agent´s behaviour function. How the agent pick actions
    # LinearAnnealedPolicy is a wrapper that transforms the policy into a linear incremental linear solution . Then why im not see LAP with other than not greedy ?
    # EpsGreedyQPolicy is a way of selecting random actions with uniform distributions from a set of actions . Select an action that can give max or min rewards
    # BolztmanQPolicy . Assumption that it follows a Boltzman distribution. gives the probability that a system will be in a certain state as a function of that state´s energy??

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.7,
                                  value_test=.0,
                                  nb_steps=1e6)
    # policy = (BoltzmanQPolicy( tau=1., clip= (-500,500)) #clip defined in between -500 / 500

    ### Agent
    # Double Q-learning ( combines Q-Learning with a deep Neural Network )
    # Q Learning -- Bellman equation

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=500,
                   target_model_update=1e-2,
                   policy=policy,
                   batch_size=150,
                   processor=processor)

    dqn.compile(Adam(lr=.001), metrics=["mae"])

    ## Save the parameters and upload them when needed

    name = "HallucinIce"
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)
    callbacks = [ModelIntervalCheckpoint(check_w_file, interval=1000)]
    callbacks += [FileLogger(log_file, interval=100)]

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=1e7,
            action_repetition=2,
            log_interval=1e4,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
Exemplo n.º 9
0
def main(unused_argv):
    try:
        while True:
            with sc2_env.SC2Env(
                    map_name="MoveToBeacon",
                    players=[sc2_env.Agent(sc2_env.Race.terran)],
                    agent_interface_format=features.AgentInterfaceFormat(
                        feature_dimensions=features.Dimensions(screen=84,
                                                               minimap=64),
                        # default size of feature screen and feature minimap
                        use_feature_units=True),
                    step_mul=
                    64,  # 16 gives roughly 150 apm (8 would give 300 apm)
                    # larger num here makes it run faster
                    game_steps_per_episode=0,
                    visualize=True) as env:
                # create a keras-rl env
                keras_env = PySC2ToKerasRL_env(env)
                obs = keras_env.reset()

                # create an agent that can interact with

                # Test Agent (makes marine run in circle)
                # keras_agent = MoveToBeacon_KerasRL()
                # keras_agent.reset()
                # while True: #play the game
                #
                #     step_actions = keras_agent.step(obs)
                #     obs, reward, done, info = keras_env.step(step_actions)

                # Replace simple agent with a learning one
                # A simple model (taken from Keras-RL cartpole dqn)
                nb_actions = keras_env.action_space.n
                model = Sequential()
                model.add(
                    Flatten(input_shape=(1, ) +
                            keras_env.observation_space.shape))
                model.add(Dense(16))
                model.add(Activation('relu'))
                model.add(Dense(16))
                model.add(Activation('relu'))
                model.add(Dense(16))
                model.add(Activation('relu'))
                model.add(Dense(nb_actions))
                model.add(Activation('linear'))
                print(model.summary())
                output_filename = "DQN_Rewards_smallerObs_smallerActions.csv"

                #some other model
                # model = Sequential()
                # model.add(Flatten(input_shape=(1,) + keras_env.observation_space.shape))
                # model.add(Dense(16))
                # model.add(Activation('relu'))
                # model.add(Dense(16))
                # model.add(Activation('relu'))
                # model.add(Dense(16))
                # model.add(Activation('relu'))
                # model.add(Dense(nb_actions))
                # model.add(Activation('linear'))
                # print(model.summary())
                #output_filename = "DQN Rewards.csv"

                # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
                # even the metrics!
                memory = SequentialMemory(limit=50000, window_length=1)
                policy = BoltzmannQPolicy()
                dqn = DQNAgent(model=model,
                               nb_actions=nb_actions,
                               memory=memory,
                               nb_steps_warmup=15,
                               target_model_update=1e-2,
                               policy=policy)
                dqn.compile(Adam(lr=1e-3), metrics=['mae'])

                # Okay, now it's time to learn something! (hopefully)

                hist = dqn.fit(keras_env,
                               nb_steps=50000,
                               visualize=False,
                               verbose=2)

                with open(output_filename, 'w+',
                          newline='') as csvfile:  #save the rewards over time
                    writer = csv.writer(csvfile)
                    writer.writerow(hist.history.get('episode_reward'))
                break  #kill the env

    except KeyboardInterrupt:
        pass
Exemplo n.º 10
0
 def testNegativeMinimapTupleRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=64, minimap=(-32, -32))
Exemplo n.º 11
0
    def __init__(self,
                 map_name='MoveToBeacon',
                 visualize=False,
                 screen_dim=32,
                 minimap_dim=32,
                 mock=False):
        super().__init__()

        self.settings = {
            'map_name':
            map_name,
            'players':
            [sc2_env.Agent(sc2_env.Race.terran)],  # true for all mini-games
            'agent_interface_format':
            features.AgentInterfaceFormat(
                feature_dimensions=features.Dimensions(screen=screen_dim,
                                                       minimap=minimap_dim),
                use_feature_units=True),
            'step_mul':
            8,  # how many game steps pass between actions; default is 8, which is 300APM, 16 means 150APM
            'game_steps_per_episode':
            0,  # the fixed length of a game, if 0: as long as needed
            'visualize':
            visualize,  # whether to draw the game
        }

        # see https://github.com/deepmind/pysc2/blob/master/docs/mini_games.md
        if map_name == "MoveToBeacon":
            # Fog of War disabled
            # No camera movement required (single-screen)
            action_set = ActionSet.Select_Army_Move_2D
        elif map_name == "CollectMineralShards":
            # Fog of War disabled
            # No camera movement required (single-screen)
            # action_set = ActionSet.Select_Army_Move_2D
            action_set = ActionSet.Select_Multi_Move_2D
        elif map_name == "FindAndDefeatZerglings":
            # Fog of War enabled
            # Camera movement required (map is larger than single-screen)
            # action_set = ActionSet.Select_Army_Attack_2D
            action_set = ActionSet.Attack_2D_Move_Camera
        elif map_name == "DefeatRoaches":
            # Fog of War disabled
            # No camera movement required (single-screen)
            # action_set = ActionSet.Select_Army_Attack_2D
            action_set = ActionSet.Select_Multi_Move_Attack_2D
        elif map_name == "DefeatZerglingsAndBanelings":
            # Fog of War disabled
            # No camera movement required (single-screen)
            # action_set = ActionSet.Select_Army_Attack_2D
            action_set = ActionSet.Select_Multi_Move_Attack_2D
        elif map_name == "CollectMineralsAndGas":
            # Fog of War disabled
            # No camera movement required (single-screen)
            action_set = ActionSet.Build_SCVs
        elif map_name == "BuildMarines":
            # Fog of War disabled
            # No camera movement required (single-screen)
            action_set = ActionSet.Build_Marines
        else:
            raise ValueError("map is not supported")

        self.mock = mock
        self._wrapped_env = self._init_env()

        self._space_converter = SpaceConverter(
            action_set=action_set,
            action_spec=self._wrapped_env.action_spec(),
            observation_spec=self._wrapped_env.observation_spec(),
            env_settings=self.settings,
            screen_dim=screen_dim)

        self.action_space = self._space_converter.get_action_space()
        self.observation_space = self._space_converter.get_observation_space()

        self._cur_timestep = None
Exemplo n.º 12
0
 def testNegativeScreenTupleRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=(-64, -64), minimap=32)
Exemplo n.º 13
0
 def testThreeTupleMinimapRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=64, minimap=(1, 2, 3))
Exemplo n.º 14
0
 def testTwoZeroesRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=(0, 0), minimap=(0, 0))
Exemplo n.º 15
0
class Env:
    metadata = {'render.modes': ['human']}
    default_settings = {
        'map_name':
        "CollectMineralShards",
        'players': [sc2_env.Agent(sc2_env.Race.terran)],
        'agent_interface_format':
        features.AgentInterfaceFormat(feature_dimensions=features.Dimensions(
            screen=64, minimap=64),
                                      action_space=actions.ActionSpace.RAW,
                                      use_raw_units=True,
                                      raw_resolution=64),
        'step_mul':
        2,
        'game_steps_per_episode':
        0,
        'visualize':
        True,
        'realtime':
        False
    }

    def __init__(self, **kwargs):
        super().__init__()
        self.kwargs = kwargs
        self.env = None
        self.marine1 = None
        self.marine2 = None
        self.marine1_ID = None
        self.marine2_ID = None
        self.action_counter = 0
        self.state = np.zeros([3, 64, 64])

    def reset(self):
        if self.env is None:
            self.init_env()
        self.marine1 = None
        self.marine2 = None
        self.action_counter = 0
        self.state = np.zeros([3, 64, 64])

        raw_obs = self.env.reset()[0]
        return self.get_state_from_obs(raw_obs, True)

    def init_env(self):
        args = {**self.default_settings, **self.kwargs}
        self.env = sc2_env.SC2Env(**args)

    def get_state_from_obs(self, raw_obs, reset):
        marines = self.get_units_by_type(raw_obs, units.Terran.Marine)
        if reset:
            self.marine1_ID = marines[0].tag
            self.marine2_ID = marines[1].tag
            self.marine1 = marines[0]
            self.marine2 = marines[1]
        else:
            if self.marine1_ID == marines[0].tag:
                self.marine1 = marines[0]
                self.marine2 = marines[1]
            elif self.marine1_ID == marines[1].tag:
                self.marine1 = marines[1]
                self.marine2 = marines[0]
            else:
                assert False
        shard_matrix = np.array(
            raw_obs.observation.feature_minimap.player_relative)
        shard_matrix[shard_matrix < 2] = 0

        marine1_matrix = np.zeros([64, 64])
        marine1_matrix[self.marine1.x, int(self.marine1.y)] = 1

        marine2_matrix = np.zeros([64, 64])
        marine2_matrix[self.marine2.x, int(self.marine2.y)] = 2

        self.state = np.stack([shard_matrix, marine1_matrix, marine2_matrix],
                              axis=0)
        return self.state

    def step(self, action):
        raw_obs = self.take_action(action)
        new_state = self.get_state_from_obs(raw_obs, False)
        return new_state, int(raw_obs.reward), raw_obs.last()

    def take_action(self, action):
        if action < 4096:
            x = action % 64
            y = int(action / 64)
            mapped_action = actions.RAW_FUNCTIONS.Move_pt(
                "now", self.marine1.tag, [x, y])
        else:
            action = action - 4096
            x = action % 64
            y = int(action / 64)
            mapped_action = actions.RAW_FUNCTIONS.Move_pt(
                "now", self.marine2.tag, [x, y])

        raw_obs = self.env.step([mapped_action])[0]

        return raw_obs

    def get_units_by_type(self, obs, unit_type):
        unit_list = []
        for unit in obs.observation.raw_units:
            if unit.unit_type == unit_type:
                unit_list.append(unit)
        return unit_list

    def close(self):
        if self.env is not None:
            self.env.close()
        super().close()
def run_game_with_agent(agent, mapname, iterations):
    ### dqn parameters
    frame_num = 4
    state_size = [84, 84, 7 * frame_num]
    action_size = 32 + 40 + 1
    learning_rate = 0.00025

    eps_f = 0.1
    eps_s = 1.00

    # Q learning hyperparameters
    gamma = 0.95  # Discounting rate

    ### TRAINING HYPERPARAMETERS
    total_episodes = 200  #prev 2000    # Total episodes for training
    batch_size = 10  #prev 100
    iter_num = 10  #prev 20

    ### Experience HYPERPARAMETERS
    print("pre training!")
    pretrain_length = batch_size  # Number of experiences stored in the Memory when initialized for the first time
    experience_size = 3 * batch_size  #prev 800
    dqn_sc2 = DQN(state_size, action_size, learning_rate, name="dqn_sc2")
    dqn_target = DQN(state_size, action_size, learning_rate, name="dqn_target")
    game_data = []
    exp = experience(experience_size)
    decay_rate = 0.0000009  #prev 0.0005
    with sc2_env.SC2Env(
            map_name=mapname,
            agent_interface_format=features.AgentInterfaceFormat(
                feature_dimensions=features.Dimensions(screen=84, minimap=64),
                use_feature_units=True),
            step_mul=20,  #if too low, nothing really happens
            visualize=True,
            game_steps_per_episode=0) as env:

        agent.setup(env.observation_spec(), env.action_spec())
        #pretrain
        state = State(4)
        for i in range(pretrain_length):
            # print("Playing game {}".format(i+1))
            timesteps = env.reset()
            agent.reset()
            state.reinitialize(transform_state(timesteps))  #initialize state

            while True:
                # print("new")
                state_old = state
                [step_actions, step_actions_array] = random_step(timesteps[0])
                # print("random action: ", step_actions)
                done = False
                if timesteps[0].last():
                    done = True
                reward = get_reward(timesteps[0], done)
                timesteps = env.step([step_actions])
                state.add(transform_state(timesteps))
                # for layer in state[0]:
                #     # print("layer: ", layer.shape)
                #     print ("Is zero?: ", (np.zeros(layer.shape) == layer).all())
                exp.add([
                    state_old.get(), step_actions_array, reward,
                    state.get(), done
                ])

                if timesteps[0].last():
                    break

                # print("timesteps [0] : ", type(timesteps[0]))

        #train
        agent.setup(env.observation_spec(), env.action_spec())
        saver = tf.train.Saver()
        # total_learning_episodes =10
        agent.reset()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            time = 0  #initialize time
            for i in range(iter_num):
                time = train_iteration(env, sess, agent, exp, saver,
                                       total_episodes, dqn_sc2, dqn_target,
                                       eps_s, eps_f, decay_rate, time,
                                       batch_size, gamma)

    # #execute with what we have
    #     print("testing!")
    #     with tf.Session() as sess:
    #         agent.setup(env.observation_spec(), env.action_spec())

    #         # Load the model
    #         saver.restore(sess, "./models/dqn_split.ckpt")
    #         state = State(4)
    #         for i in range(10):
    #             agent.reset()
    #             timesteps = env.reset()

    #             state.reinitialize(transform_state(timesteps) )
    #             totalScore = 0
    #             done=False
    #             while not done:
    #                 # Take the biggest Q value (= the best action)

    #                 Qs = sess.run(dqn_sc2.Yhat, feed_dict = {dqn_sc2.inputs_: state.get().reshape((1,*(state.get().shape) ))}) #run NN

    #                 [step_actions , step_actions_array] =argmax_action(np.argmax(Qs),timesteps[0] )
    #                 state_old = state
    #                 timesteps = env.step([step_actions])
    #                 state.add(transform_state(timesteps))

    #                 if timesteps[0].last():
    #                     done=True
    #                 reward = get_reward(timesteps[0],done)
    #                 exp.add([state_old.get(),step_actions_array,  reward, state.get(), done])

    #     #             print("Score: ", reward)
    #                 totalScore += reward
    #             print("TOTAL_SCORE", totalScore)

    return []
Exemplo n.º 17
0
def main(unused_argv):
    agent = terranAgent()
    try:
        while True:
            with sc2_env.SC2Env(
                    map_name="HellionZerglings",
                    players=[sc2_env.Agent(sc2_env.Race.terran)],
                    agent_interface_format=features.AgentInterfaceFormat(
                        feature_dimensions=features.Dimensions(screen=84,
                                                               minimap=64),
                        use_feature_units=True),
                    step_mul=16,
                    game_steps_per_episode=0,
                    visualize=True) as env:

                agent.setup(env.observation_spec(), env.action_spec())

                timesteps = env.reset()
                agent.reset()

                agent.terminal = False
                agent.s, agent.readout, agent.h_fc1 = agent.createNetwork()

                # define the cost function
                agent.a = tf.placeholder("float", [None, ACTIONS])
                agent.y = tf.placeholder("float", [None])
                agent.readout_action = tf.reduce_sum(tf.multiply(
                    agent.readout, agent.a),
                                                     reduction_indices=1)
                agent.cost = tf.reduce_mean(
                    tf.square(agent.y - agent.readout_action))
                agent.train_step = tf.train.AdamOptimizer(1e-6).minimize(
                    agent.cost)

                # store the previous observations in replay memory
                agent.D = deque()

                # printing
                agent.a_file = open("logs_" + GAME + "/readout.txt", 'w')
                agent.h_file = open("logs_" + GAME + "/hidden.txt", 'w')

                agent.x_t = agent.grab_screen(timesteps[0])
                print("SHAPE: {}".format(agent.x_t.shape))
                agent.r_t = -0.1
                #agent.ret, agent.x_t = cv2.threshold(agent.x_t, 1, 255, cv2.THRESH_BINARY)
                agent.s_t = np.stack(
                    (agent.x_t, agent.x_t, agent.x_t, agent.x_t), axis=2)
                agent.a_t = np.zeros([ACTIONS])
                agent.a_t[0] = 1
                agent.action_index = 0
                agent.total_reward = 0
                agent.readout_t = 0
                agent.sess = tf.InteractiveSession()
                # saving and loading networks
                agent.saver = tf.train.Saver()
                agent.sess.run(tf.initialize_all_variables())
                agent.checkpoint = tf.train.get_checkpoint_state(
                    "saved_networks")
                # if agent.checkpoint and agent.checkpoint.model_checkpoint_path:
                #     agent.saver.restore(agent.sess, agent.checkpoint.model_checkpoint_path)
                #     print("Successfully loaded:", agent.checkpoint.model_checkpoint_path)
                # else:
                #     print("Could not find old network weights")

                agent.epsilon = INITIAL_EPSILON
                agent.t = 0

                while True:
                    step_actions = [agent.step(timesteps[0])]
                    timesteps = env.step(step_actions)

    except KeyboardInterrupt:
        pass
Exemplo n.º 18
0
    def __init__(self, map_name = None, unit_type = [], generate_xai_replay = False, xai_replay_dimension = 256, verbose = False):
        if map_name is None:
            map_name = MAP_NAME
        maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps')
        print("map director: " + str(maps_dir))
        register_map(maps_dir, map_name)
        
        if generate_xai_replay:
            aif=features.AgentInterfaceFormat(
                feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE),
                rgb_dimensions=sc2_env.Dimensions(
                screen=(1.5*xai_replay_dimension, xai_replay_dimension),
                minimap=(64, 64),
                ),
                action_space=actions.ActionSpace.FEATURES,
                camera_width_world_units = 28,
                #use_camera_position = True,
            )

            step_mul_value = 4
            # step_mul_value = 16

        else:
            aif=features.AgentInterfaceFormat(
              feature_dimensions = features.Dimensions(screen = SCREEN_SIZE, minimap = SCREEN_SIZE),
              action_space = actions.ActionSpace.FEATURES,
              camera_width_world_units = 100,
              )
            step_mul_value = 16
        np.set_printoptions(threshold=sys.maxsize,linewidth=sys.maxsize, precision = 2)
        
        self.sc2_env = sc2_env.SC2Env(
          map_name = map_name,
          agent_interface_format = aif,

          step_mul = step_mul_value,
          game_steps_per_episode = 0,
          score_index = 0,
          visualize = True,)

        
        self.current_obs = None
        self.decomposed_rewards = []
        self.verbose = verbose
        
        
        self.miner_index = 0
        self.reset_steps = -1
        self.mineral_limiation = 1500
        self.norm_vector = np.array([700, 50, 40, 20, 50, 40, 20, 3,
                                    50, 40, 20, 50, 40, 20, 3,
                                    50, 40, 20, 50, 40, 20, 
                                    50, 40, 20, 50, 40, 20,
                                    2000, 2000, 2000, 2000, 40])
        
        self.decision_point = 1
        self.signal_of_end = False
        self.end_state = None
        self.maker_cost_np = np.zeros(len(maker_cost))
        # Have to change the combine func if this changed
        self.pylon_cost = 300
        self.pylon_index = 7
        for i, mc in enumerate(maker_cost.values()):
            self.maker_cost_np[i] = mc

        self.last_decomposed_reward_dict = {}
        self.decomposed_reward_dict = {}
        self.num_waves = 0
        
        maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps')
        action_dict_path = os.path.join(os.path.dirname(__file__), 'action_1500_tow_2L.pt')
        print("actions path:" + action_dict_path)
        self.a_dict = torch.load(action_dict_path)
        self.action_space = self.a_dict['actions']
        self.action_space_dict = self.a_dict['mineral']
#         print(self.a_dict.keys())
    # at the end of the reward type name:
    # 1 means for player 1 is positive, for player 2 is negative
    # 2 means for player 2 is positive, for player 1 is negative
        self.reward_types = list(reward_dict.values())
        # print(self.reward_types)
        for rt in self.reward_types:
        	self.decomposed_reward_dict[rt] = 0
        	self.last_decomposed_reward_dict[rt] = 0

        unit_type = [UNIT_TYPES['Marine'], UNIT_TYPES['Baneling'], UNIT_TYPES['Immortal']]
Exemplo n.º 19
0
def main(unused_argv):
    agent = AiBot()
    epsilon = 0
    epsilon_min = 0
    eps_reduction_factor = 0.99
    save_game = False
    episode = 0
    path = ""
    save_buffer = False

    last_100 = deque(maxlen=100)
    iter = 1

    agent.actor_critic_agent = ActorCriticAgent(15, [
        "no_op", "build_scv", "build_supply_depot", "build_marine",
        "build_barracks", "return_scv", "attack"
    ], epsilon)

    game_results = []
    latest_result = 0
    all_rewards = []

    # Plot setup
    fig, ax = plt.subplots(num=1)
    ax.plot()
    ax.set_title("Score for each game over time")

    fig2, ax2 = plt.subplots(num=3)
    ax2.plot()
    ax2.set_title("win%")

    fig3, ax3 = plt.subplots(num=4)
    ax3.plot()
    ax3.set_title("totalreward")

    try:
        with sc2_env.SC2Env(
                map_name="AbyssalReef",
                players=[
                    sc2_env.Agent(sc2_env.Race.terran),
                    sc2_env.Bot(sc2_env.Race.terran, sc2_env.Difficulty.medium)
                ],
                agent_interface_format=features.AgentInterfaceFormat(
                    feature_dimensions=features.Dimensions(screen=84,
                                                           minimap=64),
                    use_feature_units=True,
                    use_raw_units=True,
                    use_camera_position=True),
                step_mul=8,
                game_steps_per_episode=30000,
                visualize=False,
                disable_fog=True) as env:
            while True:
                agent.setup(env.observation_spec(), env.action_spec())

                timesteps = env.reset()
                agent.actor_critic_agent.build_index = 0
                if os.path.isfile('good_buffer.data'):
                    filehandler1 = open("good_buffer.data", 'rb')
                    agent.actor_critic_agent.good_buffer = pickle.load(
                        filehandler1)
                agent.reset()
                if episode > 0:
                    all_rewards = all_rewards + [
                        agent.actor_critic_agent.total_reward
                    ]
                    with open('all_rewards.txt', mode='w') as filehandle:
                        for i in all_rewards:
                            filehandle.write("%s\n" % i)
                    game_results = game_results + [latest_result]
                    with open('game_results.txt', mode='w') as filehandle:
                        for i in game_results:
                            filehandle.write("%s\n" % i)
                episode += 1
                if agent.actor_critic_agent.epsilon > epsilon_min:
                    agent.actor_critic_agent.epsilon *= eps_reduction_factor
                if agent.actor_critic_agent.buffer_epsilon > agent.actor_critic_agent.buffer_epsilon_min:
                    agent.actor_critic_agent.buffer_epsilon *= agent.actor_critic_agent.buffer_epsilon_decay
                if agent.actor_critic_agent.actor.IMITATION_WEIGHT > 0.0001:
                    agent.actor_critic_agent.actor.IMITATION_WEIGHT = imitation_factor(
                        episode)
                else:
                    agent.actor_critic_agent.actor.IMITATION_WEIGHT = 0.0001
                print("Imitation weight: ",
                      agent.actor_critic_agent.actor.IMITATION_WEIGHT)

                # For determining win/loss/tie
                agent.reward = 0
                # For plotting total reward
                agent.actor_critic_agent.total_reward = 0

                while True:
                    step_actions = [agent.step(timesteps[0], epsilon, episode)]

                    if timesteps[0].last():
                        state, oldscore, map = agent.game_state.get_state_now(
                            timesteps[0])

                        # If it won
                        if agent.reward == 1:
                            last_100.append(1)
                            latest_result = 1
                            end_reward = 100
                        # If it lost
                        elif agent.reward == -1:
                            last_100.append(0)
                            latest_result = 0
                            end_reward = -100
                        # If time's up
                        else:
                            last_100.append(0)
                            latest_result = 0
                            end_reward = -5

                        agent.actor_critic_agent.total_reward += end_reward

                        if agent.actor_critic_agent.GOOD_GAME:
                            agent.actor_critic_agent.good_buffer.append([
                                agent.actor_critic_agent.prev_state[0],
                                agent.actor_critic_agent.prev_actions,
                                end_reward, state[0], True
                            ])
                        else:
                            agent.actor_critic_agent.buffer.append([
                                agent.actor_critic_agent.prev_state[0],
                                agent.actor_critic_agent.prev_actions,
                                end_reward, state[0], True
                            ])

                        if save_buffer and agent.reward == 1:
                            filehandler = open("good_buffer.data", 'wb')
                            pickle.dump(agent.actor_critic_agent.good_buffer,
                                        filehandler)
                            print(len(agent.actor_critic_agent.good_buffer))
                        if save_game:
                            agent.save_game(path, episode)
                        print("Score: ",
                              timesteps[0].observation.score_cumulative.score)
                        ax.scatter(
                            episode,
                            timesteps[0].observation.score_cumulative.score,
                            s=3,
                            c='blue')
                        fig.savefig("score.png")

                        if len(last_100) == last_100.maxlen:
                            percent = sum(last_100) / 100
                            ax2.scatter(iter, percent, s=3, c='blue')
                            iter += 1
                            fig2.savefig("winpercent.png")
                        ax3.scatter(episode,
                                    agent.actor_critic_agent.total_reward,
                                    s=3,
                                    c='blue')
                        fig3.savefig("total_reward.png")
                        break
                    timesteps = env.step(step_actions)

    except KeyboardInterrupt:
        pass
Exemplo n.º 20
0
def eval_genomes(genomes, config):
    global global_stats
    if params.parallel:
        """
        MULTITHREADING - currently creating threads equal to number of agents and divides genomes between them
        speed increase caps out at 4 to 5 threads
        """
        # TODO environment timeout handling in rare case of environment failing
        thread_list = []
        queue_lock = threading.Lock()

        # create queues
        agent_queue = queue.Queue(maxsize=params.num_of_agents)
        env_queue = queue.Queue(maxsize=params.num_of_agents)
        results = {}
        queue_lock.acquire()
        for agent in agents:
            agent_queue.put(agent)
        for env in envs:
            env_queue.put(env)
        queue_lock.release()

        # create threads and divide genomes between them for evaluation
        thread_id = 1
        total_genomes = len(genomes)
        per_thread = int(total_genomes / params.num_of_agents)
        for i in range(params.num_of_agents):
            thread = WorkerThread(
                thread_id=thread_id,
                genomes=genomes[i * per_thread:i * per_thread + per_thread],
                agent_queue=agent_queue,
                env_queue=env_queue,
                lock=queue_lock,
                config=config,
                results=results)
            thread_list.append(thread)

        # if need to send genome to each thread
        # for genome_id, genome in genomes:
        #     thread = WorkerThread(thread_id=thread_id, genome=genome, agent_queue=agent_queue, env_queue=env_queue,
        #                           lock=queue_lock, config=config, results=results)
        #     thread_list.append(thread)
        #     thread_id += 1

        # start and join threads for one generation of genomes
        for thread in thread_list:
            thread.start()

        for thread in thread_list:
            thread.join()

        # collect and assign results
        for genome_id, genome in genomes:
            # assuming genome_id is the same as genome.key
            genome.fitness = results[genome_id]
    else:
        # serialised evaluation - env timeouts are handled
        agent, env = retrieve_agent_and_environment()
        for genome_id, genome in genomes:
            while True:
                try:
                    genome.fitness = eval_single_genome(
                        genome, config, agent, env)
                    break
                except KeyboardInterrupt:
                    exit()
                except Exception as e:
                    print(e)
                    print("Exception during eval_single_genome")
                    if isinstance(e, protocol.ConnectionError):
                        print("timeout issues")
                        print("plotting graphs and pickling results")
                        print("attempting to restart env")

                    pickle_results([], good_genomes, [])
                    plot_graphs(config,
                                global_stats,
                                display=False,
                                winner=None)

                    # close env
                    env.close()
                    # start new env
                    env = sc2_env.SC2Env(
                        map_name=params.map,
                        players=[sc2_env.Agent(sc2_env.Race.terran)],
                        agent_interface_format=features.AgentInterfaceFormat(
                            feature_dimensions=features.Dimensions(
                                screen=params.dimensions,
                                minimap=params.dimensions),
                            use_feature_units=True),
                        step_mul=params.step_mul,
                        game_steps_per_episode=params.game_steps,
                        visualize=params.visualize,
                        random_seed=1)
                    # reinitialise agent
                    agent.reinitialize()
                    # eval single genome should run again at top of while loop

            print("Genome ID {}, Genome fitness {}".format(
                genome_id, genome.fitness))

            save_good_genome(genome, agent.genome_threshold)
        return_agent_and_environment(agent, env)
Exemplo n.º 21
0
def run_thread(agent, map_name):
    while True:
        try:
            print("\nStarting episode %s for agent %s ..." %
                  (agent.episode, agent.id))
            clean_sc2_temp_folder(tmp_maps_path, 8, 90)
            agent.rollouts_manager.empty_dict_rollouts()
            agent.episode_values = []
            agent.episode_cumulated_reward = 0
            agent.episode_step_count = 0
            agent.current_episode_actions = []
            agent.current_episode_rewards = []
            agent.current_episode_values = []

            L_players = [sc2_env.Agent(sc2_env.Race.terran)]
            with sc2_env.SC2Env(
                    map_name=map_name,
                    players=L_players,
                    agent_interface_format=features.AgentInterfaceFormat(
                        feature_dimensions=features.Dimensions(
                            screen=params['resolution'],
                            minimap=params['resolution']),
                        use_feature_units=True),
                    step_mul=params['step_mul'],
                    game_steps_per_episode=0,
                    visualize=False,
                    disable_fog=True) as env:
                agent.setup(env.observation_spec(), env.action_spec())
                timesteps = env.reset()
                agent.reset()
                global start_time
                start_time = time.time()
                while True:
                    step_actions = [agent.step(timesteps[0])]
                    if timesteps[0].last():
                        break
                    timesteps = env.step(step_actions)

            print("\nEpisode over for agent %s ..." % agent.id)

            #Summary parameters :
            available_actions_ratio = len(
                agent.current_episode_unique_actions) / len(
                    agent.current_episode_available_actions)
            summary = tf.Summary()
            summary.value.add(tag='Perf/1_Reward',
                              simple_value=float(
                                  agent.episode_cumulated_reward))
            summary.value.add(tag='Perf/2_Distinct actions',
                              simple_value=float(
                                  len(agent.current_episode_unique_actions)))
            summary.value.add(tag='Perf/3_Average advantage',
                              simple_value=float(np.mean(agent.advantages)))
            summary.value.add(tag='Perf/4_Previous actions ratio',
                              simple_value=float(agent.previous_actions_ratio))
            summary.value.add(tag='Perf/5_Average value',
                              simple_value=float(agent.average_value))
            summary.value.add(tag='Perf/6_Available actions ratio',
                              simple_value=float(available_actions_ratio))
            summary.value.add(tag='Perf/7_Average agent return',
                              simple_value=float(np.mean(agent.agent_return)))
            summary.value.add(tag='Perf/8_Random policy',
                              simple_value=float(agent.random_policy))
            summary.value.add(tag='Perf/9_Episode length',
                              simple_value=float(
                                  agent.current_episode_step_count))
            summary.value.add(tag='Losses/1_Value loss',
                              simple_value=float(agent.value_loss))
            summary.value.add(tag='Losses/2_Policy loss',
                              simple_value=float(agent.global_policy_loss))
            summary.value.add(tag='Losses/3_Entropy loss',
                              simple_value=float(agent.entropy))
            summary.value.add(tag='Losses/4_Network loss',
                              simple_value=float(agent.network_loss))
            #summary.value.add(tag='Losses/5_Grad norm', simple_value=float(agent.grad_norms))
            #summary.value.add(tag='Losses/6_Var norm', simple_value=float(agent.var_norms))

            for label in agent.dict_policy.keys():
                policy = agent.dict_policy[label][0]
                policy_len = len(policy)
                indexed_label = agent.index_label(
                    label) + ' | (%s)' % policy_len
                summary.value.add(tag=indexed_label,
                                  histo=build_histo_summary(
                                      policy, policy_len))
            agent.summary_writer.add_summary(summary, agent.episode)
            agent.summary_writer.flush()

            if agent.episode > 0 and agent.episode % 20 == 0:
                session_path = training_path + "sessions\\model_episode_%s.cptk" % (
                    str(agent.episode))
                build_path(session_path)
                saver.save(sess, session_path)
                print("\nModel saved")
            agent.episode += 1

        except KeyboardInterrupt:
            break

        except pysc2.lib.remote_controller.RequestError:
            print(
                "\n\npysc2.lib.remote_controller.RequestError for worker %s\n\n"
                % agent.name)
            env.close()
            print("\n\nenvironment closed for worker %s\n\n" % agent.name)
            time.sleep(2)
            pass
        except pysc2.lib.remote_controller.ConnectError:
            print()
        except pysc2.lib.protocol.ConnectionError:
            print("\n\npysc2.lib.protocol.ConnectionError for worker %s\n\n" %
                  agent.name)
            #Picked from "https://github.com/inoryy/reaver-pysc2/blob/master/reaver/envs/sc2.py#L57-L69"
            # hacky fix from websocket timeout issue...
            # this results in faulty reward signals, but I guess it beats completely crashing...
            env.close()
MAPNAME = 'Simple64'
APM = 300
APM = int(APM / 18.75)
UNLIMIT = 0
VISUALIZE = True
REALTIME = True

SCREEN_SIZE = 84
MINIMAP_SIZE = 64

players = [sc2_env.Agent(sc2_env.Race.terran),\
           sc2_env.Bot(sc2_env.Race.zerg,\
           sc2_env.Difficulty.very_easy)]

interface = features.AgentInterfaceFormat(\
                feature_dimensions = features.Dimensions(\
                screen = SCREEN_SIZE, minimap = MINIMAP_SIZE), use_feature_units = True)


class Agent(base_agent.BaseAgent):
    def step(self, obs):
        super(Agent, self).step(obs)
        return actions.FUNCTIONS.no_op()


def main(args):
    agent = Agent()
    try:
        with sc2_env.SC2Env(map_name = MAPNAME, players = players,\
                agent_interface_format = interface,\
                step_mul = APM, game_steps_per_episode = UNLIMIT,\
                visualize = VISUALIZE, realtime = REALTIME) as env:
Exemplo n.º 23
0
  player_id: 1
  minerals: 0
  vespene: 0
  food_cap: 10
  food_used: 0
  food_army: 0
  food_workers: 0
  idle_worker_count: 0
  army_count: 0
  warp_gate_count: 0
  larva_count: 0
}
game_loop: 20
"""

RECTANGULAR_DIMENSIONS = features.Dimensions(screen=(84, 80), minimap=(64, 67))
SQUARE_DIMENSIONS = features.Dimensions(screen=84, minimap=64)


class AvailableActionsTest(absltest.TestCase):
    always_expected = {
        "no_op", "move_camera", "select_point", "select_rect",
        "select_control_group"
    }

    def setUp(self):
        super(AvailableActionsTest, self).setUp()
        self.obs = text_format.Parse(observation_text_proto,
                                     sc_pb.Observation())
        self.hideSpecificActions(True)
Exemplo n.º 24
0
    def __init__(self, reward_types, map_name = None, unit_type = [], generate_xai_replay = False, xai_replay_dimension = 256, verbose = False):
        if map_name is None:
            map_name = MAP_NAME
        maps_dir = os.path.join(os.path.dirname(__file__), '..', 'maps')
        print("map director: " + str(maps_dir))
        register_map(maps_dir, map_name)
        
        if generate_xai_replay:
            aif=features.AgentInterfaceFormat(
                feature_dimensions=features.Dimensions(screen=SCREEN_SIZE, minimap=SCREEN_SIZE),
                rgb_dimensions=sc2_env.Dimensions(
                screen=(xai_replay_dimension, xai_replay_dimension),
                minimap=(64, 64),
                ),
                action_space=actions.ActionSpace.FEATURES,
                camera_width_world_units = 28,
                #use_camera_position = True,
            )
            step_mul_value = 4
        else:
            aif=features.AgentInterfaceFormat(
              feature_dimensions = features.Dimensions(screen = SCREEN_SIZE, minimap = SCREEN_SIZE),
              action_space = actions.ActionSpace.FEATURES,
              camera_width_world_units = 100,
              
              )
        np.set_printoptions(threshold=sys.maxsize,linewidth=sys.maxsize, precision = 1)
        step_mul_value = 16
        self.sc2_env = sc2_env.SC2Env(
          map_name = map_name,
          agent_interface_format = aif,

          step_mul = step_mul_value,
          game_steps_per_episode = 0,
          score_index = 0,
          visualize = True,)

        
        self.current_obs = None
        self.actions_taken = 0
        self.decomposed_rewards = []
        self.verbose = verbose
        self.decision_point = 1
        self.miner_index = 12
        self.reset_steps = -1
        self.norm_vector = np.array([1, 1, 1, 1, 50, 50, 1, 1, 1, 1, 50, 50, 100])

        self.signal_of_end = False
        self.end_state = None
        self.maker_cost_np = np.zeros(len(maker_cost))
        for i, mc in enumerate(maker_cost.values()):
            self.maker_cost_np[i] = mc

        self.reward_types = reward_types
        self.last_decomposed_reward_dict = {}
        self.decomposed_reward_dict = {}
        for rt in reward_types:
        	self.decomposed_reward_dict[rt] = 0
        	self.last_decomposed_reward_dict[rt] = 0

        unit_type = [UNIT_TYPES['Marine'], UNIT_TYPES['Viking'], UNIT_TYPES['Colossus']]
        self.input_screen_features = {
            "PLAYER_RELATIVE":[1, 4],
            "UNIT_TYPE": unit_type,
            'HIT_POINT': 0,
            'HIT_POINT_RATIO': 0,
            'SHIELD': 0,
            'SHIELD_RATIO': 0,
            'UNIT_DENSITY': 0
        }
Exemplo n.º 25
0
 def testScreenWidthWithoutHeightRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=(84, 0), minimap=64)
Exemplo n.º 26
0
import sc2agents.learning.deep.keras.models as keras_models

DEFAULT_MINIMAP_SIZE = 64
DEFAULT_SCREEN_SIZE = 84
DEFAULT_STEP_MUL = 32
DEFAULT_DIFFICULTY = sc2_env.Difficulty.very_easy

DEFAULT_GAME_SPECIFICATION = {
    'map_name':
    "Eastwatch",
    'players': [
        sc2_env.Agent(sc2_env.Race.terran),
        sc2_env.Bot(sc2_env.Race.zerg, DEFAULT_DIFFICULTY)
    ],
    'agent_interface_format':
    features.AgentInterfaceFormat(feature_dimensions=features.Dimensions(
        screen=DEFAULT_SCREEN_SIZE, minimap=DEFAULT_MINIMAP_SIZE),
                                  use_feature_units=True),
    'step_mul':
    DEFAULT_STEP_MUL,
    'game_steps_per_episode':
    0,
    'visualize':
    False
    # 'ensure_available_actions': False #TODO enable after pysc2 release
}


def run_game(agent, game_specification=None):
    if game_specification is None:
        game_specification = DEFAULT_GAME_SPECIFICATION
    with sc2_env.SC2Env(**game_specification) as env:
Exemplo n.º 27
0
 def testMinimapWidthAndHeightWithoutScreenRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(minimap=(64, 67))
Exemplo n.º 28
0
def training_game():
    env = Environment(
        map_name="ForceField",
        visualize=True,
        game_steps_per_episode=150,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=64, minimap=32)))

    input_shape = (_SIZE, _SIZE, 1)
    nb_actions = 12  # Number of actions

    model = neural_network_model(input_shape, nb_actions)
    memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH)

    processor = SC2Proc()

    # Policy

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr="eps",
                                  value_max=1,
                                  value_min=0.2,
                                  value_test=.0,
                                  nb_steps=1e2)

    # Agent

    dqn = DQNAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        enable_double_dqn=True,
        enable_dueling_network=True,
        # 2019-07-12 GU Zhan (Sam)
        #                   nb_steps_warmup=500, target_model_update=1e-2, policy=policy,
        nb_steps_warmup=2000,
        target_model_update=1e-2,
        policy=policy,
        batch_size=150,
        processor=processor,
        delta_clip=1)

    dqn.compile(Adam(lr=.001), metrics=["mae", "acc"])

    # Tensorboard callback

    timestamp = f"{datetime.datetime.now():%Y-%m-%d %I:%M%p}"
    # 2019-07-12 GU Zhan (Sam) folder name for Lunux:
    #    callbacks = keras.callbacks.TensorBoard(log_dir='./Graph/'+ timestamp, histogram_freq=0,
    #                                write_graph=True, write_images=False)

    # 2019-07-12 GU Zhan (Sam) folder name for Windows:
    callbacks = keras.callbacks.TensorBoard(log_dir='.\Graph\issgz',
                                            histogram_freq=0,
                                            write_graph=True,
                                            write_images=False)

    # Save the parameters and upload them when needed

    name = "agent"
    w_file = "dqn_{}_weights.h5f".format(name)
    check_w_file = "train_w" + name + "_weights.h5f"

    if SAVE_MODEL:
        check_w_file = "train_w" + name + "_weights_{step}.h5f"

    log_file = "training_w_{}_log.json".format(name)

    if LOAD_MODEL:
        dqn.load_weights(w_file)

    class Saver(Callback):
        def on_episode_end(self, episode, logs={}):
            if episode % 200 == 0:
                self.model.save_weights(w_file, overwrite=True)

    s = Saver()
    logs = FileLogger('DQN_Agent_log.csv', interval=1)

    #    dqn.fit(env, callbacks=[callbacks,s,logs], nb_steps=600, action_repetition=2,
    dqn.fit(env,
            callbacks=[callbacks, s, logs],
            nb_steps=10000,
            action_repetition=2,
            log_interval=1e4,
            verbose=2)

    dqn.save_weights(w_file, overwrite=True)
    dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
Exemplo n.º 29
0
 def testSingularZeroesRaises(self):
     with self.assertRaises(ValueError):
         features.Dimensions(screen=0, minimap=0)
Exemplo n.º 30
0
 def testEquality(self):
     self.assertEqual(features.Dimensions(screen=64, minimap=64),
                      features.Dimensions(screen=64, minimap=64))
     self.assertNotEqual(features.Dimensions(screen=64, minimap=64),
                         features.Dimensions(screen=64, minimap=32))
     self.assertNotEqual(features.Dimensions(screen=64, minimap=64), None)