Exemplo n.º 1
0
    async def observation(self, game_loop=None):
        if game_loop is not None:
            result = await self._execute(observation=sc_pb.RequestObservation(
                game_loop=game_loop))
        else:
            result = await self._execute(
                observation=sc_pb.RequestObservation())
        assert result.HasField("observation")

        if not self.in_game or result.observation.player_result:
            # Sometimes game ends one step before results are available
            if not result.observation.player_result:
                result = await self._execute(
                    observation=sc_pb.RequestObservation())
                assert result.observation.player_result

            player_id_to_result = {}
            for pr in result.observation.player_result:
                player_id_to_result[pr.player_id] = Result(pr.result)
            self._game_result = player_id_to_result

        # if render_data is available, then RGB rendering was requested
        if self._renderer and result.observation.observation.HasField(
                "render_data"):
            await self._renderer.render(result.observation)

        return result
Exemplo n.º 2
0
    async def observation(self):
        result = await self._execute(observation=sc_pb.RequestObservation())
        if (not self.in_game) or len(result.observation.player_result) > 0:
            # Sometimes game ends one step before results are available
            if len(result.observation.player_result) == 0:
                result = await self._execute(observation=sc_pb.RequestObservation())
                assert len(result.observation.player_result) > 0

            player_id_to_result = {}
            for pr in result.observation.player_result:
                player_id_to_result[pr.player_id] = Result(pr.result)
            self._game_result = player_id_to_result
        return result
Exemplo n.º 3
0
    def observe(self, disable_fog=False, target_game_loop=0):
        """Get a current observation."""
        obs = self._client.send(observation=sc_pb.RequestObservation(
            game_loop=target_game_loop, disable_fog=disable_fog))

        if obs.observation.game_loop == 2**32 - 1:
            logging.info("Received stub observation.")

            if not obs.player_result:
                raise ValueError(
                    "Expect a player result in a stub observation")
            elif self._last_obs is None:
                raise RuntimeError(
                    "Received stub observation with no previous obs")

            # Rather than handling empty obs through the code, regurgitate the last
            # observation (+ player result, sub actions).
            new_obs = copy.deepcopy(self._last_obs)
            del new_obs.actions[:]
            new_obs.actions.extend(obs.actions)
            new_obs.player_result.extend(obs.player_result)
            obs = new_obs
            self._last_obs = None
        else:
            self._last_obs = obs

        if FLAGS.sc2_log_actions and obs.actions:
            sys.stderr.write(" Executed actions ".center(60, "<") + "\n")
            for action in obs.actions:
                sys.stderr.write(str(action))
            sys.stderr.flush()

        return obs
Exemplo n.º 4
0
    def reset(self):
        # Move the camera in any direction
        # This runs the ResetEpisode trigger built into the map
        self.decomposed_rewards = []
        action = actions.FUNCTIONS.move_camera([0, 0])
        self.current_obs = self.sc2_env.step([action])[0]

        if self.reset_steps >= 10:
            self.sc2_env.reset()
            self.reset_steps = 0
        self.reset_steps += 1

        self.end_state = None
        self.decision_point = 1
        self.num_waves = 0

        data = self.sc2_env._controllers[0]._client.send(
            observation=sc_pb.RequestObservation())
        actions_space = self.sc2_env._controllers[0]._client.send(
            action=sc_pb.RequestAction())

        data = data.observation.raw_data.units
        self.getRewards(data)
        # Get channel states
        #         state = self.get_channel_state(self.current_obs)

        # Get custom states
        state_1 = self.get_custom_state(data, 1)
        state_2 = self.get_custom_state(data, 2)

        for rt in self.reward_types:
            self.decomposed_reward_dict[rt] = 0
            self.last_decomposed_reward_dict[rt] = 0
#         self.use_custom_ability(action_to_ability_id['switch_player'])
        return state_1, state_2
Exemplo n.º 5
0
    def step(self, action, skip=False):
        end = False
        state = None
        get_income = False
        ### ACTION TAKING ###
        if action < 4:
            self.use_custom_ability(action_to_ability_id[action])
        elif action > 4:
            print("Invalid action: check final layer of network")

        action = actions.FUNCTIONS.no_op()
        self.current_obs = self.sc2_env.step([action])[0]
        # Get reward from data
        data = self.sc2_env._controllers[0]._client.send(
            observation=sc_pb.RequestObservation())
        data = data.observation.raw_data.units
        end, get_income = self.getRewards(data)
        state = self.get_custom_state(data)
        if not skip:
            # Get channel states
            # state = self.get_channel_state(self.current_obs)
            # Get custom states
            self.decomposed_rewards = []
            for rt in self.reward_types:
                value_reward = self.decomposed_reward_dict[
                    rt] - self.last_decomposed_reward_dict[rt]
                self.decomposed_rewards.append(value_reward)
            for rt in self.reward_types:
                self.last_decomposed_reward_dict[
                    rt] = self.decomposed_reward_dict[rt]
        if end:
            self.end_state = state
        return state, end, get_income
Exemplo n.º 6
0
 async def observation(self):
     result = await self._execute(observation=sc_pb.RequestObservation())
     if len(result.observation.player_result) > 0:
         player_id_to_result = {}
         for pr in result.observation.player_result:
             player_id_to_result[pr.player_id] = Result(pr.result)
         self._game_result = player_id_to_result
     return result
Exemplo n.º 7
0
    def step(self, action, player):
        done = False
        dp = False
        data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation())
        data = data.observation.raw_data.units
#         pretty_print_units(data)
        #input("pausing at step")
        if len(action) > 0:
            if player == 1:
                fifo = self.fifo_player_1
            else:
                fifo = self.fifo_player_2
            ## ACTION TAKING ###
            current_player = self.get_current_player(data)
#             print(current_player)
            if current_player != player:
#                 print('switch')
                self.use_custom_ability(action_to_ability_id['switch_player'])
            
            for a_index, num_action in enumerate(action):
                for _ in range(int(num_action)):
#                     print(a_index, num_action)
                    self.use_custom_ability(action_to_ability_id[a_index])
                    fifo.append(a_index)
                    if len(fifo) > self.building_limiation:
                        del fifo[0]
                    
                    
            action = actions.FUNCTIONS.no_op()
            self.current_obs = self.sc2_env.step([action])[0]
                    
        else:
            action = actions.FUNCTIONS.no_op()
            self.current_obs = self.sc2_env.step([action])[0]
            # Get reward from data
            done, dp = self.getRewards(data)

            if dp or done:
              # Get channel states
              # state = self.get_channel_state(self.current_obs)
              # Get custom states
                state_1 = self.get_custom_state(data, 1)
                state_2 = self.get_custom_state(data, 2)
                if done:
                    self.end_state_1 = state_1
                    self.end_state_2 = state_2
                    
                self.decomposed_rewards = []
                for rt in self.reward_types:
                    value_reward = self.decomposed_reward_dict[rt] - self.last_decomposed_reward_dict[rt]
                    self.decomposed_rewards.append(value_reward)
                # TODO: consider to merge two for
                for rt in self.reward_types:
                    self.last_decomposed_reward_dict[rt] = self.decomposed_reward_dict[rt]

                return state_1, state_2, done, dp
        return None, None, done, dp
Exemplo n.º 8
0
 async def get_result(self):
     try:
         res = await self.controller.ping()
         if res.status in {Status.in_game, Status.in_replay, Status.ended}:
             res = await self.controller._execute(
                 observation=sc_pb.RequestObservation())
             if res.HasField(
                     "observation") and res.observation.player_result:
                 self.result = {
                     pr.player_id: Result(pr.result)
                     for pr in res.observation.player_result
                 }
     except Exception as e:
         tb = traceback.format_exc()
         logger.error(f"Obs-check: {e}, traceback: {tb}")
Exemplo n.º 9
0
    def step(self, action):
        end = False
        state = None
        
        ### ACTION TAKING ###
        if sum(action) > 0:
            for a_index, num_action in enumerate(action):
                for _ in range(num_action):
                    self.use_custom_ability(action_to_ability_id[a_index])
        
        action = actions.FUNCTIONS.no_op()
        self.current_obs = self.sc2_env.step([action])[0]
        # Get reward from data
        data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation())
        data = data.observation.raw_data.units
        end, dp = self.getRewards(data)
        state = self.get_custom_state(data)
#         if not skip:
#           # Get channel states
#           # state = self.get_channel_state(self.current_obs)
#           # Get custom states
#             self.decomposed_rewards = []
#             for rt in self.reward_types:
#                 value_reward = self.decomposed_reward_dict[rt] - self.last_decomposed_reward_dict[rt]
#                 self.decomposed_rewards.append(value_reward)
#             for rt in self.reward_types:
#                 self.last_decomposed_reward_dict[rt] = self.decomposed_reward_dict[rt]
#             #print(self.decomposed_rewards)

        self.end_state = state
            
        if dp or end:
          # Get channel states
          # state = self.get_channel_state(self.current_obs)
          # Get custom states
            self.decomposed_rewards = []
            for rt in self.reward_types:
                value_reward = self.decomposed_reward_dict[rt] - self.last_decomposed_reward_dict[rt]
                self.decomposed_rewards.append(value_reward)
            for rt in self.reward_types:
                self.last_decomposed_reward_dict[rt] = self.decomposed_reward_dict[rt]
                
            return state, self.get_big_A(state[self.miner_index] * 100), end, dp
        else:
            return state, None, end, dp
Exemplo n.º 10
0
    def reset(self):
        # Move the camera in any direction
        # This runs the ResetEpisode trigger built into the map
        self.decomposed_rewards_all = []
        self.decomposed_rewards = []
        self.decomposed_rewards_mark = 0
        action = actions.FUNCTIONS.move_camera([0, 0])
        self.last_timestep = self.sc2_env.step([action])[0]
        observation = self.unpack_timestep(self.last_timestep)
        self.current_obs = observation
        self.actions_taken = 0
        #np.set_printoptions(threshold=np.nan,linewidth=np.nan)

        state = observation[3]['feature_screen']
        player_relative = np.array(state[5])
        player_relative[np.array(state[6]) == 73] = 3
        player_relative[np.array(state[12]) == 1] = 3
        state[5] = player_relative.tolist()
        state = getOneHotState(state, self.input_screen_features)
        state = np.reshape(state, (1, -1))

        self.end_state = None
        #print(self.agent_interface_format.camera_width_world_units)
        #print(self.agent_interface_format.use_camera_position)
        #print(observation)
        #input()

        data = self.sc2_env._controllers[0]._client.send(
            observation=sc_pb.RequestObservation())
        self.sc2_env._controllers[0]._client.send(action=sc_pb.RequestAction())

        data = data.observation.raw_data.units

        rewards, sof = self.getRewards(data)

        self.signal_of_finished = sof

        for key in self.decomposed_reward_dict:
            self.decomposed_reward_dict[key] = 0

        return state
Exemplo n.º 11
0
    def reset(self):
        # Move the camera in any direction
        # This runs the ResetEpisode trigger built into the map
        self.decomposed_rewards = []
        action = actions.FUNCTIONS.move_camera([0, 0])
        self.actions_taken = 0
        self.current_obs = self.sc2_env.step([action])[0]

        self.end_state = None
        self.get_income_signal = 2
        data = self.sc2_env._controllers[0]._client.send(
            observation=sc_pb.RequestObservation())
        actions_space = self.sc2_env._controllers[0]._client.send(
            action=sc_pb.RequestAction())

        data = data.observation.raw_data.units
        self.getRewards(data)
        state = self.get_custom_state(data)

        for rt in self.reward_types:
            self.decomposed_reward_dict[rt] = 0
            self.last_decomposed_reward_dict[rt] = 0
        return state
Exemplo n.º 12
0
    def step(self, action):
        done = False
        dead = False

        ### ACTION TAKING ###
    #    print(action)
        if self.actions_taken == 0 and self.check_action(self.current_obs, 12):
            if action == 0:
                action = actions.FUNCTIONS.Attack_screen("now", [0,0])
            elif action == 1:
                action = actions.FUNCTIONS.Attack_screen("now", [39,0])
            elif action == 2:
                action = actions.FUNCTIONS.Attack_screen("now", [0,39])
            elif action == 3:
                action = actions.FUNCTIONS.Attack_screen("now", [39,39])
            elif action == 4:
                action = actions.FUNCTIONS.no_op()
            else:
                print("Invalid action: check final layer of network")
                action = actions.FUNCTIONS.no_op()
        else:
            action = actions.FUNCTIONS.no_op()
 #           print(self.actions_taken == 0, self.check_action(self.current_obs, 12))
   #     print(action)
        ####################

        ### STATE PREPARATION ###

        self.last_timestep = self.sc2_env.step([action])[0]

        observation = self.unpack_timestep(self.last_timestep)
        self.current_obs = observation
        #########################

        ### REWARD PREPARATION AND TERMINATION ###

        data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation())
        data = data.observation.raw_data.units

        rewards, sof = self.getRewards(data)

        state = observation[3]['feature_screen']
        player_relative = np.array(state[5])
        player_relative[np.array(state[6]) == 73] = 3
        player_relative[np.array(state[12]) == 1] = 3
        state[5] = player_relative.tolist()
        state = getOneHotState(state, self.input_screen_features)
        state = np.reshape(state, (1, -1))

        #print(state.shape)
        self.decomposed_rewards_all.append([])
        la = len(self.decomposed_rewards_all)
        for key in self.decomposed_reward_dict:
            self.decomposed_rewards_all[la - 1].append(self.decomposed_reward_dict[key])
 #       print(self.signal_of_finished,sof)
        if self.signal_of_finished != sof:
            done = True
            
            if sof == 1:
                dead = True
            else:
                dead = False
            self.decomposed_rewards.append([])
            
            
            for i in range(len(self.reward_types)):
                l = len(self.decomposed_rewards)
                la = len(self.decomposed_rewards_all)
                if not dead:
                    self.decomposed_rewards[l - 1].append(
                        self.decomposed_rewards_all[la - 1][i] - self.decomposed_rewards_all[self.decomposed_rewards_mark][i]
                        )
                else:
                    self.decomposed_rewards[l - 1].append(
                        self.decomposed_rewards_all[la - 2][i] - self.decomposed_rewards_all[self.decomposed_rewards_mark][i]
                        )
            self.decomposed_rewards_mark = la - 1
            
        self.signal_of_finished = sof
        '''                                                                                       
        if len(state) < 41:
            current_len_state = len(state)
            for x in range(current_len_state, 41):
                state.append(0.0)
#        print(done,dead)
        '''
        if dead:

            state = observation[3]['feature_screen']
            player_relative = np.array(state[5])
            player_relative[np.array(state[6]) == 73] = 3
            player_relative[np.array(state[12]) == 1] = 3
            state[5] = player_relative.tolist()

            agent_units_position = np.array(state[6]) == 83
            
            for i, s in enumerate(state):
                nps = np.array(s)
                nps[agent_units_position] = 0
                state[i] = nps.tolist()
            state = getOneHotState(state,self.input_screen_features)
            state = np.reshape(state, (1, -1))

            self.end_state = state

        return state, done, dead
Exemplo n.º 13
0
unit_command = raw_pb.ActionRawUnitCommand()
unit_command.ability_id = 16 # Move Ability
unit_command.target_unit_tag = unit_tag_list[0]
unit_command.unit_tags.append(unit_tag_list[1])
action_raw = raw_pb.ActionRaw(unit_command = unit_command)

action = sc_pb.RequestAction()
action.actions.add(action_raw = action_raw)
test_client.comm.send(action=action)

"""

"""Move Units"""
unit_tag_list=[]

observation = sc_pb.RequestObservation()
t=test_client.comm.send(observation=observation)

for unit in t.observation.observation.raw_data.units:
    if unit.unit_type == 84: # Probe unit_type_tag
        unit_tag_list.append(unit.tag)

unit_command = raw_pb.ActionRawUnitCommand()
unit_command.ability_id = 16 # Move Ability
unit_command.target_world_space_pos.x = 30
unit_command.target_world_space_pos.y = 30
for i in range(0,12):
    unit_command.unit_tags.append(unit_tag_list[i])
action_raw = raw_pb.ActionRaw(unit_command = unit_command)

action = sc_pb.RequestAction()
Exemplo n.º 14
0
 def get_observation(self):
     observation = self.sc2_env._controllers[0]._client.send(
         observation=sc_pb.RequestObservation())
     return observation
Exemplo n.º 15
0
 async def observation(self):
     result = await self._execute(observation=sc_pb.RequestObservation())
     return result
Exemplo n.º 16
0
 def observe(self):
     """Get a current observation."""
     return self._client.send(observation=sc_pb.RequestObservation())
Exemplo n.º 17
0
    def step(self, action):
        done = False
        dead = False

        ### ACTION TAKING ###

        if self.actions_taken == 0 and self.check_action(self.current_obs, 12):
            if action == 0:
                action = actions.FUNCTIONS.Attack_screen("now", [0,0])
            elif action == 1:
                action = actions.FUNCTIONS.Attack_screen("now", [83,0])
            elif action == 2:
                action = actions.FUNCTIONS.Attack_screen("now", [0,83])
            elif action == 3:
                action = actions.FUNCTIONS.Attack_screen("now", [83,83])
            elif action == 4:
                action = actions.FUNCTIONS.no_op()
            else:
                print("Invalid action: check final layer of network")
                action = actions.FUNCTIONS.no_op()
        else:
            action = actions.FUNCTIONS.no_op()

        ####################

        ### STATE PREPARATION ###

        self.last_timestep = self.sc2_env.step([action])[0]

        observation, state, reward, done_null, info = self.unpack_timestep(self.last_timestep)
        self.current_obs = observation

        state = self.int_map_to_onehot(state)
        state = np.array(state)

        #########################

        ### REWARD PREPARATION AND TERMINATION ###

        from s2clientprotocol import sc2api_pb2 as sc_pb
        data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation())
        data = data.observation.raw_data.units

        damageByZealot = 0
        damageToZealot = 0
        damageByZergling = 0
        damageToZergling = 0
        damageByRoach = 0
        damageToRoach = 0
        damageByStalker = 0
        damageToStalker = 0
        damageByMarine = 0
        damageToMarine = 0
        damageByHydralisk = 0
        damageToHydralisk = 0
        wins = 0
        losses = 0
        rewards = []
        unit_types = []

        state = []

        # print("#################")
        for x in data:
            # print(x.unit_type)
            if x.unit_type < 1922 and x.unit_type != 51:
                state.append(x.unit_type)
                state.append(x.pos.x)
                state.append(x.pos.y)
            if x.unit_type == 1922:
                damageByZealot = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1923:
                damageToZealot = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1924:
                damageByZergling = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1925:
                damageToZergling = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1926:
                damageByRoach = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1927:
                damageToRoach = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1928:
                damageByStalker = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1929:
                damageToStalker = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1930:
                damageByMarine = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1931:
                damageToMarine = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1932:
                damageByHydralisk = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1933:
                damageToHydralisk = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1934:
                wins = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1935:
                # print("LOSSSSSSSESSSSSSSS")
                losses = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
        # print("#################")
        # print(rewards)
        # unit_types.sort()
        # print(wins)
        # print(losses)
        # print(damageToHydralisk)
        # print(unit_types)
        # print(len(unit_types))
        # print("#################")


        # print("Damage by roach: {}".format(damageByRoach))
        # print("Damage by zergling: {}".format(damageByZergling))
        # print("Damage to roach: {}".format(damageToRoach))
        # print("Damage to zergling: {}".format(damageToZergling))

        # total_reward = roach_reward + zergling_reward - 4
        # reward = total_reward
        # self.reward = total_reward
        self.reward = wins + losses
        self.losses = losses
        # self.rewards.append(reward)

        if self.last_reward != self.reward:
            done = True
            if self.last_losses < self.losses:
                dead = True
            else:
                dead = False

        self.last_reward = self.reward
        self.last_losses = self.losses

        self.decomposed_rewards.append([damageToZealot - 2, damageToZergling - 2, damageToRoach - 2, damageToStalker - 2, damageToMarine - 2, damageToHydralisk - 2])

        # damageToZealot
        # damageToZergling
        # damageToRoach
        # damageToStalker
        # damageToMarine
        # damageToHydralisk

        ###########################################
        # print(len(state))
        if len(state) < 36:
            current_len_state = len(state)
            for x in range(current_len_state, 36):
                state.append(0.0)
        # print(len(state))
        return state, reward, done, dead, info
 def observe(self, disable_fog=False, target_game_loop=0):
   """Get a current observation."""
   return self._client.send(observation=sc_pb.RequestObservation(
     game_loop=target_game_loop,
     disable_fog=disable_fog))
Exemplo n.º 19
0
    def reset(self):
        # Move the camera in any direction
        # This runs the ResetEpisode trigger built into the map
        self.decomposed_rewards = []
        self.rewards = []
        self.last_timestep = self.sc2_env.reset()
        action = actions.FUNCTIONS.move_camera([0, 0])
        self.last_timestep = self.sc2_env.step([action])[0]
        observation, state, reward, done, info = self.unpack_timestep(self.last_timestep)
        self.current_obs = observation
        state = self.int_map_to_onehot(state)
        state = np.array(state)
        self.actions_taken = 0 
        from s2clientprotocol import sc2api_pb2 as sc_pb



        data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation())
        self.sc2_env._controllers[0]._client.send(action=sc_pb.RequestAction())


        data = data.observation.raw_data.units



        damageByZealot = 0
        damageToZealot = 0
        damageByZergling = 0
        damageToZergling = 0
        damageByRoach = 0
        damageToRoach = 0
        damageByStalker = 0
        damageToStalker = 0
        damageByMarine = 0
        damageToMarine = 0
        damageByHydralisk = 0
        damageToHydralisk = 0
        wins = 0
        losses = 0
        rewards = []
        unit_types = []

        state = []

        # print("#################")
        for x in data:
            # print(x.unit_type)
            if x.unit_type < 1922 and x.unit_type != 51:
                state.append(x.unit_type)
                state.append(x.pos.x)
                state.append(x.pos.y)
            if x.unit_type == 1922:
                damageByZealot = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1923:
                damageToZealot = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1924:
                damageByZergling = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1925:
                damageToZergling = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1926:
                damageByRoach = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1927:
                damageToRoach = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1928:
                damageByStalker = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1929:
                damageToStalker = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1930:
                damageByMarine = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1931:
                damageToMarine = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1932:
                damageByHydralisk = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1933:
                damageToHydralisk = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1934:
                wins = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1935:
                # print("LOSSSSSSSESSSSSSSS")
                losses = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
        return state
Exemplo n.º 20
0
    def step(self, action):
        done = False
        dead = False

        ### ACTION TAKING ###

        if self.actions_taken == 0 and self.check_action(self.current_obs, 12):
            if action == 0:
                action = actions.FUNCTIONS.Attack_screen("now", [0,0])
            elif action == 1:
                action = actions.FUNCTIONS.Attack_screen("now", [39,0])
            elif action == 2:
                action = actions.FUNCTIONS.Attack_screen("now", [0,39])
            elif action == 3:
                action = actions.FUNCTIONS.Attack_screen("now", [39,39])
            elif action == 4:
                action = actions.FUNCTIONS.no_op()
            else:
                print("Invalid action: check final layer of network")
                action = actions.FUNCTIONS.no_op()
        else:
            action = actions.FUNCTIONS.no_op()

        ####################

        ### STATE PREPARATION ###

        self.last_timestep = self.sc2_env.step([action])[0]

        observation, state, reward, done_null, info = self.unpack_timestep(self.last_timestep)
        self.current_obs = observation

        #state = self.int_map_to_onehot(state)
        state = observation[3]['feature_screen']
        # print(np.unique(np.array(state[6])))
        #state = expand_pysc2_to_neural_input(state)
        input_screen_feature = {'PLAYER_RELATIVE': [1,3,16], 'UNIT_TYPE':SIMPLE_SC2_UNITS, 'HIT_POINT':0, 'HIT_POINT_RATIO': 0, 'UNIT_DENSITY':0}
        state = getOneHotState(state, input_screen_feature)
        # print('STATE SHAPE')
        # print(state.shape)
        #state shape (20, 40, 40)
        state = np.reshape(state, (1, -1))
        state = np.array(state)
        # print('STATE SHAPE')
        # print(state.shape)
        #state shape (1, 32000)

        #########################

        ### REWARD PREPARATION AND TERMINATION ###

        from s2clientprotocol import sc2api_pb2 as sc_pb
        data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation())
        data = data.observation.raw_data.units

        damageByRoach = 0
        damageByZergling = 0
        damageToRoach = 0
        damageToZergling = 0

        for x in data:
            if x.unit_type == 1922:
                roach_reward = x.health
            if x.unit_type == 1923:
                zergling_reward = x.health
            if x.unit_type == 1924:
                damageByRoach = x.health
            if x.unit_type == 1925:
                damageByZergling = x.health
            if x.unit_type == 1926:
                damageToRoach = x.health
            if x.unit_type == 1927:
                damageToZergling = x.health

        # print("Damage by roach: {}".format(damageByRoach))
        # print("Damage by zergling: {}".format(damageByZergling))
        # print("Damage to roach: {}".format(damageToRoach))
        # print("Damage to zergling: {}".format(damageToZergling))

        total_reward = roach_reward + zergling_reward - 4
        reward = total_reward
        self.reward = total_reward
        self.rewards.append(reward)

        if self.last_reward != self.reward:
            done = True
            if self.last_reward > self.reward:
                dead = True
            else:
                dead = False

        self.last_reward = self.reward

        self.decomposed_rewards.append([roach_reward - 2, zergling_reward - 2, damageByRoach - 2, damageByZergling - 2, damageToRoach - 2, damageToZergling - 2])

        ###########################################

        return state, reward, done, dead, info
Exemplo n.º 21
0
    def step(self, action):
        done = False
        dead = False

        ### ACTION TAKING ###

        if self.actions_taken == 0 and self.check_action(self.current_obs, 12):
            if action == 0:
                action = actions.FUNCTIONS.Attack_screen("now", [0,0])
            elif action == 1:
                action = actions.FUNCTIONS.Attack_screen("now", [83,0])
            elif action == 2:
                action = actions.FUNCTIONS.Attack_screen("now", [0,83])
            elif action == 3:
                action = actions.FUNCTIONS.Attack_screen("now", [83,83])
            elif action == 4:
                action = actions.FUNCTIONS.no_op()
            else:
                print("Invalid action: check final layer of network")
                action = actions.FUNCTIONS.no_op()
        else:
            action = actions.FUNCTIONS.no_op()

        ####################

        ### STATE PREPARATION ###

        self.last_timestep = self.sc2_env.step([action])[0]

        observation, state, reward, done_null, info = self.unpack_timestep(self.last_timestep)
        self.current_obs = observation

        state = self.int_map_to_onehot(state)
        state = np.array(state)

        #########################

        ### REWARD PREPARATION AND TERMINATION ###

        from s2clientprotocol import sc2api_pb2 as sc_pb
        data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation())
        data = data.observation.raw_data.units

        damageByRoach = 0
        damageByZergling = 0
        damageToRoach = 0
        damageToZergling = 0

        for x in data:
            if x.unit_type == 1922:
                roach_reward = x.health
            if x.unit_type == 1923:
                zergling_reward = x.health 
            if x.unit_type == 1924:
                damageByRoach = x.health
            if x.unit_type == 1925:
                damageByZergling = x.health
            if x.unit_type == 1926:
                damageToRoach = x.health
            if x.unit_type == 1927:
                damageToZergling = x.health

        # print("Damage by roach: {}".format(damageByRoach))
        # print("Damage by zergling: {}".format(damageByZergling))
        # print("Damage to roach: {}".format(damageToRoach))
        # print("Damage to zergling: {}".format(damageToZergling))

        total_reward = roach_reward + zergling_reward - 4
        reward = total_reward
        self.reward = total_reward
        self.rewards.append(reward)

        if self.last_reward != self.reward:
            done = True
            if self.last_reward > self.reward:
                dead = True
            else:
                dead = False

        self.last_reward = self.reward

        self.decomposed_rewards.append([roach_reward - 2, zergling_reward - 2, damageByRoach - 2, damageByZergling - 2, damageToRoach - 2, damageToZergling - 2])

        ###########################################

        return state, reward, done, dead, info
Exemplo n.º 22
0
    def reset(self):
        # Move the camera in any direction
        # This runs the ResetEpisode trigger built into the map
        self.decomposed_rewards = []
        self.rewards = []
        self.last_timestep = self.sc2_env.reset()
        action = actions.FUNCTIONS.move_camera([0, 0])
        self.last_timestep = self.sc2_env.step([action])[0]
        observation, state, reward, done, info = self.unpack_timestep(
            self.last_timestep)
        self.current_obs = observation
        #state = self.int_map_to_onehot(state)
        state = np.array(state)
        self.actions_taken = 0
        from s2clientprotocol import sc2api_pb2 as sc_pb

        data = self.sc2_env._controllers[0]._client.send(
            observation=sc_pb.RequestObservation())
        self.sc2_env._controllers[0]._client.send(action=sc_pb.RequestAction())

        data = data.observation.raw_data.units

        damageByZealot = 0
        damageToZealot = 0
        damageByZergling = 0
        damageToZergling = 0
        damageByRoach = 0
        damageToRoach = 0
        damageByStalker = 0
        damageToStalker = 0
        damageByMarine = 0
        damageToMarine = 0
        damageByHydralisk = 0
        damageToHydralisk = 0
        wins = 0
        losses = 0
        rewards = []
        unit_types = []

        state = []

        # print("#################")
        for x in data:
            # print(x.unit_type)
            if x.unit_type < 1922 and x.unit_type != 51:
                state.append(x.unit_type)
                state.append(x.pos.x)
                state.append(x.pos.y)
            if x.unit_type == 1922:
                damageByZealot = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1923:
                damageToZealot = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1924:
                damageByZergling = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1925:
                damageToZergling = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1926:
                damageByRoach = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1927:
                damageToRoach = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1928:
                damageByStalker = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1929:
                damageToStalker = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1930:
                damageByMarine = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1931:
                damageToMarine = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1932:
                damageByHydralisk = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1933:
                damageToHydralisk = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1934:
                wins = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
            if x.unit_type == 1935:
                # print("LOSSSSSSSESSSSSSSS")
                losses = x.health
                rewards.append(x.health)
                unit_types.append(x.unit_type)
        state = observation[3]['feature_screen']
        #state = expand_pysc2_to_neural_input(state)
        input_screen_feature = {
            'PLAYER_RELATIVE': [1, 3, 16],
            'UNIT_TYPE': SIMPLE_SC2_UNITS,
            'HIT_POINT': 0,
            'HIT_POINT_RATIO': 0,
            'UNIT_DENSITY': 0
        }
        state = getOneHotState(state, input_screen_feature)
        # print('STATE SHAPE:')
        # print(state.shape)
        state = np.reshape(state, (1, -1))
        return state