def tron_client(env: TronGridClientEnvironment, username: str): logger.debug("Connecting to game server and waiting for game to start") player_num = env.connect(username) logger.debug("Player number: {}".format(player_num)) logger.debug("First observation: {}".format(env.wait_for_turn())) logger.info("Game started...") current_action = Action() control_thread = ControlThread(current_action) control_thread.start() frame_start_time = time() board_size = env.server_environment.N num_players = env.server_environment.num_players renderer = TronRender(board_size, num_players) while True: renderer.render_observation(env.observation) frame_delta = time() - frame_start_time sleep((FRAME_MILLISECONDS / 1000) - frame_delta) new_obs, reward, terminal, winners = env.step(current_action()) frame_start_time = time() current_action.reset() if terminal: logger.info("Game is over. Players {} won".format(winners)) logger.info("Final observation: {}".format(new_obs)) renderer.close() break
class TronRaySinglePlayerEnvironment(gym.Env): def __init__(self, board_size=15, num_players=4, spawn_offset=2, agent=SimpleAvoidAgent()): self.env = TronGridEnvironment.create(board_size=board_size, num_players=num_players) self.state = None self.players = None self.human_player = None self.spawn_offset = spawn_offset self.agent = agent self.renderer = TronRender(board_size, num_players, winner_player=0) self.action_space = Discrete(3) self.observation_space = Dict({ 'board': Box(0, num_players, shape=(board_size, board_size)), 'heads': Box(0, np.infty, shape=(num_players, )), 'directions': Box(0, 4, shape=(num_players, )), 'deaths': Box(0, num_players, shape=(num_players, )) }) def reset(self): self.state, self.players = self.env.new_state( spawn_offset=self.spawn_offset) self.human_player = self.players[0] return self._get_observation(self.human_player) def _get_observation(self, player): return self.env.state_to_observation(self.state, player) def step(self, action: int): human_player = self.human_player action_to_string = {0: 'forward', 1: 'right', 2: 'left'} actions = [] for player in self.players: if player == human_player: actions.append(action_to_string[action]) else: actions.append( self.agent(self.env, self._get_observation(player))) self.state, self.players, rewards, terminal, winners = self.env.next_state( self.state, self.players, actions) observation = self._get_observation(human_player) reward = rewards[human_player] done = (human_player not in self.players) or terminal return observation, reward, done, {} def render(self, mode='human'): if self.state is None: return None return self.renderer.render(self.state, mode) def close(self): self.renderer.close() def test(self, trainer, frame_time=0.1): self.close() state = self.reset() done = False action = None reward = None cumulative_reward = 0 while not done: # Uncomment for multiagent # action = trainer.compute_action(np.expand_dims(extractor.transform(obs),axis=0), prev_action=action, prev_reward=reward) # Uncomment for single agent action = trainer.compute_action(extractor.transform(obs), prev_action=action, prev_reward=reward) state, reward, done, results = self.step(action) cumulative_reward += reward self.render() sleep(frame_time) self.render() return cumulative_reward
class TronRayEnvironment(MultiAgentEnv): action_space = Discrete(3) def __init__(self, board_size=15, num_players=4): self.env = TronGridEnvironment.create(board_size=board_size, num_players=num_players) self.state = None self.players = None self.renderer = TronRender(board_size, num_players) self.observation_space = Dict({ 'board': Box(0, num_players, shape=(board_size, board_size)), 'heads': Box(0, np.infty, shape=(num_players, )), 'directions': Box(0, 4, shape=(num_players, )), 'deaths': Box(0, num_players, shape=(num_players, )) }) def reset(self): self.state, self.players = self.env.new_state() return { str(i): self.env.state_to_observation(self.state, i) for i in range(self.env.num_players) } def step(self, action_dict): #print("CALLING STEP ****************************************************************************************") action_to_string = {0: 'forward', 1: 'right', 2: 'left'} actions = [] for player in self.players: action = action_dict.get(str(player), 0) actions.append(action_to_string[action]) self.state, self.players, rewards, terminal, winners = self.env.next_state( self.state, self.players, actions) num_players = self.env.num_players alive_players = set(self.players) observations = { str(i): self.env.state_to_observation(self.state, i) for i in map(int, action_dict.keys()) } rewards = {str(i): rewards[i] for i in map(int, action_dict.keys())} dones = { str(i): i not in alive_players for i in map(int, action_dict.keys()) } dones['__all__'] = terminal if dones['0'] == True: dones['1'] = True dones['2'] = True dones['3'] = True dones["__all__"] = True return observations, rewards, dones, {} def render(self, mode='human'): if self.state is None: return None return self.renderer.render(self.state, mode) def close(self): self.renderer.close() def test(self, trainer, frame_time=0.1): num_players = self.env.num_players self.close() state = self.reset() done = {"__all__": False} action = {str(i): None for i in range(num_players)} reward = {str(i): None for i in range(num_players)} cumulative_reward = 0 while not done['__all__']: action = { i: trainer.compute_action(state[i], prev_action=action[i], prev_reward=reward[i], policy_id="opponent") for i in map(str, range(num_players)) } action['0'] = trainer.compute_action(state['0'], prev_action=action['0'], prev_reward=reward['0'], policy_id="trainer") state, reward, done, results = self.step(action) cumulative_reward += sum(reward.values()) if done['0'] == True: print("Player Died") self.render() sleep(frame_time) self.render() return cumulative_reward