def step(self, action_single): # advances the simulator by step_len number of steps. Returns a list of # [observation (object), reward (float), done (bool), info (dict)] # Actions must be integeres in the range [0, 18) self.steps_since_reset += 1 self.game_sim.giveCommands([playeraction.Action(action_single)]) for i in range(self.step_len): self.game_sim.step() goal = self.goalScored() # If a goal is scored return instantly if goal != 0: return [ self.getState(), self.game_sim.getSingeplayerReward(), True, {} ] # If no goal consider it a tie. if self.steps_since_reset >= self.max_steps: return [ self.getState(), self.game_sim.getSingeplayerReward(), True, {} ] else: return [self.getState(), 0.0, False, {}]
def getAction(self, frame): frame_tensor = torch.FloatTensor(frame.posToNp(self.team, 0, self.accepts_normalised)) # frame_tensor = frame_tensor.cuda() #if torch.cuda.is_available(): output = self.network(frame_tensor) win_prob = output[-1] action_pred_data = output[0:-1] if not self.value_is_prob: #win_prob = torch.nn.Sigmoid()(win_prob) win_prob = (win_prob + 1.0)/2 #if torch.cuda.is_available(): # movepred = movepred.cpu() action_data = [] if self.method == "random": for i in range(len(action_pred_data)): if len(action_pred_data[i]) == 1: p = action_pred_data[i].detach().numpy()[0] action_data.append(np.random.choice([False, True], p = [1 - p, p])) else: action_data.append(np.random.choice(len(action_pred_data[i]), p = action_pred_data[i].detach().numpy())) elif self.method == "max": for i in range(len(action_pred_data)): action_data.append(np.argmax(action_pred_data[i].detach().numpy())) else: raise ValueError action = playeraction.Action(*action_data) if self.team == "red": pass elif self.team == "blue": action = action.flipped() else: raise ValueError if self.debug_surf: move_probs = [] if len(action_pred_data) == 1: temp = action_pred_data[0].detach().numpy() for i in range(len(temp)): if i % 2 == 0: move_probs.append(temp[i]) else: move_probs[i // 2] = (move_probs[i // 2] + temp[i]) / 2 move_probs = np.array(move_probs) elif len(action_pred_data) == 2: move_probs = action_pred_data[0].detach().numpy() else: raise ValueError if self.team == "blue": move_probs = move_probs[[0,5,6,7,8,1,2,3,4]] self.debug_surf.drawMove(move_probs, action.dir_idx, self.team, float(win_prob)) return action
def getAction(self, frame=None): # Ignore frame _ = frame # Returns raw action of the agent based on the key presses queried from # the gui. Returns (dir_idx, kicking_state) movements = [self.gui.isKeyPressed(key) for key in self.movement_keys] movements[0], movements[2] = movements[2], movements[0] raw_action = playeraction.binaryToRaw(*movements, self.gui.isKeyPressed(self.kick)) action = playeraction.Action(*raw_action) return action
def reset(self, reset_type): if reset_type == "random": # positional parameters self.pos = np.array([gameparams.pitchcornerx + (np.random.random_sample())*580, gameparams.pitchcornery + (np.random.random_sample())*200]).astype(float) elif reset_type == "default": self.pos = self.default_position else: raise ValueError("Passed a wrong reset type to a player") self.vel = np.zeros(2) self.kick_count = 0 # Set the action to default action state self.current_action = playeraction.Action()
def __init__(self, team, initial_position, initial_velocity = np.zeros(2), initial_acceleration = np.zeros(2), ): # Initialise positional parameters, basic properties of the object Entity.__init__(self, initial_position, initial_velocity, initial_acceleration, gameparams.playerradius, gameparams.playerbouncing) # Set the not random reset position self.default_position = initial_position # Initialise current action + can_kick which presents kick-spamming self.current_action = playeraction.Action() self.can_kick = True # Records the number of kicks the ball has made, used for reward shaping self.kick_count = 0 # player properties self.team = team self.mass = 1 / gameparams.playerinvmass
def getAction(self, frame=None): # Ignore frame _ = frame # Returns raw action of the agent based on the key presses queried from # the gui. Returns (dir_idx, kicking_state) return playeraction.Action(0)