def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return - rewards.min_delay_unscaled(self) else: return (- rewards.min_delay_unscaled(self) + rewards.penalize_standstill(self, gain=0.2))
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" r = 0 if rl_actions is not None: r = -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=2) if self.env_params.evaluate: r += -rewards.min_delay_unscaled(self) #print(f"Reward computed: {r}, rl_actions: {rl_actions}") else: r += (-rewards.min_delay_unscaled(self) + rewards.penalize_standstill(self, gain=0.2)) print(f"Reward computed: {r}, rl_actions: {rl_actions}") return r
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return -rewards.min_delay_unscaled(self) else: """ print('delay penalty:', -rewards.min_delay_unscaled(self)) print('standstill penalty:', rewards.penalize_standstill(self, gain=0.2)) print('action penalty:', -rewards.boolean_action_penalty(rl_actions >= 0.5, gain=0.2)) """ #return (- rewards.min_delay_unscaled(self) + # rewards.penalize_standstill(self, gain=0.2)) return ( -rewards.min_delay_unscaled(self) - rewards.boolean_action_penalty(rl_actions >= 0.5, gain=0.01) - rewards.waiting_penalty(self, gain=0.01))
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.test: return 0 return - rewards.min_delay_unscaled(self) \ - rewards.boolean_action_penalty(rl_actions >= 0.5, gain=1.0)
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if rl_actions is None: return {} if self.env_params.evaluate: rew = -rewards.min_delay_unscaled(self) else: rew = -rewards.min_delay_unscaled(self) \ + rewards.penalize_standstill(self, gain=0.2) # each agent receives reward normalized by number of lights rew /= self.num_traffic_lights rews = {} for rl_id in rl_actions.keys(): rews[rl_id] = rew return rews
def compute_reward(self, rl_actions, **kwargs): """See class definition.""" if self.env_params.evaluate: return -rewards.min_delay_unscaled(self) else: return rewards.desired_velocity(self, fail=kwargs["fail"])
def compute_reward(self, state, rl_actions, **kwargs): if self.env_params.evaluate: return rewards.min_delay_unscaled(self) else: return rewards.desired_velocity(self, fail=kwargs["fail"])