def control(self, steer, gas): if self.optimizer is None: reward_h, reward_r = self.rewards reward_h = self.traj_h.reward(reward_h) reward_r = self.traj.reward(reward_r) self.optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u, reward_r, self.traj.u) self.traj_h.x0.set_value(self.human.x) self.optimizer.maximize(bounds = self.bounds)
def control(self, steer, gas): #import ipdb; ipdb.set_trace() if self.optimizer is None: reward_h, reward_r = self.rewards reward_h = self.traj_h.total(reward_h) reward_r = self.traj.total(reward_r) self.optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u, reward_r, self.traj.u) self.traj_h.x0.set_value(self.human.x) self.optimizer.maximize(bounds=self.bounds)
def control(self, steer, gas): if self.optimizer is None: u = sum(log_p for log_p in self.log_ps) / len(self.log_ps) self.prenormalize = th.function([], None, updates=[(log_p, log_p - u) for log_p in self.log_ps]) s = tt.log(sum(tt.exp(log_p) for log_p in self.log_ps)) self.normalize = th.function([], None, updates=[(log_p, log_p - s) for log_p in self.log_ps]) self.update_belief = th.function( [], None, updates=[(log_p, log_p + self.human.past.log_p(reward('past'))) for reward, log_p in zip(self.rewards, self.log_ps)]) self.normalize() self.t = 0 if self.dumb: self.useq = self.objective self.optimizer = True else: if hasattr(self.objective, '__call__'): obj_h = sum([ traj_h.total(reward('traj')) for traj_h, reward in zip(self.traj_hs, self.rewards) ]) var_h = sum([traj_h.u for traj_h in self.traj_hs], []) obj_r = sum( tt.exp(log_p) * self.objective(traj_h) for traj_h, log_p in zip(self.traj_hs, self.log_ps)) self.optimizer = utils.NestedMaximizer( obj_h, var_h, obj_r, self.traj.u) else: obj_r = self.objective self.optimizer = utils.Maximizer(self.objective, self.traj.u) if self.t == self.T: self.update_belief() self.t = 0 if self.dumb: self.u = self.useq[0] self.useq = self.useq[1:] if self.t == 0: self.prenormalize() self.normalize() for traj_h in self.traj_hs: traj_h.x0.set_value(self.human.x) if not self.dumb: self.optimizer.maximize(bounds=self.bounds) for log_p in self.log_ps: print '%.2f' % np.exp(log_p.get_value()), print
def control(self, steer, gas): if self.optimizer is None: reward_h, reward_r = self.rewards reward_r = self.traj.reward(reward_r) reward_h = self.traj_h.reward(reward_h) # reward_r ar for leader # reward_h ar for follower self.optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u, reward_r, self.traj.u) self.traj_h.x0.set_value(self.follower.x) self.traj_o.x0.set_value(self.obstacle.x) self.optimizer.maximize(bounds=self.bounds)
def control(self, steer, gas): if self.nested: if self.nested_optimizer is None: reward_h, reward_r = self._nested_rewards reward_h = self.traj_h.reward(reward_h) reward_r = self.traj.reward(reward_r) self.nested_optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u, reward_r, self.traj.u) self.traj_h.x0.set_value(self.human.x) self.nested_optimizer.maximize(bounds = self.bounds) else: print len(self.cache) if self.index<len(self.cache): self.u = self.cache[self.index] else: if self.simple_optimizer is None: r = self.traj.reward(self._simple_reward) self.simple_optimizer = utils.Maximizer(r, self.traj.u) #TODO: make sure these bounds are correct, and that we shouldn't add bounded control to reward function self.simple_optimizer.maximize(bounds = self.bounds) self.cache.append(self.u) self.index += 1
def control(self, steer, gas): if self.optimizer is None: #if True: reward_h, reward_r, reward_o = self.rewards self.t_temp = reward_o #reward_h = reward_h + reward_o reward_h = self.traj_h.reward(reward_h) reward_r = self.traj.reward(reward_r) reward_o = self.traj_o.reward(reward_o) self.optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u, reward_r, self.traj.u) self.traj_h.x0.set_value(self.leader.x) self.traj_o.x0.set_value(self.obstacle.x) self.optimizer.maximize(bounds=self.bounds) #self.traj.u[0].set_value(self.traj_h.u[0].get_value()) # They need to be same, they aren't print "What leader thinks: ", self.leader1.traj_h.u[0].get_value() print "What itself thinks: ", self.traj.u[0].get_value()
def initialize(self): if self.optimizer_follower is None: if self.maniac is not None: # Have a maniac. # For the following truck. reward_r_noncritical = reward.attentive_truck_reward( use_DSG=self.use_DSG, critical=False, truck_traj=self.truck.traj, human_traj=self.traj_h, maniac_traj=self.maniac.traj, predecessor=self.predecessor) reward_r_critical = reward.attentive_truck_reward( use_DSG=self.use_DSG, critical=True, truck_traj=self.truck.traj, human_traj=self.traj_h, maniac_traj=self.maniac.traj, predecessor=self.predecessor) reward_h_noncritical = reward.attentive_human_reward( use_DSG=self.use_DSG, critical=False, truck_traj=self.truck.traj, human_traj=self.traj_h, maniac_traj=self.maniac.traj, lanes=self.lanes, fences=self.fences, predecessor=self.predecessor, bounds=self.human_bounds) #world.cars[0].bounds) reward_h_critical = reward.attentive_human_reward( use_DSG=self.use_DSG, critical=True, truck_traj=self.truck.traj, human_traj=self.traj_h, maniac_traj=self.maniac.traj, lanes=self.lanes, fences=self.fences, predecessor=self.predecessor, bounds=self.human_bounds) #world.cars[0].bounds) reward_r_noncritical = self.truck.traj.reward( reward_r_noncritical) reward_r_critical = self.truck.traj.reward(reward_r_critical) reward_h_noncritical = self.traj_h.reward(reward_h_noncritical) reward_h_critical = self.traj_h.reward(reward_h_critical) self.optimizer_follower = [ utils.NestedMaximizer(reward_h_noncritical, self.traj_h.u, reward_r_noncritical, self.truck.traj.u), utils.NestedMaximizer(reward_h_critical, self.traj_h.u, reward_r_critical, self.truck.traj.u) ] else: # No maniac. # For the following truck. reward_r_noncritical = reward.attentive_truck_reward( use_DSG=self.use_DSG, critical=False, truck_traj=self.truck.traj, human_traj=self.traj_h, maniac_traj=None, predecessor=self.predecessor) reward_h_noncritical = reward.attentive_human_reward( use_DSG=self.use_DSG, critical=False, truck_traj=self.truck.traj, human_traj=self.traj_h, maniac_traj=None, lanes=self.lanes, fences=self.fences, predecessor=self.predecessor, bounds=self.human_bounds) # have bounds=None on original. reward_r_noncritical = self.truck.traj.reward( reward_r_noncritical) reward_h_noncritical = self.traj_h.reward(reward_h_noncritical) self.optimizer_follower = [ utils.NestedMaximizer(reward_h_noncritical, self.traj_h.u, reward_r_noncritical, self.truck.traj.u) ]