Exemplo n.º 1
0
 def control(self, steer, gas):
     if self.optimizer is None:
         reward_h, reward_r = self.rewards
         reward_h = self.traj_h.reward(reward_h)
         reward_r = self.traj.reward(reward_r)
         self.optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u, reward_r, self.traj.u)
     self.traj_h.x0.set_value(self.human.x)
     self.optimizer.maximize(bounds = self.bounds)
Exemplo n.º 2
0
 def control(self, steer, gas):
     #import ipdb; ipdb.set_trace()
     if self.optimizer is None:
         reward_h, reward_r = self.rewards
         reward_h = self.traj_h.total(reward_h)
         reward_r = self.traj.total(reward_r)
         self.optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u,
                                                reward_r, self.traj.u)
     self.traj_h.x0.set_value(self.human.x)
     self.optimizer.maximize(bounds=self.bounds)
Exemplo n.º 3
0
 def control(self, steer, gas):
     if self.optimizer is None:
         u = sum(log_p for log_p in self.log_ps) / len(self.log_ps)
         self.prenormalize = th.function([],
                                         None,
                                         updates=[(log_p, log_p - u)
                                                  for log_p in self.log_ps])
         s = tt.log(sum(tt.exp(log_p) for log_p in self.log_ps))
         self.normalize = th.function([],
                                      None,
                                      updates=[(log_p, log_p - s)
                                               for log_p in self.log_ps])
         self.update_belief = th.function(
             [],
             None,
             updates=[(log_p, log_p + self.human.past.log_p(reward('past')))
                      for reward, log_p in zip(self.rewards, self.log_ps)])
         self.normalize()
         self.t = 0
         if self.dumb:
             self.useq = self.objective
             self.optimizer = True
         else:
             if hasattr(self.objective, '__call__'):
                 obj_h = sum([
                     traj_h.total(reward('traj'))
                     for traj_h, reward in zip(self.traj_hs, self.rewards)
                 ])
                 var_h = sum([traj_h.u for traj_h in self.traj_hs], [])
                 obj_r = sum(
                     tt.exp(log_p) * self.objective(traj_h)
                     for traj_h, log_p in zip(self.traj_hs, self.log_ps))
                 self.optimizer = utils.NestedMaximizer(
                     obj_h, var_h, obj_r, self.traj.u)
             else:
                 obj_r = self.objective
                 self.optimizer = utils.Maximizer(self.objective,
                                                  self.traj.u)
     if self.t == self.T:
         self.update_belief()
         self.t = 0
     if self.dumb:
         self.u = self.useq[0]
         self.useq = self.useq[1:]
     if self.t == 0:
         self.prenormalize()
         self.normalize()
         for traj_h in self.traj_hs:
             traj_h.x0.set_value(self.human.x)
         if not self.dumb:
             self.optimizer.maximize(bounds=self.bounds)
     for log_p in self.log_ps:
         print '%.2f' % np.exp(log_p.get_value()),
     print
Exemplo n.º 4
0
    def control(self, steer, gas):

        if self.optimizer is None:
            reward_h, reward_r = self.rewards

            reward_r = self.traj.reward(reward_r)
            reward_h = self.traj_h.reward(reward_h)

            # reward_r ar for leader
            # reward_h ar for follower
            self.optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u,
                                                   reward_r, self.traj.u)

        self.traj_h.x0.set_value(self.follower.x)
        self.traj_o.x0.set_value(self.obstacle.x)
        self.optimizer.maximize(bounds=self.bounds)
Exemplo n.º 5
0
 def control(self, steer, gas):
     if self.nested:
         if self.nested_optimizer is None:
             reward_h, reward_r = self._nested_rewards
             reward_h = self.traj_h.reward(reward_h)
             reward_r = self.traj.reward(reward_r)
             self.nested_optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u, reward_r, self.traj.u)
         self.traj_h.x0.set_value(self.human.x)
         self.nested_optimizer.maximize(bounds = self.bounds)
     else:
         print len(self.cache)
         if self.index<len(self.cache):
             self.u = self.cache[self.index]
         else:
             if self.simple_optimizer is None:
                 r = self.traj.reward(self._simple_reward)
                 self.simple_optimizer = utils.Maximizer(r, self.traj.u)
             #TODO: make sure these bounds are correct, and that we shouldn't add bounded control to reward function
             self.simple_optimizer.maximize(bounds = self.bounds)
             self.cache.append(self.u)
         self.index += 1
Exemplo n.º 6
0
    def control(self, steer, gas):
        if self.optimizer is None:
            #if True:
            reward_h, reward_r, reward_o = self.rewards
            self.t_temp = reward_o
            #reward_h = reward_h + reward_o

            reward_h = self.traj_h.reward(reward_h)
            reward_r = self.traj.reward(reward_r)
            reward_o = self.traj_o.reward(reward_o)

            self.optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u,
                                                   reward_r, self.traj.u)

        self.traj_h.x0.set_value(self.leader.x)
        self.traj_o.x0.set_value(self.obstacle.x)
        self.optimizer.maximize(bounds=self.bounds)
        #self.traj.u[0].set_value(self.traj_h.u[0].get_value())

        # They need to be same, they aren't
        print "What leader thinks: ", self.leader1.traj_h.u[0].get_value()
        print "What itself thinks: ", self.traj.u[0].get_value()
Exemplo n.º 7
0
    def initialize(self):
        if self.optimizer_follower is None:

            if self.maniac is not None:  # Have a maniac.

                # For the following truck.
                reward_r_noncritical = reward.attentive_truck_reward(
                    use_DSG=self.use_DSG,
                    critical=False,
                    truck_traj=self.truck.traj,
                    human_traj=self.traj_h,
                    maniac_traj=self.maniac.traj,
                    predecessor=self.predecessor)
                reward_r_critical = reward.attentive_truck_reward(
                    use_DSG=self.use_DSG,
                    critical=True,
                    truck_traj=self.truck.traj,
                    human_traj=self.traj_h,
                    maniac_traj=self.maniac.traj,
                    predecessor=self.predecessor)
                reward_h_noncritical = reward.attentive_human_reward(
                    use_DSG=self.use_DSG,
                    critical=False,
                    truck_traj=self.truck.traj,
                    human_traj=self.traj_h,
                    maniac_traj=self.maniac.traj,
                    lanes=self.lanes,
                    fences=self.fences,
                    predecessor=self.predecessor,
                    bounds=self.human_bounds)  #world.cars[0].bounds)
                reward_h_critical = reward.attentive_human_reward(
                    use_DSG=self.use_DSG,
                    critical=True,
                    truck_traj=self.truck.traj,
                    human_traj=self.traj_h,
                    maniac_traj=self.maniac.traj,
                    lanes=self.lanes,
                    fences=self.fences,
                    predecessor=self.predecessor,
                    bounds=self.human_bounds)  #world.cars[0].bounds)

                reward_r_noncritical = self.truck.traj.reward(
                    reward_r_noncritical)
                reward_r_critical = self.truck.traj.reward(reward_r_critical)
                reward_h_noncritical = self.traj_h.reward(reward_h_noncritical)
                reward_h_critical = self.traj_h.reward(reward_h_critical)

                self.optimizer_follower = [
                    utils.NestedMaximizer(reward_h_noncritical, self.traj_h.u,
                                          reward_r_noncritical,
                                          self.truck.traj.u),
                    utils.NestedMaximizer(reward_h_critical, self.traj_h.u,
                                          reward_r_critical, self.truck.traj.u)
                ]

            else:  # No maniac.

                # For the following truck.
                reward_r_noncritical = reward.attentive_truck_reward(
                    use_DSG=self.use_DSG,
                    critical=False,
                    truck_traj=self.truck.traj,
                    human_traj=self.traj_h,
                    maniac_traj=None,
                    predecessor=self.predecessor)
                reward_h_noncritical = reward.attentive_human_reward(
                    use_DSG=self.use_DSG,
                    critical=False,
                    truck_traj=self.truck.traj,
                    human_traj=self.traj_h,
                    maniac_traj=None,
                    lanes=self.lanes,
                    fences=self.fences,
                    predecessor=self.predecessor,
                    bounds=self.human_bounds)  # have bounds=None on original.

                reward_r_noncritical = self.truck.traj.reward(
                    reward_r_noncritical)
                reward_h_noncritical = self.traj_h.reward(reward_h_noncritical)

                self.optimizer_follower = [
                    utils.NestedMaximizer(reward_h_noncritical, self.traj_h.u,
                                          reward_r_noncritical,
                                          self.truck.traj.u)
                ]