Пример #1
0
    def control(self, steer, gas):
        print len(self.cache)  # VIKTIGT: printar ut vilken tidsteg ar nu
        self.cache.append(None)
        if len(self.cache) >= 30:
            print "Exiting"
            exit()
        if self.movable == False:
            self.index += 1

            # just to test, but the simple optimizer seem to be able to find the other cars and get a reward depending on that
            #print self.traj.reward(self.reward).eval()
            #IMPORTANT
            return

        if self.index < len(self.cache):
            self.u = self.cache[self.index]
        else:
            if self.optimizer is None:
                # skickar self.reward functionet till traj.reward
                # det som faktiskt blir skickad ar self._reward
                # detta behandlas i traj.reward for att far var reward for bilen
                r = self.traj.reward(self.reward)
                # skapar en instans av Maximizer for foljande reward och trajectory
                self.optimizer = utils.Maximizer(r,
                                                 self.traj.u)  #IMPORTANT: slow
            # maximerar rewarden med hjalp av maximizer
            self.optimizer.maximize()
            # cachar vad som har hand
            self.cache.append(self.u)
            # uppdaterar tiden nu
            self.sync(self.cache)
        # gar fram en tidsteg
        self.index += 1
Пример #2
0
def run_irl(world, car, reward, theta, data):
    def gen():
        for point in data:
            for c, x0, u in zip(world.cars, point['x0'], point['u']):
                c.traj.x0.set_value(x0)
                for cu, uu in zip(c.traj.u, u):
                    cu.set_value(uu)
            yield

    r = car.traj.reward(reward)
    g = utils.grad(r, car.traj.u)
    H = utils.hessian(r, car.traj.u)
    I = tt.eye(utils.shape(H)[0])
    reg = utils.vector(1)
    reg.set_value([1e-1])
    H = H - reg[0] * I
    L = tt.dot(g, tt.dot(tn.MatrixInverse()(H), g)) + tt.log(tn.Det()(-H))
    for _ in gen():
        pass
    optimizer = utils.Maximizer(L, [theta],
                                gen=gen,
                                method='gd',
                                eps=0.1,
                                debug=True,
                                iters=1000,
                                inf_ignore=10)
    optimizer.maximize()
    print theta.get_value()
Пример #3
0
 def control(self, steer, gas):
     if self.index<len(self.cache):
         self.u = self.cache[self.index]
     else:
         if self.optimizer is None:
             r = self.traj.reward(self.reward)
             self.optimizer = utils.Maximizer(r, self.traj.u)
         self.optimizer.maximize()
         self.cache.append(self.u)
         self.sync(self.cache)
     self.index += 1
Пример #4
0
 def control(self, steer, gas):
     if self.optimizer is None:
         u = sum(log_p for log_p in self.log_ps) / len(self.log_ps)
         self.prenormalize = th.function([],
                                         None,
                                         updates=[(log_p, log_p - u)
                                                  for log_p in self.log_ps])
         s = tt.log(sum(tt.exp(log_p) for log_p in self.log_ps))
         self.normalize = th.function([],
                                      None,
                                      updates=[(log_p, log_p - s)
                                               for log_p in self.log_ps])
         self.update_belief = th.function(
             [],
             None,
             updates=[(log_p, log_p + self.human.past.log_p(reward('past')))
                      for reward, log_p in zip(self.rewards, self.log_ps)])
         self.normalize()
         self.t = 0
         if self.dumb:
             self.useq = self.objective
             self.optimizer = True
         else:
             if hasattr(self.objective, '__call__'):
                 obj_h = sum([
                     traj_h.total(reward('traj'))
                     for traj_h, reward in zip(self.traj_hs, self.rewards)
                 ])
                 var_h = sum([traj_h.u for traj_h in self.traj_hs], [])
                 obj_r = sum(
                     tt.exp(log_p) * self.objective(traj_h)
                     for traj_h, log_p in zip(self.traj_hs, self.log_ps))
                 self.optimizer = utils.NestedMaximizer(
                     obj_h, var_h, obj_r, self.traj.u)
             else:
                 obj_r = self.objective
                 self.optimizer = utils.Maximizer(self.objective,
                                                  self.traj.u)
     if self.t == self.T:
         self.update_belief()
         self.t = 0
     if self.dumb:
         self.u = self.useq[0]
         self.useq = self.useq[1:]
     if self.t == 0:
         self.prenormalize()
         self.normalize()
         for traj_h in self.traj_hs:
             traj_h.x0.set_value(self.human.x)
         if not self.dumb:
             self.optimizer.maximize(bounds=self.bounds)
     for log_p in self.log_ps:
         print '%.2f' % np.exp(log_p.get_value()),
     print
Пример #5
0
    def control(self, _steer, _gas):
        if self.model == None:
            raise Exception("NeuralCar.model is None")

        if self.mu == 1.0:
            self.u = self.model.predict(np.array([self.x]))[0]
            return

        if self.optimizer is None:
            r = self.traj.total(self.reward)
            self.optimizer = utils.Maximizer(r, self.traj.u)

        self.optimizer.maximize()

        self.u = (1 - self.mu) * self.u + self.mu * self.model.predict(
            np.array([self.x]))[0]
 def control(self, steer, gas):
     print len(self.cache)  # VIKTIGT: printar ut vilken tidsteg ar nu
     if self.index < len(self.cache):
         self.u = self.cache[self.index]
     else:
         if self.optimizer is None:
             # skickar self.reward functionet till traj.reward
             # det som faktiskt blir skickad ar self._reward
             # detta behandlas i traj.reward for att far var reward for bilen
             r = self.traj.reward(self.reward)
             # skapar en instans av Maximizer for foljande reward och trajectory
             self.optimizer = utils.Maximizer(r,
                                              self.traj.u)  #IMPORTANT: slow
         # maximerar rewarden med hjalp av maximizer
         self.optimizer.maximize()
         # cachar vad som har hand
         self.cache.append(self.u)
         # uppdaterar tiden nu
         self.sync(self.cache)
     # gar fram en tidsteg
     self.index += 1
Пример #7
0
 def control(self, steer, gas):
     if self.nested:
         if self.nested_optimizer is None:
             reward_h, reward_r = self._nested_rewards
             reward_h = self.traj_h.reward(reward_h)
             reward_r = self.traj.reward(reward_r)
             self.nested_optimizer = utils.NestedMaximizer(reward_h, self.traj_h.u, reward_r, self.traj.u)
         self.traj_h.x0.set_value(self.human.x)
         self.nested_optimizer.maximize(bounds = self.bounds)
     else:
         print len(self.cache)
         if self.index<len(self.cache):
             self.u = self.cache[self.index]
         else:
             if self.simple_optimizer is None:
                 r = self.traj.reward(self._simple_reward)
                 self.simple_optimizer = utils.Maximizer(r, self.traj.u)
             #TODO: make sure these bounds are correct, and that we shouldn't add bounded control to reward function
             self.simple_optimizer.maximize(bounds = self.bounds)
             self.cache.append(self.u)
         self.index += 1
Пример #8
0
    def control(self, _steer, _gas):
        if self.copyx != None:
            print(self.copyx.shape[0], " examples")
            dists = np.array([np.linalg.norm(self.x - x) for x in self.copyx])
            i = np.argmin(dists)
            self.social_u.set_value(self.copyu[i])

            print("SOCIAL U_i", i)
            self.l.set_value(self.l_default)
            if dists[i] > 1.0:
                print("ignoring social")
                self.l.set_value(0)
            else:
                self.l.set_value(self.l_default)
        else:
            self.l.set_value(0)

        if self.optimizer is None:
            r = self.traj.total(self.reward) - self.l * (self.traj.u[0] -
                                                         self.social_u).norm(2)
            self.optimizer = utils.Maximizer(r, self.traj.u)

        self.optimizer.maximize()
Пример #9
0
 def optimizer(self):
     if self._optimizer is None:
         self._optimizer = utils.Maximizer(self._objective, self._variables)
     return self._optimizer
Пример #10
0
 def control(self, steer, gas):
     if self.optimizer is None:
         r = self.traj.total(self.reward)
         self.optimizer = utils.Maximizer(r, self.traj.u)
     self.optimizer.maximize()
                t = idx_u*self.step_per_u+idx
                r_list.append(reward(t, self.x[t], self.u[idx_u]))
        #r = [reward(t, self.x[t], self.u[t]) for t in range(self.T)]
        return sum(r_list)
        """
        g = [utils.grad(r[t], self.x[t]) for t in range(self.T)]
        for t in reversed(range(self.T-1)):
            g[t] = g[t]+tt.dot(g[t+1], utils.jacobian(self.x[t+1], self.x[t]))
        for t in range(self.T):
            g[t] = tt.dot(g[t], utils.jacobian(self.x[t], self.u[t]))+utils.grad(r[t], self.u[t], constants=[self.x[t]])
        return sum(r), {self.u[t]: g[t] for t in range(self.T)}
        """

if __name__ == '__main__':
    from dynamics import CarDynamics
    import math
    dyn = CarDynamics(0.1)
    traj = Trajectory(5, dyn)
    l = lane.StraightLane([0., -1.], [0., 1.], .1)
    reward = feature.speed()+l.feature()#+feature.speed()
    r = traj.reward(reward)
    #traj.x0.value = np.asarray([0., 0., math.pi/2, 1.])
    traj.x0.set_value([0.1, 0., math.pi/2, 1.])
    optimizer = utils.Maximizer(r, traj.u)
    import time
    t = time.time()
    for i in range(1):
        optimizer.maximize(bounds=[(-1., 1.), (-2, 2.)])
    print (time.time()-t)/1.
    print [u.get_value() for u in traj.u]