Пример #1
0
 def get_state_reward(self, fw):
     """Compute the state reward."""
     state_r = feature.feature(lambda t, x, u: 0.0)
     for lane, w_lane in zip(self.world.lanes, self.w_lanes):
         if self.is_human:
             lane_gaussian_std = constants.LANE_REWARD_STDEV_h
         else:
             lane_gaussian_std = constants.LANE_REWARD_STDEV_r
         state_r += w_lane * lane.gaussian(fw=fw, stdev=lane_gaussian_std)
     for fence, w_fence in zip(self.world.fences, self.w_fences):
         if self.fence_sigmoid:  # sigmoid fence reward
             state_r += w_fence * fence.sigmoid(fw=fw)
         else:  # gaussian-shaped fence reward
             state_r += w_fence * fence.gaussian(fw=fw)
     if self.speed is not None:
         state_r += self.w_speed * feature.speed(self.speed)
     for other_traj, w_other_traj in zip(self.other_car_trajs,
                                         self.w_other_car_trajs):
         if self.fine_behind:
             state_r += (w_other_traj *
                         other_traj.gaussian(fw, length=.14, width=.03))
         else:
             state_r += (w_other_traj *
                         other_traj.gaussian(fw, length=.14, width=.03) +
                         other_traj.not_behind(fw, self.w_behind))
     for other_truck_traj, w_other_truck_traj in zip(
             self.other_truck_trajs, self.w_other_truck_trajs):
         state_r += (w_other_truck_traj * other_truck_traj.sigmoid(fw))
     return state_r
Пример #2
0
 def simple_reward(self,
                   trajs=None,
                   lanes=None,
                   roads=None,
                   fences=None,
                   speed=1.,
                   speed_import=1.):
     if lanes is None:
         lanes = self.lanes
     if roads is None:
         roads = self.roads
     if fences is None:
         fences = self.fences
     if trajs is None:
         trajs = [c.linear for c in self.cars]
     elif isinstance(trajs, car.Car):
         trajs = [c.linear for c in self.cars if c != trajs]
     r = 0.1 * feature.control()
     theta = [1., -50., 10., 10., -60.]  # Simple model
     # theta = [.959, -46.271, 9.015, 8.531, -57.604]
     for lane in lanes:
         r = r + theta[0] * lane.gaussian()
     for fence in fences:
         r = r + theta[1] * fence.gaussian()
     for road in roads:
         r = r + theta[2] * road.gaussian(10.)
     if speed is not None:
         r = r + speed_import * theta[3] * feature.speed(speed)
     for traj in trajs:
         r = r + theta[4] * traj.gaussian()
     return r
Пример #3
0
 def simple_reward(self, trajs=None, lanes=None, roads=None, fences=None, speed=1., speed_import=1.):
     if lanes is None:
         lanes = self.lanes
     if roads is None:
         roads = self.roads
     if fences is None:
         fences = self.fences
     if trajs is None:
         trajs = [c.linear for c in self.cars]
     elif isinstance(trajs, car.Car):
         trajs = [c.linear for c in self.cars if c!=trajs]
     r = 0.1*feature.control()
     theta = [1., -50., 10., 10., -60.] # Simple model
     # theta = [.959, -46.271, 9.015, 8.531, -57.604]
     for lane in lanes:
         r = r+theta[0]*lane.gaussian()
     for fence in fences:
         r = r+theta[1]*fence.gaussian()
     for road in roads:
         r = r+theta[2]*road.gaussian(10.)
     if speed is not None:
         r = r+speed_import*theta[3]*feature.speed(speed)
     for traj in trajs:
         r = r+theta[4]*traj.gaussian()
     return r
Пример #4
0
 def feature_calc(t, x, u, target_car):
     res = [0, 0, 0, 0, 0]
     for lane in self.lanes:
         res[0] = res[0] + lane.gaussian()(t, x, u).eval()
     for fence in self.fences:
         res[1] = res[1] + fence.gaussian()(t, x, u).eval()
     for road in self.roads:
         res[2] = res[2] + road.gaussian(10.)(t, x, u).eval()
     res[3] = res[3] + feature.speed(1.)(t, x, u)
     for car in self.cars:
         if car != target_car:
             res[4] = res[4] + car.traj.gaussian()(t, x, u).eval()
     return res
Пример #5
0
    def state_rewards(self, fw):
        """Compute the individual state rewards and return them as a dictionary
        with keys that describe the rewards."""

        rewards = {}
        state_r = feature.feature(lambda t, x, u: 0.0)
        for i, (lane, w_lane) in enumerate(zip(self.world.lanes,
                                               self.w_lanes)):
            if self.is_human:
                lane_gaussian_std = constants.LANE_REWARD_STDEV_h
            else:
                lane_gaussian_std = constants.LANE_REWARD_STDEV_r
            rewards['lane gaussian ' + str(i)] = w_lane * lane.gaussian(
                fw=fw, stdev=lane_gaussian_std)
        for i, (fence,
                w_fence) in enumerate(zip(self.world.fences, self.w_fences)):
            if self.fence_sigmoid:  # sigmoid fence reward
                rewards['fence sigmoid ' +
                        str(i)] = w_fence * fence.sigmoid(fw=fw)
            else:  # gaussian-shaped fence reward
                rewards['fence gaussian ' +
                        str(i)] = w_fence * fence.gaussian(fw=fw)
        if self.speed is not None:
            rewards['speed'] = self.w_speed * feature.speed(self.speed)
        for i, (other_car_traj, w_other_car_traj) in enumerate(
                zip(self.other_car_trajs, self.w_other_car_trajs)):
            if self.is_human:
                w = w_other_car_traj
            else:
                w = w_other_car_traj
            if self.fine_behind:
                rewards['other traj gaussian ' + str(i)] = (
                    w * other_car_traj.gaussian(fw, length=.1, width=.03))
            else:
                rewards['other traj gaussian ' + str(i)] = (
                    w * other_car_traj.gaussian(fw, length=.14, width=.03))
                rewards['other traj not behind ' +
                        str(i)] = other_car_traj.not_behind(fw, self.w_behind)
        for i, (other_truck_traj, w_other_truck_traj) in enumerate(
                zip(self.other_truck_trajs, self.w_other_truck_trajs)):
            rewards['other truck sigmoid ' +
                    str(i)] = (w_other_truck_traj *
                               other_truck_traj.sigmoid(fw))
        return rewards
Пример #6
0
 def simple_reward(self,
                   trajs=None,
                   lanes=None,
                   roads=None,
                   fences=None,
                   speed=1.,
                   speed_import=1.):
     # skapar simple reward for en bil
     if lanes is None:
         lanes = self.lanes
     if roads is None:
         roads = self.roads
     if fences is None:
         fences = self.fences
     if trajs is None:
         trajs = [c.linear for c in self.cars]
     elif isinstance(trajs, car.Car):
         trajs = [c.linear for c in self.cars if c != trajs]
     elif isinstance(trajs, static_obj.Car):
         trajs = [c.linear for c in self.cars if c != trajs]
     r = 0.1 * feature.control()
     theta = [1., -50., 10., 10., -60.]  # Simple model
     # theta = [.959, -46.271, 9.015, 8.531, -57.604]
     # skapar alla lanes, fences, roads, speed och trajectory for alla bilar
     for lane in lanes:
         r = r + theta[0] * lane.gaussian()
     for fence in fences:
         # increase the negative reward for the fences so that the cars dont go outside of the road
         #r = r+theta[1]*fence.gaussian()*1000000
         r = r + theta[1] * fence.gaussian()
     if roads == None:
         pass
     else:
         for road in roads:
             r = r + theta[2] * road.gaussian(10.)
     if speed is not None:
         r = r + speed_import * theta[3] * feature.speed(speed)
     try:  #quick fix, if there is just 1 car it will not be a list
         for traj in trajs:
             r = r + theta[4] * traj.gaussian()
     except:
         r = r + theta[4] * trajs.gaussian()
     return r
Пример #7
0
 def simple_reward(self,
                   trajs=None,
                   lanes=None,
                   roads=None,
                   fences=None,
                   speed=1.,
                   speed_import=1.):
     if lanes is None:
         lanes = self.lanes
     if roads is None:
         roads = self.roads
     if fences is None:
         fences = self.fences
     if trajs is None:
         trajs = [c.linear for c in self.cars]
     elif isinstance(trajs, car.Car):
         trajs = [c.linear for c in self.cars if c != trajs]
     r = 0.1 * feature.control()
     """
     # What is theta? First one is importance of staying in lanes, 
     second is staying on the road entirely (not violating the outer fence)
     third is staying on the road also?
     fourth is maintaining desired speed
     fifth is ...?
     """
     theta = [1., -50., 10., 10., -60.]  # Simple model
     # theta = [.959, -46.271, 9.015, 8.531, -57.604]
     for lane in lanes:
         r = r + theta[0] * lane.gaussian()
     for fence in fences:
         r = r + theta[1] * fence.gaussian()
     for road in roads:
         r = r + theta[2] * road.gaussian(10.)
     if speed is not None:
         r = r + speed_import * theta[3] * feature.speed(speed)
     for traj in trajs:
         r = r + theta[4] * traj.gaussian()
     return r
Пример #8
0
    import lane
    dyn = dynamics.CarDynamics(0.1)
    vis = Visualizer(dyn.dt)
    vis.lanes.append(lane.StraightLane([0., -1.], [0., 1.], 0.13))
    vis.lanes.append(vis.lanes[0].shifted(1))
    vis.lanes.append(vis.lanes[0].shifted(-1))
    vis.cars.append(car.UserControlledCar(dyn, [0., 0., math.pi / 2., .1]))
    vis.cars.append(
        car.SimpleOptimizerCar(dyn, [0., 0.5, math.pi / 2., 0.], color='red'))
    r = -60. * vis.cars[0].linear.gaussian()
    r = r + vis.lanes[0].gaussian()
    r = r + vis.lanes[1].gaussian()
    r = r + vis.lanes[2].gaussian()
    r = r - 30. * vis.lanes[1].shifted(1).gaussian()
    r = r - 30. * vis.lanes[2].shifted(-1).gaussian()
    r = r + 30. * feature.speed(0.5)
    r = r + 10. * vis.lanes[0].gaussian(10.)
    r = r + .1 * feature.control()
    vis.cars[1].reward = r
    vis.main_car = vis.cars[0]
    vis.paused = True
    vis.set_heat(r)
    #vis.set_heat(vis.lanes[0].gaussian()+vis.lanes[1].gaussian()+vis.lanes[2].gaussian())
    #vis.set_heat(-vis.cars[1].traj.gaussian()+vis.lanes[0].gaussian()+vis.lanes[1].gaussian()+vis.lanes[2].gaussian())
    vis.run()

if __name__ == '__main__' and len(sys.argv) == 1:
    import world as wrld
    import car
    world = wrld.world2()
    vis = Visualizer(0.1, name='replay')
Пример #9
0
        the_car = None
        for c in the_world.cars:
            if isinstance(c, car.UserControlledCar):
                the_car = c
    T = the_car.traj.T
    train = []
    for fname in files:
        with open(fname) as f:
            us, xs = pickle.load(f)
            for t in range(T, len(xs[0]) - T, T):
                point = {
                    'x0': [xseq[t - 1] for xseq in xs],
                    'u': [useq[t:t + T] for useq in us]
                }
                train.append(point)
    theta = utils.vector(5)
    theta.set_value(np.array([1., -50., 10., 10., -60.]))
    r = 0.1 * feature.control()
    #features, thetas are weights
    for lane in the_world.lanes:
        r = r + theta[0] * lane.gaussian()
    for fence in the_world.fences:
        r = r + theta[1] * lane.gaussian()
    for road in the_world.roads:
        r = r + theta[2] * road.gaussian(10.)
    r = r + theta[3] * feature.speed(1.)
    for car in the_world.cars:
        if car != the_car:
            r = r + theta[4] * car.traj.gaussian()
    run_irl(the_world, the_car, r, theta, train)
Пример #10
0
if __name__ == '__main__' and False:
    import lane
    dyn = dynamics.CarDynamics(0.1)
    vis = Visualizer(dyn.dt)
    vis.lanes.append(lane.StraightLane([0., -1.], [0., 1.], 0.13))
    vis.lanes.append(vis.lanes[0].shifted(1))
    vis.lanes.append(vis.lanes[0].shifted(-1))
    vis.cars.append(car.UserControlledCar(dyn, [0., 0., math.pi/2., .1]))
    vis.cars.append(car.SimpleOptimizerCar(dyn, [0., 0.5, math.pi/2., 0.], color='red'))
    r = -60.*vis.cars[0].linear.gaussian()
    r = r + vis.lanes[0].gaussian()
    r = r + vis.lanes[1].gaussian()
    r = r + vis.lanes[2].gaussian()
    r = r - 30.*vis.lanes[1].shifted(1).gaussian()
    r = r - 30.*vis.lanes[2].shifted(-1).gaussian()
    r = r + 30.*feature.speed(0.5)
    r = r + 10.*vis.lanes[0].gaussian(10.)
    r = r + .1*feature.control()
    vis.cars[1].reward = r
    vis.main_car = vis.cars[0]
    vis.paused = True
    vis.set_heat(r)
    #vis.set_heat(vis.lanes[0].gaussian()+vis.lanes[1].gaussian()+vis.lanes[2].gaussian())
    #vis.set_heat(-vis.cars[1].traj.gaussian()+vis.lanes[0].gaussian()+vis.lanes[1].gaussian()+vis.lanes[2].gaussian())
    vis.run()

if __name__ == '__main__' and len(sys.argv)==1:
    import world as wrld
    import car
    world = wrld.world2()
    vis = Visualizer(0.1, name='replay')
                t = idx_u*self.step_per_u+idx
                r_list.append(reward(t, self.x[t], self.u[idx_u]))
        #r = [reward(t, self.x[t], self.u[t]) for t in range(self.T)]
        return sum(r_list)
        """
        g = [utils.grad(r[t], self.x[t]) for t in range(self.T)]
        for t in reversed(range(self.T-1)):
            g[t] = g[t]+tt.dot(g[t+1], utils.jacobian(self.x[t+1], self.x[t]))
        for t in range(self.T):
            g[t] = tt.dot(g[t], utils.jacobian(self.x[t], self.u[t]))+utils.grad(r[t], self.u[t], constants=[self.x[t]])
        return sum(r), {self.u[t]: g[t] for t in range(self.T)}
        """

if __name__ == '__main__':
    from dynamics import CarDynamics
    import math
    dyn = CarDynamics(0.1)
    traj = Trajectory(5, dyn)
    l = lane.StraightLane([0., -1.], [0., 1.], .1)
    reward = feature.speed()+l.feature()#+feature.speed()
    r = traj.reward(reward)
    #traj.x0.value = np.asarray([0., 0., math.pi/2, 1.])
    traj.x0.set_value([0.1, 0., math.pi/2, 1.])
    optimizer = utils.Maximizer(r, traj.u)
    import time
    t = time.time()
    for i in range(1):
        optimizer.maximize(bounds=[(-1., 1.), (-2, 2.)])
    print (time.time()-t)/1.
    print [u.get_value() for u in traj.u]
Пример #12
0
        return f
    def reward(self, reward):
        r = [reward(t, self.x[t], self.u[t]) for t in range(self.T)]
        return sum(r)
        """
        g = [utils.grad(r[t], self.x[t]) for t in range(self.T)]
        for t in reversed(range(self.T-1)):
            g[t] = g[t]+tt.dot(g[t+1], utils.jacobian(self.x[t+1], self.x[t]))
        for t in range(self.T):
            g[t] = tt.dot(g[t], utils.jacobian(self.x[t], self.u[t]))+utils.grad(r[t], self.u[t], constants=[self.x[t]])
        return sum(r), {self.u[t]: g[t] for t in range(self.T)}
        """

if __name__ == '__main__':
    from dynamics import CarDynamics
    import math
    dyn = CarDynamics(0.1)
    traj = Trajectory(5, dyn)
    l = lane.StraightLane([0., -1.], [0., 1.], .1)
    reward = feature.speed()+l.feature()#+feature.speed()
    r = traj.reward(reward)
    #traj.x0.value = np.asarray([0., 0., math.pi/2, 1.])
    traj.x0.set_value([0.1, 0., math.pi/2, 1.])
    optimizer = utils.Maximizer(r, traj.u)
    import time
    t = time.time()
    for i in range(1):
        optimizer.maximize(bounds=[(-1., 1.), (-2, 2.)])
    print (time.time()-t)/1.
    print [u.get_value() for u in traj.u]
Пример #13
0
        # theta = [1., -50., 10., 10., -60.] # Simple model
=======
        # theta = [1., -50., 10., 10., -60., 10.] # Simple model
        # theta = [2.05026991,-50.,9.99045658,0.14135938,-60.] # Learned model
        # theta = [ 5.97469800e+00, -40.0789372,  10.0000000,  .0168410493, -60.0000000]
        theta = [-118.675528, -49.9917950,  10.0000000, -.0158836823, -604.318363]
        # theta = [2.05026991,-50.,9.99045658,5,-60.]
        for lane in lanes:
            r = r+theta[0]*lane.gaussian()
        for fence in fences:
            r = r+theta[1]*fence.gaussian()
        for road in roads:
            r = r+theta[2]*road.gaussian(10.)
        if speed is not None:
            r = r+speed_import*theta[3]*feature.speed(speed)
        for traj in trajs:
            r = r+theta[4]*traj.gaussian()
        return r

def playground():
    dyn = dynamics.CarDynamics(0.1)
    world = World()
    clane = lane.StraightLane([0., -1.], [0., 1.], 0.17)
    world.lanes += [clane, clane.shifted(1), clane.shifted(-1)]
    world.roads += [clane]
    world.fences += [clane.shifted(2), clane.shifted(-2)]
    #world.cars.append(car.UserControlledCar(dyn, [0., 0., math.pi/2., 0.], color='orange'))
    world.cars.append(car.UserControlledCar(dyn, [-0.17, -0.17, math.pi/2., 0.], color='white'))
    return world
Пример #14
0
    else:
        the_car = None
        for c in the_world.cars:
            if isinstance(c, car.UserControlledCar):
                the_car = c
    T = the_car.traj.T
    train = []
    for fname in files:
        with open(fname) as f:
            us, xs = pickle.load(f)
            for t in range(T, len(xs[0])-T, T):
                point = {
                    'x0': [xseq[t-1] for xseq in xs],
                    'u': [useq[t:t+T] for useq in us]
                }
                train.append(point)
    theta = utils.vector(5)
    theta.set_value(np.array([1., -50., 10., 10., -60.]))
    r = 0.1*feature.control()
    for lane in the_world.lanes:
        r = r + theta[0]*lane.gaussian()
    for fence in the_world.fences:
        r = r + theta[1]*lane.gaussian()
    for road in the_world.roads:
        r = r + theta[2]*road.gaussian(10.)
    r = r + theta[3]*feature.speed(1.)
    for car in the_world.cars:
        if car!=the_car:
            r = r + theta[4]*car.traj.gaussian()
    run_irl(the_world, the_car, r, theta, train)