def __init__(self, stateDim, inputDim, dt):
        self.namespace = ""
        # 0 1 2   3    4  5  6
        # x y yaw roll vx vy yawr

        self.dt = dt
        self.img_name = ''
        self.img = None
        self.length = 15
        self.width = 8
        self.x0 = 0  #250
        self.y0 = 0  #150
        self.yaw0 = 0
        self.roll0 = 0
        self.vx0 = 1
        self.vy0 = 0
        self.yawd0 = 0
        self.states_init = [
            self.x0, self.y0, self.yaw0, self.roll0, self.vx0, self.vy0,
            self.yawd0
        ]
        self.stateDim = stateDim
        self.inputDim = inputDim
        self.dynamicsDim = 4
        self.states = np.zeros(self.stateDim)
        self.states_der = np.zeros(self.stateDim)
        self.state_hist = np.zeros([1, (self.stateDim)])
        self.state_der_hist = np.zeros([1, (self.stateDim)])
        self.data = np.zeros([1, 2 * self.dynamicsDim + self.inputDim])
        self.inputs = np.zeros(self.inputDim)
        self.inputs_der = np.zeros(self.inputDim)
        self.dynamics = dynamics.Dynamics(stateDim, inputDim, dt)

        self.toggle = False

        self.pose_pub = rospy.Publisher('simulation/pose',
                                        PoseStamped,
                                        queue_size=1)
        self.bodyOdom_pub = rospy.Publisher('simulation/bodyOdom',
                                            Odometry,
                                            queue_size=1)
        self.poseOdom_pub = rospy.Publisher('pose_estimate',
                                            Odometry,
                                            queue_size=1)
        self.input_pub = rospy.Publisher('simulation/inputs',
                                         Joy,
                                         queue_size=1)
        self.accel_pub = rospy.Publisher('acceleration/all',
                                         Accel,
                                         queue_size=1)
        self.br = tf2_ros.TransformBroadcaster()

        # pygame.init()
        # self.screen     = pygame.display.set_mode((480, 320), DOUBLEBUF)
        self.RED = (255, 0, 0)
        self.BLACK = (255, 255, 255)
        self.WHITE = (0, 0, 0)
Пример #2
0
 def read_world(self, file_world):
     maxx = 0
     maxy = 0
     if file_world != '':
         f = open(file_world, "r")
         f1 = f.readlines()
         for i in range(0, len(f1)):
             if i == 0:
                 self.xlim = np.abs(int(int(f1[i])))
                 maxx = np.abs(int(int(f1[i])))
             elif i == 1:
                 self.ylim = np.abs(int(int(f1[i])))
                 maxy = np.abs(int(int(f1[i])))
             else:
                 for j in range(0, len(f1[i])):
                     if f1[i][j] == '@':
                         self.curr_x = self.wpt_x = j
                         self.curr_y = self.wpt_y = (maxy - i + 1)
                     if f1[i][j] == '#':
                         self.static_obs.append((j, (maxy - i + 1)))
         self.boat_dynamics = dynamics.Dynamics(cw4.cw4, self.curr_x,
                                                self.curr_y)
Пример #3
0
    def __init__(self):
        mass = rospy.get_param("mass", 1.0)
        damping = rospy.get_param("damping", 0.0)

        self.lock = threading.Lock()
        self.enabled = False
        
        self.dynamics = dynamics.Dynamics()
        self.dynamics.mass = mass
        self.dynamics.damping = damping
        self.dynamics.reset()

        # Setup services
        self.node_enable_srv = rospy.Service(
                'dynamics_enable', 
                NodeEnable, 
                self.handle_node_enable
                ) 

        self.set_dynam_params_srv = rospy.Service(
                'set_dynam_params', 
                SetDynamParams, 
                self.handle_set_dynam_params
                )

        self.get_dynam_params_srv = rospy.Service(
                'get_dynam_params',
                GetDynamParams,
                self.handle_get_dynam_params
                )

        # Setup subscribers
        self.force_sub = rospy.Subscriber('force', ForceMsg, self.force_callback)

        # Setup setpt topic
        #self.setptMsg = None
        self.setptMsg = SetptMsg()
        self.setptPub = rospy.Publisher('setpt_rel', SetptMsg)
Пример #4
0
    def __init__(self,
                 start_x=0.0,
                 start_y=0.0,
                 start_heading=0.0,
                 start_speed=0.0,
                 nobs=4,
                 xlim=1000.0,
                 ylim=1000.0,
                 plot_bool=False,
                 file_world='',
                 goal_location='',
                 environment=None,
                 geotiff=None,
                 boat_model=cw4.cw4,
                 dynamic_obs=''):
        self.debug = False
        self.throttle = 0.0
        self.rudder = 0.0
        # Create an instance of the asv_sim model
        # Set parameters for the model.
        self.environment = environment
        self.boat_model = boat_model
        self.boat_dynamics = dynamics.Dynamics(self.boat_model, start_x,
                                               start_y, start_heading,
                                               start_speed, self.environment)

        # Set the starting state
        self.start_lat = 43.0
        self.start_long = -70.1
        self.start_heading = start_heading
        self.start_speed = start_speed

        # Set target waypoint
        self.desired_lat = 42.997797  # Does it need to be updated from somewhere else?
        self.desired_long = -70.102602  # Does it need to be updated from somewhere else?
        self.wpt_x = start_x  # 0.0
        self.wpt_y = start_y  # 0.0

        # deg to rad
        self.desired_lat = np.radians(self.desired_lat)
        self.desired_long = np.radians(self.desired_long)

        # set target speed
        self.desired_speed = self.start_speed
        self.desired_heading = self.start_heading

        # current state:
        self.curr_lat = self.start_lat
        self.curr_long = self.start_long
        self.curr_x = start_x
        self.curr_y = start_y
        self.curr_heading = self.start_heading

        # Loop at some rate
        self.period = 0.05
        self.decreaserate = 0.0

        self.last_update_time = None

        self.xx = []
        self.yy = []

        # Define the socket for input data...
        self.soc = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM)

        # Define the socket for output data - ASV current state
        self.soc_asv_out = socket.socket(family=socket.AF_INET,
                                         type=socket.SOCK_DGRAM)

        # Define the socket for output data - Obstacles current state
        self.soc_obs_out = socket.socket(family=socket.AF_INET,
                                         type=socket.SOCK_DGRAM)

        # Limits of the area
        self.xlim = np.abs(xlim)
        self.ylim = np.abs(ylim)

        # read the grid and update the boat model, limit, curr_x, curr_y, wpt_x, wpt_y
        self.static_obs = None
        self.static_draw = []
        self.goal_location = []
        self.factor = 1
        self.read_world(file_world, geotiff, goal_location)

        if dynamic_obs:
            self.read_dynamic(dynamic_obs)
        else:
            # Initializing obstacles:
            self.nobs = nobs  # number of obstacles

            self.xobs = np.random.uniform(-self.xlim, self.xlim, self.nobs)
            self.yobs = np.random.uniform(-self.ylim, self.ylim, self.nobs)
            # speed of obstacles [m/sec]
            self.vobs = np.random.uniform(2.57, 7.72, self.nobs)
            # heading of obstacles [rad]
            self.hobs = np.random.uniform(0, 2 * np.pi, self.nobs)
            self.xx_obs = []
            self.yy_obs = []
            self.obs_id = np.arange(self.nobs)
            #self.cmap = plt.cm.summer
            #rcParams['axes.prop_cycle'] = cycler(color=self.cmap(np.linspace(0, 1, self.nobs)))
            # self.obstacles = np.zeros(self.nobs, dtype=[('position',float,2),
            #											('id', float, 1),
            #											('color',float, 4)])

            #futrue path

        self.future_heading = []
        self.future_x = []
        self.future_y = []
        self.estimateStart = (0, 0, 0)

        # Plot
        self.plot_boolean = plot_bool
        # Verifying self.plot_boolean is of type Boolean and not str:
        if self.plot_boolean == 'True':
            self.plot_boolean = True
        if self.plot_boolean == 'False':
            self.plot_boolean = False

        if self.debug:
            print("Initial plot_boolean = ", self.plot_boolean)
        # plt.ion()
        # # Set up the figure
        # fig = plt.figure()
        # plt.xlabel("x [m]")
        # plt.ylabel("y [m]")
        self.plotaxes = None
        self.plot_draw = test.PLOT(self.static_obs, self.static_draw,
                                   self.xlim, self.ylim, self.factor)
        self.plotaxes_obs = list(range(self.nobs))
Пример #5
0
    def read_world(self, file_world, geotiff, goal_location):
        maxx = 0
        maxy = 0
        if geotiff:
            geotiff = depth_map.BathyGrid(geotiff)
            x1, x2, y1, y2 = geotiff.getBound()
            maxx = np.abs(x2 - x1)
            maxy = np.abs(y2 - y1)
            geodata = geotiff.getGrid()
            by, bx = np.shape(geodata)
            previous = None
            self.xlim = max(bx, by)
            self.ylim = self.xlim
            self.factor = maxx / bx
            self.static_obs = [bitarray(by) for i in xrange(bx)]
            for j in range(by):
                for i in range(bx):
                    if geodata[j, i] < 2:
                        self.static_obs[i][by - 1 - j] = True
                        if previous == None:
                            previous = ((i, (by - 1 - j)))
                    else:
                        self.static_obs[i][by - 1 - j] = False
                        if previous != None:
                            x, y = previous
                            self.static_draw.append((previous, (i, y + 1)))
                            previous = None
                if previous != None:
                    x, y = previous
                    self.static_draw.append((previous, (bx, y + 1)))
                    previous = None

            self.boat_dynamics = dynamics.Dynamics(self.boat_model,
                                                   self.curr_x, self.curr_y,
                                                   self.curr_heading,
                                                   self.start_speed,
                                                   self.environment)

        elif file_world != '':
            f = open(file_world, "r")
            f1 = f.readlines()
            previous = None

            for i in range(0, len(f1)):
                if i == 1:
                    self.xlim = np.abs(int(int(f1[i])))
                    maxx = np.abs(int(int(f1[i])))
                elif i == 2:
                    self.ylim = np.abs(int(int(f1[i])))
                    maxy = np.abs(int(int(f1[i])))
                    self.static_obs = [bitarray(maxy) for i in xrange(maxx)]
                    print maxx, maxy
                elif i == 0:
                    self.factor = np.abs(int(int(f1[i])))
                else:
                    string = f1[i].strip()
                    for j in range(0, len(string)):
                        if f1[i][j] == '#':
                            self.static_obs[j][maxy - i + 2] = True
                            if previous == None:
                                previous = ((j, maxy - i + 2))
                        else:
                            self.static_obs[j][maxy - i + 2] = False
                            if previous != None:
                                x, y = previous
                                self.static_draw.append((previous, (j, y + 1)))
                                previous = None
                    if previous != None:
                        x, y = previous
                        self.static_draw.append((previous, (maxx, y + 1)))
                        previous = None
            self.boat_dynamics = dynamics.Dynamics(self.boat_model,
                                                   self.curr_x, self.curr_y,
                                                   self.curr_heading,
                                                   self.start_speed,
                                                   self.environment)
        if goal_location != '':
            f = open(goal_location, "r")
            f1 = f.readlines()
            numOfGoal = int(f1[0])
            for i in range(1, len(f1)):
                s = f1[i].split(' ')
                self.goal_location.append((float(s[0]), float(s[1])))
Пример #6
0
    def run(self):

        xlist = np.array(
            [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
        ylist = np.array([5.0, 6.0, 7.0, 8.0, 9.0,\
                         10.0, 11.0, 12.0, 13.0, 14.0,\
                         15.0, 16.0, 17.0, 18.0, 19.0, \
                         20.0, 21.0, 22.0, 23.0, 24.0, \
                         25.0, 26.0, 27.0, 28.0, 29.0, \
                         30.0, 31.0, 32.0, 33.0, 34.0, \
                         35.0, 36.0, 37.0, 38.0, 39.0, \
                         40.0, 41.0, 42.0, 43.0, 44.0, \
                         45.0, 46.0, 47.0, 48.0, 49.0, 50.0])
        vallist = np.zeros((45, 10))

        for a_iteration in range(10):
            a_param = 0.05 + a_iteration * 0.1
            for b_iteration in range(45):
                b_param = 5.5 + b_iteration * 1.0

                max_reward = -10000.0

                # (a,b)'s score
                rewards = 0

                theta = np.pi
                omega = 0.0
                state = np.array([[theta, omega]])

                for test_step in range(1000):

                    current_obs = torch.Tensor([[
                        np.sin(state[0, 0]),
                        np.cos(state[0, 0]), state[0, 1]
                    ]])  #state: numpy(1,2) -> torch.Tensor(1,3)
                    action = self.agent.get_action(current_obs,
                                                   None)  #Not input ounoise
                    action = action.detach().numpy()[
                        0]  #action: torch.Tensor(1,1) -> numpy(scaler)

                    next_state, reward, done = dynamics.Dynamics(
                        state, action, a_param,
                        b_param)  #next_state: numpy(1,2)

                    rewards += reward

                    state = next_state

                print("(a,b)=(" + str(a_param) + "," + str(b_param) +
                      ") : reward " + str(rewards))
                vallist[b_iteration, a_iteration] = rewards

        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams["mathtext.fontset"] = 'cm'
        plt.rcParams['mathtext.default'] = 'it'

        fig, ax = plt.subplots()
        cs = ax.pcolormesh(xlist,
                           ylist,
                           vallist,
                           cmap="jet",
                           vmin=-4000.0,
                           vmax=-50.0)  #seismic,hot
        fig.colorbar(cs)
        ax.set_xlim(0.0, 1.0)
        ax.set_ylim(5.0, 50.0)
        ax.set_xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
        ax.set_yticks(
            [5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0])
        ax.set_xlabel(r'$\xi_{1}$', fontsize=18)
        ax.set_ylabel(r'$\xi_{2}$', fontsize=18)

        fig.savefig('Score_of_mu_1.eps', pad_inches=0.05)
        fig.savefig('Score_of_mu_1.png', pad_inches=0.05)
Пример #7
0
 def __init__(self, stateDim, inputDim, dt):
     self.namespace = ""
     '''
     states : [x, y, yaw, vx]
     '''
     '''
     Initial conditions
     '''
     self.x0 = 0.
     self.y0 = 0.
     self.yaw0 = 0.
     self.vx0 = 0.
     '''
     Parameters (RC platform)
     '''
     self.dt = dt
     self.max_steer_anlge = 17.75 * np.pi / 180  # rad
     self.max_vx = 30.0 / 3.6  # m/s
     self.length = 0.257
     '''
     Variable initialization
     '''
     self.stateDim = stateDim
     self.inputDim = inputDim
     self.dynamicsDim = 4
     self.states_init = [self.x0, self.y0, self.yaw0, self.vx0]
     self.states = self.states_init
     self.states_dot = np.zeros(self.stateDim)
     self.state_hist = np.zeros([1, (self.stateDim)])
     self.state_dot_hist = np.zeros([1, (self.stateDim)])
     self.data = np.zeros([1, 2 * self.dynamicsDim + self.inputDim])
     self.inputs = np.zeros(self.inputDim)
     self.toggle_switch = False
     '''
     Vehicle dynamics object
     '''
     self.dynamics = dynamics.Dynamics(stateDim, inputDim, dt)
     # self.dynamics.modelType = 1
     self.dynamics.max_steer = self.max_steer_anlge
     '''
     ROS publishers
     '''
     self.pose_pub = rospy.Publisher('simulation/pose',
                                     PoseStamped,
                                     queue_size=1)
     self.bodyOdom_pub = rospy.Publisher(
         'simulation/bodyOdom', Odometry,
         queue_size=1)  # velocities in the body frame.
     self.poseOdom_pub = rospy.Publisher(
         'simulation/poseOdom', Odometry,
         queue_size=1)  # velocities in the map frame.
     self.input_pub = rospy.Publisher('simulation/inputs',
                                      Joy,
                                      queue_size=1)
     self.accel_pub = rospy.Publisher('simulation/acceleration',
                                      Accel,
                                      queue_size=1)
     self.br = tf2_ros.TransformBroadcaster()
     self.ego_model_marker_pub = rospy.Publisher('simulation/car_marker',
                                                 Marker,
                                                 queue_size=1)
     '''
     Variable definitions for autonomous driving
     '''
     self.auto_mode = True
     self.control_sub = rospy.Subscriber('control', AckermannDriveStamped,
                                         self.controlSubCallback)
     self.steering = 0
     self.throttle = 0
     self.steer_angle_to_norm = 1 / self.max_steer_anlge  #1/0.25 #30/180*np.pi
     self.throttle_to_norm = 1
Пример #8
0
    def run(self):
        #Initialize Replay Memory (class)
        memory = MEMORY(self.args.replay_buffer_size)

        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams["mathtext.fontset"] = 'cm'
        plt.rcParams['mathtext.default'] = 'it'
        params = {'legend.fontsize': 12, 'legend.handlelength': 3}
        plt.rcParams.update(params)

        fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(9, 6))
        plt.subplots_adjust(hspace=0.5)

        sum_of_rewards = 0

        a_param = self.args.a_param
        b_param = self.args.b_param

        #Learning Phase
        state = dynamics.Initialize()  # Get the initial state s_0 (numpy(1,2))
        print("Initial State is " + str(state))

        print('This episode mass:' + str(a_param))
        print('This episode length:' + str(b_param))

        MAX_STEP = 1000

        time_list = []
        a_list = []
        x_1_list = []
        x_2_list = []

        for learning_step in range(MAX_STEP):
            #gradually change
            if learning_step < 200:
                b_param += 45.0 / 200
            x_1_list.append(state[0, 0])
            x_2_list.append(state[0, 1])
            time_list.append(learning_step)

            current_obs = torch.Tensor(
                [[np.sin(state[0, 0]),
                  np.cos(state[0, 0]),
                  state[0, 1]]])  #state: numpy(1,2) -> torch.Tensor(1,3)
            action = self.agent.get_action(
                current_obs,
                None)  #exploration action by agent (torch.Tensor(1,1))
            action = action.detach().numpy()[
                0]  #action: torch.Tensor(1,1) -> numpy(1,)
            #exploration noise###############################################
            if np.sqrt(state[0, 0]**(2) + state[0, 1]**(2)) >= 0.05:
                noise = 0.1 * np.random.normal()
            action = action + noise
            a_list.append(action)

            next_state, reward, done = dynamics.Dynamics(
                state, action, a_param, b_param)  #next_state: numpy(1,2)
            sum_of_rewards += reward

            #Make Exploration
            action = torch.Tensor([action
                                   ])  #action: numpy(1,) -> torch.Tensor(1,1)
            mask = torch.Tensor(
                [not done])  #mask: bool(False) -> torch.Tensor(1)(True)
            next_obs = torch.Tensor([[
                np.sin(next_state[0, 0]),
                np.cos(next_state[0, 0]), next_state[0, 1]
            ]])  #next_state: numpy(1,2) -> torch.Tensor(1,3)
            reward = torch.Tensor(
                [reward])  #reward: numpy(scalar) -> torch.Tensor(1)

            if abs(
                    action[0]
            ) <= 1.0:  #If we do not want to store the experience that has the big scale action.
                memory.push(current_obs, action, mask, next_obs,
                            reward)  # all torch.Tensor

            state = next_state

            #Update main DNN and target DNN
            if len(memory) > self.args.batch_size:
                transitions = memory.sample(
                    self.args.batch_size)  #Make exploration_batch
                batch = Transition(*zip(*transitions))

                self.agent.update_DNNs(batch)  #Update DNN
                self.agent.update_target_DNNs()  #Update Target DNN

            #if done:
            #break

        print("Sum of rewards is " + str(sum_of_rewards))

        axes[0].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[0].plot(time_list, a_list, linewidth=2)
        axes[0].set_xlim(0.0, MAX_STEP)
        axes[0].set_ylim(-1, 1)
        axes[0].set_xlabel('$k$', fontsize=16)
        axes[0].set_ylabel('$a[k]$', fontsize=16)
        axes[0].grid(True)

        axes[1].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[1].plot(time_list, x_1_list, linewidth=2)
        axes[1].set_xlim(0.0, MAX_STEP)
        axes[1].set_ylim(-np.pi, np.pi)
        axes[1].set_xlabel('$k$', fontsize=16)
        axes[1].set_ylabel('$x_1[k]$', fontsize=16)
        axes[1].grid(True)

        axes[2].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[2].plot(time_list, x_2_list, linewidth=2)
        axes[2].set_xlim(0.0, MAX_STEP)
        axes[2].set_ylim(-7, 7)
        axes[2].set_xlabel('$k$', fontsize=16)
        axes[2].set_ylabel('$x_2[k]$', fontsize=16)
        axes[2].grid(True)

        fig.savefig('standard_from5_to_50.eps',
                    bbox_inches="tight",
                    pad_inches=0.05)
        fig.savefig('standard_from5_to_50.png',
                    bbox_inches="tight",
                    pad_inches=0.05)
Пример #9
0
    def run(self):
        #episode_final = False
        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams["mathtext.fontset"] = 'cm'
        plt.rcParams['mathtext.default'] = 'it'
        params = {'legend.fontsize': 12, 'legend.handlelength': 3}
        plt.rcParams.update(params)
        fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(9, 10))
        plt.subplots_adjust(hspace=0.8)
        #reward list
        sum_reward_list = []

        #======================Hyper Parameter=====================
        weight = np.array([[1 / 4, 1 / 4, 1 / 4, 1 / 4]])
        learn_alpha = 5e-5
        gamma = 0.99
        MAX_STEP = 1000
        #==========================================================

        max_reward = -10000.0

        a_param = self.args.a_param
        b_param = self.args.b_param

        weight_1_list = []
        weight_2_list = []
        weight_3_list = []
        weight_4_list = []

        time_list = []
        a_list = []
        x_1_list = []
        x_2_list = []

        td_error_list = []

        Discrete_time = 0

        state = np.array([[np.pi, 0.0]])

        for test_step in range(MAX_STEP):

            weight_1_list.append(weight[0, 0])  #store the initial parameter
            weight_2_list.append(weight[0, 1])
            weight_3_list.append(weight[0, 2])
            weight_4_list.append(weight[0, 3])

            time_list.append(test_step)
            x_1_list.append(state[0, 0])
            x_2_list.append(state[0, 1])

            current_obs = torch.Tensor([[state[0, 0], state[0, 1]]])
            action = self.agent.get_action(current_obs,
                                           weight)  #Not input ounoise
            action = action.detach().numpy()[
                0]  #action: torch.Tensor(1,1) -> numpy(scaler)

            #exploration noise###############################################
            noise = max(
                (400 - Discrete_time), 0.0) / 400 * 0.1 * np.random.normal()

            action = action + noise
            a_list.append(action)

            action = torch.Tensor([action])
            Q_vec = self.agent.get_Q_value(
                current_obs,
                action)  # Q(x[k],a[k]) as characteristic functions
            action = action.detach().numpy()[
                0]  #action: torch.Tensor(1,1) -> numpy(1,)

            next_state, reward, done = dynamics.Dynamics(
                state, action, a_param, b_param)  #next_state: numpy(1,2)
            next_obs = torch.Tensor([[next_state[0, 0], next_state[0, 1]]])

            #update of the parameters
            max_Q_next_vec = self.agent.get_next_value(next_obs, weight)

            param = np.array(
                [[weight[0, 0], weight[0, 1], weight[0, 2],
                  weight[0, 3]]])  #w=[w_{1},...,w_{N}]
            td_error = param @ Q_vec.T - (reward + gamma *
                                          (param @ max_Q_next_vec.T))
            td_error_list.append(abs(td_error[0, 0]))

            chara_vec = np.array(
                [[Q_vec[0, 0], Q_vec[0, 1], Q_vec[0, 2], Q_vec[0, 3]]])

            update_vec = td_error * chara_vec

            #Barrier
            eta = 1e-7
            epsilon_w = 1e-9
            barrier_vec = eta*np.array([[-1/(weight[0,0]+epsilon_w),\
                                        -1/(weight[0,1]+epsilon_w),\
                                        -1/(weight[0,2]+epsilon_w),\
                                        -1/(weight[0,3]+epsilon_w)]])

            update_vec = update_vec + barrier_vec

            pre_weight = weight  #memorize pre_weight
            weight = weight - learn_alpha * (update_vec
                                             )  #weight is next weight

            if (weight[0, 0] <
                    0.0) or (weight[0, 1] < 0.0) or (weight[0, 2] < 0.0) or (
                        weight[0, 3] < 0.0):  #If some weights are negative
                update_error_count = 1
                while (True):
                    weight = pre_weight
                    weight = weight - (2**(
                        -update_error_count)) * learn_alpha * (update_vec)
                    update_error_count += 1
                    if (weight[0, 0] >= 0.0) and (weight[0, 1] >= 0.0) and (
                            weight[0, 2] >= 0.0) and (weight[0, 3] >= 0.0):
                        break
            weight_sum = weight[0, 0] + weight[0, 1] + weight[0, 2] + weight[0,
                                                                             3]
            weight = weight / weight_sum

            state = next_state
            Discrete_time += 1

        axes[0].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[0].plot(time_list, a_list, linewidth=2)
        axes[0].set_xlim(0.0, MAX_STEP)
        axes[0].set_ylim(-1, 1)
        axes[0].set_xlabel('$k$', fontsize=16)
        axes[0].set_ylabel('$a[k]$', fontsize=16)
        axes[0].grid(True)

        axes[1].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[1].plot(time_list, x_1_list, linewidth=2)
        axes[1].set_xlim(0.0, MAX_STEP)
        axes[1].set_ylim(-np.pi, np.pi)
        axes[1].set_xlabel('$k$', fontsize=16)
        axes[1].set_ylabel('$x_1[k]$', fontsize=16)
        axes[1].grid(True)

        axes[2].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[2].plot(time_list, x_2_list, linewidth=2)
        axes[2].set_xlim(0.0, MAX_STEP)
        axes[2].set_ylim(-7, 7)
        axes[2].set_xlabel('$k$', fontsize=16)
        axes[2].set_ylabel('$x_2[k]$', fontsize=16)
        axes[2].grid(True)

        axes[3].plot(time_list, weight_1_list, linewidth=2, label="$w_1$")
        axes[3].plot(time_list, weight_2_list, linewidth=2, label="$w_2$")
        axes[3].plot(time_list, weight_3_list, linewidth=2, label="$w_3$")
        axes[3].plot(time_list, weight_4_list, linewidth=2, label="$w_4$")
        axes[3].set_xlim(0.0, MAX_STEP)
        axes[3].set_ylim(0, 1)
        axes[3].set_xlabel('$k$', fontsize=16)
        axes[3].set_ylabel('$w[k]$', fontsize=16)
        axes[3].grid(True)
        axes[3].legend(loc='upper left', ncol=4)

        axes[4].plot(time_list, td_error_list, linewidth=2)
        axes[4].set_xlim(0.0, MAX_STEP)
        # axes[4].set_ylim(0,0.5)
        axes[4].set_xlabel('$k$', fontsize=16)
        axes[4].set_ylabel(r'$|\delta[k]|$', fontsize=16)
        axes[4].grid(True)

        fig.savefig('TR_N4_case1.eps', bbox_inches="tight", pad_inches=0.05)
        fig.savefig('TR_N4_case1.png', bbox_inches="tight", pad_inches=0.05)
Пример #10
0
    def run(self):
        xlist = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
        ylist = np.array([5.0, 6.0, 7.0, 8.0, 9.0,\
                         10.0, 11.0, 12.0, 13.0, 14.0,\
                         15.0, 16.0, 17.0, 18.0, 19.0, \
                         20.0, 21.0, 22.0, 23.0, 24.0, \
                         25.0, 26.0, 27.0, 28.0, 29.0, \
                         30.0, 31.0, 32.0, 33.0, 34.0, \
                         35.0, 36.0, 37.0, 38.0, 39.0, \
                         40.0, 41.0, 42.0, 43.0, 44.0, \
                         45.0, 46.0, 47.0, 48.0, 49.0, 50.0])
        vallist = np.zeros((45,10))

        for a_iteration in range(10):
            a_param = 0.05 + a_iteration*0.1
            for b_iteration in range(45):
                b_param = 5.5 + b_iteration*1.0

                #======================Hyper Parameter=====================
                weight = np.array([[1/8,1/8,1/8,1/8,1/8,1/8,1/8,1/8]])
                learn_alpha = 5e-5
                gamma = 0.99
                MAX_STEP = 1000
                #==========================================================

                state = np.array([[np.pi, 0.0]])            
                rewards = 0 #sum of rewards for each system (a,b)

                for test_step in range(MAX_STEP):
                    current_obs = torch.Tensor([[state[0,0],state[0,1]]])
                    action = self.agent.get_action(current_obs, weight) 
                    action = action.detach().numpy()[0] #action: torch.Tensor(1,1) -> numpy(1,)

                    #exploration noise=========================
                    noise = max((400-test_step),0.0)/400*0.1*np.random.normal()
                    action = action + noise #numpy(1,)
                    #==========================================

                    action = torch.Tensor([action])
                    Q_vec = self.agent.get_Q_value(current_obs, action) # Q(x[k],a[k]) as characteristic functions
                    action = action.detach().numpy()[0] #action: torch.Tensor(1,1) -> numpy(1,)

                    next_state, reward, done = dynamics.Dynamics(state, action, a_param, b_param)  #next_state: numpy(1,2)
                    next_obs = torch.Tensor([[next_state[0,0], next_state[0,1]]])

                    #update of the parameters 
                    max_Q_next_vec = self.agent.get_next_value(next_obs, weight)

                    param = np.array([[weight[0,0], weight[0,1], weight[0,2], weight[0,3], weight[0,4], weight[0,5], weight[0,6], weight[0,7]]]) #w=[w_{1},...,w_{N}]
                    td_error = param @ Q_vec.T - (reward + gamma * (param @ max_Q_next_vec.T))

                    chara_vec = np.array([[Q_vec[0,0], Q_vec[0,1], Q_vec[0,2], Q_vec[0,3], Q_vec[0,4], Q_vec[0,5], Q_vec[0,6], Q_vec[0,7]]])

                    update_vec = td_error * chara_vec

                    #Barrier
                    eta = 1e-7
                    epsilon_w = 1e-9
                    barrier_vec = eta*np.array([[-1/(weight[0,0]+epsilon_w),\
                                                -1/(weight[0,1]+epsilon_w),\
                                                -1/(weight[0,2]+epsilon_w),\
                                                -1/(weight[0,3]+epsilon_w),\
                                                -1/(weight[0,4]+epsilon_w),\
                                                -1/(weight[0,5]+epsilon_w),\
                                                -1/(weight[0,6]+epsilon_w),\
                                                -1/(weight[0,7]+epsilon_w)]])

                    update_vec = update_vec + barrier_vec
                        
                    pre_weight = weight #memorize pre_weight
                    weight = weight - learn_alpha * (update_vec)
                        

                    if (weight[0,0]<0.0)or(weight[0,1]<0.0)or(weight[0,2]<0.0)or(weight[0,3]<0.0)or(weight[0,4]<0.0)or(weight[0,5]<0.0)or(weight[0,6]<0.0)or(weight[0,7]<0.0): #If some weights are negative 
                        update_error_count = 1
                        while(True):
                            weight = pre_weight 
                            weight = weight - (2**(-update_error_count))*learn_alpha * (update_vec)
                            update_error_count += 1
                            if (weight[0,0]>=0.0)and(weight[0,1]>=0.0)and(weight[0,2]>=0.0)and(weight[0,3]>=0.0)and(weight[0,4]>=0.0)and(weight[0,5]>=0.0)and(weight[0,6]>=0.0)and(weight[0,7]>=0.0):
                                break

                    #Normalize weight
                    weight_sum = weight[0,0] + weight[0,1] + weight[0,2] + weight[0,3] + weight[0,4] + weight[0,5] + weight[0,6] + weight[0,7]
                    weight = weight/weight_sum   
                    
                    rewards += reward
                    state = next_state

                  
                print("##########################################################################################")
                print("(a,b)=("+str(a_param)+","+str(b_param)+") : reward "+str(rewards)+" Weight is "+str(weight))
                print("Last State is "+str(state))
                print("##########################################################################################")
                vallist[b_iteration,a_iteration] = rewards

        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams["mathtext.fontset"] = 'cm'
        plt.rcParams['mathtext.default'] = 'it'

        fig, ax = plt.subplots()
        cs = ax.pcolormesh(xlist, ylist, vallist,  cmap="jet", vmin=-4000.0, vmax=-50.0)#seismic,hot
        fig.colorbar(cs)
        ax.set_xlim(0.0, 1.0)
        ax.set_ylim(5.0, 50.0)
        ax.set_xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
        ax.set_yticks([5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0])
        ax.set_xlabel(r'$\xi_{1}$',fontsize=16)
        ax.set_ylabel(r'$\xi_{2}$',fontsize=16)
  
        fig.savefig('N8.eps', pad_inches=0.05) 
        fig.savefig('N8.png', pad_inches=0.05)
                
            
                
            
            
Пример #11
0
desired_long = -70.102602  # Does it need to be updated from somewhere else?

# deg to rad
desired_lat = np.radians(desired_lat)
desired_long = np.radians(desired_long)
wpt_x = 10
wpt_y = 10

# set target speed
desired_speed = 2.5  #get via UDP server

# Loop at some rate
dt = 0.01  # [sec]
t = 0.0

boat_dynamics = dynamics.Dynamics(boat)


def heading_to_rudder(heading):
    #heading_delta = data.heading.heading - current_heading
    heading_delta = desired_heading - curr_heading
    if heading_delta > np.pi:
        heading_delta -= np.pi * 2.0
    if heading_delta < -np.pi:
        heading_delta += np.pi * 2.0
    rudder = max(-1.0, min(1.0, heading_delta))
    return rudder


def speed_to_throttle(speed):
    #throttle = speed / dynamics.model["max_speed"]