예제 #1
0
파일: evaluate.py 프로젝트: AMJM/RL_vessel
def replay_trajectory(episodes):
    view = Viewer()
    view.plot_boundary(buoys)
    view.plot_goal(goal, goal_factor)
    for episode in episodes:
        transitions_list = episode['transitions_list']
        for transition in transitions_list:
            state = transition[0]
            view.plot_position(state[0], state[1], state[2])
    view.freeze_screen()
예제 #2
0
def plot_sequence(tuples):
    view = Viewer()
    view.plot_boundary(buoys)
    view.plot_goal(goal, goal_factor)
    for tuple in tuples:
        state = tuple[0]
        print('Position: ', state[0], state[1], state[2])
        print('Velocities: ', state[3], state[4], state[5])
        print('Action: ', tuple[1])
        view.plot_position(state[0], state[1], state[2])
    view.freeze_screen()
예제 #3
0
def collect_trajectories():
    with tf.device('/cpu:0'):
        reward_mapping.set_goal(goal, goal_heading_e_ccw, goal_vel_lon)
        viewer = Viewer()
        viewer.plot_boundary(buoys)
        viewer.plot_goal(goal, 1000)

        agents = ['agents/agent_20180727160449Sequential_r____disc_0.8it20.h5']
        starting_points = [[11000, 5320, -105.5, 3, 0, 0],
                           [11000, 5320, -104.5, 3, 0, 0],
                           [11000, 5320, -105.5, 3, 0, 0],
                           [11000, 5300, -103.5, 3, 0, 0],
                           [11000, 5300, -102.5, 3, 0, 0],
                           [11000, 5300, -101.5, 3, 0, 0]]

        for agent_obj in agents:
            viewer
            agent = learner.Learner(load_saved_regression=agent_obj, nn_=True)
            ret_tuples = list()
            results = list()
            num_steps = list()
            env = environment.Environment(rw_mapper=reward_mapping)
            env.set_up()
            for start_pos in starting_points:
                final_flag = 0
                transitions_list = list()
                compact_state_list = list()
                total_steps = 0
                env.set_single_start_pos_mode(start_pos)
                env.move_to_next_start()
                steps_inside = 0
                for step in range(evaluation_steps):
                    state = env.get_state()
                    print('Value for yaw_p :', state[5])
                    viewer.plot_position(state[0], state[1], state[2])
                    state_r = utils.convert_to_simple_state(state, geom_helper)
                    compact_state_list.append(state_r)
                    print('Value for yaw_p :', state_r[3])
                    print('Value for vlon:', state_r[0])
                    action = agent.select_action(state_r)
                    state_prime, reward = env.step(action[0], action[1])
                    transition = (state, (action[0], action[1]), state_prime,
                                  reward)
                    if abs(state_r[2]) < 50:
                        steps_inside += 1
                    final_flag = env.is_final()
                    print("***Evaluation step " + str(step + 1) + " Completed")
                    transitions_list.append(transition)
                    ret_tuples += transitions_list
                    total_steps = step
                    if final_flag != 0:
                        break
                results.append(final_flag)
                num_steps.append(total_steps)
                with open(
                        agent_obj + '_' + str(start_pos[1]) + '_' +
                        str(start_pos[2]) + str(final_flag) + '.csv',
                        'wt') as out:
                    csv_out = csv.writer(out)
                    csv_out.writerow(
                        ['x', 'y', 'heading', 'rudder_lvl', 'balance'])
                    for tr, compact_state in zip(transitions_list,
                                                 compact_state_list):
                        pos = (tr[0][0], tr[0][1], tr[0][2], tr[1][0],
                               compact_state[2])
                        csv_out.writerow(pos)
예제 #4
0
class ShipEnv(Env):
    def __init__(self, type='continuous', action_dim=1):
        self.type = type
        self.action_dim = action_dim
        self.observation_space = spaces.Box(
            low=np.array([0, -np.pi / 2, 0, -4, -0.2]),
            high=np.array([150, np.pi / 2, 5.0, 4.0, 0.2]))
        self.init_space = spaces.Box(
            low=np.array([0, -np.pi / 15, 1.0, 0.2, -0.01]),
            high=np.array([30, np.pi / 15, 1.5, 0.3, 0.01]))
        self.ship_data = None
        self.last_pos = np.zeros(3)
        self.last_action = np.zeros(self.action_dim)
        self.simulator = Simulator()
        self.point_a = (0.0, 0.0)
        self.point_b = (2000, 0.0)
        self.max_x_episode = (5000, 0)
        self.guideline = LineString([self.point_a, self.max_x_episode])
        self.start_pos = np.zeros(1)
        self.number_loop = 0  # loops in the screen -> used to plot
        self.borders = [[0, 150], [2000, 150], [2000, -150], [0, -150]]
        self.viewer = None

    def step(self, action):
        side = np.sign(self.last_pos[1])
        angle_action = action[0] * side
        rot_action = 0.2
        state_prime = self.simulator.step(angle_level=angle_action,
                                          rot_level=rot_action)
        # convert simulator states into obervable states
        obs = self.convert_state(state_prime)
        # print('Observed state: ', obs)
        dn = self.end(state_prime=state_prime, obs=obs)
        rew = self.calculate_reward(obs=obs)
        self.last_pos = [state_prime[0], state_prime[1], state_prime[2]]
        self.last_action = np.array([angle_action, rot_action])
        info = dict()
        return obs, rew, dn, info

    def convert_state(self, state):
        """
        This method generated the features used to build the reward function
        :param state: Global state of the ship
        """
        ship_point = Point((state[0], state[1]))
        side = np.sign(state[1] - self.point_a[1])
        d = ship_point.distance(self.guideline)  # meters
        theta = side * state[2]  # radians
        vx = state[3]  # m/s
        vy = side * state[4]  # m/s
        thetadot = side * state[5]  # graus/min
        obs = np.array([d, theta, vx, vy, thetadot])
        return obs

    def calculate_reward(self, obs):
        d, theta, vx, vy, thetadot = obs[0], obs[1] * 180 / np.pi, obs[2], obs[
            3], obs[4] * 180 / np.pi
        if not self.observation_space.contains(obs):
            print(
                "\n Action: %f,  State[%f %f %f], Velocidade [%f , %f] , Theta: %f, Distance: %f thetadot: %f \n"
                % (self.last_action[0], self.last_pos[0], self.last_pos[1],
                   self.last_pos[2], vx, vy, theta, d, thetadot))
            return -1000
        else:
            return 1 - 8 * np.abs(theta / 90) - np.abs(
                thetadot / 20) - 5 * np.abs(d) / 150 - np.abs(
                    vy / 4) - np.abs(vx - 2) / 2

    def end(self, state_prime, obs):
        if not self.observation_space.contains(obs) or -1 > state_prime[
                0] or state_prime[0] > self.max_x_episode[
                    0] or 160 < state_prime[1] or state_prime[1] < -160:
            if not self.observation_space.contains(obs):
                print("\n Smashed")
            if self.viewer is not None:
                self.viewer.end_episode()
            if self.ship_data is not None:
                if self.ship_data.iterations > 0:
                    self.ship_data.save_experiment(self.name_experiment)
            return True
        else:
            return False

    def set_init_space(self, low, high):
        self.init_space = spaces.Box(low=np.array(low), high=np.array(high))

    def reset(self):
        init = list(map(float, self.init_space.sample()))
        init_states = np.array([
            self.start_pos[0], init[0], init[1], init[2] * np.cos(init[1]),
            init[2] * np.sin(init[1]), 0
        ])
        self.simulator.reset_start_pos(init_states)
        self.last_pos = np.array([self.start_pos[0], init[0], init[1]])
        print('Reseting position')
        state = self.simulator.get_state()
        if self.viewer is not None:
            self.viewer.end_episode()
        return self.convert_state(state)

    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = Viewer()
            self.viewer.plot_boundary(self.borders)
            self.viewer.plot_guidance_line(self.point_a, self.point_b)

        img_x_pos = self.last_pos[0] - self.point_b[0] * (self.last_pos[0] //
                                                          self.point_b[0])
        if self.last_pos[0] // self.point_b[0] > self.number_loop:
            self.viewer.end_episode()
            self.viewer.plot_position(img_x_pos, self.last_pos[1],
                                      self.last_pos[2],
                                      20 * self.last_action[0])
            self.viewer.restart_plot()
            self.number_loop += 1
        else:
            self.viewer.plot_position(img_x_pos, self.last_pos[1],
                                      self.last_pos[2],
                                      20 * self.last_action[0])

    def close(self, ):
        self.viewer.freeze_scream()
예제 #5
0
def evaluate_agent(ag_obj):
    import reward
    reward_mapping = reward.RewardMapper('quadratic', _g_helper=geom_helper)
    reward_mapping.set_goal(goal, goal_heading_e_ccw, goal_vel_lon)
    agent = learner.Learner(load_saved_regression=ag_obj, nn_=True)
    env = environment.Environment(rw_mapper=reward_mapping)
    env.set_up()
    viewer = Viewer()
    viewer.plot_boundary(buoys)
    viewer.plot_goal(goal, 100)

    starting_points = [
        [11000, 5280, -103.5, 3, 0, 0],
        [11000, 5280, -104.5, 3, 0, 0],
        [11000, 5280, -105.5, 3, 0, 0],
        [11000, 5300, -104, 3, 0, 0],
        [11000, 5280, -103.5, 3, 0, 0],
        [11000, 5320, -103.5, 3, 0, 0],
        [11000, 5320, -103.5, 3, 0, 0]]

    ret_tuples = list()
    # env.set_single_start_pos_mode([11000, 5380.10098, -103, 3, 0, 0])
    # env.set_single_start_pos_mode([8000, 4600, -103.5, 3, 0, 0])
    # env.set_single_start_pos_mode([12000, 5500, -90, 3, 0, 0])
    # env.set_single_start_pos_mode([6600, 4200, -102, 3, 0, 0])
    # env.starts_from_file_mode('starting_points_global_coord')
    # env.move_to_next_start()
    results = list()
    num_steps = list()
    steps_inside_goal_region = list()
    for start_pos in starting_points:
        final_flag = 0
        transitions_list = list()
        total_steps = 0
        env.set_single_start_pos_mode(start_pos)
        env.move_to_next_start()
        steps_inside = 0
        for step in range(evaluation_steps):
            state = env.get_state()
            print('Value for yaw_p :', state[5])
            viewer.plot_position(state[0], state[1], state[2])
            state_r = utils.convert_to_simple_state(state, geom_helper)
            print('Value for yaw_p :', state_r[3])
            action = agent.select_action(state_r)
            state_prime, reward = env.step(action[0], action[1])
            transition = (state, (action[0], action[1]), state_prime, reward)
            if abs(state_r[2]) < 50:
                steps_inside += 1
            final_flag = env.is_final()
            print("***Evaluation step " + str(step + 1) + " Completed")
            transitions_list.append(transition)
            ret_tuples += transitions_list
            total_steps = step
            if final_flag != 0:
                break
        results.append(final_flag)
        num_steps.append(total_steps)
        steps_inside_goal_region.append(steps_inside)
        with open('trajectory_' + agent.learner.__class__.__name__ + 'it' + str(total_steps) + 'end' + str(final_flag),
                  'wb') as outfile:
            pickle.dump(transitions_list, outfile)
        with open('trajectory_' + agent.learner.__class__.__name__ + 'it' + str(total_steps) + 'end' + str(
                final_flag) + '.csv', 'wt') as out:
            csv_out = csv.writer(out)
            csv_out.writerow(['x', 'y', 'heading', 'rudder_lvl'])
            for tr in transitions_list:
                pos = (tr[0][0], tr[0][1], tr[0][2], tr[1][0])
                csv_out.writerow(pos)
    with open('results' + agent.learner.__class__.__name__ + datetime.datetime.now().strftime('%Y%m%d%H%M%S'),
              'wb') as outfile:
        pickle.dump(num_steps, outfile)
        pickle.dump(results, outfile)
        pickle.dump(steps_inside_goal_region, outfile)
    return ret_tuples
예제 #6
0
파일: reward.py 프로젝트: AMJM/RL_vessel
class RewardMapper(object):
    def __init__(self, plot_flag=True, r_mode_='cte'):
        self.boundary = None
        self.goal_rec = None
        self.ship_polygon = None
        self.ship = None
        self.plot_flag = plot_flag
        if self.plot_flag:
            self.view = Viewer()
        self.set_ship_geometry(((0, 0), (0, 10), (20, 0)))
        self.ship_vel = list()
        self.ship_last_vel = list()
        self.ship_pos = list()
        self.ship_last_pos = list()
        self.last_ship = None
        self.goal_point = None
        self.g_heading_n_cw = None
        self.g_vel_x = None
        self.g_vel_y = None
        self.reward_mode = r_mode_
        self.last_angle_selected = None
        self.last_rot_selected = None
        self.guid_line = None
        self.upper_shore = None
        self.lower_shore = None

    def is_inbound_coordinate(self, x, y):
        return self.boundary.buffer(-20).contains(Point(x, y))

    def generate_inner_positions(self):
        points_dict = dict()
        for line_x in range(int(self.goal_point[0] + 5000),
                            int(self.goal_point[0] + 6000), 500):
            line = LineString([(line_x, 0), (line_x, 15000)])
            intersect = self.boundary.intersection(line)
            if intersect.geom_type == 'LineString':
                middle_point = (line_x,
                                (intersect.bounds[1] + intersect.bounds[3]) /
                                2)
                upper_point = (line_x,
                               (middle_point[1] + intersect.bounds[3]) / 2)
                lower_point = (line_x,
                               (middle_point[1] + intersect.bounds[1]) / 2)
                dist = Point(self.goal_point).distance(Point(middle_point))
                points_dict[middle_point] = dist
                points_dict[upper_point] = dist
                points_dict[lower_point] = dist
        return points_dict

    def get_middle_y(self, x):
        line = LineString([(x, 0), (x, 15000)])
        intersect = self.boundary.intersection(line)
        return (intersect.bounds[1] + intersect.bounds[3]) / 2

    def get_guidance_line(self):
        y_temp_a = self.get_middle_y(8000)
        y_temp_b = self.get_middle_y(9000)
        x_a = 3600
        x_b = 14000
        m, b = np.polyfit([8000, 9000], [y_temp_a, y_temp_b], 1)
        y_a = m * x_a + b
        y_b = m * x_b + b
        self.guid_line = LineString([(x_a, y_a), (x_b, y_b)])
        return (x_a, y_a), (x_b, y_b)

    def set_boundary_points(self, points):
        self.boundary = Polygon(points)
        if self.plot_flag:
            self.view.plot_boundary(points)

    def set_shore_lines(self, upper_points, lower_points):
        self.upper_shore = LineString(upper_points)
        self.lower_shore = LineString(lower_points)

    def get_shore_balance(self, x, y):
        ship_point = Point((x, y))
        #upper means positive sign
        upper_dist = ship_point.distance(self.upper_shore)
        lower_dist = ship_point.distance(self.lower_shore)
        return (upper_dist - lower_dist)

    def set_ship_geometry(self, points):
        self.ship_polygon = Polygon(points)

    def set_goal(self, point, heading, vel_l):
        factor = 300
        self.goal_point = point
        self.g_vel_x, self.g_vel_y, self.g_heading_n_cw = utils.local_to_global(
            vel_l, 0, heading)
        self.goal_rec = Polygon(
            ((point[0] - factor, point[1] - factor), (point[0] - factor,
                                                      point[1] + factor),
             (point[0] + factor, point[1] + factor), (point[0] + factor,
                                                      point[1] - factor)))
        if self.plot_flag:
            self.view.plot_goal(point, factor)

    def initialize_ship(self, x, y, heading, global_vel_x, global_vel_y,
                        global_vel_theta):
        self.ship_last_vel = [global_vel_x, global_vel_y, global_vel_theta]
        self.ship_last_pos = [x, y, heading]
        self.last_ship = affinity.translate(self.ship_polygon, x, y)
        self.last_ship = affinity.rotate(self.last_ship, heading, 'center')
        self.ship_pos = self.ship_last_pos
        self.ship_vel = self.ship_last_vel
        self.ship = self.last_ship
        if self.plot_flag:
            self.view.plot_position(x, y, heading)

    def update_ship(self, x, y, heading, global_vel_x, global_vel_y,
                    global_vel_theta, angle, rot):
        self.ship_last_pos = self.ship_pos
        self.ship_last_vel = self.ship_vel
        self.last_ship = self.ship
        self.last_angle_selected = angle
        self.last_rot_selected = rot
        self.ship_vel = [global_vel_x, global_vel_y, global_vel_theta]
        self.ship_pos = [x, y, heading]
        self.ship = affinity.translate(self.ship_polygon, x, y)
        self.ship = affinity.rotate(self.ship, heading, 'center')
        if self.plot_flag:
            self.view.plot_position(x, y, heading)

    def get_shortest_distance_from_boundary(self):
        a = self.ship.distance(self.boundary)
        return a

    def collided(self):
        collided = (not self.boundary.contains(self.ship))
        if collided:
            print('Collided!!')
        return collided

    def reached_goal(self):
        cont = self.goal_rec.contains(self.ship)
        # reached = cont and abs(self.ship_vel[0]) < abs(self.g_vel_x) and abs(self.ship_pos[2] - self.g_heading_n_cw) < 20
        # reached = abs(self.ship_pos[2] - self.g_heading_n_cw) < 20 and cont
        reached = cont
        # reached = abs(self.ship_vel[0]) < 0.2 or cont
        if reached:
            print('Reached goal!!')
        return reached

    def get_reward(self):
        # ref_array = np.array((self.goal_point[0], self.goal_point[1], self.g_heading_n_cw, self.g_vel_x, self.g_vel_y, 0))
        # array = np.array((self.ship_pos+self.ship_vel))
        ref_array = np.array(
            (self.goal_point[0], self.goal_point[1], self.g_heading_n_cw))
        array = np.array((self.ship_pos))
        old_array = np.array((self.ship_last_pos))
        # new_dist = np.linalg.norm(array - ref_array)
        # old_dist = np.linalg.norm(old_array - ref_array)
        new_u_misalign = abs(array[2] - ref_array[2])
        old_u_misalign = abs(old_array[2] - ref_array[2])
        # print('distance_from_goal_state: ', new_dist)
        # shore_dist = self.boundary.exterior.distance(self.ship)
        # old_guid_dist = self.guid_line.distance(self.last_ship)
        # new_guid_dist = self.guid_line.distance(self.ship)
        # old_shore_dist = self.boundary.boundary.distance(self.last_ship)
        # new_shore_dist = self.boundary.boundary.distance(self.ship)
        new_u_balance = abs(self.get_shore_balance(array[0], array[1]))
        old_u_balance = abs(self.get_shore_balance(old_array[0], old_array[1]))
        # old_balance = self.get_shore_balance(old_array[0], old_array[1])
        # old_misalign = old_array[2] - ref_array[2]
        reward = -0.1
        if self.reward_mode == 'cte':
            reward = -0.1
        elif self.reward_mode == 'potential':
            pass
        elif self.reward_mode == 'rule':
            pass
            # if (old_balance < 0 and old_misalign > 0 and self.last_angle_selected != - 0.5) or \
            #         (old_balance > 0 and old_misalign < 0 and self.last_angle_selected != 0.5):
            #     reward = -100
            # elif (old_balance == 0 and old_misalign == 0 and self.last_angle_selected == - 0.5):
            #     reward = -100
            # else:
            #     reward = 100
        elif self.reward_mode == 'dist':
            reward = 100 * math.exp(-0.000001 * old_u_balance -
                                    0.000001 * old_u_misalign)
        elif self.reward_mode == 'align':
            reward = 100 * math.exp(-0.000001 * old_u_misalign)
        elif self.reward_mode == 'step':
            if new_u_balance < 50 and new_u_misalign < 2:
                reward = 1
            else:
                reward = -0.1
        elif self.reward_mode == 'step_with_rudder_punish':
            if new_u_balance < 50 and new_u_misalign < 2:
                reward = 1
            else:
                reward = -0.1
            if abs(self.last_angle_selected) == 0.5:
                reward = reward - 0.2
        elif self.reward_mode == 'linear_with_rudder_punish':
            if new_u_balance < 30 and new_u_misalign < 2:
                reward = 1
            else:
                reward = -0.1 - 0.00001 * new_u_balance
            if abs(self.last_angle_selected) == 0.5:
                reward = reward - 0.2
        if self.collided():
            reward = -1
            return reward
        return reward
예제 #7
0
class ShipEnv(Env):
    def __init__(self, special_mode=None):
        self.special_mode = special_mode
        self.buoys = buoys
        self.lower_shore = lower_shore
        self.upper_shore = upper_shore
        self.goal = goal
        self.goal_factor = goal_factor
        self.upper_line = LineString(upper_shore)
        self.lower_line = LineString(lower_shore)
        self.goal_point = Point(goal)
        self.boundary = Polygon(self.buoys)
        self.goal_rec = Polygon(((self.goal[0] - self.goal_factor,
                                  self.goal[1] - self.goal_factor),
                                 (self.goal[0] - self.goal_factor,
                                  self.goal[1] + self.goal_factor),
                                 (self.goal[0] + self.goal_factor,
                                  self.goal[1] + self.goal_factor),
                                 (self.goal[0] + self.goal_factor,
                                  self.goal[1] - self.goal_factor)))
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=np.array([-1, -180, -1.0]),
                                            high=np.array([1, -50, 1.0]))
        self.init_space = spaces.Box(low=np.array([-0.8, -106, 2.5]),
                                     high=np.array([0.8, -101, 2.5]))
        self.start_pos = [11000, 5300.10098]
        self.buzz_interface = Environment()
        self.buzz_interface.set_up()
        self.point_a = (13000, 0)
        self.point_b = (15010, 0)
        self.line = LineString([self.point_a, self.point_b])
        self.set_point = np.array([0, -103, 2.5, 0, 0])
        self.tolerance = np.array([20, 2.0, 0.2, 0.05])
        self.last_pos = list()
        self.ship_point = None
        self.ship_polygon = Polygon(((-10, 0), (0, 100), (10, 0)))
        self.plot = False
        self.viewer = None
        self.last_action = [0, 0]

    def step(self, action):
        info = dict()
        state_prime, _ = self.buzz_interface.step(
            angle_level=self.convert_action(action), rot_level=0.3)
        # v_lon, v_drift, _ = global_to_local(state_prime[3], state_prime[4], state_prime[2])
        obs = self.convert_state_sog_cog(state_prime)
        print('Action: ', action)
        print('Observed state: ', obs)
        dn = self.end(state_prime=state_prime, obs=obs)
        rew = self.calculate_reward(obs=obs)
        if dn:
            if not self.goal_rec.contains(self.ship_point):
                rew = -1000
        self.last_pos = [state_prime[0], state_prime[1], state_prime[2]]
        self.last_action = self.convert_action(action)
        print('Reward: ', rew)
        return obs, rew, dn, info

    def calculate_reward(self, obs):
        if abs(obs[1] + 166.6) < 0.3 and abs(obs[2]) < 0.01:
            return 1
        else:
            return np.tanh(-((obs[1] + 166.6)**2) - 1000 * (obs[2]**2))

    def end(self, state_prime, obs):
        if not self.observation_space.contains(
                obs) or not self.boundary.contains(self.ship_point):
            print('Ending episode with obs: ', obs)
            if self.viewer is not None:
                self.viewer.end_of_episode()
            return True
        else:
            return False

    def convert_action(self, act):
        if act == 0:
            return -0.2
        elif act == 1:
            return 0.0
        elif act == 2:
            return 0.2

    # 0:bank_balance
    # 1:heading
    # 2:v_lon
    # 3:v_drift
    # 4:heading_p
    def convert_state(self, state):
        # print('Original state:', state)
        v_lon, v_drift, _ = global_to_local(state[3], state[4], state[2])
        self.ship_point = Point((state[0], state[1]))
        self.ship_polygon = affinity.translate(self.ship_polygon, state[0],
                                               state[1])
        self.ship_polygon = affinity.rotate(self.ship_polygon, -state[2],
                                            'center')
        bank_balance = (self.ship_point.distance(self.upper_line) - self.ship_point.distance(self.lower_line)) / \
                       (self.ship_point.distance(self.upper_line) + self.ship_point.distance(self.lower_line))
        obs = np.array([bank_balance, state[2], v_lon, v_drift, state[5]])
        # print('Observation', obs)
        return obs

        # Agent handles the state space (distance_from_line, heading, heading_p)
        # obs variables:
        # 0:bank_balance
        # 1:cog
        # 2:heading_p
    def convert_state_sog_cog(self, state):
        v_lon, v_drift, _ = global_to_local(state[3], state[4], state[2])
        self.ship_point = Point((state[0], state[1]))
        self.ship_polygon = affinity.translate(self.ship_polygon, state[0],
                                               state[1])
        self.ship_polygon = affinity.rotate(self.ship_polygon, -state[2],
                                            'center')
        bank_balance = (self.ship_point.distance(self.upper_line) - self.ship_point.distance(self.lower_line)) / \
                       (self.ship_point.distance(self.upper_line) + self.ship_point.distance(self.lower_line))
        # sog = np.linalg.norm([state[3], state[4]])
        cog = np.degrees(np.arctan2(state[4], state[3]))
        obs = np.array([bank_balance, cog, state[5]])
        # print('Observation', obs)
        return obs

    def change_init_space(self, low, high):
        self.init_space = spaces.Box(low=np.array(low), high=np.array(high))

    def reset(self):
        init = list(map(float, self.init_space.sample()))
        # self.buzz_interface.set_single_start_pos_mode([self.start_pos[0], self.start_pos[1]+init[0], init[1], init[2], 0, 0])
        self.buzz_interface.set_single_start_pos_mode(
            [11000, 5280, -106.4, 3, 0, 0])
        self.buzz_interface.move_to_next_start()
        print('Reseting position')
        state = self.buzz_interface.get_state()
        self.last_pos = [state[0], state[1], state[2]]
        self.last_action = [0, 0]
        return self.convert_state_sog_cog(state)

    def render(self, mode='human'):
        if mode == 'human':
            if self.viewer is None:
                self.viewer = Viewer()
                # self.viewer.plot_guidance_line(self.point_a, self.point_b)
                self.viewer.plot_boundary(self.buoys)
                self.viewer.plot_goal(self.goal, self.goal_factor)
            self.viewer.plot_position(self.last_pos[0], self.last_pos[1],
                                      self.last_pos[2], 30 * self.last_action)

    def close(self):
        self.buzz_interface.finish()
예제 #8
0
class ShipEnv(Env):
    def __init__(self, type='continuous', action_dim = 2):
        self.type = type
        self.action_dim = action_dim
        assert type == 'continuous' or type == 'discrete', 'type must be continuous or discrete'
        assert action_dim > 0 and action_dim <=2, 'action_dim must be 1 or 2'
        if type == 'continuous':
            self.action_space = spaces.Box(low=np.array([-1.0, 0]), high=np.array([1.0, 0.2]))
            self.observation_space = spaces.Box(low=np.array([0, -np.pi / 2, 0, -4, -0.2]), high=np.array([150, np.pi / 2, 4.0, 4.0, 0.2]))
            self.init_space = spaces.Box(low=np.array([0, -np.pi / 15, 1.0, 0.2, -0.01]), high=np.array([30, np.pi / 15, 1.5, 0.3, 0.01]))
        elif type == 'discrete':
            if action_dim == 2:
                self.action_space = spaces.Discrete(63)
            else:
                self.action_space = spaces.Discrete(21)
            self.observation_space = spaces.Box(low=np.array([0, -np.pi / 2, 0, -4, -0.4]), high=np.array([150, np.pi / 2, 4.0, 4.0, 0.4]))
            self.init_space = spaces.Box(low=np.array([0, -np.pi / 15, 1.0, 0.2, -0.01]), high=np.array([30, np.pi / 15, 2.0, 0.3, 0.01]))
        self.ship_data = None
        self.name_experiment = None
        self.last_pos = np.zeros(3)
        self.last_action = np.zeros(self.action_dim)
        self.simulator = Simulator()
        self.point_a = (0.0, 0.0)
        self.point_b = (2000, 0.0)
        self.max_x_episode = (5000, 0)
        self.guideline = LineString([self.point_a, self.max_x_episode])
        self.start_pos = np.zeros(1)
        self.number_loop = 0  # loops in the screen -> used to plot
        self.borders = [[0, 150], [2000, 150], [2000, -150], [0, -150]]
        self.viewer = None
        self.test_performance = False
        self.init_test_performance = np.linspace(0, np.pi / 15, 10)
        self.counter = 0

    def step(self, action):
        # According to the action stace a different kind of action is selected
        if self.type == 'continuous' and self.action_dim == 2:
            side = np.sign(self.last_pos[1])
            angle_action = action[0]*side
            rot_action = (action[1]+1)/10
        elif self.type == 'continuous' and self.action_dim == 1:
            side = np.sign(self.last_pos[1])
            angle_action = action[0]*side
            rot_action = 0.2
        elif self.type == 'discrete' and self.action_dim == 2:
            side = np.sign(self.last_pos[1])
            angle_action = (action // 3 - 10) / 10
            angle_action = angle_action * side
            rot_action = 0.1 * (action % 3)
        elif self.type == 'discrete' and self.action_dim == 1:
            side = np.sign(self.last_pos[1])
            angle_action = (action - 10) / 10
            angle_action = angle_action * side
            rot_action = 0.2
        state_prime = self.simulator.step(angle_level=angle_action, rot_level=rot_action)
        # convert simulator states into obervable states
        obs = self.convert_state(state_prime)
        # print('Observed state: ', obs)
        dn = self.end(state_prime=state_prime, obs=obs)
        rew = self.calculate_reward(obs=obs)
        self.last_pos = [state_prime[0], state_prime[1], state_prime[2]]
        self.last_action = np.array([angle_action, rot_action])
        if self.ship_data is not None:
            self.ship_data.new_transition(state_prime, obs, self.last_action, rew)
        info = dict()
        return obs, rew, dn, info

    def convert_state(self, state):
        """
        This method generated the features used to build the reward function
        :param state: Global state of the ship
        """
        ship_point = Point((state[0], state[1]))
        side = np.sign(state[1] - self.point_a[1])
        d = ship_point.distance(self.guideline)  # meters
        theta = side*state[2]  # radians
        vx = state[3]  # m/s
        vy = side*state[4]  # m/s
        thetadot = side * state[5]  # graus/min
        obs = np.array([d, theta, vx, vy, thetadot])
        return obs

    def calculate_reward(self, obs):
        d, theta, vx, vy, thetadot = obs[0], obs[1]*180/np.pi, obs[2], obs[3], obs[4]*180/np.pi
        if self.last_pos[0] > 5000:
            print("\n Got there")
        if not self.observation_space.contains(obs):
            return -1000
        else:
            return (4*(vx-1.5) + 5*(1-d/20) + 2*(1-vy**2/10) + 5*(1-np.abs(theta/30)) + 3*(1 - np.abs(thetadot)/12)) / 24

    def end(self, state_prime, obs):
        if not self.observation_space.contains(obs) or -1 > state_prime[0] or state_prime[0] > self.max_x_episode[0] or 160 < state_prime[1] or state_prime[1]< -160:
            if not self.observation_space.contains(obs):
                print("\n Smashed")
            if self.viewer is not None:
                self.viewer.end_episode()
            if self.ship_data is not None:
                if self.ship_data.iterations > 0:
                    self.ship_data.save_experiment(self.name_experiment)
            return True
        else:
            return False

    def set_init_space(self, low, high):
        self.init_space = spaces.Box(low=np.array(low), high=np.array(high))

    def reset(self):
        init = list(map(float, self.init_space.sample()))
        if self.test_performance:
            angle = self.init_test_performance[self.counter]
            v_lon = 1.5
            init_states = np.array([self.start_pos[0], 30, angle, v_lon * np.cos(angle), v_lon * np.sin(angle), 0])
            self.counter += 1
            init[0] = 30
            init[1] = angle
        else:
            init_states = np.array([self.start_pos[0], init[0], init[1], init[2] * np.cos(init[1]), init[2] * np.sin(init[1]), 0])
        self.simulator.reset_start_pos(init_states)
        self.last_pos = np.array([self.start_pos[0], init[0],  init[1]])
        print('Reseting position')
        state = self.simulator.get_state()
        if self.ship_data is not None:
            if self.ship_data.iterations > 0:
                self.ship_data.save_experiment(self.name_experiment)
            self.ship_data.new_iter(state, self.convert_state(state), np.zeros(len(self.last_action)), np.array([0]))
        if self.viewer is not None:
            self.viewer.end_episode()
        return self.convert_state(state)

    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = Viewer()
            self.viewer.plot_boundary(self.borders)
            self.viewer.plot_guidance_line(self.point_a, self.point_b)

        img_x_pos = self.last_pos[0] - self.point_b[0] * (self.last_pos[0] // self.point_b[0])
        if self.last_pos[0]//self.point_b[0] > self.number_loop:
            self.viewer.end_episode()
            self.viewer.plot_position(img_x_pos, self.last_pos[1], self.last_pos[2], 20 * self.last_action[0])
            self.viewer.restart_plot()
            self.number_loop += 1
        else:
            self.viewer.plot_position(img_x_pos, self.last_pos[1], self.last_pos[2], 20 * self.last_action[0])

    def close(self, ):
        self.viewer.freeze_scream()

    def set_save_experice(self, name='experiment_ssn_ddpg_10iter'):
        assert type(name) == type(""), 'name must be a string'
        self.ship_data = ShipExperiment()
        self.name_experiment = name

    def set_test_performace(self):
        self.test_performance = True