def replay_trajectory(episodes): view = Viewer() view.plot_boundary(buoys) view.plot_goal(goal, goal_factor) for episode in episodes: transitions_list = episode['transitions_list'] for transition in transitions_list: state = transition[0] view.plot_position(state[0], state[1], state[2]) view.freeze_screen()
def plot_sequence(tuples): view = Viewer() view.plot_boundary(buoys) view.plot_goal(goal, goal_factor) for tuple in tuples: state = tuple[0] print('Position: ', state[0], state[1], state[2]) print('Velocities: ', state[3], state[4], state[5]) print('Action: ', tuple[1]) view.plot_position(state[0], state[1], state[2]) view.freeze_screen()
def collect_trajectories(): with tf.device('/cpu:0'): reward_mapping.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) viewer = Viewer() viewer.plot_boundary(buoys) viewer.plot_goal(goal, 1000) agents = ['agents/agent_20180727160449Sequential_r____disc_0.8it20.h5'] starting_points = [[11000, 5320, -105.5, 3, 0, 0], [11000, 5320, -104.5, 3, 0, 0], [11000, 5320, -105.5, 3, 0, 0], [11000, 5300, -103.5, 3, 0, 0], [11000, 5300, -102.5, 3, 0, 0], [11000, 5300, -101.5, 3, 0, 0]] for agent_obj in agents: viewer agent = learner.Learner(load_saved_regression=agent_obj, nn_=True) ret_tuples = list() results = list() num_steps = list() env = environment.Environment(rw_mapper=reward_mapping) env.set_up() for start_pos in starting_points: final_flag = 0 transitions_list = list() compact_state_list = list() total_steps = 0 env.set_single_start_pos_mode(start_pos) env.move_to_next_start() steps_inside = 0 for step in range(evaluation_steps): state = env.get_state() print('Value for yaw_p :', state[5]) viewer.plot_position(state[0], state[1], state[2]) state_r = utils.convert_to_simple_state(state, geom_helper) compact_state_list.append(state_r) print('Value for yaw_p :', state_r[3]) print('Value for vlon:', state_r[0]) action = agent.select_action(state_r) state_prime, reward = env.step(action[0], action[1]) transition = (state, (action[0], action[1]), state_prime, reward) if abs(state_r[2]) < 50: steps_inside += 1 final_flag = env.is_final() print("***Evaluation step " + str(step + 1) + " Completed") transitions_list.append(transition) ret_tuples += transitions_list total_steps = step if final_flag != 0: break results.append(final_flag) num_steps.append(total_steps) with open( agent_obj + '_' + str(start_pos[1]) + '_' + str(start_pos[2]) + str(final_flag) + '.csv', 'wt') as out: csv_out = csv.writer(out) csv_out.writerow( ['x', 'y', 'heading', 'rudder_lvl', 'balance']) for tr, compact_state in zip(transitions_list, compact_state_list): pos = (tr[0][0], tr[0][1], tr[0][2], tr[1][0], compact_state[2]) csv_out.writerow(pos)
class ShipEnv(Env): def __init__(self, type='continuous', action_dim=1): self.type = type self.action_dim = action_dim self.observation_space = spaces.Box( low=np.array([0, -np.pi / 2, 0, -4, -0.2]), high=np.array([150, np.pi / 2, 5.0, 4.0, 0.2])) self.init_space = spaces.Box( low=np.array([0, -np.pi / 15, 1.0, 0.2, -0.01]), high=np.array([30, np.pi / 15, 1.5, 0.3, 0.01])) self.ship_data = None self.last_pos = np.zeros(3) self.last_action = np.zeros(self.action_dim) self.simulator = Simulator() self.point_a = (0.0, 0.0) self.point_b = (2000, 0.0) self.max_x_episode = (5000, 0) self.guideline = LineString([self.point_a, self.max_x_episode]) self.start_pos = np.zeros(1) self.number_loop = 0 # loops in the screen -> used to plot self.borders = [[0, 150], [2000, 150], [2000, -150], [0, -150]] self.viewer = None def step(self, action): side = np.sign(self.last_pos[1]) angle_action = action[0] * side rot_action = 0.2 state_prime = self.simulator.step(angle_level=angle_action, rot_level=rot_action) # convert simulator states into obervable states obs = self.convert_state(state_prime) # print('Observed state: ', obs) dn = self.end(state_prime=state_prime, obs=obs) rew = self.calculate_reward(obs=obs) self.last_pos = [state_prime[0], state_prime[1], state_prime[2]] self.last_action = np.array([angle_action, rot_action]) info = dict() return obs, rew, dn, info def convert_state(self, state): """ This method generated the features used to build the reward function :param state: Global state of the ship """ ship_point = Point((state[0], state[1])) side = np.sign(state[1] - self.point_a[1]) d = ship_point.distance(self.guideline) # meters theta = side * state[2] # radians vx = state[3] # m/s vy = side * state[4] # m/s thetadot = side * state[5] # graus/min obs = np.array([d, theta, vx, vy, thetadot]) return obs def calculate_reward(self, obs): d, theta, vx, vy, thetadot = obs[0], obs[1] * 180 / np.pi, obs[2], obs[ 3], obs[4] * 180 / np.pi if not self.observation_space.contains(obs): print( "\n Action: %f, State[%f %f %f], Velocidade [%f , %f] , Theta: %f, Distance: %f thetadot: %f \n" % (self.last_action[0], self.last_pos[0], self.last_pos[1], self.last_pos[2], vx, vy, theta, d, thetadot)) return -1000 else: return 1 - 8 * np.abs(theta / 90) - np.abs( thetadot / 20) - 5 * np.abs(d) / 150 - np.abs( vy / 4) - np.abs(vx - 2) / 2 def end(self, state_prime, obs): if not self.observation_space.contains(obs) or -1 > state_prime[ 0] or state_prime[0] > self.max_x_episode[ 0] or 160 < state_prime[1] or state_prime[1] < -160: if not self.observation_space.contains(obs): print("\n Smashed") if self.viewer is not None: self.viewer.end_episode() if self.ship_data is not None: if self.ship_data.iterations > 0: self.ship_data.save_experiment(self.name_experiment) return True else: return False def set_init_space(self, low, high): self.init_space = spaces.Box(low=np.array(low), high=np.array(high)) def reset(self): init = list(map(float, self.init_space.sample())) init_states = np.array([ self.start_pos[0], init[0], init[1], init[2] * np.cos(init[1]), init[2] * np.sin(init[1]), 0 ]) self.simulator.reset_start_pos(init_states) self.last_pos = np.array([self.start_pos[0], init[0], init[1]]) print('Reseting position') state = self.simulator.get_state() if self.viewer is not None: self.viewer.end_episode() return self.convert_state(state) def render(self, mode='human'): if self.viewer is None: self.viewer = Viewer() self.viewer.plot_boundary(self.borders) self.viewer.plot_guidance_line(self.point_a, self.point_b) img_x_pos = self.last_pos[0] - self.point_b[0] * (self.last_pos[0] // self.point_b[0]) if self.last_pos[0] // self.point_b[0] > self.number_loop: self.viewer.end_episode() self.viewer.plot_position(img_x_pos, self.last_pos[1], self.last_pos[2], 20 * self.last_action[0]) self.viewer.restart_plot() self.number_loop += 1 else: self.viewer.plot_position(img_x_pos, self.last_pos[1], self.last_pos[2], 20 * self.last_action[0]) def close(self, ): self.viewer.freeze_scream()
def evaluate_agent(ag_obj): import reward reward_mapping = reward.RewardMapper('quadratic', _g_helper=geom_helper) reward_mapping.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) agent = learner.Learner(load_saved_regression=ag_obj, nn_=True) env = environment.Environment(rw_mapper=reward_mapping) env.set_up() viewer = Viewer() viewer.plot_boundary(buoys) viewer.plot_goal(goal, 100) starting_points = [ [11000, 5280, -103.5, 3, 0, 0], [11000, 5280, -104.5, 3, 0, 0], [11000, 5280, -105.5, 3, 0, 0], [11000, 5300, -104, 3, 0, 0], [11000, 5280, -103.5, 3, 0, 0], [11000, 5320, -103.5, 3, 0, 0], [11000, 5320, -103.5, 3, 0, 0]] ret_tuples = list() # env.set_single_start_pos_mode([11000, 5380.10098, -103, 3, 0, 0]) # env.set_single_start_pos_mode([8000, 4600, -103.5, 3, 0, 0]) # env.set_single_start_pos_mode([12000, 5500, -90, 3, 0, 0]) # env.set_single_start_pos_mode([6600, 4200, -102, 3, 0, 0]) # env.starts_from_file_mode('starting_points_global_coord') # env.move_to_next_start() results = list() num_steps = list() steps_inside_goal_region = list() for start_pos in starting_points: final_flag = 0 transitions_list = list() total_steps = 0 env.set_single_start_pos_mode(start_pos) env.move_to_next_start() steps_inside = 0 for step in range(evaluation_steps): state = env.get_state() print('Value for yaw_p :', state[5]) viewer.plot_position(state[0], state[1], state[2]) state_r = utils.convert_to_simple_state(state, geom_helper) print('Value for yaw_p :', state_r[3]) action = agent.select_action(state_r) state_prime, reward = env.step(action[0], action[1]) transition = (state, (action[0], action[1]), state_prime, reward) if abs(state_r[2]) < 50: steps_inside += 1 final_flag = env.is_final() print("***Evaluation step " + str(step + 1) + " Completed") transitions_list.append(transition) ret_tuples += transitions_list total_steps = step if final_flag != 0: break results.append(final_flag) num_steps.append(total_steps) steps_inside_goal_region.append(steps_inside) with open('trajectory_' + agent.learner.__class__.__name__ + 'it' + str(total_steps) + 'end' + str(final_flag), 'wb') as outfile: pickle.dump(transitions_list, outfile) with open('trajectory_' + agent.learner.__class__.__name__ + 'it' + str(total_steps) + 'end' + str( final_flag) + '.csv', 'wt') as out: csv_out = csv.writer(out) csv_out.writerow(['x', 'y', 'heading', 'rudder_lvl']) for tr in transitions_list: pos = (tr[0][0], tr[0][1], tr[0][2], tr[1][0]) csv_out.writerow(pos) with open('results' + agent.learner.__class__.__name__ + datetime.datetime.now().strftime('%Y%m%d%H%M%S'), 'wb') as outfile: pickle.dump(num_steps, outfile) pickle.dump(results, outfile) pickle.dump(steps_inside_goal_region, outfile) return ret_tuples
class RewardMapper(object): def __init__(self, plot_flag=True, r_mode_='cte'): self.boundary = None self.goal_rec = None self.ship_polygon = None self.ship = None self.plot_flag = plot_flag if self.plot_flag: self.view = Viewer() self.set_ship_geometry(((0, 0), (0, 10), (20, 0))) self.ship_vel = list() self.ship_last_vel = list() self.ship_pos = list() self.ship_last_pos = list() self.last_ship = None self.goal_point = None self.g_heading_n_cw = None self.g_vel_x = None self.g_vel_y = None self.reward_mode = r_mode_ self.last_angle_selected = None self.last_rot_selected = None self.guid_line = None self.upper_shore = None self.lower_shore = None def is_inbound_coordinate(self, x, y): return self.boundary.buffer(-20).contains(Point(x, y)) def generate_inner_positions(self): points_dict = dict() for line_x in range(int(self.goal_point[0] + 5000), int(self.goal_point[0] + 6000), 500): line = LineString([(line_x, 0), (line_x, 15000)]) intersect = self.boundary.intersection(line) if intersect.geom_type == 'LineString': middle_point = (line_x, (intersect.bounds[1] + intersect.bounds[3]) / 2) upper_point = (line_x, (middle_point[1] + intersect.bounds[3]) / 2) lower_point = (line_x, (middle_point[1] + intersect.bounds[1]) / 2) dist = Point(self.goal_point).distance(Point(middle_point)) points_dict[middle_point] = dist points_dict[upper_point] = dist points_dict[lower_point] = dist return points_dict def get_middle_y(self, x): line = LineString([(x, 0), (x, 15000)]) intersect = self.boundary.intersection(line) return (intersect.bounds[1] + intersect.bounds[3]) / 2 def get_guidance_line(self): y_temp_a = self.get_middle_y(8000) y_temp_b = self.get_middle_y(9000) x_a = 3600 x_b = 14000 m, b = np.polyfit([8000, 9000], [y_temp_a, y_temp_b], 1) y_a = m * x_a + b y_b = m * x_b + b self.guid_line = LineString([(x_a, y_a), (x_b, y_b)]) return (x_a, y_a), (x_b, y_b) def set_boundary_points(self, points): self.boundary = Polygon(points) if self.plot_flag: self.view.plot_boundary(points) def set_shore_lines(self, upper_points, lower_points): self.upper_shore = LineString(upper_points) self.lower_shore = LineString(lower_points) def get_shore_balance(self, x, y): ship_point = Point((x, y)) #upper means positive sign upper_dist = ship_point.distance(self.upper_shore) lower_dist = ship_point.distance(self.lower_shore) return (upper_dist - lower_dist) def set_ship_geometry(self, points): self.ship_polygon = Polygon(points) def set_goal(self, point, heading, vel_l): factor = 300 self.goal_point = point self.g_vel_x, self.g_vel_y, self.g_heading_n_cw = utils.local_to_global( vel_l, 0, heading) self.goal_rec = Polygon( ((point[0] - factor, point[1] - factor), (point[0] - factor, point[1] + factor), (point[0] + factor, point[1] + factor), (point[0] + factor, point[1] - factor))) if self.plot_flag: self.view.plot_goal(point, factor) def initialize_ship(self, x, y, heading, global_vel_x, global_vel_y, global_vel_theta): self.ship_last_vel = [global_vel_x, global_vel_y, global_vel_theta] self.ship_last_pos = [x, y, heading] self.last_ship = affinity.translate(self.ship_polygon, x, y) self.last_ship = affinity.rotate(self.last_ship, heading, 'center') self.ship_pos = self.ship_last_pos self.ship_vel = self.ship_last_vel self.ship = self.last_ship if self.plot_flag: self.view.plot_position(x, y, heading) def update_ship(self, x, y, heading, global_vel_x, global_vel_y, global_vel_theta, angle, rot): self.ship_last_pos = self.ship_pos self.ship_last_vel = self.ship_vel self.last_ship = self.ship self.last_angle_selected = angle self.last_rot_selected = rot self.ship_vel = [global_vel_x, global_vel_y, global_vel_theta] self.ship_pos = [x, y, heading] self.ship = affinity.translate(self.ship_polygon, x, y) self.ship = affinity.rotate(self.ship, heading, 'center') if self.plot_flag: self.view.plot_position(x, y, heading) def get_shortest_distance_from_boundary(self): a = self.ship.distance(self.boundary) return a def collided(self): collided = (not self.boundary.contains(self.ship)) if collided: print('Collided!!') return collided def reached_goal(self): cont = self.goal_rec.contains(self.ship) # reached = cont and abs(self.ship_vel[0]) < abs(self.g_vel_x) and abs(self.ship_pos[2] - self.g_heading_n_cw) < 20 # reached = abs(self.ship_pos[2] - self.g_heading_n_cw) < 20 and cont reached = cont # reached = abs(self.ship_vel[0]) < 0.2 or cont if reached: print('Reached goal!!') return reached def get_reward(self): # ref_array = np.array((self.goal_point[0], self.goal_point[1], self.g_heading_n_cw, self.g_vel_x, self.g_vel_y, 0)) # array = np.array((self.ship_pos+self.ship_vel)) ref_array = np.array( (self.goal_point[0], self.goal_point[1], self.g_heading_n_cw)) array = np.array((self.ship_pos)) old_array = np.array((self.ship_last_pos)) # new_dist = np.linalg.norm(array - ref_array) # old_dist = np.linalg.norm(old_array - ref_array) new_u_misalign = abs(array[2] - ref_array[2]) old_u_misalign = abs(old_array[2] - ref_array[2]) # print('distance_from_goal_state: ', new_dist) # shore_dist = self.boundary.exterior.distance(self.ship) # old_guid_dist = self.guid_line.distance(self.last_ship) # new_guid_dist = self.guid_line.distance(self.ship) # old_shore_dist = self.boundary.boundary.distance(self.last_ship) # new_shore_dist = self.boundary.boundary.distance(self.ship) new_u_balance = abs(self.get_shore_balance(array[0], array[1])) old_u_balance = abs(self.get_shore_balance(old_array[0], old_array[1])) # old_balance = self.get_shore_balance(old_array[0], old_array[1]) # old_misalign = old_array[2] - ref_array[2] reward = -0.1 if self.reward_mode == 'cte': reward = -0.1 elif self.reward_mode == 'potential': pass elif self.reward_mode == 'rule': pass # if (old_balance < 0 and old_misalign > 0 and self.last_angle_selected != - 0.5) or \ # (old_balance > 0 and old_misalign < 0 and self.last_angle_selected != 0.5): # reward = -100 # elif (old_balance == 0 and old_misalign == 0 and self.last_angle_selected == - 0.5): # reward = -100 # else: # reward = 100 elif self.reward_mode == 'dist': reward = 100 * math.exp(-0.000001 * old_u_balance - 0.000001 * old_u_misalign) elif self.reward_mode == 'align': reward = 100 * math.exp(-0.000001 * old_u_misalign) elif self.reward_mode == 'step': if new_u_balance < 50 and new_u_misalign < 2: reward = 1 else: reward = -0.1 elif self.reward_mode == 'step_with_rudder_punish': if new_u_balance < 50 and new_u_misalign < 2: reward = 1 else: reward = -0.1 if abs(self.last_angle_selected) == 0.5: reward = reward - 0.2 elif self.reward_mode == 'linear_with_rudder_punish': if new_u_balance < 30 and new_u_misalign < 2: reward = 1 else: reward = -0.1 - 0.00001 * new_u_balance if abs(self.last_angle_selected) == 0.5: reward = reward - 0.2 if self.collided(): reward = -1 return reward return reward
class ShipEnv(Env): def __init__(self, special_mode=None): self.special_mode = special_mode self.buoys = buoys self.lower_shore = lower_shore self.upper_shore = upper_shore self.goal = goal self.goal_factor = goal_factor self.upper_line = LineString(upper_shore) self.lower_line = LineString(lower_shore) self.goal_point = Point(goal) self.boundary = Polygon(self.buoys) self.goal_rec = Polygon(((self.goal[0] - self.goal_factor, self.goal[1] - self.goal_factor), (self.goal[0] - self.goal_factor, self.goal[1] + self.goal_factor), (self.goal[0] + self.goal_factor, self.goal[1] + self.goal_factor), (self.goal[0] + self.goal_factor, self.goal[1] - self.goal_factor))) self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(low=np.array([-1, -180, -1.0]), high=np.array([1, -50, 1.0])) self.init_space = spaces.Box(low=np.array([-0.8, -106, 2.5]), high=np.array([0.8, -101, 2.5])) self.start_pos = [11000, 5300.10098] self.buzz_interface = Environment() self.buzz_interface.set_up() self.point_a = (13000, 0) self.point_b = (15010, 0) self.line = LineString([self.point_a, self.point_b]) self.set_point = np.array([0, -103, 2.5, 0, 0]) self.tolerance = np.array([20, 2.0, 0.2, 0.05]) self.last_pos = list() self.ship_point = None self.ship_polygon = Polygon(((-10, 0), (0, 100), (10, 0))) self.plot = False self.viewer = None self.last_action = [0, 0] def step(self, action): info = dict() state_prime, _ = self.buzz_interface.step( angle_level=self.convert_action(action), rot_level=0.3) # v_lon, v_drift, _ = global_to_local(state_prime[3], state_prime[4], state_prime[2]) obs = self.convert_state_sog_cog(state_prime) print('Action: ', action) print('Observed state: ', obs) dn = self.end(state_prime=state_prime, obs=obs) rew = self.calculate_reward(obs=obs) if dn: if not self.goal_rec.contains(self.ship_point): rew = -1000 self.last_pos = [state_prime[0], state_prime[1], state_prime[2]] self.last_action = self.convert_action(action) print('Reward: ', rew) return obs, rew, dn, info def calculate_reward(self, obs): if abs(obs[1] + 166.6) < 0.3 and abs(obs[2]) < 0.01: return 1 else: return np.tanh(-((obs[1] + 166.6)**2) - 1000 * (obs[2]**2)) def end(self, state_prime, obs): if not self.observation_space.contains( obs) or not self.boundary.contains(self.ship_point): print('Ending episode with obs: ', obs) if self.viewer is not None: self.viewer.end_of_episode() return True else: return False def convert_action(self, act): if act == 0: return -0.2 elif act == 1: return 0.0 elif act == 2: return 0.2 # 0:bank_balance # 1:heading # 2:v_lon # 3:v_drift # 4:heading_p def convert_state(self, state): # print('Original state:', state) v_lon, v_drift, _ = global_to_local(state[3], state[4], state[2]) self.ship_point = Point((state[0], state[1])) self.ship_polygon = affinity.translate(self.ship_polygon, state[0], state[1]) self.ship_polygon = affinity.rotate(self.ship_polygon, -state[2], 'center') bank_balance = (self.ship_point.distance(self.upper_line) - self.ship_point.distance(self.lower_line)) / \ (self.ship_point.distance(self.upper_line) + self.ship_point.distance(self.lower_line)) obs = np.array([bank_balance, state[2], v_lon, v_drift, state[5]]) # print('Observation', obs) return obs # Agent handles the state space (distance_from_line, heading, heading_p) # obs variables: # 0:bank_balance # 1:cog # 2:heading_p def convert_state_sog_cog(self, state): v_lon, v_drift, _ = global_to_local(state[3], state[4], state[2]) self.ship_point = Point((state[0], state[1])) self.ship_polygon = affinity.translate(self.ship_polygon, state[0], state[1]) self.ship_polygon = affinity.rotate(self.ship_polygon, -state[2], 'center') bank_balance = (self.ship_point.distance(self.upper_line) - self.ship_point.distance(self.lower_line)) / \ (self.ship_point.distance(self.upper_line) + self.ship_point.distance(self.lower_line)) # sog = np.linalg.norm([state[3], state[4]]) cog = np.degrees(np.arctan2(state[4], state[3])) obs = np.array([bank_balance, cog, state[5]]) # print('Observation', obs) return obs def change_init_space(self, low, high): self.init_space = spaces.Box(low=np.array(low), high=np.array(high)) def reset(self): init = list(map(float, self.init_space.sample())) # self.buzz_interface.set_single_start_pos_mode([self.start_pos[0], self.start_pos[1]+init[0], init[1], init[2], 0, 0]) self.buzz_interface.set_single_start_pos_mode( [11000, 5280, -106.4, 3, 0, 0]) self.buzz_interface.move_to_next_start() print('Reseting position') state = self.buzz_interface.get_state() self.last_pos = [state[0], state[1], state[2]] self.last_action = [0, 0] return self.convert_state_sog_cog(state) def render(self, mode='human'): if mode == 'human': if self.viewer is None: self.viewer = Viewer() # self.viewer.plot_guidance_line(self.point_a, self.point_b) self.viewer.plot_boundary(self.buoys) self.viewer.plot_goal(self.goal, self.goal_factor) self.viewer.plot_position(self.last_pos[0], self.last_pos[1], self.last_pos[2], 30 * self.last_action) def close(self): self.buzz_interface.finish()
class ShipEnv(Env): def __init__(self, type='continuous', action_dim = 2): self.type = type self.action_dim = action_dim assert type == 'continuous' or type == 'discrete', 'type must be continuous or discrete' assert action_dim > 0 and action_dim <=2, 'action_dim must be 1 or 2' if type == 'continuous': self.action_space = spaces.Box(low=np.array([-1.0, 0]), high=np.array([1.0, 0.2])) self.observation_space = spaces.Box(low=np.array([0, -np.pi / 2, 0, -4, -0.2]), high=np.array([150, np.pi / 2, 4.0, 4.0, 0.2])) self.init_space = spaces.Box(low=np.array([0, -np.pi / 15, 1.0, 0.2, -0.01]), high=np.array([30, np.pi / 15, 1.5, 0.3, 0.01])) elif type == 'discrete': if action_dim == 2: self.action_space = spaces.Discrete(63) else: self.action_space = spaces.Discrete(21) self.observation_space = spaces.Box(low=np.array([0, -np.pi / 2, 0, -4, -0.4]), high=np.array([150, np.pi / 2, 4.0, 4.0, 0.4])) self.init_space = spaces.Box(low=np.array([0, -np.pi / 15, 1.0, 0.2, -0.01]), high=np.array([30, np.pi / 15, 2.0, 0.3, 0.01])) self.ship_data = None self.name_experiment = None self.last_pos = np.zeros(3) self.last_action = np.zeros(self.action_dim) self.simulator = Simulator() self.point_a = (0.0, 0.0) self.point_b = (2000, 0.0) self.max_x_episode = (5000, 0) self.guideline = LineString([self.point_a, self.max_x_episode]) self.start_pos = np.zeros(1) self.number_loop = 0 # loops in the screen -> used to plot self.borders = [[0, 150], [2000, 150], [2000, -150], [0, -150]] self.viewer = None self.test_performance = False self.init_test_performance = np.linspace(0, np.pi / 15, 10) self.counter = 0 def step(self, action): # According to the action stace a different kind of action is selected if self.type == 'continuous' and self.action_dim == 2: side = np.sign(self.last_pos[1]) angle_action = action[0]*side rot_action = (action[1]+1)/10 elif self.type == 'continuous' and self.action_dim == 1: side = np.sign(self.last_pos[1]) angle_action = action[0]*side rot_action = 0.2 elif self.type == 'discrete' and self.action_dim == 2: side = np.sign(self.last_pos[1]) angle_action = (action // 3 - 10) / 10 angle_action = angle_action * side rot_action = 0.1 * (action % 3) elif self.type == 'discrete' and self.action_dim == 1: side = np.sign(self.last_pos[1]) angle_action = (action - 10) / 10 angle_action = angle_action * side rot_action = 0.2 state_prime = self.simulator.step(angle_level=angle_action, rot_level=rot_action) # convert simulator states into obervable states obs = self.convert_state(state_prime) # print('Observed state: ', obs) dn = self.end(state_prime=state_prime, obs=obs) rew = self.calculate_reward(obs=obs) self.last_pos = [state_prime[0], state_prime[1], state_prime[2]] self.last_action = np.array([angle_action, rot_action]) if self.ship_data is not None: self.ship_data.new_transition(state_prime, obs, self.last_action, rew) info = dict() return obs, rew, dn, info def convert_state(self, state): """ This method generated the features used to build the reward function :param state: Global state of the ship """ ship_point = Point((state[0], state[1])) side = np.sign(state[1] - self.point_a[1]) d = ship_point.distance(self.guideline) # meters theta = side*state[2] # radians vx = state[3] # m/s vy = side*state[4] # m/s thetadot = side * state[5] # graus/min obs = np.array([d, theta, vx, vy, thetadot]) return obs def calculate_reward(self, obs): d, theta, vx, vy, thetadot = obs[0], obs[1]*180/np.pi, obs[2], obs[3], obs[4]*180/np.pi if self.last_pos[0] > 5000: print("\n Got there") if not self.observation_space.contains(obs): return -1000 else: return (4*(vx-1.5) + 5*(1-d/20) + 2*(1-vy**2/10) + 5*(1-np.abs(theta/30)) + 3*(1 - np.abs(thetadot)/12)) / 24 def end(self, state_prime, obs): if not self.observation_space.contains(obs) or -1 > state_prime[0] or state_prime[0] > self.max_x_episode[0] or 160 < state_prime[1] or state_prime[1]< -160: if not self.observation_space.contains(obs): print("\n Smashed") if self.viewer is not None: self.viewer.end_episode() if self.ship_data is not None: if self.ship_data.iterations > 0: self.ship_data.save_experiment(self.name_experiment) return True else: return False def set_init_space(self, low, high): self.init_space = spaces.Box(low=np.array(low), high=np.array(high)) def reset(self): init = list(map(float, self.init_space.sample())) if self.test_performance: angle = self.init_test_performance[self.counter] v_lon = 1.5 init_states = np.array([self.start_pos[0], 30, angle, v_lon * np.cos(angle), v_lon * np.sin(angle), 0]) self.counter += 1 init[0] = 30 init[1] = angle else: init_states = np.array([self.start_pos[0], init[0], init[1], init[2] * np.cos(init[1]), init[2] * np.sin(init[1]), 0]) self.simulator.reset_start_pos(init_states) self.last_pos = np.array([self.start_pos[0], init[0], init[1]]) print('Reseting position') state = self.simulator.get_state() if self.ship_data is not None: if self.ship_data.iterations > 0: self.ship_data.save_experiment(self.name_experiment) self.ship_data.new_iter(state, self.convert_state(state), np.zeros(len(self.last_action)), np.array([0])) if self.viewer is not None: self.viewer.end_episode() return self.convert_state(state) def render(self, mode='human'): if self.viewer is None: self.viewer = Viewer() self.viewer.plot_boundary(self.borders) self.viewer.plot_guidance_line(self.point_a, self.point_b) img_x_pos = self.last_pos[0] - self.point_b[0] * (self.last_pos[0] // self.point_b[0]) if self.last_pos[0]//self.point_b[0] > self.number_loop: self.viewer.end_episode() self.viewer.plot_position(img_x_pos, self.last_pos[1], self.last_pos[2], 20 * self.last_action[0]) self.viewer.restart_plot() self.number_loop += 1 else: self.viewer.plot_position(img_x_pos, self.last_pos[1], self.last_pos[2], 20 * self.last_action[0]) def close(self, ): self.viewer.freeze_scream() def set_save_experice(self, name='experiment_ssn_ddpg_10iter'): assert type(name) == type(""), 'name must be a string' self.ship_data = ShipExperiment() self.name_experiment = name def set_test_performace(self): self.test_performance = True