def train_from_single_episode(episodes, pickle_vars, ep_number): env = environment.Environment(buoys, steps_between_actions, vessel_id, rudder_id, thruster_id, scenario, goal, goal_heading_e_ccw, goal_vel_lon, False) replace_reward = reward.RewardMapper(plot_flag=False) replace_reward.set_boundary_points(buoys) replace_reward.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) batch_learner = learner.Learner( file_to_save=learner_file, action_space_name=pickle_vars['action_space'], r_m_=replace_reward) episode = episodes[ep_number] with open('debug_ep.txt', 'w') as outfile: for transition in episode['transitions_list']: print(transition[0], file=outfile) print(list(transition[1]), file=outfile) print(transition[2], file=outfile) print(transition[3], file=outfile) print('\n', file=outfile) batch_learner.add_to_batch(episode['transitions_list'], episode['final_flag']) batch_learner.set_up_agent() for it in range(max_fit_iterations): if it % 10 == 0: batch_learner.fqi_step(1, debug=True) else: batch_learner.fqi_step(1, debug=False)
def train_from_samples(sample_files): replace_reward = reward.RewardMapper(plot_flag=False, r_mode_='cte') replace_reward.set_boundary_points(buoys) replace_reward.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) batch_learner = learner.Learner(r_m_=replace_reward) for file in sample_files: batch_learner.load_sample_file(file) batch_learner.set_up_agent() batch_learner.fqi_step(50)
def sample_transitions(start_state=0, end_state=-1) -> object: reward_mapping = reward.RewardMapper('quadratic', _g_helper=geom_helper) reward_mapping.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) env = environment.Environment(rw_mapper=reward_mapping) env.set_up() # env.set_sampling_mode(start_state, end_state) env.set_single_start_pos_mode([9000, 4819.10098, -103.5, 3, 0, 0]) # env.set_single_start_pos_mode([13000, 5777.706, -103.5, 3, 0, 0]) # env.starts_from_file_mode('samples/starting_points_global_coord20180512195730') transitions_list = list() policy_mode = "random" policy_mode = "random" for episode in range(5000000): if episode == 1: env.start_bifurcation_mode() print('### NEW STARTING STATE ###', episode) # TODO fix onconsistency in order of methods from Environment env.move_to_next_start() final_flag = 0 for action in action_space.action_combinations: # env.set_up() env.reset_to_start() for i in range(500000): state = env.get_state() act = None if policy_mode == 'controller': pass # TODO implement # converted_state = utils.convert_to_simple_state(state) # act[0]= # act[1]=0.6 else: rand_act = random.choice(action_space.action_combinations) if random.random() < 0.2: act = rand_act else: act = action angle = act[0] rot = act[1] if episode == 0: angle = 0.0 state_prime, rw = env.step(angle, rot) print('Reward:', rw) final_flag = env.is_final() # New format transition = (state, (angle, rot), state_prime, rw, final_flag) if i % 50 == 0 and episode == 0: env.add_states_to_start_list(state_prime) transitions_list.append(transition) if final_flag != 0 or state[0] < 7000: break with open(sample_file + 'action_' + action_space.action_space + '_s' + str(start) + '_' + str(episode), 'wb') as outfile: pickle.dump(transitions_list, outfile) transitions_list = list()
def train_from_batch(episodes, pickle_vars): replace_reward = reward.RewardMapper(plot_flag=False, r_mode_='exp_border_target_rot_angle') replace_reward.set_boundary_points(buoys) replace_reward.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) batch_learner = learner.Learner(file_to_save=learner_file, action_space_name=pickle_vars['action_space'], r_m_=replace_reward) batch_size = 0 for episode in episodes: remaining = max_tuples_per_batch - len(episode['transitions_list']) - batch_size if remaining >= 0: batch_learner.add_to_batch(episode['transitions_list'], episode['final_flag']) batch_size += len(episode['transitions_list']) else: batch_learner.add_to_batch(episode['transitions_list'][0:abs(remaining)], 0) break batch_learner.set_up_agent() batch_learner.fqi_step(max_fit_iterations)
def __init__(self, _buoys_list, _step, _vessel_id, _rudder_id, _thr_id, _scn, _goal, _g_heading, _g_vel_l, _plot, _increment=0.1): super(Environment, self).__init__() self.step_increment = _increment self.buoys = _buoys_list self.goal = _goal self.g_heading = _g_heading self.g_vel_l = _g_vel_l self.steps_between_actions = _step # self.vessel_id = '102' self.vessel_id = _vessel_id self.rudder_id = _rudder_id self.thruster_id = _thr_id # self.mongo_addr = 'mongodb://10.1.1.92:27017' # self.dbname = 'test' self.simulation_id = 'sim' # self.scenario = 'Santos_Container_L349B45' self.scenario = _scn self.control_id = '555' self.chat_address = '127.0.0.1' self.simulation = [] self.dyna_ctrl_id = '407' self.allow_advance_ev = threading.Event() self.dyna_ready_ev = threading.Event() self.own = [] self.vessel = [] self.rudder = [] self.thruster = [] self.max_angle = 0 self.max_rot = 0 self.reward_mapper = reward.RewardMapper(_plot, r_mode_='step') self.init_state = list() self._final_flag = False self.initial_states_sequence = None self.reward_mapper.set_boundary_points(self.buoys) self.reward_mapper.set_goal(self.goal, self.g_heading, self.g_vel_l) self.reward_mapper.get_guidance_line() self.reward_mapper.set_shore_lines(upper_shore, lower_shore) os.chdir('./dyna') self.dyna_proc = None self.accumulated_starts = list()
if tup[0][2] + 103.5 < 20 and tup[0][0] < 11500 ] print('Number of tuples to be considered:', len(tuples)) tuples_with_reflection = list() for tuple in tuples: reflect_tuple = reflect_tuple_on_line(point_a, point_b, tuple) if is_inbound_coordinate(geom_helper.boundary, reflect_tuple[0][0], reflect_tuple[0][1]): tuples_with_reflection.append(tuple) tuples_with_reflection.append(reflect_tuple) print('Number of tuples after reflection:', len(tuples_with_reflection)) return tuples_with_reflection if __name__ == '__main__': reward_mapping = reward.RewardMapper('quadratic', _g_helper=geom_helper) reward_mapping.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) # tuples = list() # bundle_name = 'samples/samples_bundle_new' # with open(bundle_name, 'rb') as file: # tuples = pickle.load(file) # # filtered = [tpl for tpl in tuples if tpl[0][3] < 0] # # reduct_batch = random.sample(filtered, 200) # points = geom_helper.get_simmetry_points() # new_list = replace_reward(reduct_batch, reward_mapping) # # # new_list = get_strictly_simmetric_set(points[0], points[1], new_list) #
# with open(os.path.join(dir_name + '/' + 'samples20180429212517action_cte_rotation_s0_0'), 'rb') as infile: # try: # while True: # transitions = pickle.load(infile) # except EOFError as e: # pass # print('Number of transitions added : ', len(transitions)) # tuples = transitions # #correct # correct_tuples = list() # for tuple in tuples: # correct_tuple = (tuple[0],(0.0,-0.5),tuple[2],tuple[3],tuple[4]) # correct_tuples.append(correct_tuple) replace_reward = reward.RewardMapper(plot_flag=False, r_mode_='linear_with_rudder_punish') replace_reward.set_boundary_points(buoys) replace_reward.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) point_a, point_b = replace_reward.get_guidance_line() replace_reward.set_shore_lines(upper_shore, lower_shore) # tuples = list() bundle_name = 'samples/samples_bundle_complete_action_b' with open(bundle_name, 'rb') as file: tuples = pickle.load(file) # tuples = [tup for tup in tuples if tup[0][2]+103.5 < 20 and 7000 < tup[0][0] < 11500 and tup[0][3]<0] # print('Number of tuples to be considered:', len(tuples)) random.shuffle(tuples) reduct_batch = tuples[:100000] # tuples_with_reflection = list() # for tuple in reduct_batch:
def evaluate_agent(ag_obj): import reward reward_mapping = reward.RewardMapper('quadratic', _g_helper=geom_helper) reward_mapping.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) agent = learner.Learner(load_saved_regression=ag_obj, nn_=True) env = environment.Environment(rw_mapper=reward_mapping) env.set_up() viewer = Viewer() viewer.plot_boundary(buoys) viewer.plot_goal(goal, 100) starting_points = [ [11000, 5280, -103.5, 3, 0, 0], [11000, 5280, -104.5, 3, 0, 0], [11000, 5280, -105.5, 3, 0, 0], [11000, 5300, -104, 3, 0, 0], [11000, 5280, -103.5, 3, 0, 0], [11000, 5320, -103.5, 3, 0, 0], [11000, 5320, -103.5, 3, 0, 0]] ret_tuples = list() # env.set_single_start_pos_mode([11000, 5380.10098, -103, 3, 0, 0]) # env.set_single_start_pos_mode([8000, 4600, -103.5, 3, 0, 0]) # env.set_single_start_pos_mode([12000, 5500, -90, 3, 0, 0]) # env.set_single_start_pos_mode([6600, 4200, -102, 3, 0, 0]) # env.starts_from_file_mode('starting_points_global_coord') # env.move_to_next_start() results = list() num_steps = list() steps_inside_goal_region = list() for start_pos in starting_points: final_flag = 0 transitions_list = list() total_steps = 0 env.set_single_start_pos_mode(start_pos) env.move_to_next_start() steps_inside = 0 for step in range(evaluation_steps): state = env.get_state() print('Value for yaw_p :', state[5]) viewer.plot_position(state[0], state[1], state[2]) state_r = utils.convert_to_simple_state(state, geom_helper) print('Value for yaw_p :', state_r[3]) action = agent.select_action(state_r) state_prime, reward = env.step(action[0], action[1]) transition = (state, (action[0], action[1]), state_prime, reward) if abs(state_r[2]) < 50: steps_inside += 1 final_flag = env.is_final() print("***Evaluation step " + str(step + 1) + " Completed") transitions_list.append(transition) ret_tuples += transitions_list total_steps = step if final_flag != 0: break results.append(final_flag) num_steps.append(total_steps) steps_inside_goal_region.append(steps_inside) with open('trajectory_' + agent.learner.__class__.__name__ + 'it' + str(total_steps) + 'end' + str(final_flag), 'wb') as outfile: pickle.dump(transitions_list, outfile) with open('trajectory_' + agent.learner.__class__.__name__ + 'it' + str(total_steps) + 'end' + str( final_flag) + '.csv', 'wt') as out: csv_out = csv.writer(out) csv_out.writerow(['x', 'y', 'heading', 'rudder_lvl']) for tr in transitions_list: pos = (tr[0][0], tr[0][1], tr[0][2], tr[1][0]) csv_out.writerow(pos) with open('results' + agent.learner.__class__.__name__ + datetime.datetime.now().strftime('%Y%m%d%H%M%S'), 'wb') as outfile: pickle.dump(num_steps, outfile) pickle.dump(results, outfile) pickle.dump(steps_inside_goal_region, outfile) return ret_tuples