def classify_pedestrians(annotation_file, viscinity): ''' reads the annotation file and spits out important stats about the data ''' tick_speed = 30 #initialize world env = GridWorldDrone(display=False, is_onehot=False, seed=10, obstacles=None, show_trail=False, is_random=False, show_orientation=True, annotation_file=annotation_file, subject=None, external_control=False, replace_subject=True, tick_speed=tick_speed, rows=576, cols=720, width=10) subject_set = extract_subjects_from_file(annotation_file) avg_ped_per_subject = [] for subject in subject_set: print(' Subject :', subject) state = env.reset_and_replace(ped=subject) nearby_peds_in_frame = 0 total_frames = env.final_frame - env.current_frame while env.current_frame < env.final_frame: state, _, _, _ = env.step() nearby_peds_in_frame += get_pedestrians_in_viscinity(state, viscinity) avg_peds_per_frame = nearby_peds_in_frame/total_frames avg_ped_per_subject.append(avg_peds_per_frame) print('Avg peds nearby :', avg_peds_per_frame) subject_array = np.asarray(list(subject_set)) avg_peds_per_subject_arr = np.asarray(avg_ped_per_subject) subject_array = subject_array[avg_peds_per_subject_arr.argsort()] avg_peds_per_subject_arr.sort() easy_arr = subject_array[0:200] medium_arr = subject_array[200:380] hard_arr = subject_array[380:] return easy_arr, medium_arr, hard_arr
def main(args): output = {} # parameters for the feature extractors step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 if args.feat_extractor != "Raw_state": saved_policies = [] assert os.path.isdir( args.parent_policy_folder), "Folder does not exist!" parent_path = pathlib.Path(args.parent_policy_folder) for seed_folder in parent_path.glob("./*"): for policy in seed_folder.glob("./*.pt"): saved_policies.append(str(policy)) output["eval parameters"] = vars(args) # initialize environment from envs.gridworld_drone import GridWorldDrone consider_heading = True np.random.seed(0) env = GridWorldDrone( display=False, is_onehot=False, seed=0, obstacles=None, show_trail=True, is_random=False, subject=None, annotation_file=args.annotation_file, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, external_control=True, replace_subject=args.dont_replace_subject, show_comparison=True, consider_heading=consider_heading, show_orientation=True, rows=576, cols=720, width=grid_size, ) # initialize the feature extractor from featureExtractor.drone_feature_extractor import ( DroneFeatureRisk_speedv2, ) from featureExtractor.drone_feature_extractor import ( VasquezF1, VasquezF2, VasquezF3, ) from featureExtractor.drone_feature_extractor import ( Fahad, GoalConditionedFahad, ) if args.feat_extractor == "DroneFeatureRisk_speedv2": feat_ext_args = { "agent_width": agent_width, "obs_width": obs_width, "step_size": step_size, "grid_size": grid_size, "thresh1": 18, "thresh2": 30, } feat_ext = DroneFeatureRisk_speedv2(**feat_ext_args) if args.feat_extractor == "VasquezF1": feat_ext_args = { "density_radius": 6 * agent_width, "lower_speed_threshold": 18, "upper_speed_threshold": 30, } feat_ext = VasquezF1( feat_ext_args["density_radius"], feat_ext_args["lower_speed_threshold"], feat_ext_args["upper_speed_threshold"], ) if args.feat_extractor == "VasquezF2": feat_ext_args = { "density_radius": 6 * agent_width, "lower_speed_threshold": 18, "upper_speed_threshold": 30, } feat_ext = VasquezF2( feat_ext_args["density_radius"], feat_ext_args["lower_speed_threshold"], feat_ext_args["upper_speed_threshold"], ) if args.feat_extractor == "VasquezF3": feat_ext_args = { "agent_width": agent_width, } feat_ext = VasquezF3(feat_ext_args["agent_width"]) if args.feat_extractor == "Fahad": feat_ext_args = { "inner_ring_rad": 36, "outer_ring_rad": 60, "lower_speed_threshold": 0.5, "upper_speed_threshold": 1.0, } feat_ext = Fahad(36, 60, 0.5, 1.0) if args.feat_extractor == "GoalConditionedFahad": feat_ext_args = { "inner_ring_rad": 36, "outer_ring_rad": 60, "lower_speed_threshold": 0.5, "upper_speed_threshold": 1.0, } feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0) # no features if dealing with raw trajectories if args.feat_extractor == "Raw_state": feat_ext_args = {} feat_ext = None output["feature_extractor_params"] = feat_ext_args output["feature_extractor"] = feat_ext if args.feat_extractor != "Raw_state": # initialize policy # for getting metrics from policy files for filename in saved_policies: policy_path = filename output_file = filename.split("/")[-3:] output_filename = "" for data in output_file: output_filename += data output_filename = output_filename.split(".")[0] sample_state = env.reset() state_size = feat_ext.extract_features(sample_state).shape[0] policy = Policy(state_size, env.action_space.n, [256]) policy.load(policy_path) policy.to(DEVICE) # metric parameters metric_applicator = metric_utils.MetricApplicator() metric_applicator.add_metric(metrics.compute_trajectory_smoothness) metric_applicator.add_metric( metrics.compute_distance_displacement_ratio) metric_applicator.add_metric(metrics.proxemic_intrusions, [3]) metric_applicator.add_metric(metrics.anisotropic_intrusions, [20]) metric_applicator.add_metric(metrics.count_collisions, [10]) metric_applicator.add_metric(metrics.goal_reached, [10, 10]) metric_applicator.add_metric(metrics.pedestrian_hit, [10]) metric_applicator.add_metric(metrics.trajectory_length) metric_applicator.add_metric( metrics.distance_to_nearest_pedestrian_over_time) # collect trajectories and apply metrics num_peds = len(env.pedestrian_dict.keys()) output["metrics"] = metric_applicator.get_metrics() output["metric_results"] = {} metric_results = metric_utils.collect_trajectories_and_metrics( env, feat_ext, policy, num_peds, args.max_ep_length, metric_applicator, disregard_collisions=args.disregard_collisions, ) output["metric_results"] = metric_results # drift calculation drift_matrix = np.zeros( (len(env.pedestrian_dict.keys()), len(args.drift_timesteps))) for drift_idx, drift_timestep in enumerate(args.drift_timesteps): ped_drifts = agent_drift_analysis( policy, "Policy_network", env, list([ int(ped_key) for ped_key in env.pedestrian_dict.keys() ]), feat_extractor=feat_ext, pos_reset=drift_timestep, ) assert len(ped_drifts) == len((env.pedestrian_dict.keys())) drift_matrix[:, drift_idx] = ped_drifts output["metric_results"]["drifts"] = drift_matrix pathlib.Path("./results/").mkdir(exist_ok=True) with open( "./results/" + output_filename + "_" + datetime.now().strftime("%Y-%m-%d-%H:%M"), "wb", ) as f: pickle.dump(output, f) else: # when raw trajectories are directly provided. # metric parameters metric_applicator = metric_utils.MetricApplicator() metric_applicator.add_metric(metrics.compute_trajectory_smoothness, [10]) metric_applicator.add_metric( metrics.compute_distance_displacement_ratio, [10]) metric_applicator.add_metric(metrics.proxemic_intrusions, [3]) metric_applicator.add_metric(metrics.anisotropic_intrusions, [20]) metric_applicator.add_metric(metrics.count_collisions, [10]) metric_applicator.add_metric(metrics.goal_reached, [10, 10]) metric_applicator.add_metric(metrics.pedestrian_hit, [10]) metric_applicator.add_metric(metrics.trajectory_length) metric_applicator.add_metric( metrics.distance_to_nearest_pedestrian_over_time) metric_results = metric_utils.collect_metrics_from_trajectory( args.trajectory_folder, metric_applicator) output["metric_results"] = metric_results pathlib.Path("./results/").mkdir(exist_ok=True) output_filename = args.trajectory_folder.strip().split("/")[-1] with open( "./results/" + output_filename + "_" + datetime.now().strftime("%Y-%m-%d-%H:%M"), "wb", ) as f: pickle.dump(output, f)
#************************************************* #initialize environment from envs.gridworld_drone import GridWorldDrone consider_heading = True np.random.seed(args.seed) env = GridWorldDrone(display=args.render, is_onehot=False, seed=args.seed, obstacles=None, show_trail=True, is_random=False, subject=args.subject, annotation_file=args.annotation_file, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, external_control=True, replace_subject=args.run_exact, show_comparison=True, consider_heading=consider_heading, show_orientation=True, rows=576, cols=720, width=grid_size) print('Environment initalized successfully.') #************************************************* #initialize the feature extractor from featureExtractor.drone_feature_extractor import DroneFeatureSAM1, DroneFeatureMinimal
def main(): #####for the logger ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S") ################### args = parser.parse_args() from envs.gridworld_drone import GridWorldDrone from featureExtractor.drone_feature_extractor import ( DroneFeatureRisk_speedv2, ) save_folder = None if not args.dont_save and not args.play: if not args.save_folder: print("Provide save folder.") exit() policy_net_dims = "-policy_net-" for dim in args.policy_net_hidden_dims: policy_net_dims += str(dim) policy_net_dims += "-" reward_net_dims = "-reward_net-" for dim in args.reward_net_hidden_dims: reward_net_dims += str(dim) reward_net_dims += "-" save_folder = ("./results/" + args.save_folder + st + args.feat_extractor + "-seed-" + str(args.seed) + policy_net_dims + reward_net_dims + "-total-ep-" + str(args.total_epochs) + "-max-ep-len-" + str(args.max_ep_length)) experiment_logger = Logger(save_folder, "experiment_info.txt") experiment_logger.log_header("Arguments for the experiment :") repo = git.Repo(search_parent_directories=True) experiment_logger.log_info({'From branch : ': repo.active_branch.name}) experiment_logger.log_info( {'Commit number : ': repo.head.object.hexsha}) experiment_logger.log_info(vars(args)) window_size = 9 step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 feat_ext = None # initialize the feature extractor to be used if args.feat_extractor == 'DroneFeatureRisk_speedv2': feat_ext = DroneFeatureRisk_speedv2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, return_tensor=False, thresh1=18, thresh2=30, ) if feat_ext is None: print("Please enter proper feature extractor!") sys.exit() #log feature extractor information if not args.dont_save and not args.play: experiment_logger.log_header("Parameters of the feature extractor :") experiment_logger.log_info(feat_ext.__dict__) #initialize the environment replace_subject = False if args.replace_subject: replace_subject = True else: replace_subject = False continuous_action_flag = False if args.continuous_control: continuous_action_flag = True env = GridWorldDrone( display=args.render, seed=args.seed, show_trail=False, is_random=False, annotation_file=args.annotation_file, subject=args.subject, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, external_control=True, step_reward=0.001, show_comparison=True, replace_subject=replace_subject, continuous_action=continuous_action_flag, # rows=200, cols=200, width=grid_size) rows=576, cols=720, width=grid_size, ) #log information about the environment if not args.dont_save and not args.play: experiment_logger.log_header("Environment details :") experiment_logger.log_info(env.__dict__) #initialize the controller categorical_flag = False output_size = 2 if args.is_categorical: categorical_flag = True output_size = 35 controller = SupervisedPolicyController( 80, output_size, categorical=categorical_flag, hidden_dims=args.policy_net_hidden_dims, policy_path=args.policy_path, mini_batch_size=args.batch_size, learning_rate=args.lr, save_folder=save_folder) if not args.dont_save and not args.play: experiment_logger.log_header("Environment details :") experiment_logger.log_info(controller.__dict__) base_data_path = '../envs/expert_datasets/university_students/annotation/traj_info/\ frame_skip_1/students003/' folder_name = args.training_data_folder data_folder = base_data_path + folder_name if not args.play: if categorical_flag: controller.train(args.total_epochs, data_folder) else: controller.train_regression(args.total_epochs, data_folder) if args.play: controller.play_policy(args.num_trajs, env, args.max_ep_length, feat_ext)
def play_regression_policy(self, num_runs, max_episode_length, feat_extractor): ''' Loads up an environment and checks the performance of the agent. ''' #initialize variables needed for the run agent_width = 10 obs_width = 10 step_size = 2 grid_size = 10 #load up the environment annotation_file = "../envs/expert_datasets/university_students\ /annotation/processed/frame_skip_1/students003_processed_corrected.txt" env = GridWorldDrone( display=True, is_onehot=False, seed=0, obstacles=None, show_trail=False, is_random=False, annotation_file=annotation_file, subject=None, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, replace_subject=True, segment_size=None, external_control=True, step_reward=0.001, show_comparison=True, consider_heading=True, show_orientation=True, continuous_action=False, # rows=200, cols=200, width=grid_size) rows=576, cols=720, width=grid_size, ) #initialize the feature extractor feat_ext = None if feat_extractor == "DroneFeatureRisk_speedv2": feat_ext = DroneFeatureRisk_speedv2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, return_tensor=False, thresh1=18, thresh2=30, ) #play the environment for i in range(num_runs): state = env.reset() state_features = feat_ext.extract_features(state) state_features = torch.from_numpy(state_features).type(torch.FloatTensor).to(self.device) done = False t = 0 while t < max_episode_length: action = self.policy.eval_action(state_features) state, _, done, _ = env.step(action) state_features = feat_ext.extract_features(state) state_features = torch.from_numpy(state_features).type(torch.FloatTensor).to(self.device) t+=1 if done: break
def main(): output = {} # parameters for the feature extractors step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 args = parser.parse_args() output["eval parameters"] = vars(args) # initialize environment from envs.gridworld_drone import GridWorldDrone consider_heading = True np.random.seed(0) env = GridWorldDrone( display=False, is_onehot=False, seed=0, obstacles=None, show_trail=True, is_random=False, subject=None, annotation_file=args.annotation_file, tick_speed=60, obs_width=obs_width, step_size=step_size, agent_width=agent_width, external_control=True, replace_subject=args.dont_replace_subject, show_comparison=True, consider_heading=consider_heading, show_orientation=True, rows=576, cols=720, width=grid_size, ) feat_ext = fe_utils.load_feature_extractor(args.feat_extractor) output["feature_extractor"] = feat_ext # initialize policy sample_state = env.reset() state_size = feat_ext.extract_features(sample_state).shape[0] policy = QNetwork(state_size, env.action_space.n, 512) policy.load(args.policy_path) policy.to(DEVICE) # metric parameters metric_applicator = metric_utils.MetricApplicator() metric_applicator.add_metric(metrics.compute_trajectory_smoothness) metric_applicator.add_metric(metrics.compute_distance_displacement_ratio) metric_applicator.add_metric(metrics.proxemic_intrusions, [3]) metric_applicator.add_metric(metrics.anisotropic_intrusions, [20]) metric_applicator.add_metric(metrics.count_collisions, [5]) metric_applicator.add_metric(metrics.goal_reached, [10, 0.5]) metric_applicator.add_metric(metrics.trajectory_length) # collect trajectories and apply metrics num_peds = len(env.pedestrian_dict.keys()) output["metrics"] = metric_applicator.get_metrics() output["metric_results"] = {} metric_results = metric_utils.collect_trajectories_and_metrics( env, feat_ext, policy, num_peds, args.max_ep_length, metric_applicator, disregard_collisions=True, ) pd_metrics = pd.DataFrame(metric_results).T pd_metrics = pd_metrics.applymap(lambda x: x[0]) output["metric_results"] = pd_metrics pathlib.Path('./results/').mkdir(exist_ok=True) with open( "./results/" + args.output_name + "_" + datetime.now().strftime("%Y-%m-%d-%H:%M"), "wb", ) as f: pickle.dump(output, f)
def extract_trajectory(annotation_file, folder_to_save, feature_extractor=None, display=False, extract_action=False, show_states=False, subject=None, trajectory_length_limit=None): if not os.path.exists(folder_to_save): os.makedirs(folder_to_save) lag_val = 8 tick_speed = 60 subject_list = extract_subjects_from_file(annotation_file) print(subject_list) disp = display total_path_len = 0 if show_states: tick_speed = 5 disp = True #initialize world world = GridWorldDrone(display=disp, is_onehot=False, seed=10, obstacles=None, show_trail=False, is_random=False, show_orientation=True, annotation_file=annotation_file, subject=None, external_control=False, replace_subject=True, tick_speed=tick_speed, rows=576, cols=720, width=10) default_action = int(len(world.speed_array)/2)*int(len(world.orientation_array))+int(len(world.orientation_array)/2) default_action = torch.tensor(default_action) if subject is not None: subject_list = subject for sub in subject_list: print('Starting for subject :', sub) trajectory_info = [] if extract_action: action_info = [] step_counter_segment = 0 segment_counter = 1 world.subject = sub old_state = world.reset() cur_lag = 0 print('Path lenghth :',world.final_frame - world.current_frame) path_len = world.final_frame - world.current_frame cur_subject_final_frame = world.final_frame total_path_len += world.final_frame - world.current_frame print('Total trajectory information :\nStarting frame: {},final frame: {}'.format(world.current_frame, cur_subject_final_frame)) print('Total path length :', path_len) if trajectory_length_limit is not None: traj_seg_length = min(trajectory_length_limit, path_len) #change the goal position world.goal_state = copy.deepcopy(world.return_position(world.cur_ped, world.current_frame + traj_seg_length)['position']) world.state['goal_state'] = copy.deepcopy(world.goal_state) print('Segment 1: Start frame :', world.current_frame) while world.current_frame < cur_subject_final_frame: state, _, _, _ = world.step() step_counter_segment += 1 #step_counter_trajectory += 1 #if disp: # feature_extractor.overlay_bins(state) if extract_action: if cur_lag == lag_val: action = extract_expert_action(state, old_state, world.orient_quantization, len(world.orientation_array), world.speed_quantization, len(world.speed_array)) ''' action = extract_expert_speed_orientation(state) ''' old_state = copy.deepcopy(state) action = torch.tensor(action) action_info.append(action) for i in range(cur_lag): action_info.append(default_action) cur_lag = 0 #pdb.set_trace() else: cur_lag += 1 if feature_extractor is not None: state = feature_extractor.extract_features(state) state = torch.tensor(state) trajectory_info.append(copy.deepcopy(state)) if trajectory_length_limit is not None: if step_counter_segment%traj_seg_length == 0: print('Segment {} final frame : {}'.format(segment_counter, world.current_frame)) path_len = cur_subject_final_frame - world.current_frame traj_seg_length = min(trajectory_length_limit, path_len) print('Length of next path :', traj_seg_length) #change the goal position world.goal_state = copy.deepcopy(world.return_position(world.cur_ped, world.current_frame + traj_seg_length)['position']) world.state['goal_state'] = copy.deepcopy(world.goal_state) print('Trajectory length : ', len(trajectory_info)) if feature_extractor is not None: state_tensors = torch.stack(trajectory_info) torch.save(state_tensors, os.path.join(folder_to_save, 'traj_of_sub_{}_segment{}.states'.format(str(sub), str(segment_counter)))) else: with open('traj_of_sub_{}_segment{}.states'.format(str(sub), str(segment_counter)), 'w') as fout: json.dump(trajectory_info, fout) if extract_action: acton_tensors = torch.stack(action_info) torch.save(action_tensors, os.path.join(folder_to_save, 'action_of_sub_{}_segment{}.actions'.format(str(sub), str(segment_counter)))) segment_counter += 1 #pdb.set_trace() step_counter_segment = 0 trajectory_info = [] print('Segment {}: Start frame : {}'.format(segment_counter, world.current_frame)) #add the last bunch of actions for i in range(cur_lag): action_info.append(default_action) if trajectory_length_limit is None: if feature_extractor is not None: state_tensors = torch.stack(trajectory_info) torch.save(state_tensors, os.path.join(folder_to_save, 'traj_of_sub_{}_segment{}.states'.format(str(sub), str(segment_counter)))) if extract_action: #pdb.set_trace() action_tensors = torch.stack(action_info) torch.save(action_tensors, os.path.join(folder_to_save, 'action_of_sub_{}_segment{}.actions'.format(str(sub), str(segment_counter)))) else: ''' with open('traj_of_sub_{}_segment{}.states'.format(str(sub), str(segment_counter)), 'w') as fout: pdb.set_trace() json.dump(trajectory_info, fout) ''' np.save(os.path.join(folder_to_save, 'traj_of_sub_{}_segment{}.states'.format(str(sub), str(segment_counter))), trajectory_info) if extract_action: action_tensors = torch.stack(action_info) torch.save(action_tensors, os.path.join(folder_to_save, 'action_of_sub_{}_segment{}.actions'.format(str(sub), str(segment_counter)))) #if feature_extractor.debug_mode: # feature_extractor.print_info() print('The average path length :', total_path_len/len(subject_list))
def run_analysis(args): step_size = 2 agent_width = 10 obs_width = 10 grid_size = 3 #checks if all the parameters are in order check_parameters(args) #************************************************* #initialize environment from envs.gridworld_drone import GridWorldDrone consider_heading = True env = GridWorldDrone(display=args.render, is_onehot=False, obstacles=None, show_trail=True, is_random=False, annotation_file=args.annotation_file, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, external_control=True, replace_subject=True, show_comparison=True, consider_heading=consider_heading, show_orientation=True, rows=576, cols=720, width=grid_size) print('Environment initalized successfully.') #************************************************* #initialize the feature extractor from featureExtractor.drone_feature_extractor import DroneFeatureRisk_speedv2 feat_ext = None if args.feat_extractor == 'DroneFeatureRisk_speedv2': feat_ext = DroneFeatureRisk_speedv2(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=18, thresh2=30) if args.feat_extractor == "VasquezF1": feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == "VasquezF2": feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == "VasquezF3": feat_ext = VasquezF3(agent_width) if args.feat_extractor == "Fahad": feat_ext = Fahad(36, 60, 0.5, 1.0) if args.feat_extractor == "GoalConditionedFahad": feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0) #************************************************* #initialize the agent agent_list = [] agent_type_list = [] policy_network_counter = 0 #folder_dict = read_files_from_directories(args.parent_directory) for i in range(len(args.agent_type)): if args.agent_type[i] == 'Policy_network': #initialize the network agent = Policy(feat_ext.state_rep_size, env.action_space.n, hidden_dims=args.policy_net_hidden_dims) if args.policy_path: agent.load(args.policy_path[policy_network_counter]) policy_network_counter += 1 else: print('Provide a policy path') if args.agent_type[i] == 'Potential_field': #initialize the PF agent max_speed = env.max_speed orient_quant = env.orient_quantization orient_div = len(env.orientation_array) speed_quant = env.speed_quantization speed_div = len(env.speed_array) attr_mag = 3 rep_mag = 2 agent = PFController(speed_div, orient_div, orient_quant) if args.agent_type[i] == 'Social_forces': orient_quant = env.orient_quantization orient_div = len(env.orientation_array) speed_quant = env.speed_quantization speed_div = len(env.speed_array) agent = SocialForcesController(speed_div, orient_div, orient_quant) agent_list.append(agent) agent_type_list.append(args.agent_type[i]) #**************************************************** #agent initialized from the commandline start_interval = args.start_interval reset_int = args.increment_interval reset_lim = args.end_interval #getting the pedestrian list ped_list = np.zeros(1) for list_name in args.ped_list: ped_list = np.concatenate((ped_list, np.load(list_name)), axis=0) ped_list = ped_list[1:].astype(int) ped_list = np.sort(ped_list) #**************************************************** drift_lists = drift_analysis(agent_list, agent_type_list, env, ped_list, feat_extractor=feat_ext, start_interval=start_interval, reset_interval=reset_int, max_interval=reset_lim) drift_info_numpy = np.asarray(drift_lists) #**************************************************** if args.save_filename: filename = args.save_filename + str(start_interval) + \ '-' + str(reset_lim) + '-' + str(reset_int) np.save('./drift_results/'+ filename, drift_info_numpy) #**************************************************** if args.plot: plot_drift_results(drift_lists)
step_size = 2 agent_width = 10 grid_size = 10 obs_width = agent_width render = False env = GridWorldDrone(display=render, is_onehot=False, seed=10, obstacles=None, show_trail=False, is_random=True, annotation_file=annotation_file, subject=None, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, replace_subject=True, segment_size=None, external_control=True, step_reward=0.001, show_comparison=True, consider_heading=True, show_orientation=True, rows=576, cols=720, width=grid_size) #initialize feature extractor feat_ext = DroneFeatureRisk_speed(agent_width=agent_width, obs_width=obs_width, step_size=step_size,
def main(args): output = {} # parameters for the feature extractors step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 output["eval parameters"] = vars(args) # initialize environment from envs.gridworld_drone import GridWorldDrone consider_heading = True np.random.seed(0) env = GridWorldDrone( display=False, is_onehot=False, seed=0, obstacles=None, show_trail=True, is_random=False, subject=None, annotation_file=args.annotation_file, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, external_control=True, replace_subject=args.dont_replace_subject, show_comparison=True, consider_heading=consider_heading, show_orientation=True, rows=576, cols=720, width=grid_size, ) # initialize the feature extractor from featureExtractor.drone_feature_extractor import ( DroneFeatureRisk_speedv2, ) from featureExtractor.drone_feature_extractor import ( VasquezF1, VasquezF2, VasquezF3, ) from featureExtractor.drone_feature_extractor import ( Fahad, GoalConditionedFahad, ) feat_ext_args = {} feat_ext = None ''' if args.feat_extractor == "DroneFeatureRisk_speedv2": feat_ext_args = { "agent_width": agent_width, "obs_width": obs_width, "step_size": step_size, "grid_size": grid_size, "thresh1": 18, "thresh2": 30, } feat_ext = DroneFeatureRisk_speedv2(**feat_ext_args) if args.feat_extractor == "VasquezF1": feat_ext_args = { "density_radius": 6 * agent_width, "lower_speed_threshold": 18, "upper_speed_threshold": 30, } feat_ext = VasquezF1( feat_ext_args["density_radius"], feat_ext_args["lower_speed_threshold"], feat_ext_args["upper_speed_threshold"], ) if args.feat_extractor == "VasquezF2": feat_ext_args = { "density_radius": 6 * agent_width, "lower_speed_threshold": 18, "upper_speed_threshold": 30, } feat_ext = VasquezF2( feat_ext_args["density_radius"], feat_ext_args["lower_speed_threshold"], feat_ext_args["upper_speed_threshold"], ) if args.feat_extractor == "VasquezF3": feat_ext_args = { "agent_width": agent_width, } feat_ext = VasquezF3(feat_ext_args["agent_width"]) if args.feat_extractor == 'Fahad': feat_ext_args = { "inner_ring_rad" : 36, "outer_ring_rad" : 60, "lower_speed_threshold" : 0.5, "upper_speed_threshold" : 1.0 } feat_ext = Fahad(36, 60, 0.5, 1.0) if args.feat_extractor == 'GoalConditionedFahad': feat_ext_args = { "inner_ring_rad" : 36, "outer_ring_rad" : 60, "lower_speed_threshold" : 0.5, "upper_speed_threshold" : 1.0 } feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0) output["feature_extractor_params"] = feat_ext_args output["feature_extractor"] = feat_ext ''' # initialize policy ''' sample_state = env.reset() state_size = feat_ext.extract_features(sample_state).shape[0] policy = Policy(state_size, env.action_space.n, [256]) policy.load(args.policy_path) policy.to(DEVICE) ''' orient_quant = env.orient_quantization orient_div = len(env.orientation_array) speed_div = len(env.speed_array) policy = PFController(speed_div, orient_div, orient_quant) # metric parameters metric_applicator = metric_utils.MetricApplicator() metric_applicator.add_metric(metrics.compute_trajectory_smoothness, [10]) metric_applicator.add_metric(metrics.compute_distance_displacement_ratio, [10]) metric_applicator.add_metric(metrics.proxemic_intrusions, [3]) metric_applicator.add_metric(metrics.anisotropic_intrusions, [20]) metric_applicator.add_metric(metrics.count_collisions, [10]) metric_applicator.add_metric(metrics.goal_reached, [10, 10]) metric_applicator.add_metric(metrics.pedestrian_hit, [10]) metric_applicator.add_metric(metrics.trajectory_length) metric_applicator.add_metric( metrics.distance_to_nearest_pedestrian_over_time) # collect trajectories and apply metrics num_peds = len(env.pedestrian_dict.keys()) output["metrics"] = metric_applicator.get_metrics() output["metric_results"] = {} metric_results = metric_utils.collect_trajectories_and_metrics_non_NN( env, policy, num_peds, args.max_ep_length, metric_applicator, disregard_collisions=args.disregard_collisions, ) output["metric_results"] = metric_results pathlib.Path('./results/').mkdir(exist_ok=True) with open( "./results/" + args.output_name + "_" + datetime.now().strftime("%Y-%m-%d-%H:%M"), "wb", ) as f: pickle.dump(output, f)
def main(): #####for the logger ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S") ################### args = parser.parse_args() seed_all(args.seed) if args.on_server: matplotlib.use("Agg") # pygame without monitor os.environ["SDL_VIDEODRIVER"] = "dummy" from matplotlib import pyplot as plt mp.set_start_method("spawn") from rlmethods.b_actor_critic import ActorCritic from rlmethods.soft_ac import SoftActorCritic, QSoftActorCritic from rlmethods.rlutils import ReplayBuffer from envs.gridworld_drone import GridWorldDrone from featureExtractor.drone_feature_extractor import ( DroneFeatureSAM1, DroneFeatureOccup, DroneFeatureRisk, DroneFeatureRisk_v2, VasquezF1, VasquezF2, VasquezF3, Fahad, GoalConditionedFahad, ) from featureExtractor.gridworld_featureExtractor import ( FrontBackSide, LocalGlobal, OneHot, SocialNav, FrontBackSideSimple, ) from featureExtractor.drone_feature_extractor import ( DroneFeatureRisk_speed, DroneFeatureRisk_speedv2, ) from featureExtractor.drone_feature_extractor import VasquezF1 save_folder = None if not args.dont_save and not args.play: if not args.save_folder: print("Provide save folder.") exit() policy_net_dims = "-policy_net-" for dim in args.policy_net_hidden_dims: policy_net_dims += str(dim) policy_net_dims += "-" reward_net_dims = "-reward_net-" for dim in args.reward_net_hidden_dims: reward_net_dims += str(dim) reward_net_dims += "-" save_folder = ( "./results/" + args.save_folder + st + args.feat_extractor + "-seed-" + str(args.seed) + policy_net_dims + reward_net_dims + "-total-ep-" + str(args.total_episodes) + "-max-ep-len-" + str(args.max_ep_length) ) experiment_logger = Logger(save_folder, "experiment_info.txt") experiment_logger.log_header("Arguments for the experiment :") repo = git.Repo(search_parent_directories=True) experiment_logger.log_info({'From branch : ' : repo.active_branch.name}) experiment_logger.log_info({'Commit number : ' : repo.head.object.hexsha}) experiment_logger.log_info(vars(args)) window_size = 9 step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 feat_ext = None # initialize the feature extractor to be used if args.feat_extractor == "Onehot": feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == "SocialNav": feat_ext = SocialNav(fieldList=["agent_state", "goal_state"]) if args.feat_extractor == "FrontBackSideSimple": feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == "LocalGlobal": feat_ext = LocalGlobal( window_size=11, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) if args.feat_extractor == "DroneFeatureSAM1": feat_ext = DroneFeatureSAM1( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureOccup": feat_ext = DroneFeatureOccup( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, window_size=window_size, ) if args.feat_extractor == "DroneFeatureRisk": feat_ext = DroneFeatureRisk( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_v2": feat_ext = DroneFeatureRisk_v2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_speed": feat_ext = DroneFeatureRisk_speed( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, return_tensor=False, thresh1=10, thresh2=15, ) if args.feat_extractor == "DroneFeatureRisk_speedv2": feat_ext = DroneFeatureRisk_speedv2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, return_tensor=False, thresh1=18, thresh2=30, ) if args.feat_extractor == "VasquezF1": feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == "VasquezF2": feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == "VasquezF3": feat_ext = VasquezF3(agent_width) if args.feat_extractor == "Fahad": feat_ext = Fahad(36, 60, 0.5, 1.0) if args.feat_extractor == "GoalConditionedFahad": feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0) if feat_ext is None: print("Please enter proper feature extractor!") exit() # log feature extractor info if not args.dont_save and not args.play: experiment_logger.log_header("Parameters of the feature extractor :") experiment_logger.log_info(feat_ext.__dict__) # initialize the environment if args.replace_subject: replace_subject = True else: replace_subject = False env = GridWorldDrone( display=args.render, is_onehot=False, seed=args.seed, obstacles=None, show_trail=False, is_random=True, annotation_file=args.annotation_file, subject=args.subject, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, replace_subject=replace_subject, segment_size=args.segment_size, external_control=True, step_reward=0.001, show_comparison=True, consider_heading=True, show_orientation=True, # rows=200, cols=200, width=grid_size) rows=576, cols=720, width=grid_size, ) # env = gym.make('Acrobot-v1') # log environment info if not args.dont_save and not args.play: experiment_logger.log_header("Environment details :") experiment_logger.log_info(env.__dict__) # initialize RL if args.rl_method == "ActorCritic": model = ActorCritic( env, feat_extractor=feat_ext, gamma=1, log_interval=100, max_episode_length=args.max_ep_length, hidden_dims=args.policy_net_hidden_dims, save_folder=save_folder, lr=args.lr, entropy_coeff=args.entropy_coeff, max_episodes=args.total_episodes, ) if args.rl_method == "SAC": replay_buffer = ReplayBuffer(args.replay_buffer_size) model = SoftActorCritic( env, replay_buffer, feat_ext, buffer_sample_size=args.replay_buffer_sample_size, entropy_tuning=True, play_interval=args.play_interval, entropy_target=args.entropy_target, gamma=args.gamma, learning_rate=args.lr, ) if args.rl_method == "discrete_QSAC": replay_buffer = ReplayBuffer(args.replay_buffer_size) model = QSoftActorCritic( env, replay_buffer, feat_ext, buffer_sample_size=args.replay_buffer_sample_size, entropy_tuning=True, play_interval=args.play_interval, entropy_target=args.entropy_target, gamma=args.gamma, learning_rate=args.lr, ) # log RL info if not args.dont_save and not args.play: experiment_logger.log_header("Details of the RL method :") experiment_logger.log_info(model.__dict__) if args.policy_path is not None: from debugtools import numericalSort policy_file_list = [] reward_across_models = [] # print(args.policy_path) if os.path.isfile(args.policy_path): policy_file_list.append(args.policy_path) if os.path.isdir(args.policy_path): policy_names = glob.glob(os.path.join(args.policy_path, "*.pt")) policy_file_list = sorted(policy_names, key=numericalSort) xaxis = np.arange(len(policy_file_list)) if not args.play and not args.play_user: # no playing of any kind, so training if args.reward_path is None: if args.policy_path: model.policy.load(args.policy_path) if args.rl_method == "SAC" or args.rl_method == "discrete_QSAC": model.train(args.total_episodes, args.max_ep_length) else: model.train() else: from irlmethods.deep_maxent import RewardNet state_size = feat_ext.extract_features(env.reset()).shape[0] reward_net = RewardNet(state_size, args.reward_net_hidden_dims) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) model.train(reward_net=reward_net) if not args.dont_save: model.policy.save(save_folder + "/policy-models/") if args.play: # env.tickSpeed = 15 from debugtools import compile_results xaxis = [] counter = 1 plt.figure(0) avg_reward_list = [] frac_good_run_list = [] print(policy_file_list) for policy_file in policy_file_list: print("Playing for policy :", policy_file) model.policy.load(policy_file) policy_folder = policy_file.strip().split("/")[0:-2] save_folder = "" for p in policy_folder: save_folder = save_folder + p + "/" print("The final save folder ", save_folder) # env.tickSpeed = 10 assert args.policy_path is not None, "pass a policy to play from!" if args.exp_trajectory_path is not None: from irlmethods.irlUtils import calculate_expert_svf expert_svf = calculate_expert_svf( args.exp_trajectory_path, max_time_steps=args.max_ep_length, feature_extractor=feat_ext, gamma=1, ) # reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render)) if args.exp_trajectory_path is None: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render ) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, store_raw=args.store_raw_states, path=save_folder + "/agent_generated_trajectories/", ) else: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, expert_svf=expert_svf ) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, path=save_folder + "/agent_generated_trajectories/", expert_svf=expert_svf, ) avg_reward, good_run_frac = compile_results( rewards, state_info, sub_info ) avg_reward_list.append(avg_reward) frac_good_run_list.append(good_run_frac) plt.plot(avg_reward_list, c="r") plt.plot(frac_good_run_list, c="g") plt.draw() plt.show() if args.play_user: env.tickSpeed = 200 model.generate_trajectory_user( args.num_trajs, args.render, path="./user_generated_trajectories/" )
def main(): args = parser.parse_args() step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 #set up the feature extractor from featureExtractor.drone_feature_extractor import DroneFeatureRisk_speedv2 from featureExtractor.drone_feature_extractor import VasquezF1, VasquezF2, VasquezF3 feat_ext = None if args.feat_extractor == 'DroneFeatureRisk_speedv2': feat_ext = DroneFeatureRisk_speedv2(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=18, thresh2=30) if args.feat_extractor == 'VasquezF1': feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == 'VasquezF2': feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == 'VasquezF3': feat_ext = VasquezF3(agent_width) #set up the environment from envs.gridworld_drone import GridWorldDrone env = GridWorldDrone( display=True, is_onehot=False, obstacles=None, show_trail=False, is_random=True, annotation_file=args.annotation_file, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, replace_subject=False, consider_heading=True, show_orientation=True, rows=576, cols=720, width=grid_size, ) #set up the policy network from rlmethods.b_actor_critic import Policy state_size = feat_ext.extract_features(env.reset()).shape[0] policy_net = Policy(state_size, env.action_space.n, args.policy_net_hidden_dims) policy_net.load(args.policy_path) print(next(policy_net.parameters()).is_cuda) #set up the reward network from irlmethods.deep_maxent import RewardNet state_size = feat_ext.extract_features(env.reset()).shape[0] reward_net = RewardNet(state_size, args.reward_net_hidden_dims) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) #run stuff ''' screenshot, reward_map = generate_reward_map(env, feat_ext, reward_net, render=args.render, sample_rate=args.sample_rate, frame_id=args.frame_id) plot_map(reward_map, frame_img=screenshot) ''' visualize_reward_per_spot(env, feat_ext, reward_net, policy_net, num_traj=20, div=36, render=True)
def main(): # initalize summary writer tbx_writer = SummaryWriter(comment="_alpha_" + str(args.log_alpha)) # initialize replay buffer replay_buffer = ReplayBuffer(args.replay_buffer_size) # initialize feature extractor feature_extractor = DroneFeatureRisk_speed( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=18, thresh2=30, ) # initialize checkpoint if args.checkpoint_path: checkpointer = Checkpointer.load_checkpointer(args.checkpoint_path) else: checkpointer = None # initialize environment env = GridWorldDrone( display=args.render, is_random=True, rows=576, cols=720, agent_width=agent_width, step_size=step_size, obs_width=obs_width, width=grid_size, annotation_file=args.annotation_file, external_control=True, continuous_action=True, consider_heading=True, is_onehot=False, ) # initialize the reward network state_size = feature_extractor.extract_features(env.reset()).shape[0] reward_net = None if args.reward_path is not None: reward_net = RewardNet(state_size, args.reward_net_hidden_dims) reward_net.load(args.reward_path) # intialize the RL method soft_ac = SoftActorCritic( env, replay_buffer, feature_extractor, buffer_sample_size=args.replay_buffer_sample_size, tbx_writer=tbx_writer, tau=0.005, log_alpha=args.log_alpha, entropy_tuning=True, entropy_target=args.entropy_target, render=args.render, play_interval=args.play_interval, checkpointer=checkpointer, ) soft_ac.train( args.rl_episodes, args.max_episode_length, reward_network=reward_net ) soft_ac.policy.save("./cont_world_policies")
def main(): ''' The main function ''' #************************************************** #parameters for the feature extractors thresh1 = 10 thresh2 = 15 step_size = 2 agent_width = 10 obs_width = 10 grid_size = 3 #************************************************** #for bookkeeping purposes ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') args = parser.parse_args() #checks if all the parameters are in order check_parameters(args) if args.on_server: matplotlib.use('Agg') os.environ['SDL_VIDEODRIVER'] = 'dummy' #************************************************* #initialize environment from envs.gridworld_drone import GridWorldDrone consider_heading = True np.random.seed(0) env = GridWorldDrone(display=args.render, is_onehot=False, seed=0, obstacles=None, show_trail=True, is_random=False, subject=None, annotation_file=args.annotation_file, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, external_control=True, replace_subject=args.run_exact, show_comparison=True, consider_heading=consider_heading, show_orientation=True, rows=576, cols=720, width=grid_size) print('Environment initalized successfully.') #************************************************* #initialize the feature extractor from featureExtractor.drone_feature_extractor import DroneFeatureRisk, DroneFeatureRisk_v2 from featureExtractor.drone_feature_extractor import DroneFeatureRisk_speed, DroneFeatureRisk_speedv2 if args.feat_extractor == 'DroneFeatureRisk': feat_ext = DroneFeatureRisk(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=True, thresh1=thresh1, thresh2=thresh2) if args.feat_extractor == 'DroneFeatureRisk_v2': feat_ext = DroneFeatureRisk_v2(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, thresh1=thresh1, thresh2=thresh2) if args.feat_extractor == 'DroneFeatureRisk_speed': feat_ext = DroneFeatureRisk_speed(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=True, thresh1=thresh1, thresh2=thresh2) if args.feat_extractor == 'DroneFeatureRisk_speedv2': feat_ext = DroneFeatureRisk_speedv2(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=18, thresh2=30) #************************************************* #initialize the agents agent_list = [] #list containing the paths to the agents agent_type_list = [] #list containing the type of the agents #for potential field agent attr_mag = 3 rep_mag = 2 #agent = PFController() ###################### #for social forces agent ###################### #for network based agents agent_file_list = [ '/home/abhisek/Study/Robotics/deepirl/experiments/results/Beluga/IRL Runs/Variable-speed-hit-full-run-suppressed-local-updated-features2019-12-14_16:38:00-policy_net-256--reward_net-256--reg-0.001-seed-9-lr-0.0005/saved-models/28.pt' ] agent_file_list.append( '/home/abhisek/Study/Robotics/deepirl/experiments/results/Quadra/RL Runs/Possible_strawman2019-12-16 12:22:05DroneFeatureRisk_speedv2-seed-789-policy_net-256--reward_net-128--total-ep-8000-max-ep-len-500/policy-models/0.pt' ) #initialize agents based on the agent files for agent_file in agent_file_list: agent_temp = Policy(feat_ext.state_rep_size, env.action_space.n, hidden_dims=args.policy_net_hidden_dims) agent_temp.load(agent_file) agent_list.append(agent_temp) agent_type_list.append('Policy_network') ##################### for i in range(len(agent_list)): while env.cur_ped != env.last_pedestrian: state = env.reset() done = False t = 0 traj = [copy.deepcopy(state)] while not done or t < args.max_ep_length: if agent_type_list[i] != 'Policy_Network': feat = feat_ext.extract_features(state) feat = torch.from_numpy(feat).type( torch.FloatTensor).to(DEVICE) action = agent_list[i].eval_action(feat) state, _, done, _ = env.step(action) traj.append(copy.deepcopy(state)) if done: break total_smoothness, avg_smoothness = compute_trajectory_smoothness( traj) ratio = compute_distance_displacement_ratio(traj) proxemic_intrusions(traj, 10) anisotropic_intrusions(traj, 30) pdb.set_trace()
def main(): #####for the logger ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') ################### args = parser.parse_args() if args.on_server: matplotlib.use('Agg') # pygame without monitor os.environ['SDL_VIDEODRIVER'] = 'dummy' from matplotlib import pyplot as plt mp.set_start_method('spawn') from rlmethods.scott_SAC.SAC import SAC from envs.gridworld_drone import GridWorldDrone from featureExtractor.drone_feature_extractor import DroneFeatureSAM1, DroneFeatureOccup, DroneFeatureRisk, DroneFeatureRisk_v2 from featureExtractor.gridworld_featureExtractor import FrontBackSide, LocalGlobal, OneHot, SocialNav, FrontBackSideSimple from featureExtractor.drone_feature_extractor import DroneFeatureRisk_speed save_folder = None if not args.dont_save and not args.play: if not args.save_folder: print('Provide save folder.') exit() policy_net_dims = '-policy_net-' for dim in args.policy_net_hidden_dims: policy_net_dims += str(dim) policy_net_dims += '-' reward_net_dims = '-reward_net-' for dim in args.reward_net_hidden_dims: reward_net_dims += str(dim) reward_net_dims += '-' save_folder = './results/'+ args.save_folder +st + args.feat_extractor + \ '-seed-'+str(args.seed) + policy_net_dims + reward_net_dims + \ '-total-ep-'+str(args.total_episodes)+'-max-ep-len-'+ str(args.max_ep_length) experiment_logger = Logger(save_folder, 'experiment_info.txt') experiment_logger.log_header('Arguments for the experiment :') experiment_logger.log_info(vars(args)) window_size = 9 step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 feat_ext = None #initialize the feature extractor to be used if args.feat_extractor == 'Onehot': feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == 'SocialNav': feat_ext = SocialNav(fieldList=['agent_state', 'goal_state']) if args.feat_extractor == 'FrontBackSideSimple': feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == 'LocalGlobal': feat_ext = LocalGlobal( window_size=11, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) if args.feat_extractor == 'DroneFeatureSAM1': feat_ext = DroneFeatureSAM1(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30) if args.feat_extractor == 'DroneFeatureOccup': feat_ext = DroneFeatureOccup(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, window_size=window_size) if args.feat_extractor == 'DroneFeatureRisk': feat_ext = DroneFeatureRisk(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=True, thresh1=15, thresh2=30) if args.feat_extractor == 'DroneFeatureRisk_v2': feat_ext = DroneFeatureRisk_v2(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=True, thresh1=15, thresh2=30) if args.feat_extractor == 'DroneFeatureRisk_speed': feat_ext = DroneFeatureRisk_speed(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, thresh1=10, thresh2=15) if feat_ext is None: print('Please enter proper feature extractor!') exit() #log feature extractor info if not args.dont_save and not args.play: experiment_logger.log_header('Parameters of the feature extractor :') experiment_logger.log_info(feat_ext.__dict__) #initialize the environment if args.replace_subject: replace_subject = True else: replace_subject = False env = GridWorldDrone( display=args.render, is_onehot=False, seed=args.seed, obstacles=None, show_trail=False, is_random=True, annotation_file=args.annotation_file, subject=args.subject, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, replace_subject=replace_subject, segment_size=args.segment_size, external_control=True, step_reward=0.001, show_comparison=True, consider_heading=True, show_orientation=True, #rows=200, cols=300, width=grid_size) rows=576, cols=720, width=grid_size) #log environment info if not args.dont_save and not args.play: experiment_logger.log_header('Environment details :') experiment_logger.log_info(env.__dict__) #initialize RL model = SAC(env, feat_extractor=feat_ext, log_interval=100, max_ep_length=args.max_ep_length, hidden_dims=args.policy_net_hidden_dims, save_folder=save_folder, max_episodes=args.total_episodes) #log RL info if not args.dont_save and not args.play: experiment_logger.log_header('Details of the RL method :') experiment_logger.log_info(model.__dict__) if args.policy_path is not None: from debugtools import numericalSort policy_file_list = [] reward_across_models = [] if os.path.isfile(args.policy_path): policy_file_list.append(args.policy_path) if os.path.isdir(args.policy_path): policy_names = glob.glob(os.path.join(args.policy_path, '*.pt')) policy_file_list = sorted(policy_names, key=numericalSort) xaxis = np.arange(len(policy_file_list)) if not args.play and not args.play_user: #no playing of any kind, so training if args.reward_path is None: if args.policy_path: model.policy.load(args.policy_path) model.train() else: from irlmethods.deep_maxent import RewardNet state_size = feat_ext.extract_features(env.reset()).shape[0] reward_net = RewardNet(state_size, args.reward_net_hidden_dims) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) model.train(reward_net=reward_net) if not args.dont_save: model.policy.save(save_folder + '/policy-models/') if args.play: #env.tickSpeed = 15 from debugtools import compile_results xaxis = [] counter = 1 plt.figure(0) avg_reward_list = [] frac_good_run_list = [] for policy_file in policy_file_list: print('Playing for policy :', policy_file) model.policy.load(policy_file) policy_folder = policy_file.strip().split('/')[0:-2] save_folder = '' for p in policy_folder: save_folder = save_folder + p + '/' print('The final save folder ', save_folder) #env.tickSpeed = 10 assert args.policy_path is not None, 'pass a policy to play from!' if args.exp_trajectory_path is not None: from irlmethods.irlUtils import calculate_expert_svf expert_svf = calculate_expert_svf( args.exp_trajectory_path, max_time_steps=args.max_ep_length, feature_extractor=feat_ext, gamma=1) #reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render)) if args.exp_trajectory_path is None: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, path=save_folder + '/agent_generated_trajectories/') else: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, expert_svf=expert_svf) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, path=save_folder + '/agent_generated_trajectories/', expert_svf=expert_svf) avg_reward, good_run_frac = compile_results( rewards, state_info, sub_info) #pdb.set_trace() avg_reward_list.append(avg_reward) frac_good_run_list.append(good_run_frac) plt.plot(avg_reward_list, c='r') plt.plot(frac_good_run_list, c='g') plt.draw() plt.show() if args.play_user: env.tickSpeed = 200 model.generate_trajectory_user(args.num_trajs, args.render, path='./user_generated_trajectories/')
def main(): args = parser.parse_args() mp.set_start_method('spawn') from envs.gridworld_drone import GridWorldDrone agent_width = 10 step_size = 2 obs_width = 10 grid_size = 10 if args.feat_extractor == 'Onehot': feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == 'SocialNav': feat_ext = SocialNav(fieldList=['agent_state', 'goal_state']) if args.feat_extractor == 'FrontBackSideSimple': feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, fieldList=['agent_state', 'goal_state', 'obstacles']) if args.feat_extractor == 'LocalGlobal': feat_ext = LocalGlobal( window_size=3, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, fieldList=['agent_state', 'goal_state', 'obstacles']) #featExtract = OneHot(grid_rows=10,grid_cols=10) #featExtract = FrontBackSideSimple(thresh1 = 1,fieldList = ['agent_state','goal_state','obstacles']) #featExtract = SocialNav(fieldList = ['agent_state','goal_state']) ''' np.asarray([2,2]),np.asarray([7,4]),np.asarray([3,5]), np.asarray([5,2]),np.asarray([8,3]),np.asarray([7,5]), np.asarray([3,3]),np.asarray([3,7]),np.asarray([5,7]) env = GridWorld(display=args.render, is_onehot= False,is_random=True, rows=10, agent_width=agent_width,step_size=step_size, obs_width=obs_width,width=grid_size, cols=10, seed = 7, obstacles = '../envs/map3.jpg', goal_state = np.asarray([5,5])) ''' env = GridWorldDrone(display=args.render, is_onehot=False, seed=999, obstacles=None, show_trail=False, is_random=False, annotation_file=args.annotation_file, subject=None, tick_speed=90, obs_width=10, step_size=step_size, agent_width=agent_width, show_comparison=True, rows=576, cols=720, width=grid_size) model = ActorCritic(env, feat_extractor=featExtract, gamma=0.99, log_interval=50, max_ep_length=500, max_episodes=2000) if args.policy_path is not None: model.policy.load(args.policy_path) if not args.play and not args.play_user: if args.reward_path is None: model.train_mp(n_jobs=4) else: from irlmethods.deep_maxent import RewardNet state_size = featExtract.extract_features(env.reset()).shape[0] reward_net = RewardNet(state_size) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) model.train_mp(reward_net=reward_net, n_jobs=4) if not args.dont_save: model.policy.save('./saved-models/') if args.play: #env.tickSpeed = 15 assert args.policy_path is not None, 'pass a policy to play from!' model.generate_trajectory( args.num_trajs, './trajs/ac_loc_glob_rectified_win_3_static_map3/') if args.play_user: env.tickSpeed = 200 model.generate_trajectory_user(args.num_trajs, './trajs/ac_gridworld_user/')