def main(): args = parser.parse_args() if args.on_server: # matplotlib without monitor matplotlib.use('Agg') # pygame without monitor os.environ['SDL_VIDEODRIVER'] = 'dummy' #####for the logger ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H:%M:%S') ################### if not args.save_folder: print('Provide save folder.') exit() policy_net_dims = '-policy_net-' for dim in args.policy_net_hidden_dims: policy_net_dims += str(dim) policy_net_dims += '-' reward_net_dims = '-reward_net-' for dim in args.reward_net_hidden_dims: reward_net_dims += str(dim) reward_net_dims += '-' parent_dir = './results/' + str( args.save_folder) + st + policy_net_dims + reward_net_dims to_save = './results/'+str(args.save_folder)+st+policy_net_dims + reward_net_dims + \ '-reg-'+str(args.regularizer)+ \ '-seed-'+str(args.seed)+'-lr-'+str(args.lr_irl) log_file = 'Experiment_info.txt' experiment_logger = Logger(to_save, log_file) experiment_logger.log_header('Arguments for the experiment :') experiment_logger.log_info(vars(args)) #from rlmethods.rlutils import LossBasedTermination #for rl from rlmethods.b_actor_critic import ActorCritic from rlmethods.soft_ac_pi import SoftActorCritic from rlmethods.rlutils import ReplayBuffer #for irl from irlmethods.deep_maxent import DeepMaxEnt import irlmethods.irlUtils as irlUtils from featureExtractor.gridworld_featureExtractor import OneHot, LocalGlobal, SocialNav, FrontBackSideSimple agent_width = 10 step_size = 2 obs_width = 10 grid_size = 10 if args.feat_extractor is None: print('Feature extractor missing.') exit() #check for the feature extractor being used #initialize feature extractor if args.feat_extractor == 'Onehot': feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == 'SocialNav': feat_ext = SocialNav() if args.feat_extractor == 'FrontBackSideSimple': feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == 'LocalGlobal': feat_ext = LocalGlobal( window_size=5, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) if args.feat_extractor == 'DroneFeatureSAM1': feat_ext = DroneFeatureSAM1(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=5, thresh2=10) if args.feat_extractor == 'DroneFeatureRisk': feat_ext = DroneFeatureRisk(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30) if args.feat_extractor == 'DroneFeatureRisk_v2': feat_ext = DroneFeatureRisk_v2(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30) if args.feat_extractor == 'DroneFeatureRisk_speed': feat_ext = DroneFeatureRisk_speed(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=10, thresh2=15) if args.feat_extractor == 'DroneFeatureRisk_speedv2': feat_ext = DroneFeatureRisk_speedv2(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=18, thresh2=30) experiment_logger.log_header('Parameters of the feature extractor :') experiment_logger.log_info(feat_ext.__dict__) #initialize the environment if not args.dont_save and args.save_folder is None: print('Specify folder to save the results.') exit() ''' environment can now initialize without an annotation file if args.annotation_file is None: print('Specify annotation file for the environment.') exit() ''' if args.exp_trajectory_path is None: print('Specify expert trajectory folder.') exit() #**set is_onehot to false goal_state = np.asarray([1, 5]) ''' env = GridWorld(display=args.render, is_onehot= False,is_random=False, rows =10, cols =10, seed = 7, obstacles = [np.asarray([5,5])], goal_state = np.asarray([1,5])) ''' env = GridWorld(display=args.render, is_random=True, rows=576, cols=720, agent_width=agent_width, step_size=step_size, obs_width=obs_width, width=grid_size, subject=args.subject, annotation_file=args.annotation_file, goal_state=goal_state, step_wrapper=utils.step_wrapper, seed=args.seed, replace_subject=args.replace_subject, segment_size=args.segment_size, external_control=True, continuous_action=True, reset_wrapper=utils.reset_wrapper, consider_heading=True, is_onehot=False) experiment_logger.log_header('Environment details :') experiment_logger.log_info(env.__dict__) #CHANGE HEREq #CHANGE HERE #initialize loss based termination # intialize RL method #CHANGE HERE replay_buffer = ReplayBuffer(args.replay_buffer_size) tbx_writer = SummaryWriter(to_save) rl_method = SoftActorCritic( env, replay_buffer, feat_ext, buffer_sample_size=args.replay_buffer_sample_size, tbx_writer=tbx_writer, entropy_tuning=True, tau=0.005, log_alpha=args.log_alpha, entropy_target=args.entropy_target, render=args.render, checkpoint_interval=100000000, play_interval=args.play_interval, ) print("RL method initialized.") print(rl_method.policy) experiment_logger.log_header('Details of the RL method :') experiment_logger.log_info(rl_method.__dict__) # initialize IRL method #CHANGE HERE trajectory_path = args.exp_trajectory_path if args.scale_svf is None: scale = False if args.scale_svf: scale = args.scale_svf irl_method = DeepMaxEnt(trajectory_path, rlmethod=rl_method, rl_episodes=args.rl_episodes, env=env, iterations=args.irl_iterations, on_server=args.on_server, l1regularizer=args.regularizer, learning_rate=args.lr_irl, seed=args.seed, graft=False, scale_svf=scale, rl_max_ep_len=args.max_episode_length, hidden_dims=args.reward_net_hidden_dims, clipping_value=args.clipping_value, enumerate_all=True, save_folder=parent_dir) print("IRL method intialized.") print(irl_method.reward) experiment_logger.log_header('Details of the IRL method :') experiment_logger.log_info(irl_method.__dict__) irl_method.train() if not args.dont_save: pass
def main(): args = parser.parse_args() experiment_logger = Logger('temp_save.txt') experiment_logger.log_header('Arguments for the experiment :') experiment_logger.log_info(vars(args)) mp.set_start_method('spawn') if args.render: from envs.gridworld import GridWorld else: from envs.gridworld_clockless import GridWorldClockless as GridWorld agent_width = 10 step_size = 10 obs_width = 10 grid_size = 10 if args.feat_extractor == 'Onehot': feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == 'SocialNav': feat_ext = SocialNav(fieldList=['agent_state', 'goal_state']) if args.feat_extractor == 'FrontBackSideSimple': feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == 'LocalGlobal': feat_ext = LocalGlobal( window_size=3, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) experiment_logger.log_header('Parameters of the feature extractor :') experiment_logger.log_info(feat_ext.__dict__) ''' np.asarray([2,2]),np.asarray([7,4]),np.asarray([3,5]), np.asarray([5,2]),np.asarray([8,3]),np.asarray([7,5]), np.asarray([3,3]),np.asarray([3,7]),np.asarray([5,7]) ''' env = GridWorld(display=args.render, is_onehot=False, is_random=True, rows=100, agent_width=agent_width, step_size=step_size, obs_width=obs_width, width=grid_size, cols=100, seed=7, buffer_from_obs=0, obstacles=3, goal_state=np.asarray([5, 5])) experiment_logger.log_header('Environment details :') experiment_logger.log_info(env.__dict__) model = ActorCritic(env, feat_extractor=feat_ext, gamma=0.99, log_interval=100, max_ep_length=40, hidden_dims=args.policy_net_hidden_dims, max_episodes=4000) experiment_logger.log_header('Details of the RL method :') experiment_logger.log_info(model.__dict__) pdb.set_trace() if args.policy_path is not None: model.policy.load(args.policy_path) if not args.play and not args.play_user: if args.reward_path is None: model.train_mp(n_jobs=4) else: from irlmethods.deep_maxent import RewardNet state_size = featExtract.extract_features(env.reset()).shape[0] reward_net = RewardNet(state_size) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) model.train_mp(reward_net=reward_net, n_jobs=4) if not args.dont_save: model.policy.save('./saved-models/') if args.play: env.tickSpeed = 15 assert args.policy_path is not None, 'pass a policy to play from!' model.generate_trajectory(args.num_trajs, './trajs/ac_fbs_simple4_static_map7/') if args.play_user: env.tickSpeed = 200 model.generate_trajectory_user(args.num_trajs, './trajs/ac_gridworld_user/')
def main(): args = parser.parse_args() if args.on_server: # matplotlib without monitor matplotlib.use('Agg') # pygame without monitor os.environ['SDL_VIDEODRIVER'] = 'dummy' #####for the logger base_folder = './results/' + str(args.save_folder) + '-reg-' + str( args.regularizer) + '-seed-' + str(args.seed) + '-lr-' + str(args.lr) log_file = 'Experiment_info.txt' experiment_logger = Logger(base_folder, log_file) experiment_logger.log_header('Arguments for the experiment :') experiment_logger.log_info(vars(args)) from rlmethods.rlutils import LossBasedTermination from rlmethods.b_actor_critic import ActorCritic from irlmethods.deep_maxent import DeepMaxEnt import irlmethods.irlUtils as irlUtils from featureExtractor.gridworld_featureExtractor import OneHot, LocalGlobal, SocialNav, FrontBackSideSimple agent_width = 10 step_size = 10 obs_width = 10 grid_size = 10 if args.feat_extractor is None: print('Feature extractor missing.') exit() #check for the feature extractor being used #initialize feature extractor if args.feat_extractor == 'Onehot': feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == 'SocialNav': feat_ext = SocialNav() if args.feat_extractor == 'FrontBackSideSimple': feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == 'LocalGlobal': feat_ext = LocalGlobal( window_size=5, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) experiment_logger.log_header('Parameters of the feature extractor :') experiment_logger.log_info(feat_ext.__dict__) #initialize the environment if not args.dont_save and args.save_folder is None: print('Specify folder to save the results.') exit() if args.annotation_file is None: print('Specify annotation file for the environment.') exit() if args.exp_trajectory_path is None: print('Specify expert trajectory folder.') exit() #**set is_onehot to false goal_state = np.asarray([1, 5]) ''' env = GridWorld(display=args.render, is_onehot= False,is_random=False, rows =10, cols =10, seed = 7, obstacles = [np.asarray([5,5])], goal_state = np.asarray([1,5])) ''' env = GridWorld(display=args.render, is_random=True, rows=576, cols=720, agent_width=agent_width, step_size=step_size, obs_width=obs_width, width=grid_size, annotation_file=args.annotation_file, goal_state=goal_state, step_wrapper=utils.step_wrapper, seed=args.seed, reset_wrapper=utils.reset_wrapper, is_onehot=False) experiment_logger.log_header('Environment details :') experiment_logger.log_info(env.__dict__) #CHANGE HEREq #CHANGE HERE #initialize loss based termination # intialize RL method #CHANGE HERE rlMethod = ActorCritic(env, gamma=0.99, log_interval=args.rl_log_intervals, max_episodes=args.rl_episodes, max_ep_length=args.rl_ep_length, termination=None, hidden_dims=args.reward_net_hidden_dims, feat_extractor=feat_ext) print("RL method initialized.") print(rlMethod.policy) if args.policy_path is not None: rlMethod.policy.load(args.policy_path) experiment_logger.log_header('Details of the RL method :') experiment_logger.log_info(rlMethod.__dict__) # initialize IRL method #CHANGE HERE trajectory_path = args.exp_trajectory_path folder_to_save = '/results/' + args.save_folder irlMethod = DeepMaxEnt(trajectory_path, rlmethod=rlMethod, env=env, iterations=args.irl_iterations, log_intervals=5, on_server=args.on_server, regularizer=args.regularizer, learning_rate=args.lr, graft=True, hidden_dims=args.reward_net_hidden_dims, save_folder=folder_to_save) print("IRL method intialized.") print(irlMethod.reward) experiment_logger.log_header('Details of the IRL method :') experiment_logger.log_info(irlMethod.__dict__) rewardNetwork = irlMethod.train() if not args.dont_save: pass
def main(): args = parser.parse_args() utils.seed_all(args.seed) if args.on_server: # matplotlib without monitor matplotlib.use("Agg") # pygame without monitor os.environ["SDL_VIDEODRIVER"] = "dummy" #####for the logger ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d_%H:%M:%S") ################### if not args.save_folder: print("Provide save folder.") exit() policy_net_dims = "-policy_net-" for dim in args.policy_net_hidden_dims: policy_net_dims += str(dim) policy_net_dims += "-" reward_net_dims = "-reward_net-" for dim in args.reward_net_hidden_dims: reward_net_dims += str(dim) reward_net_dims += "-" parent_dir = ("./results/" + str(args.save_folder) + st + policy_net_dims + reward_net_dims) to_save = ("./results/" + str(args.save_folder) + st + policy_net_dims + reward_net_dims + "-reg-" + str(args.regularizer) + "-seed-" + str(args.seed) + "-lr-" + str(args.lr_irl)) log_file = "Experiment_info.txt" experiment_logger = Logger(to_save, log_file) experiment_logger.log_header("Arguments for the experiment :") repo = git.Repo(search_parent_directories=True) experiment_logger.log_info({'From branch : ': repo.active_branch.name}) experiment_logger.log_info({'Commit number : ': repo.head.object.hexsha}) experiment_logger.log_info(vars(args)) # from rlmethods.rlutils import LossBasedTermination # for rl from rlmethods.b_actor_critic import ActorCritic from rlmethods.soft_ac_pi import SoftActorCritic from rlmethods.soft_ac import SoftActorCritic as QSAC from rlmethods.rlutils import ReplayBuffer # for irl from irlmethods.deep_maxent import DeepMaxEnt import irlmethods.irlUtils as irlUtils from featureExtractor.gridworld_featureExtractor import ( OneHot, LocalGlobal, SocialNav, FrontBackSideSimple, ) agent_width = 10 step_size = 2 obs_width = 10 grid_size = 10 if args.feat_extractor is None: print("Feature extractor missing.") exit() # check for the feature extractor being used # initialize feature extractor if args.feat_extractor == "Onehot": feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == "SocialNav": feat_ext = SocialNav() if args.feat_extractor == "FrontBackSideSimple": feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == "LocalGlobal": feat_ext = LocalGlobal( window_size=5, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) if args.feat_extractor == "DroneFeatureSAM1": feat_ext = DroneFeatureSAM1( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=5, thresh2=10, ) if args.feat_extractor == "DroneFeatureRisk": feat_ext = DroneFeatureRisk( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_v2": feat_ext = DroneFeatureRisk_v2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_speed": feat_ext = DroneFeatureRisk_speed( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=10, thresh2=15, ) if args.feat_extractor == "DroneFeatureRisk_speedv2": feat_ext = DroneFeatureRisk_speedv2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=18, thresh2=30, ) if args.feat_extractor == 'VasquezF1': feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == 'VasquezF2': feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == 'VasquezF3': feat_ext = VasquezF3(agent_width) if args.feat_extractor == "Fahad": feat_ext = Fahad(36, 60, 0.5, 1.0) if args.feat_extractor == "GoalConditionedFahad": feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0) experiment_logger.log_header("Parameters of the feature extractor :") experiment_logger.log_info(feat_ext.__dict__) # initialize the environment if not args.dont_save and args.save_folder is None: print("Specify folder to save the results.") exit() """ environment can now initialize without an annotation file if args.annotation_file is None: print('Specify annotation file for the environment.') exit() """ if args.exp_trajectory_path is None: print("Specify expert trajectory folder.") exit() """ env = GridWorld(display=args.render, is_onehot= False,is_random=False, rows =10, cols =10, seed = 7, obstacles = [np.asarray([5,5])], goal_state = np.asarray([1,5])) """ env = GridWorld( display=args.render, is_random=True, rows=576, cols=720, agent_width=agent_width, step_size=step_size, obs_width=obs_width, width=grid_size, subject=args.subject, annotation_file=args.annotation_file, goal_state=None, step_wrapper=utils.step_wrapper, seed=args.seed, replace_subject=args.replace_subject, segment_size=args.segment_size, external_control=True, continuous_action=False, reset_wrapper=utils.reset_wrapper, consider_heading=True, is_onehot=False, ) experiment_logger.log_header("Environment details :") experiment_logger.log_info(env.__dict__) # CHANGE HEREq # CHANGE HERE # initialize loss based termination # intialize RL method # CHANGE HERE if args.rl_method == "ActorCritic": rl_method = ActorCritic( env, feat_extractor=feat_ext, gamma=1, log_interval=args.rl_log_intervals, max_episode_length=args.rl_ep_length, hidden_dims=args.policy_net_hidden_dims, save_folder=to_save, lr=args.lr_rl, max_episodes=args.rl_episodes, ) if args.rl_method == "SAC": if not env.continuous_action: print("The action space needs to be continuous for SAC to work.") exit() replay_buffer = ReplayBuffer(args.replay_buffer_size) rl_method = SoftActorCritic( env, replay_buffer, feat_ext, play_interval=500, learning_rate=args.lr_rl, buffer_sample_size=args.replay_buffer_sample_size, ) if args.rl_method == "discrete_SAC": if not isinstance(env.action_space, gym.spaces.Discrete): print( "discrete SAC requires a discrete action space environmnet to work." ) exit() replay_buffer = ReplayBuffer(args.replay_buffer_size) rl_method = QSAC( env, replay_buffer, feat_ext, args.replay_buffer_sample_size, learning_rate=args.lr_rl, entropy_tuning=True, entropy_target=0.3, play_interval=args.play_interval, ) print("RL method initialized.") print(rl_method.policy) if args.policy_path is not None: rl_method.policy.load(args.policy_path) experiment_logger.log_header("Details of the RL method :") experiment_logger.log_info(rl_method.__dict__) # initialize IRL method # CHANGE HERE trajectory_path = args.exp_trajectory_path if args.scale_svf is None: scale = False if args.scale_svf: scale = args.scale_svf irl_method = DeepMaxEnt( trajectory_path, rlmethod=rl_method, env=env, iterations=args.irl_iterations, on_server=args.on_server, l1regularizer=args.regularizer, learning_rate=args.lr_irl, seed=args.seed, graft=False, scale_svf=scale, hidden_dims=args.reward_net_hidden_dims, clipping_value=args.clipping_value, enumerate_all=True, save_folder=parent_dir, rl_max_ep_len=args.rl_ep_length, rl_episodes=args.rl_episodes, ) print("IRL method intialized.") print(irl_method.reward) experiment_logger.log_header("Details of the IRL method :") experiment_logger.log_info(irl_method.__dict__) smoothing_flag = False if args.svf_smoothing: smoothing_flag = True irl_method.train(smoothing=smoothing_flag) if not args.dont_save: pass
def main(): #####for the logger ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S") ################### args = parser.parse_args() seed_all(args.seed) if args.on_server: matplotlib.use("Agg") # pygame without monitor os.environ["SDL_VIDEODRIVER"] = "dummy" from matplotlib import pyplot as plt mp.set_start_method("spawn") from rlmethods.b_actor_critic import ActorCritic from rlmethods.soft_ac import SoftActorCritic, QSoftActorCritic from rlmethods.rlutils import ReplayBuffer from envs.gridworld_drone import GridWorldDrone from featureExtractor.drone_feature_extractor import ( DroneFeatureSAM1, DroneFeatureOccup, DroneFeatureRisk, DroneFeatureRisk_v2, VasquezF1, VasquezF2, VasquezF3, Fahad, GoalConditionedFahad, ) from featureExtractor.gridworld_featureExtractor import ( FrontBackSide, LocalGlobal, OneHot, SocialNav, FrontBackSideSimple, ) from featureExtractor.drone_feature_extractor import ( DroneFeatureRisk_speed, DroneFeatureRisk_speedv2, ) from featureExtractor.drone_feature_extractor import VasquezF1 save_folder = None if not args.dont_save and not args.play: if not args.save_folder: print("Provide save folder.") exit() policy_net_dims = "-policy_net-" for dim in args.policy_net_hidden_dims: policy_net_dims += str(dim) policy_net_dims += "-" reward_net_dims = "-reward_net-" for dim in args.reward_net_hidden_dims: reward_net_dims += str(dim) reward_net_dims += "-" save_folder = ( "./results/" + args.save_folder + st + args.feat_extractor + "-seed-" + str(args.seed) + policy_net_dims + reward_net_dims + "-total-ep-" + str(args.total_episodes) + "-max-ep-len-" + str(args.max_ep_length) ) experiment_logger = Logger(save_folder, "experiment_info.txt") experiment_logger.log_header("Arguments for the experiment :") repo = git.Repo(search_parent_directories=True) experiment_logger.log_info({'From branch : ' : repo.active_branch.name}) experiment_logger.log_info({'Commit number : ' : repo.head.object.hexsha}) experiment_logger.log_info(vars(args)) window_size = 9 step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 feat_ext = None # initialize the feature extractor to be used if args.feat_extractor == "Onehot": feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == "SocialNav": feat_ext = SocialNav(fieldList=["agent_state", "goal_state"]) if args.feat_extractor == "FrontBackSideSimple": feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == "LocalGlobal": feat_ext = LocalGlobal( window_size=11, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) if args.feat_extractor == "DroneFeatureSAM1": feat_ext = DroneFeatureSAM1( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureOccup": feat_ext = DroneFeatureOccup( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, window_size=window_size, ) if args.feat_extractor == "DroneFeatureRisk": feat_ext = DroneFeatureRisk( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_v2": feat_ext = DroneFeatureRisk_v2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, thresh1=15, thresh2=30, ) if args.feat_extractor == "DroneFeatureRisk_speed": feat_ext = DroneFeatureRisk_speed( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, return_tensor=False, thresh1=10, thresh2=15, ) if args.feat_extractor == "DroneFeatureRisk_speedv2": feat_ext = DroneFeatureRisk_speedv2( agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, return_tensor=False, thresh1=18, thresh2=30, ) if args.feat_extractor == "VasquezF1": feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == "VasquezF2": feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0) if args.feat_extractor == "VasquezF3": feat_ext = VasquezF3(agent_width) if args.feat_extractor == "Fahad": feat_ext = Fahad(36, 60, 0.5, 1.0) if args.feat_extractor == "GoalConditionedFahad": feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0) if feat_ext is None: print("Please enter proper feature extractor!") exit() # log feature extractor info if not args.dont_save and not args.play: experiment_logger.log_header("Parameters of the feature extractor :") experiment_logger.log_info(feat_ext.__dict__) # initialize the environment if args.replace_subject: replace_subject = True else: replace_subject = False env = GridWorldDrone( display=args.render, is_onehot=False, seed=args.seed, obstacles=None, show_trail=False, is_random=True, annotation_file=args.annotation_file, subject=args.subject, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, replace_subject=replace_subject, segment_size=args.segment_size, external_control=True, step_reward=0.001, show_comparison=True, consider_heading=True, show_orientation=True, # rows=200, cols=200, width=grid_size) rows=576, cols=720, width=grid_size, ) # env = gym.make('Acrobot-v1') # log environment info if not args.dont_save and not args.play: experiment_logger.log_header("Environment details :") experiment_logger.log_info(env.__dict__) # initialize RL if args.rl_method == "ActorCritic": model = ActorCritic( env, feat_extractor=feat_ext, gamma=1, log_interval=100, max_episode_length=args.max_ep_length, hidden_dims=args.policy_net_hidden_dims, save_folder=save_folder, lr=args.lr, entropy_coeff=args.entropy_coeff, max_episodes=args.total_episodes, ) if args.rl_method == "SAC": replay_buffer = ReplayBuffer(args.replay_buffer_size) model = SoftActorCritic( env, replay_buffer, feat_ext, buffer_sample_size=args.replay_buffer_sample_size, entropy_tuning=True, play_interval=args.play_interval, entropy_target=args.entropy_target, gamma=args.gamma, learning_rate=args.lr, ) if args.rl_method == "discrete_QSAC": replay_buffer = ReplayBuffer(args.replay_buffer_size) model = QSoftActorCritic( env, replay_buffer, feat_ext, buffer_sample_size=args.replay_buffer_sample_size, entropy_tuning=True, play_interval=args.play_interval, entropy_target=args.entropy_target, gamma=args.gamma, learning_rate=args.lr, ) # log RL info if not args.dont_save and not args.play: experiment_logger.log_header("Details of the RL method :") experiment_logger.log_info(model.__dict__) if args.policy_path is not None: from debugtools import numericalSort policy_file_list = [] reward_across_models = [] # print(args.policy_path) if os.path.isfile(args.policy_path): policy_file_list.append(args.policy_path) if os.path.isdir(args.policy_path): policy_names = glob.glob(os.path.join(args.policy_path, "*.pt")) policy_file_list = sorted(policy_names, key=numericalSort) xaxis = np.arange(len(policy_file_list)) if not args.play and not args.play_user: # no playing of any kind, so training if args.reward_path is None: if args.policy_path: model.policy.load(args.policy_path) if args.rl_method == "SAC" or args.rl_method == "discrete_QSAC": model.train(args.total_episodes, args.max_ep_length) else: model.train() else: from irlmethods.deep_maxent import RewardNet state_size = feat_ext.extract_features(env.reset()).shape[0] reward_net = RewardNet(state_size, args.reward_net_hidden_dims) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) model.train(reward_net=reward_net) if not args.dont_save: model.policy.save(save_folder + "/policy-models/") if args.play: # env.tickSpeed = 15 from debugtools import compile_results xaxis = [] counter = 1 plt.figure(0) avg_reward_list = [] frac_good_run_list = [] print(policy_file_list) for policy_file in policy_file_list: print("Playing for policy :", policy_file) model.policy.load(policy_file) policy_folder = policy_file.strip().split("/")[0:-2] save_folder = "" for p in policy_folder: save_folder = save_folder + p + "/" print("The final save folder ", save_folder) # env.tickSpeed = 10 assert args.policy_path is not None, "pass a policy to play from!" if args.exp_trajectory_path is not None: from irlmethods.irlUtils import calculate_expert_svf expert_svf = calculate_expert_svf( args.exp_trajectory_path, max_time_steps=args.max_ep_length, feature_extractor=feat_ext, gamma=1, ) # reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render)) if args.exp_trajectory_path is None: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render ) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, store_raw=args.store_raw_states, path=save_folder + "/agent_generated_trajectories/", ) else: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, expert_svf=expert_svf ) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, path=save_folder + "/agent_generated_trajectories/", expert_svf=expert_svf, ) avg_reward, good_run_frac = compile_results( rewards, state_info, sub_info ) avg_reward_list.append(avg_reward) frac_good_run_list.append(good_run_frac) plt.plot(avg_reward_list, c="r") plt.plot(frac_good_run_list, c="g") plt.draw() plt.show() if args.play_user: env.tickSpeed = 200 model.generate_trajectory_user( args.num_trajs, args.render, path="./user_generated_trajectories/" )
def main(): #####for the logger ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S') ################### args = parser.parse_args() if args.on_server: matplotlib.use('Agg') # pygame without monitor os.environ['SDL_VIDEODRIVER'] = 'dummy' from matplotlib import pyplot as plt mp.set_start_method('spawn') from rlmethods.scott_SAC.SAC import SAC from envs.gridworld_drone import GridWorldDrone from featureExtractor.drone_feature_extractor import DroneFeatureSAM1, DroneFeatureOccup, DroneFeatureRisk, DroneFeatureRisk_v2 from featureExtractor.gridworld_featureExtractor import FrontBackSide, LocalGlobal, OneHot, SocialNav, FrontBackSideSimple from featureExtractor.drone_feature_extractor import DroneFeatureRisk_speed save_folder = None if not args.dont_save and not args.play: if not args.save_folder: print('Provide save folder.') exit() policy_net_dims = '-policy_net-' for dim in args.policy_net_hidden_dims: policy_net_dims += str(dim) policy_net_dims += '-' reward_net_dims = '-reward_net-' for dim in args.reward_net_hidden_dims: reward_net_dims += str(dim) reward_net_dims += '-' save_folder = './results/'+ args.save_folder +st + args.feat_extractor + \ '-seed-'+str(args.seed) + policy_net_dims + reward_net_dims + \ '-total-ep-'+str(args.total_episodes)+'-max-ep-len-'+ str(args.max_ep_length) experiment_logger = Logger(save_folder, 'experiment_info.txt') experiment_logger.log_header('Arguments for the experiment :') experiment_logger.log_info(vars(args)) window_size = 9 step_size = 2 agent_width = 10 obs_width = 10 grid_size = 10 feat_ext = None #initialize the feature extractor to be used if args.feat_extractor == 'Onehot': feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == 'SocialNav': feat_ext = SocialNav(fieldList=['agent_state', 'goal_state']) if args.feat_extractor == 'FrontBackSideSimple': feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, ) if args.feat_extractor == 'LocalGlobal': feat_ext = LocalGlobal( window_size=11, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, ) if args.feat_extractor == 'DroneFeatureSAM1': feat_ext = DroneFeatureSAM1(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, thresh1=15, thresh2=30) if args.feat_extractor == 'DroneFeatureOccup': feat_ext = DroneFeatureOccup(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, window_size=window_size) if args.feat_extractor == 'DroneFeatureRisk': feat_ext = DroneFeatureRisk(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=True, thresh1=15, thresh2=30) if args.feat_extractor == 'DroneFeatureRisk_v2': feat_ext = DroneFeatureRisk_v2(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=True, thresh1=15, thresh2=30) if args.feat_extractor == 'DroneFeatureRisk_speed': feat_ext = DroneFeatureRisk_speed(agent_width=agent_width, obs_width=obs_width, step_size=step_size, grid_size=grid_size, show_agent_persp=False, thresh1=10, thresh2=15) if feat_ext is None: print('Please enter proper feature extractor!') exit() #log feature extractor info if not args.dont_save and not args.play: experiment_logger.log_header('Parameters of the feature extractor :') experiment_logger.log_info(feat_ext.__dict__) #initialize the environment if args.replace_subject: replace_subject = True else: replace_subject = False env = GridWorldDrone( display=args.render, is_onehot=False, seed=args.seed, obstacles=None, show_trail=False, is_random=True, annotation_file=args.annotation_file, subject=args.subject, tick_speed=60, obs_width=10, step_size=step_size, agent_width=agent_width, replace_subject=replace_subject, segment_size=args.segment_size, external_control=True, step_reward=0.001, show_comparison=True, consider_heading=True, show_orientation=True, #rows=200, cols=300, width=grid_size) rows=576, cols=720, width=grid_size) #log environment info if not args.dont_save and not args.play: experiment_logger.log_header('Environment details :') experiment_logger.log_info(env.__dict__) #initialize RL model = SAC(env, feat_extractor=feat_ext, log_interval=100, max_ep_length=args.max_ep_length, hidden_dims=args.policy_net_hidden_dims, save_folder=save_folder, max_episodes=args.total_episodes) #log RL info if not args.dont_save and not args.play: experiment_logger.log_header('Details of the RL method :') experiment_logger.log_info(model.__dict__) if args.policy_path is not None: from debugtools import numericalSort policy_file_list = [] reward_across_models = [] if os.path.isfile(args.policy_path): policy_file_list.append(args.policy_path) if os.path.isdir(args.policy_path): policy_names = glob.glob(os.path.join(args.policy_path, '*.pt')) policy_file_list = sorted(policy_names, key=numericalSort) xaxis = np.arange(len(policy_file_list)) if not args.play and not args.play_user: #no playing of any kind, so training if args.reward_path is None: if args.policy_path: model.policy.load(args.policy_path) model.train() else: from irlmethods.deep_maxent import RewardNet state_size = feat_ext.extract_features(env.reset()).shape[0] reward_net = RewardNet(state_size, args.reward_net_hidden_dims) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) model.train(reward_net=reward_net) if not args.dont_save: model.policy.save(save_folder + '/policy-models/') if args.play: #env.tickSpeed = 15 from debugtools import compile_results xaxis = [] counter = 1 plt.figure(0) avg_reward_list = [] frac_good_run_list = [] for policy_file in policy_file_list: print('Playing for policy :', policy_file) model.policy.load(policy_file) policy_folder = policy_file.strip().split('/')[0:-2] save_folder = '' for p in policy_folder: save_folder = save_folder + p + '/' print('The final save folder ', save_folder) #env.tickSpeed = 10 assert args.policy_path is not None, 'pass a policy to play from!' if args.exp_trajectory_path is not None: from irlmethods.irlUtils import calculate_expert_svf expert_svf = calculate_expert_svf( args.exp_trajectory_path, max_time_steps=args.max_ep_length, feature_extractor=feat_ext, gamma=1) #reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render)) if args.exp_trajectory_path is None: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, path=save_folder + '/agent_generated_trajectories/') else: if args.dont_save: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, expert_svf=expert_svf) else: rewards, state_info, sub_info = model.generate_trajectory( args.num_trajs, args.render, path=save_folder + '/agent_generated_trajectories/', expert_svf=expert_svf) avg_reward, good_run_frac = compile_results( rewards, state_info, sub_info) #pdb.set_trace() avg_reward_list.append(avg_reward) frac_good_run_list.append(good_run_frac) plt.plot(avg_reward_list, c='r') plt.plot(frac_good_run_list, c='g') plt.draw() plt.show() if args.play_user: env.tickSpeed = 200 model.generate_trajectory_user(args.num_trajs, args.render, path='./user_generated_trajectories/')
def main(): args = parser.parse_args() mp.set_start_method('spawn') from envs.gridworld_drone import GridWorldDrone agent_width = 10 step_size = 2 obs_width = 10 grid_size = 10 if args.feat_extractor == 'Onehot': feat_ext = OneHot(grid_rows=10, grid_cols=10) if args.feat_extractor == 'SocialNav': feat_ext = SocialNav(fieldList=['agent_state', 'goal_state']) if args.feat_extractor == 'FrontBackSideSimple': feat_ext = FrontBackSideSimple( thresh1=1, thresh2=2, thresh3=3, thresh4=4, step_size=step_size, agent_width=agent_width, obs_width=obs_width, fieldList=['agent_state', 'goal_state', 'obstacles']) if args.feat_extractor == 'LocalGlobal': feat_ext = LocalGlobal( window_size=3, grid_size=grid_size, agent_width=agent_width, obs_width=obs_width, step_size=step_size, fieldList=['agent_state', 'goal_state', 'obstacles']) #featExtract = OneHot(grid_rows=10,grid_cols=10) #featExtract = FrontBackSideSimple(thresh1 = 1,fieldList = ['agent_state','goal_state','obstacles']) #featExtract = SocialNav(fieldList = ['agent_state','goal_state']) ''' np.asarray([2,2]),np.asarray([7,4]),np.asarray([3,5]), np.asarray([5,2]),np.asarray([8,3]),np.asarray([7,5]), np.asarray([3,3]),np.asarray([3,7]),np.asarray([5,7]) env = GridWorld(display=args.render, is_onehot= False,is_random=True, rows=10, agent_width=agent_width,step_size=step_size, obs_width=obs_width,width=grid_size, cols=10, seed = 7, obstacles = '../envs/map3.jpg', goal_state = np.asarray([5,5])) ''' env = GridWorldDrone(display=args.render, is_onehot=False, seed=999, obstacles=None, show_trail=False, is_random=False, annotation_file=args.annotation_file, subject=None, tick_speed=90, obs_width=10, step_size=step_size, agent_width=agent_width, show_comparison=True, rows=576, cols=720, width=grid_size) model = ActorCritic(env, feat_extractor=featExtract, gamma=0.99, log_interval=50, max_ep_length=500, max_episodes=2000) if args.policy_path is not None: model.policy.load(args.policy_path) if not args.play and not args.play_user: if args.reward_path is None: model.train_mp(n_jobs=4) else: from irlmethods.deep_maxent import RewardNet state_size = featExtract.extract_features(env.reset()).shape[0] reward_net = RewardNet(state_size) reward_net.load(args.reward_path) print(next(reward_net.parameters()).is_cuda) model.train_mp(reward_net=reward_net, n_jobs=4) if not args.dont_save: model.policy.save('./saved-models/') if args.play: #env.tickSpeed = 15 assert args.policy_path is not None, 'pass a policy to play from!' model.generate_trajectory( args.num_trajs, './trajs/ac_loc_glob_rectified_win_3_static_map3/') if args.play_user: env.tickSpeed = 200 model.generate_trajectory_user(args.num_trajs, './trajs/ac_gridworld_user/')