def main(): parser = argparse.ArgumentParser() parser.add_argument("--env", default='diabcombolock', help="name of the environment e.g., montezuma") parser.add_argument( "--num_processes", default=6, type=int, help= "number of policy search (PS) processes to be launched at a given time" ) parser.add_argument("--forwardmodel", default='forwardmodel', help="Model for training the forwad abstraction") parser.add_argument("--backwardmodel", default='backwardmodel', help="Model for learning the backward abstraction") parser.add_argument("--discretization", default="True", help="Train with discretized/undiscretized model") parser.add_argument("--policy_type", default="linear", type=str, help="Type of policy (linear, non-linear)") parser.add_argument("--name", default="run-homer", help="Name of the experiment") parser.add_argument("--horizon", default=-1, type=int, help="Horizon") parser.add_argument("--samples", default=-1, type=int, help="Samples") parser.add_argument("--env_seed", default=None, type=int, help="Environment Seed") parser.add_argument("--noise", default=None, type=str, help="Noise") parser.add_argument("--save_trace", default="False", help="Save traces") parser.add_argument("--trace_sample_rate", default=500, type=int, help="How often to save traces") parser.add_argument("--save_path", default="./results/", type=str, help="Folder where to save results") parser.add_argument("--debug", default="False", help="Debug the run") parser.add_argument("--pushover", default="False", help="Use pushover to send results on phone") args = parser.parse_args() env_name = args.env num_processes = args.num_processes exp_name = args.name experiment_name = "%s-%s-model-%s-horizon-%d-samples-%d-noise-%s" % \ (exp_name, env_name, args.backwardmodel, args.horizon, args.samples, args.noise) experiment = "%s/%s" % (args.save_path, experiment_name) print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_homer.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name)) # Read configuration and constant files. Configuration contain environment information and # constant file contains hyperparameters for the model and learning algorithm. with open("data/%s/config.json" % env_name) as f: config = json.load(f) # Add command line arguments. Command line arguments supersede file settings. if args.horizon != -1: config["horizon"] = args.horizon if args.noise is not None: config["noise"] = args.noise config["save_trace"] = args.save_trace == "True" config["trace_sample_rate"] = args.trace_sample_rate config["save_path"] = experiment config["exp_name"] = experiment_name config["env_seed"] = args.env_seed config["policy_type"] = args.policy_type GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) with open("data/%s/constants.json" % env_name) as f: constants = json.load(f) if args.samples != -1: constants["encoder_training_num_samples"] = args.samples constants["forward_model_type"] = args.forwardmodel constants["backward_model_type"] = args.backwardmodel constants["discretization"] = args.discretization == "True" print(json.dumps(config, indent=2)) # Validate the keys validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") performance = [] num_runs = 1 for trial in range(1, num_runs + 1): master_logger.log("========= STARTING EXPERIMENT %d ======== " % trial) # Create a new environment env = GenerateEnvironmentWrapper(env_name, config) master_logger.log("Environment Created") print("Created Environment...") # Save the environment for reproducibility env.save_environment(experiment, trial_name=trial) print("Saving Environment...") homing_policy_validation_fn = env.generate_homing_policy_validation_fn( ) learning_alg = DistributedHomerAlgorithm(config, constants) policy_result = learning_alg.train( experiment=experiment, env=env, env_name=env_name, num_processes=num_processes, experiment_name=experiment_name, logger=master_logger, use_pushover=args.pushover == "True", debug=args.debug == "True", homing_policy_validation_fn=homing_policy_validation_fn, trial=trial, do_reward_sensitive_learning=True) performance.append(policy_result) for key in performance[0]: # Assumes the keys are same across all runes results = [result[key] for result in performance] if len(results) <= 1: stdev = 0.0 else: stdev = statistics.stdev(results) master_logger.log( "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), stdev, num_runs, results)) print( "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), stdev, num_runs, results)) # Cleanup multiprocess_logging_manager.cleanup()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env", default='stochcombolock', help="name of the environment e.g., montezuma") parser.add_argument("--name", default="run-psdp", help="Name of the experiment") parser.add_argument("--forwardmodel", default='forwardmodel', help="Model for training the forwad abstraction") parser.add_argument("--backwardmodel", default='backwardmodel', help="Model for learning the backward abstraction") parser.add_argument("--discretization", default="True", help="Train with discretized/undiscretized model") parser.add_argument("--policy_type", default="linear", type=str, help="Type of policy (linear, non-linear)") parser.add_argument("--load", help="Name of the result folder containing homing policies and environment") parser.add_argument("--train_eps", type=int, help="Number of training episodes used for learning the policy set") parser.add_argument("--noise", default=None, type=str, help="Noise") parser.add_argument("--save_trace", default="False", help="Save traces") parser.add_argument("--trace_sample_rate", default=500, type=int, help="How often to save traces") parser.add_argument("--save_path", default="./results/", type=str, help="Folder where to save results") args = parser.parse_args() env_name = args.env exp_name = args.name load_folder = args.load experiment_name = "%s-%s-model-%s-noise-%s" % (exp_name, env_name, args.model, args.noise) experiment = "./%s/%s" % (args.save_path, experiment_name) print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_homer.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name)) with open("data/%s/config.json" % env_name) as f: config = json.load(f) # Add command line arguments. Command line arguments supersede file settings. if args.noise is not None: config["noise"] = args.noise config["save_trace"] = args.save_trace == "True" config["trace_sample_rate"] = args.trace_sample_rate config["save_path"] = args.save_path config["exp_name"] = experiment_name config["policy_type"] = args.policy_type GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) with open("data/%s/constants.json" % env_name) as f: constants = json.load(f) constants["model_type"] = args.model print(json.dumps(config, indent=2)) # Validate the keys validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") performance = [] num_runs = 5 for trial in range(1, num_runs + 1): master_logger.log("========= STARTING EXPERIMENT %d ======== " % trial) # Create a new environment print("Created Environment...") env = GenerateEnvironmentWrapper(env_name, config) master_logger.log("Environment Created") # Load the environment env_folder = load_folder + "/trial_%d_env" % trial env_folders = [join(env_folder, f) for f in listdir(env_folder) if isdir(join(env_folder, f))] assert len(env_folders) == 1, "Found more than environment. Specify the folder manually %r" % env_folders env.load_environment_from_folder(env_folders[0]) master_logger.log("Loaded Environment from %r" % env_folders[0]) # Fix config to match the env. # TODO implement the next block of code in a scalable manner config["horizon"] = env.env.horizon config["obs_dim"] = -1 GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) master_logger.log("Environment horizon %r, Observation dimension %r" % (config["horizon"], config["obs_dim"])) learning_alg = DistributedHomerAlgorithm(config, constants) policy_result = learning_alg.train_from_learned_homing_policies(env=env, load_folder=load_folder, train_episodes=args.train_eps, experiment_name=experiment_name, logger=master_logger, use_pushover=False, trial=trial) performance.append(policy_result) for key in performance[0]: # Assumes the keys are same across all runes results = [result[key] for result in performance] master_logger.log("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), num_runs, results)) print("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), num_runs, results)) # Cleanup multiprocess_logging_manager.cleanup()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env", default='stochcombolock', help="name of the environment e.g., montezuma") parser.add_argument("--model", default='gumbeldouble', help="Model for training the encoding function") parser.add_argument("--name", default="debug-encoder", help="Name of the experiment") parser.add_argument("--horizon", default=-1, type=int, help="Horizon") parser.add_argument("--samples", default=-1, type=int, help="Samples") parser.add_argument("--learn_type", default="vanilla", type=str, help="Either vanilla, coordinate, transfer") args = parser.parse_args() env_name = args.env exp_name = args.name with open("data/%s/config.json" % env_name) as f: config = json.load(f) # Add command line arguments. Command line arguments supersede file settings. if args.horizon != -1: config["horizon"] = args.horizon config["encoder_training_type"] = args.learn_type GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) with open("data/%s/constants.json" % env_name) as f: constants = json.load(f) if args.samples != -1: constants["encoder_training_num_samples"] = args.samples constants["model_type"] = args.model print(json.dumps(config, indent=2)) # Validate the keys validate(config, constants) # Create file experiment_name = "%s-model-%s-horizon-%d-samples-%d-%s" % ( exp_name, args.model, config["horizon"], constants["encoder_training_num_samples"], env_name) experiment = "./%s/%s" % (args.save_path, experiment_name) print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_homer.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name)) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") # performance = [] num_runs = 100 for attempt in range(1, num_runs + 1): master_logger.log("========= STARTING EXPERIMENT %d ======== " % attempt) p = mp.Process(target=DebugTrainEncodingFunction.do_train, args=(config, constants, env_name, experiment_name, master_logger, False, True)) p.daemon = False p.start() p.join() # for key in performance[0]: # Assumes the keys are same across all runes # results = [result[key] for result in performance] # master_logger.log("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % # (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), # num_runs, results)) # print("%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % # (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), # num_runs, results)) # Cleanup multiprocess_logging_manager.cleanup()
def main(): parser = argparse.ArgumentParser(description='du_baselines Experiments') parser.add_argument("--env", default='diabcombolock', help="name of the environment e.g., montezuma") parser.add_argument("--name", default="run-du-baselines", help="Name of the experiment") parser.add_argument("--horizon", default=-1, type=int, help="Horizon") parser.add_argument("--noise", default=None, type=str, help="Noise") parser.add_argument("--save_trace", default="False", help="Save traces") parser.add_argument("--trace_sample_rate", default=500, type=int, help="How often to save traces") parser.add_argument("--save_path", default="./results/", type=str, help="Folder where to save results") parser.add_argument("--debug", default="False", help="Debug the run") parser.add_argument("--pushover", default="False", help="Use pushover to send results on phone") # Options for Du Baselines parser.add_argument('--seed', type=int, default=367, metavar='N', help='random seed (default: 367)') parser.add_argument('--episodes', type=int, default=10000000, help='Training Episodes') parser.add_argument('--alg', type=str, default='decoding', help='Learning Algorithm', choices=["oracleq", "decoding", "qlearning"]) parser.add_argument('--model_type', type=str, default='linear', help='What model class for function approximation', choices=['linear', 'nn']) parser.add_argument('--lr', type=float, help='Learning Rate for optimization-based algorithms', default=3e-2) parser.add_argument('--epsfrac', type=float, help='Exploration fraction for Baseline DQN.', default=0.1) parser.add_argument('--conf', type=float, help='Exploration Bonus Parameter for Oracle Q.', default=3e-2) parser.add_argument( '--n', type=int, default=200, help="Data collection parameter for decoding algoithm.") parser.add_argument( '--num_cluster', type=int, default=3, help="Num of hidden state parameter for decoding algoithm.") args = parser.parse_args() env_name = args.env exp_name = args.name experiment_name = "%s-%s-model-%s-horizon-%d-samples-%d-noise-%s" % \ (exp_name, env_name, args.model_type, args.horizon, args.episodes, args.noise) experiment = "./%s/%s" % (args.save_path, experiment_name) print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_homer.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") master_logger.log("Environment Name %r. Experiment Name %r" % (env_name, exp_name)) # Read configuration and constant files. Configuration contain environment information and # constant file contains hyperparameters for the model and learning algorithm. with open("data/%s/config.json" % env_name) as f: config = json.load(f) # Add command line arguments. Command line arguments supersede file settings. if args.horizon != -1: config["horizon"] = args.horizon if args.noise is not None: config["noise"] = args.noise config["save_trace"] = args.save_trace == "True" config["trace_sample_rate"] = args.trace_sample_rate config["save_path"] = args.save_path config["exp_name"] = experiment_name GenerateEnvironmentWrapper.adapt_config_to_domain(env_name, config) with open("data/%s/constants.json" % env_name) as f: constants = json.load(f) print(json.dumps(config, indent=2)) # Validate the keys validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") performance = [] num_runs = 5 for trial in range(1, num_runs + 1): master_logger.log("========= STARTING EXPERIMENT %d ======== " % trial) random.seed(args.seed + trial * 29) np.random.seed(args.seed + trial * 29) torch.manual_seed(args.seed + trial * 37) # Create a new environment env = GenerateEnvironmentWrapper(env_name, config) master_logger.log("Environment Created") print("Created Environment...") # Save the environment for reproducibility env.save_environment(experiment, trial_name=trial) print("Saving Environment...") learning_alg = du_baseline.get_alg(args, config) policy_result = du_baseline.train(env, learning_alg, args, master_logger) performance.append(policy_result) for key in performance[0]: # Assumes the keys are same across all runes results = [result[key] for result in performance] master_logger.log( "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), num_runs, results)) print( "%r: Mean %r, Median %r, Std %r, Num runs %r, All performance %r" % (key, statistics.mean(results), statistics.median(results), statistics.stdev(results), num_runs, results)) # Cleanup multiprocess_logging_manager.cleanup()