def copy_seed_dataset(from_dataset, to_dataset): from_path = get_dataset_dir(from_dataset) to_path = get_dataset_dir(to_dataset) if not os.path.exists(to_path): print(f"Copying dataset from {from_dataset} to {to_dataset}") shutil.copytree(from_path, to_path) else: print("DATASET EXISTS! Continue?") c = input(" (y/n) >>> ") if c == "y": return else: raise ValueError("Not continuing: Dataset exists")
def copy_seed_dataset(from_dataset, to_dataset, seed_count): from_dir = get_dataset_dir(from_dataset) to_dir = get_dataset_dir(to_dataset) if os.path.exists(to_dir): print("DATASET EXISTS! Continue where left off?") c = input(" (y/n) >>> ") if c == "y": return else: raise ValueError( f"Not continuing: Dataset {to_dataset} exists. Delete it if you like and try again" ) os.makedirs(to_dir) from_files = os.listdir(from_dir) train_ids, dev_ids, test_ids = get_restricted_env_id_lists() train_ids = set(train_ids) dev_ids = set(dev_ids) test_ids = set(test_ids) file_envs = [ int(f.split("supervised_train_data_env_")[1]) for f in from_files ] files_and_envs = list(zip(from_files, file_envs)) random.shuffle(files_and_envs) files_to_copy = [] train_envs_copied = 0 for file, env in files_and_envs: if env in train_ids and train_envs_copied < seed_count: files_to_copy.append(file) if env in train_ids: train_envs_copied += 1 print( f"Copying {train_envs_copied} train envs, and all dev/test envs from {from_dataset} to {to_dataset}" ) for file in files_to_copy: from_path = os.path.join(from_dir, file) to_path = os.path.join(to_dir, file) shutil.copy(from_path, to_path)
def load_dataset(name): path = os.path.join(get_dataset_dir(), str(name)) gc.disable() f = open(path, "rb") try: dataset = pickle.load(f) except Exception as e: print("Error loading dataset pickle") dataset = [] f.close() gc.enable() return dataset
def save_dataset(dataset, name): full_path = os.path.join(get_dataset_dir(), str(name)) dirname = os.path.dirname(full_path) if not os.path.isdir(dirname): try: os.makedirs(dirname) except Exception: pass gc.disable() f = open(full_path, "wb") pickle.dump(dataset, f) f.close() gc.enable()
def prune_dataset(run_name, count): dataset_dir = get_dataset_dir(dagger_dataset_name(run_name)) files = os.listdir(dataset_dir) deleted = 0 if len(files) > count: num_drop = len(files) - count files_to_drop = random.sample(files, num_drop) for file in files_to_drop: filepath = os.path.join(dataset_dir, file) os.remove(filepath) deleted += 1 print(f"Deleted {deleted} files when pruning dataset {dataset_dir}")
def filter_env_list_has_data(env_list, dataset_name): good_envs = [] cut_some = False for env in env_list: filename = "supervised_train_data_env_" + str(env) if dataset_name == "supervised" else dataset_name + "_" + str(env) path = os.path.join(get_dataset_dir(), filename) print("env_list dataset path:", path) # Check that the data file exists and if os.path.isfile(path) and os.path.getsize(path) > 1000: good_envs.append(env) else: cut_some = True if cut_some: print("Filtered out " + str(len(env_list) - len(good_envs)) + " envs because of missing data") return good_envs
def filter_env_list_has_data(dataset_name, env_list, prefix): good_envs = [] cut_some = False too_small_count = 0 missing = 0 for env in env_list: filename = "supervised_train_data_env_" + str( env) if prefix == "supervised" else prefix + "_" + str(env) path = os.path.join(get_dataset_dir(dataset_name), filename) # Check that the data file exists and if not os.path.isfile(path): missing += 1 cut_some = True elif os.path.getsize(path) < 1000: too_small_count += 1 cut_some = True else: good_envs.append(env) if cut_some: print("Filtered out " + str(len(env_list) - len(good_envs)) + " envs because of missing data") print(f" {too_small_count} files too small, {missing} files missing") return good_envs
def check_and_prompt_if_data_exists(system_namespaces): existing_datasets = [] for namespace in system_namespaces: P.switch_to_namespace(namespace) setup = P.get_current_parameters()["Setup"] dname = get_eval_tmp_dataset_name(setup["model"], setup["run_name"]) dpath = get_dataset_dir(dname) if os.path.exists(dpath): existing_datasets.append(dname) if len(existing_datasets) > 0: print("The following evaluation rollout datasets already exist:") print(existing_datasets) print("Do you want to continue evaluation and extend these datasets?") while True: char = input("(y/n)>>>>") if char in ["Y", "y"]: return elif char in ["N", "n"]: print( "You may delete/move the existing datasets and run again!") sys.exit(0) else: print(f"Unrecognized input: {char}")
def env_data_already_collected(env_id, model_name, run_name): dname = get_eval_tmp_dataset_name(model_name, run_name) dataset_path = get_dataset_dir(dname) data_file = os.path.join(dataset_path, get_supervised_data_filename(env_id)) return os.path.isfile(data_file)
def first_choice_ok(choice): if len(choice) == 0: return False charray = np.asarray(choice) c5 = (charray / 5).astype(np.int64) copies = [c5[c] in c5[:c] or c in c5[c+1:] for c in range(len(c5))] num_copies = np.asarray(copies).sum() print("num_envs_same: ", num_copies) if num_copies > 0: return False return True if __name__ == "__main__": P.initialize_experiment() real_data_dir = get_dataset_dir("real") files = os.listdir(real_data_dir) available_env_ids = set([int(f.split("supervised_train_data_env_")[1]) for f in files]) train_ids, dev_ids, test_ids = get_restricted_env_id_lists() train_ids = set(train_ids) dev_ids = set(dev_ids) test_ids = set(test_ids) avail_train_ids = list(train_ids.intersection(available_env_ids)) avail_dev_ids = list(dev_ids.intersection(available_env_ids)) avail_test_ids = list(test_ids.intersection(available_env_ids)) print(f"Making subsets from total envs: {len(avail_train_ids)}") splits_out = {}
def train_supervised_worker(rl_process_conn): P.initialize_experiment() setup = P.get_current_parameters()["Setup"] rlsup = P.get_current_parameters()["RLSUP"] setup["trajectory_length"] = setup["sup_trajectory_length"] run_name = setup["run_name"] supervised_params = P.get_current_parameters()["Supervised"] num_epochs = supervised_params["num_epochs"] sup_device = rlsup.get("sup_device", "cuda:1") model_oracle_critic = None print("SUPP: Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() # Load the starter model and save it at epoch 0 # Supervised worker to use GPU 1, RL will use GPU 0. Simulators run on GPU 2 model_sim = load_model(setup["sup_model"], setup["sim_model_file"], domain="sim")[0].to(sup_device) model_real = load_model(setup["sup_model"], setup["real_model_file"], domain="real")[0].to(sup_device) model_critic = load_model(setup["sup_critic_model"], setup["critic_model_file"])[0].to(sup_device) # ---------------------------------------------------------------------------------------------------------------- print("SUPP: Initializing trainer") rlsup_params = P.get_current_parameters()["RLSUP"] sim_seed_dataset = rlsup_params.get("sim_seed_dataset") # TODO: Figure if 6000 or 7000 here trainer = TrainerBidomainBidata(model_real, model_sim, model_critic, model_oracle_critic, epoch=0) train_envs_common = [e for e in train_envs if 6000 <= e < 7000] train_envs_sim = [e for e in train_envs if e < 7000] dev_envs_common = [e for e in dev_envs if 6000 <= e < 7000] dev_envs_sim = [e for e in dev_envs if e < 7000] sim_datasets = [rl_dataset_name(run_name)] real_datasets = ["real"] trainer.set_dataset_names(sim_datasets=sim_datasets, real_datasets=real_datasets) # ---------------------------------------------------------------------------------------------------------------- for start_sup_epoch in range(10000): epfname = epoch_sup_filename(run_name, start_sup_epoch, model="stage1", domain="sim") path = os.path.join(get_model_dir(), str(epfname) + ".pytorch") if not os.path.exists(path): break if start_sup_epoch > 0: print(f"SUPP: CONTINUING SUP TRAINING FROM EPOCH: {start_sup_epoch}") load_pytorch_model( model_real, epoch_sup_filename(run_name, start_sup_epoch - 1, model="stage1", domain="real")) load_pytorch_model( model_sim, epoch_sup_filename(run_name, start_sup_epoch - 1, model="stage1", domain="sim")) load_pytorch_model( model_critic, epoch_sup_filename(run_name, start_sup_epoch - 1, model="critic", domain="critic")) trainer.set_start_epoch(start_sup_epoch) # ---------------------------------------------------------------------------------------------------------------- print("SUPP: Beginning training...") for epoch in range(start_sup_epoch, num_epochs): # Tell the RL process that a new Stage 1 model is ready for loading print("SUPP: Sending model to RL") model_sim.reset() rl_process_conn.send( ["stage1_model_state_dict", model_sim.state_dict()]) if DEBUG_RL: while True: sleep(1) if not sim_seed_dataset: ddir = get_dataset_dir(rl_dataset_name(run_name)) os.makedirs(ddir, exist_ok=True) while len(os.listdir(ddir)) < 20: print("SUPP: Waiting for rollouts to appear") sleep(3) print("SUPP: Beginning Epoch") train_loss = trainer.train_epoch(env_list_common=train_envs_common, env_list_sim=train_envs_sim, eval=False) test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True) print("SUPP: Epoch", epoch, "train_loss:", train_loss, "test_loss:", test_loss) save_pytorch_model( model_real, epoch_sup_filename(run_name, epoch, model="stage1", domain="real")) save_pytorch_model( model_sim, epoch_sup_filename(run_name, epoch, model="stage1", domain="sim")) save_pytorch_model( model_critic, epoch_sup_filename(run_name, epoch, model="critic", domain="critic"))
def file_exists(name): full_path = os.path.join(get_dataset_dir(), str(name)) data_exists = os.path.exists(full_path) full_path = os.path.join(get_model_dir(), str(name)) model_exists = os.path.exists(full_path) return data_exists or model_exists
def train_dagger_simple(): # ---------------------------------------------------------------------------------------------------------------- # Load params and configure stuff P.initialize_experiment() params = P.get_current_parameters()["SimpleDagger"] setup = P.get_current_parameters()["Setup"] num_iterations = params["num_iterations"] sim_seed_dataset = params.get("sim_seed_dataset") run_name = setup["run_name"] device = params.get("device", "cuda:1") dataset_limit = params.get("dataset_size_limit_envs") seed_count = params.get("seed_count") # Trigger rebuild if necessary before going into all the threads and processes _ = get_restricted_env_id_lists(full=True) # Initialize the dataset if sim_seed_dataset: copy_seed_dataset(from_dataset=sim_seed_dataset, to_dataset=dagger_dataset_name(run_name), seed_count=seed_count or dataset_limit) gap = 0 else: # TODO: Refactor this into a prompt function data_path = get_dataset_dir(dagger_dataset_name(run_name)) if os.path.exists(data_path): print("DATASET EXISTS! Continue where left off?") c = input(" (y/n) >>> ") if c != "y": raise ValueError( f"Not continuing: Dataset {data_path} exists. Delete it if you like and try again" ) else: os.makedirs(data_path, exist_ok=True) gap = dataset_limit - len(os.listdir(data_path)) print("SUPP: Loading data") train_envs, dev_envs, test_envs = get_restricted_env_id_lists() # ---------------------------------------------------------------------------------------------------------------- # Load / initialize model model = load_model(setup["model"], setup["model_file"], domain="sim")[0].to(device) oracle = load_model("oracle")[0] # ---------------------------------------------------------------------------------------------------------------- # Continue where we left off - load the model and set the iteration/epoch number for start_iteration in range(10000): epfname = epoch_dag_filename(run_name, start_iteration) path = os.path.join(get_model_dir(), str(epfname) + ".pytorch") if not os.path.exists(path): break if start_iteration > 0: print( f"DAG: CONTINUING DAGGER TRAINING FROM ITERATION: {start_iteration}" ) load_pytorch_model(model, epoch_dag_filename(run_name, start_iteration - 1)) # ---------------------------------------------------------------------------------------------------------------- # Intialize trainer trainer = Trainer(model, epoch=start_iteration, name=setup["model"], run_name=setup["run_name"]) trainer.set_dataset_names([dagger_dataset_name(run_name)]) # ---------------------------------------------------------------------------------------------------------------- # Initialize policy roller roller = SimpleParallelPolicyRoller( num_workers=params["num_workers"], device=params["device"], policy_name=setup["model"], policy_file=setup["model_file"], oracle=oracle, dataset_save_name=dagger_dataset_name(run_name), no_reward=True) rollout_sampler = RolloutSampler(roller) # ---------------------------------------------------------------------------------------------------------------- # Train DAgger - loop over iteartions, in each, prune, rollout and train an epoch print("SUPP: Beginning training...") for iteration in range(start_iteration, num_iterations): print(f"DAG: Starting iteration {iteration}") # Remove extra rollouts to keep within DAggerFM limit prune_dataset(run_name, dataset_limit) # Rollout and collect more data for training and evaluation policy_state = model.get_policy_state() rollout_sampler.sample_n_rollouts( n=gap if iteration == 0 else params["train_envs_per_iteration"], policy_state=policy_state, sample=False, envs="train", dagger_beta=dagger_beta(params, iteration)) eval_rollouts = rollout_sampler.sample_n_rollouts( n=params["eval_envs_per_iteration"], policy_state=policy_state, sample=False, envs="dev", dagger_beta=0) # Kill airsim instances so that they don't take up GPU memory and in general slow things down during training roller.kill_airsim() # Evaluate success / metrics and save to tensorboard if setup["eval_nl"]: evaler = DataEvalNL(run_name, entire_trajectory=False, save_images=False) evaler.evaluate_dataset(eval_rollouts) results = evaler.get_results() print("Results:", results) evaler.write_summaries(setup["run_name"], "dagger_eval", iteration) # Do one epoch of supervised training print("SUPP: Beginning Epoch") train_loss = trainer.train_epoch(train_envs=train_envs, eval=False) #test_loss = trainer.train_epoch(env_list_common=dev_envs_common, env_list_sim=dev_envs_sim, eval=True) # Save the model to file print("SUPP: Epoch", iteration, "train_loss:", train_loss) save_pytorch_model(model, epoch_dag_filename(run_name, iteration))
def load_dataset(dataset_name, name, lock=False): path = os.path.join(get_dataset_dir(dataset_name), str(name)) return load_dataset_from_path(path, lock=lock)
def save_dataset(dataset_name, dataset, name=None, env_id=None, lock=False): if name is None: name = get_supervised_data_filename(env_id) full_path = os.path.join(get_dataset_dir(dataset_name), str(name)) save_dataset_to_path(full_path, dataset, lock=lock)
def load_multiple_env_data(dataset_name): ddir = get_dataset_dir(dataset_name) return load_multiple_env_data_from_dir(ddir)