def _write_airsim_settings(self): airsim_settings = P.get_current_parameters()["AirSim"] airsim_settings_path = P.get_current_parameters( )["Environment"]["airsim_settings_path"] airsim_settings_path = os.path.expanduser(airsim_settings_path) save_json(airsim_settings, airsim_settings_path) print("Wrote new AirSim settings to " + str(airsim_settings_path))
def save_anno_path(env_id, path): anno_curve_path = paths.get_anno_curve_path(env_id) path_json = { 'x_array': list(path[:, 0]), 'z_array': list(path[:, 1]) } save_json(path_json, anno_curve_path) return path
def sample_human_envs(): train_envs, dev_envs, test_envs = get_restricted_env_id_lists() random.shuffle(test_envs) human_envs = test_envs[:NUM_ENVS] human_envs = sorted(human_envs) save_json(human_envs, get_human_eval_envs_path())
def save_env_split(dict_of_lists): path = paths.get_env_split_path() save_json(dict_of_lists, path)
def log_experiment_start(run_name, params): rundir = get_past_run_dir(run_name) paramsfile = os.path.join(rundir, "params.json") save_json(params, paramsfile)
def train_dagger(): P.initialize_experiment() global PARAMS PARAMS = P.get_current_parameters()["Dagger"] setup = P.get_current_parameters()["Setup"] dataset_name = P.get_current_parameters()["Data"]["dataset_name"] if setup["num_workers"] > 1: roller = ParallelPolicyRoller(num_workers=setup["num_workers"], first_worker=setup["first_worker"], reduce=PARAMS["segment_level"]) else: roller = PolicyRoller() latest_model_filename = "dagger_" + setup["model"] + "_" + setup["run_name"] dagger_data_dir = "dagger_data/" + setup["run_name"] + "/" save_json(PARAMS, dagger_data_dir + "run_params.json") # Load less tf data, but sample dagger rollouts from more environments to avoid overfitting. train_envs, dev_envs, test_envs = data_io.instructions.get_restricted_env_id_lists(max_envs=PARAMS["max_envs_dag"]) if PARAMS["resample_supervised_data"]: # Supervised data are represented as integers that will be later loaded by the dataset all_train_data = list(range(PARAMS["max_samples_in_memory"])) all_test_data = list(range(0)) else: all_train_data, all_test_data = data_io.train_data.load_supervised_data(dataset_name, max_envs=PARAMS["max_envs_sup"], split_segments=PARAMS["segment_level"]) resample_supervised_data(dataset_name, all_train_data, train_envs) resample_supervised_data(dataset_name, all_test_data, test_envs) print("Loaded tf data size: " + str(len(all_train_data)) + " : " + str(len(all_test_data))) model = load_dagger_model(latest_model_filename) data_io.model_io.save_pytorch_model(model, latest_model_filename) if PARAMS["restore_latest"]: all_train_data, all_test_data = restore_data_latest(dagger_data_dir, dataset_name) else: restore_data(dataset_name, dagger_data_dir, all_train_data, all_test_data) last_trainer_state = None for iteration in range(PARAMS["restore"], PARAMS["max_iterations"]): gc.collect() print("-------------------------------") print("DAGGER ITERATION : ", iteration) print("-------------------------------") test_data_i = all_test_data # If we have too many training examples in memory, discard uniformly at random to keep a somewhat fixed bound max_samples = PARAMS["max_samples_in_memory"] if max_samples > 0 and len(all_train_data) > max_samples:# and iteration != args.dagger_restore: num_discard = len(all_train_data) - max_samples print("Too many samples in memory! Dropping " + str(num_discard) + " samples") discards = set(random.sample(list(range(len(all_train_data))), num_discard)) all_train_data = [sample for i, sample in enumerate(all_train_data) if i not in discards] print("Now left " + str(len(all_train_data)) + " samples") # Roll out new data at iteration i, except if we are restoring to that iteration, in which case we already have data if iteration != PARAMS["restore"] or iteration == 0: train_data_i, test_data_i = collect_iteration_data(roller, iteration, train_envs, test_envs, latest_model_filename, dagger_data_dir, dataset_name) # Aggregate the dataset all_train_data += train_data_i all_test_data += test_data_i print("Aggregated dataset!)") print("Total samples: ", len(all_train_data)) print("New samples: ", len(train_data_i)) data_io.train_data.save_dataset(dataset_name, all_train_data, dagger_data_dir + "train_latest") data_io.train_data.save_dataset(dataset_name, test_data_i, dagger_data_dir + "test_latest") model, model_loaded = load_latest_model(latest_model_filename) trainer = Trainer(model, state=last_trainer_state) import rollout.run_metadata as run_md run_md.IS_ROLLOUT = False # Train on the newly aggregated dataset num_epochs = PARAMS["epochs_per_iteration_override"][iteration] if iteration in PARAMS["epochs_per_iteration_override"] else PARAMS["epochs_per_iteration"] for epoch in range(num_epochs): # Get a random sample of all test data for calculating eval loss #epoch_test_sample = sample_n_from_list(all_test_data, PARAMS["num_test_samples"]) # Just evaluate on the latest test data epoch_test_sample = test_data_i loss = trainer.train_epoch(all_train_data) test_loss = trainer.train_epoch(epoch_test_sample, eval=True) data_io.model_io.save_pytorch_model(trainer.model, latest_model_filename) print("Epoch", epoch, "Loss: Train:", loss, "Test:", test_loss) data_io.model_io.save_pytorch_model(trainer.model, get_model_filename_at_iteration(setup, iteration)) if hasattr(trainer.model, "save"): trainer.model.save("dag" + str(iteration)) last_trainer_state = trainer.get_state()
def get_all_instructions(max_size=0, do_prune_ambiguous=False, full=False): #print("max_size:", max_size) # If instructions already loaded in memory, return them global cache global loaded_corpus global loaded_size if full: min_augment_len = 1 else: min_augment_len = P.get_current_parameters()["Setup"].get( "min_augment_len", 1) max_augment_len = P.get_current_parameters()["Setup"].get("augment_len", 1) train_key = f"train-{min_augment_len}-{max_augment_len}" dev_key = f"dev-{min_augment_len}-{max_augment_len}" test_key = f"test-{min_augment_len}-{max_augment_len}" if cache is not None and train_key in cache: # loaded_size == max_size: train_instructions = cache[train_key] dev_instructions = cache[dev_key] test_instructions = cache[test_key] corpus = loaded_corpus # Otherwise see if they've been pre-build in tmp files else: # Cache cache_dir = get_instruction_cache_dir() corpus_dir = get_config_dir() train_file = os.path.join( cache_dir, f"train_{min_augment_len}-{max_augment_len}.json") dev_file = os.path.join( cache_dir, f"dev_{min_augment_len}-{max_augment_len}.json") test_file = os.path.join( cache_dir, f"test_{min_augment_len}-{max_augment_len}.json") corpus_file = os.path.join(corpus_dir, "corpus.json") wfreq_file = os.path.join(corpus_dir, "word_freq.json") corpus_already_exists = False if os.path.isfile(corpus_file): with open(corpus_file, "r") as f: corpus = list(json.load(f)) #print("corpus: ", len(corpus)) corpus_already_exists = True # If they have been saved in tmp files, load them if os.path.isfile(train_file): train_instructions = load_instruction_data_from_json(train_file) dev_instructions = load_instruction_data_from_json(dev_file) test_instructions = load_instruction_data_from_json(test_file) assert corpus_already_exists, "Insruction data exists but corpus is gone!" # Otherwise rebuild instruction data from annotations else: print( f"REBUILDING INSTRUCTION DATA FOR SEGMENT LENGTHS: {min_augment_len} to {max_augment_len}!" ) print(f"USING OLD CORPUS: {corpus_already_exists}") os.makedirs(cache_dir, exist_ok=True) all_instructions, new_corpus = defaultdict(list), set() train_an, dev_an, test_an = load_train_dev_test_annotations() print("Loaded JSON Data") print("Parsing dataset") print(" train...") train_instructions, new_corpus, word_freq = parse_dataset( train_an, new_corpus) print(" dev...") dev_instructions, new_corpus, _ = parse_dataset(dev_an, new_corpus) print(" test...") test_instructions, new_corpus, _ = parse_dataset( test_an, new_corpus) print("Augmenting maybe?") train_instructions = augment_dataset(train_instructions, merge_len=max_augment_len, min_merge_len=min_augment_len) dev_instructions = augment_dataset(dev_instructions, merge_len=max_augment_len, min_merge_len=min_augment_len) test_instructions = augment_dataset(test_instructions, merge_len=max_augment_len, min_merge_len=min_augment_len) save_json(train_instructions, train_file) save_json(dev_instructions, dev_file) save_json(test_instructions, test_file) if not corpus_already_exists: corpus = new_corpus save_json(list(corpus), corpus_file) save_json(word_freq, wfreq_file) else: print("Warning! Regenerated pomdp, but kept the old corpus!") print("Saved instructions for quicker loading!") # Clip datasets to the provided size if max_size is not None and max_size > 0: num_train = int(math.ceil(max_size * 0.7)) num_dev = int(math.ceil(max_size * 0.15)) num_test = int(math.ceil(max_size * 0.15)) train_instructions = slice_list_tail(train_instructions, num_train) dev_instructions = slice_list_tail(dev_instructions, num_dev) test_instructions = slice_list_tail(test_instructions, num_test) if do_prune_ambiguous: train_instructions = prune_ambiguous(train_instructions) dev_instructions = prune_ambiguous(dev_instructions) test_instructions = prune_ambiguous(test_instructions) #print("Corpus: ", len(corpus)) #print("Loaded: ", len(train_instructions), len(dev_instructions), len(test_instructions)) if cache is None: cache = {} cache[train_key] = train_instructions cache[dev_key] = dev_instructions cache[test_key] = test_instructions loaded_corpus = corpus loaded_size = max_size return train_instructions, dev_instructions, test_instructions, corpus
def train_dagger(): P.initialize_experiment() global PARAMS PARAMS = P.get_current_parameters()["Dagger"] setup = P.get_current_parameters()["Setup"] roller = pick_policy_roller(setup) save_json(PARAMS, get_dagger_data_dir(setup, real_drone=False) + "run_params.json") # Load less tf data, but sample dagger rollouts from more environments to avoid overfitting. train_envs, dev_envs, test_envs = data_io.instructions.get_restricted_env_id_lists( max_envs=PARAMS["max_envs_dag"]) all_train_data_real, all_dev_data_real = \ data_io.train_data.load_supervised_data("real", max_envs=PARAMS["max_envs_sup"], split_segments=PARAMS["segment_level"]) all_train_data_sim, all_dev_data_sim = \ data_io.train_data.load_supervised_data("simulator", max_envs=PARAMS["max_envs_sup"], split_segments=PARAMS["segment_level"]) print("Loaded data: ") print( f" Real train {len(all_train_data_real)}, dev {len(all_dev_data_real)}" ) print( f" Sim train {len(all_train_data_sim)}, dev {len(all_dev_data_sim)}") # Load and re-save models from supervised learning stage model_sim, _ = load_model(setup["model"], setup["sim_model_file"], domain="sim") model_real, _ = load_model(setup["model"], setup["real_model_file"], domain="real") model_critic, _ = load_model(setup["critic_model"], setup["critic_model_file"]) data_io.model_io.save_pytorch_model( model_sim, get_latest_model_filename(setup, "sim")) data_io.model_io.save_pytorch_model( model_real, get_latest_model_filename(setup, "real")) data_io.model_io.save_pytorch_model( model_critic, get_latest_model_filename(setup, "critic")) last_trainer_state = None for iteration in range(0, PARAMS["max_iterations"]): gc.collect() print("-------------------------------") print("DAGGER ITERATION : ", iteration) print("-------------------------------") # If we have too many training examples in memory, discard uniformly at random to keep a somewhat fixed bound max_samples = PARAMS["max_samples_in_memory"] all_train_data_real = discard_if_too_many(all_train_data_real, max_samples) all_train_data_sim = discard_if_too_many(all_train_data_sim, max_samples) # Roll out new data in simulation only latest_model_filename_sim = get_latest_model_filename(setup, "sim") train_data_i_sim, dev_data_i_sim = collect_iteration_data( roller, iteration, train_envs, test_envs, latest_model_filename_sim) # TODO: Save #data_io.train_data.save_dataset(dataset_name, train_data_i, dagger_data_dir + "train_" + str(iteration)) #data_io.train_data.save_dataset(dataset_name, test_data_i, dagger_data_dir + "test_" + str(iteration)) # Aggregate the dataset all_train_data_sim += train_data_i_sim all_dev_data_sim += dev_data_i_sim print("Aggregated dataset!)") print("Total samples: ", len(all_train_data_sim)) print("New samples: ", len(train_data_i_sim)) data_io.train_data.save_dataset( "sim_dagger", all_train_data_sim, get_dagger_data_dir(setup, False) + "train_latest") data_io.train_data.save_dataset( "sim_dagger", dev_data_i_sim, get_dagger_data_dir(setup, False) + "test_latest") model_sim, _ = load_model(setup["model"], get_latest_model_filename(setup, "sim"), domain="sim") model_real, _ = load_model(setup["model"], get_latest_model_filename(setup, "real"), domain="real") model_critic, _ = load_model( setup["critic_model"], get_latest_model_filename(setup, "critic")) trainer = TrainerBidomain(model_real, model_sim, model_critic, state=last_trainer_state) # Hacky reset of the rollout flag after doing the rollouts import rollout.run_metadata as run_md run_md.IS_ROLLOUT = False # Train on the newly aggregated dataset num_epochs = PARAMS["epochs_per_iteration"] for epoch in range(num_epochs): loss = trainer.train_epoch(data_list_real=all_train_data_real, data_list_sim=all_train_data_sim) dev_loss = trainer.train_epoch(data_list_real=all_dev_data_real, data_list_sim=dev_data_i_sim, eval=True) data_io.model_io.save_pytorch_model( model_sim, get_latest_model_filename(setup, "sim")) data_io.model_io.save_pytorch_model( model_real, get_latest_model_filename(setup, "real")) data_io.model_io.save_pytorch_model( model_critic, get_latest_model_filename(setup, "critic")) print("Epoch", epoch, "Loss: Train:", loss, "Test:", dev_loss) data_io.model_io.save_pytorch_model( model_real, get_model_filename_at_iteration(setup, iteration, "real")) data_io.model_io.save_pytorch_model( model_sim, get_model_filename_at_iteration(setup, iteration, "sim")) data_io.model_io.save_pytorch_model( model_critic, get_model_filename_at_iteration(setup, iteration, "critic")) last_trainer_state = trainer.get_state()
def log_experiment_start(run_name): global CURRENT_PARAMS, CURRENT_RUN rundir = _get_past_run_dir(run_name) paramsfile = os.path.join(rundir, "params.json") save_json(CURRENT_PARAMS, paramsfile)
def get_all_instructions(max_size=0, do_prune_ambiguous=False): #print("max_size:", max_size) # If instructions already loaded in memory, return them global loaded_train_instructions global loaded_test_instructions global loaded_dev_instructions global loaded_corpus global loaded_size if loaded_train_instructions is not None and loaded_size == max_size: train_instructions = loaded_train_instructions dev_instructions = loaded_dev_instructions test_instructions = loaded_test_instructions corpus = loaded_corpus # Otherwise see if they've been pre-build in tmp files else: # Cache cache_dir = get_instruction_cache_dir() corpus_dir = get_config_dir() train_file = os.path.join(cache_dir,"train.json") dev_file = os.path.join(cache_dir, "dev.json") test_file = os.path.join(cache_dir, "test.json") corpus_file = os.path.join(corpus_dir, "corpus.json") wfreq_file = os.path.join(corpus_dir, "word_freq.json") corpus_already_exists = False if os.path.isfile(corpus_file): with open(corpus_file, "r") as f: corpus = list(json.load(f)) print("corpus: ", len(corpus)) corpus_already_exists = True # If they have been saved in tmp files, load them if os.path.isfile(train_file): train_instructions = load_instruction_data_from_json(train_file) dev_instructions = load_instruction_data_from_json(dev_file) test_instructions = load_instruction_data_from_json(test_file) # Otherwise rebuild instruction data from annotations else: print("REBUILDING INSTRUCTION DATA! CORPUS WILL NOT BE VALID!") os.makedirs(cache_dir, exist_ok=True) all_instructions, corpus = defaultdict(list), set() train_an, dev_an, test_an = load_train_dev_test_annotations() print("Loaded JSON Data") train_instructions, corpus, word_freq = parse_dataset(train_an, corpus) dev_instructions, corpus, _ = parse_dataset(dev_an, corpus) test_instructions, corpus, _ = parse_dataset(test_an, corpus) #train_instructions = augment_dataset(train_instructions) #dev_instructions = augment_dataset(dev_instructions) #test_instructions = augment_dataset(test_instructions) save_json(train_instructions, train_file) save_json(dev_instructions, dev_file) save_json(test_instructions, test_file) if not corpus_already_exists: save_json(list(corpus), corpus_file) save_json(word_freq, wfreq_file) else: print("Warning! Regenerated pomdp, but kept the old corpus!") print("Saved instructions for quicker loading!") # Clip datasets to the provided size if max_size is not None and max_size > 0: num_train = int(math.ceil(max_size*0.7)) num_dev = int(math.ceil(max_size*0.15)) num_test = int(math.ceil(max_size*0.15)) train_instructions = slice_list_tail(train_instructions, num_train) dev_instructions = slice_list_tail(dev_instructions, num_dev) test_instructions = slice_list_tail(test_instructions, num_test) if do_prune_ambiguous: train_instructions = prune_ambiguous(train_instructions) dev_instructions = prune_ambiguous(dev_instructions) test_instructions = prune_ambiguous(test_instructions) #print("Corpus: ", len(corpus)) #print("Loaded: ", len(train_instructions), len(dev_instructions), len(test_instructions)) loaded_train_instructions = train_instructions loaded_dev_instructions = dev_instructions loaded_test_instructions = test_instructions loaded_corpus = corpus loaded_size = max_size return train_instructions, dev_instructions, test_instructions, corpus