def run(): trained_policy_env = "DartWalker2d-v1" # trained_policy_env = "DartSnake7Link-v1" # trained_policy_env = "DartHopper-v1" trained_policy_num_timesteps = 5000000 policy_run_nums = [1] policy_seeds = [4] eval_seed = 4 eval_run_num = 4 aug_num_timesteps = 1500000 additional_note = "augment_neurons_threshold_and_ignore_dup_neurons_metric_param_0.5" # trained_policy_num_timesteps = 2000000 # policy_run_nums = [0] # policy_seeds = [0] # eval_seed = 3 # eval_run_num = 3 # aug_num_timesteps=1500000 # additional_note = " (copy)" # # additional_note = "fixed_filter_too_strict_and_made_all_zeros_and_plots" for policy_run_num in policy_run_nums: for policy_seed in policy_seeds: result_dir = get_result_dir(trained_policy_env, trained_policy_num_timesteps, policy_run_num, policy_seed, eval_seed, eval_run_num, additional_note=additional_note) plot(result_dir, aug_num_timesteps)
except Exception as e: continue if ":" not in old_top_num_to_include: top_num_to_include = f"0:{old_top_num_to_include}" new_name = label.replace(f"top_num_to_include_{old_top_num_to_include}", f"top_num_to_include_{top_num_to_include}") new_names.append((label,new_name)) for label, new_name in new_names: os.rename(src=f"{result_dir}/{label}", dst=f"{result_dir}/{new_name}") if __name__ == "__main__": trained_policy_env = "DartWalker2d-v1" trained_policy_num_timesteps = 2000000 policy_run_nums = [0] policy_seeds = [0] eval_seed = 3 eval_run_num = 3 aug_num_timesteps = 1500000 additional_note = " (copy)" for policy_run_num in policy_run_nums: for policy_seed in policy_seeds: result_dir = get_result_dir(trained_policy_env, trained_policy_num_timesteps, policy_run_num, policy_seed, eval_seed, eval_run_num, additional_note=additional_note) change_name(result_dir)
new_x = xy_sublist_sub[amin][0] new_y = np.mean([xy_item[1][:min_len] for xy_item in xy_sublist_sub], axis=0) all_subsample_means_xy.append((new_x, new_y)) xy_list.append(all_subsample_means_xy[0]) xy_list_detail.append(all_subsample_means_xy[1:]) if include_details: return plot_curves(xy_list, new_labels, xaxis, task_name, xy_list_detail) else: return plot_curves(xy_list, new_labels, xaxis, task_name, None) if __name__ == "__main__": env = 'DartWalker2d_aug_input_current_trial-v1' trained_policy_env = "DartWalker2d-v1" trained_policy_num_timesteps = 2000000 policy_run_nums = [0] policy_seeds = [0] eval_seed = 3 eval_run_num = 3 aug_num_timesteps = 1500000 subsample_size=9 for policy_run_num in policy_run_nums: for policy_seed in policy_seeds: result_dir = get_result_dir(trained_policy_env, trained_policy_num_timesteps, policy_run_num, policy_seed, eval_seed, eval_run_num) plot(subsample_size, result_dir)
def main(): import multiprocessing as mp policy_num_timesteps = 5000000 policy_env = "DartWalker2d-v1" policy_seeds = [3] policy_run_nums = [1] eval_seeds = [4] eval_run_nums = [4] augment_seeds = range(30) augment_run_nums = [0] augment_num_timesteps = 1500000 network_sizes = [64] additional_note = "check_these_working_Ms_against_those_other_envs" policy_num_timesteps = 9000000 policy_envs = ["DartWalker2d-v1", "DartSnake7Link-v1", "DartHopper-v1"] policy_seeds = [0] policy_run_nums = [0] eval_seeds = [3] eval_run_nums = [3] augment_seeds = range(30) augment_run_nums = [0] augment_num_timesteps = 1500000 network_sizes = [64] additional_note = "reconfirm_check_these_working_Ms_against_those_other_envs" # policy_num_timesteps = 5000000 # policy_env = "DartWalker2d-v1" # policy_seeds = [4] # policy_run_nums = [0] # # eval_seeds = [4] # eval_run_nums = [4] # # augment_seeds = range(1) # augment_run_nums = [0] # augment_num_timesteps = 1500000 # top_num_to_includes = [slice(0,20)] # network_sizes = [64] # additional_note = "tee" # policy_seeds = [3, 4] # policy_run_nums = [0] # policy_num_timesteps = 5000 # policy_env = "DartWalker2d-v1" # # eval_seeds = [4] # eval_run_nums = [4] # # augment_seeds = [0] # augment_run_nums = range(2) # augment_num_timesteps = 5000 # top_num_to_includes = [slice(0,10)] # network_sizes = [16] # additional_note = "largebatchtestforotherruns" test_or_train=False # policy_num_timesteps = 2000000 # policy_env = "DartWalker2d-v1" # policy_seeds = [0] # policy_run_nums = [0] # # eval_seeds = [3] # eval_run_nums = [3] # # augment_seeds = range(1) # augment_run_nums = [0] # augment_num_timesteps = 5000 # top_num_to_includes = [slice(0, 10), slice(0,0)] # network_sizes = [64] # additional_note = "non_linear" with mp.Pool(mp.cpu_count()) as pool: #============================================================ checks = [(slice(0,0), []), (slice(0,10), [("M",6), ("M",7), ("M",8), ("M",12), ("M",16), ("M",20), ("M",24), ("M",25), ("M",26), ("COM", 1)]), (slice(0,20), [("M",2),("M",3),("M",6),("M",7),("M",8),("M",11),("M",12),("M",15),("M",16),("M",20),("M",21),("M",22),("M",24),("M",25),("M",26),("M",30),("M",31),("M",40),("M",41), ("COM", 1)])] for policy_env in policy_envs: for policy_seed in policy_seeds: for policy_run_num in policy_run_nums: for eval_seed in eval_seeds: for eval_run_num in eval_run_nums: # if not test: result_dir = get_result_dir(policy_env, policy_num_timesteps, policy_run_num, policy_seed, eval_seed, eval_run_num, additional_note) # else: # result_dir = get_test_dir(policy_env, policy_num_timesteps, policy_run_num, policy_seed, # eval_seed, eval_run_num, augment_seed, additional_note) create_dir_if_not(result_dir) for check in checks: top_num_to_include, lagrangian_inds_to_include = check run_experiment_args = [(augment_num_timesteps, top_num_to_include, augment_seed, augment_run_num, network_size, policy_env, policy_num_timesteps, policy_run_num, policy_seed, eval_seed, eval_run_num, learning_rate, additional_note, result_dir, lagrangian_inds_to_include) for augment_seed in augment_seeds for augment_run_num in augment_run_nums for network_size in network_sizes for learning_rate in [64 / network_size * 3e-4]] pool.starmap(run_experiment, run_experiment_args) try: plot(result_dir, augment_num_timesteps) except Exception as e: print(e)