def run():
        trained_policy_env = "DartWalker2d-v1"
        # trained_policy_env = "DartSnake7Link-v1"
        # trained_policy_env = "DartHopper-v1"
        trained_policy_num_timesteps = 5000000
        policy_run_nums = [1]
        policy_seeds = [4]
        eval_seed = 4
        eval_run_num = 4
        aug_num_timesteps = 1500000
        additional_note = "augment_neurons_threshold_and_ignore_dup_neurons_metric_param_0.5"
        # trained_policy_num_timesteps = 2000000
        # policy_run_nums = [0]
        # policy_seeds = [0]
        # eval_seed = 3
        # eval_run_num = 3
        # aug_num_timesteps=1500000
        # additional_note = " (copy)"
        # # additional_note = "fixed_filter_too_strict_and_made_all_zeros_and_plots"
        for policy_run_num in policy_run_nums:
            for policy_seed in policy_seeds:
                result_dir = get_result_dir(trained_policy_env,
                                            trained_policy_num_timesteps,
                                            policy_run_num,
                                            policy_seed,
                                            eval_seed,
                                            eval_run_num,
                                            additional_note=additional_note)

                plot(result_dir, aug_num_timesteps)
        except Exception as e:
            continue
        if ":" not in old_top_num_to_include:
            top_num_to_include = f"0:{old_top_num_to_include}"

            new_name  = label.replace(f"top_num_to_include_{old_top_num_to_include}", f"top_num_to_include_{top_num_to_include}")
            new_names.append((label,new_name))


    for label, new_name in new_names:
        os.rename(src=f"{result_dir}/{label}", dst=f"{result_dir}/{new_name}")

if __name__ == "__main__":
    trained_policy_env = "DartWalker2d-v1"
    trained_policy_num_timesteps = 2000000
    policy_run_nums = [0]
    policy_seeds = [0]
    eval_seed = 3
    eval_run_num = 3
    aug_num_timesteps = 1500000
    additional_note = " (copy)"
    for policy_run_num in policy_run_nums:
        for policy_seed in policy_seeds:
            result_dir = get_result_dir(trained_policy_env, trained_policy_num_timesteps, policy_run_num, policy_seed,
                                        eval_seed, eval_run_num, additional_note=additional_note)

            change_name(result_dir)



示例#3
0
            new_x = xy_sublist_sub[amin][0]
            new_y = np.mean([xy_item[1][:min_len] for xy_item in xy_sublist_sub], axis=0)
            all_subsample_means_xy.append((new_x, new_y))

        xy_list.append(all_subsample_means_xy[0])
        xy_list_detail.append(all_subsample_means_xy[1:])
    if include_details:
        return plot_curves(xy_list, new_labels, xaxis, task_name, xy_list_detail)
    else:
        return plot_curves(xy_list, new_labels, xaxis, task_name, None)


if __name__ == "__main__":

    env = 'DartWalker2d_aug_input_current_trial-v1'
    trained_policy_env = "DartWalker2d-v1"
    trained_policy_num_timesteps = 2000000
    policy_run_nums = [0]
    policy_seeds = [0]
    eval_seed = 3
    eval_run_num = 3
    aug_num_timesteps = 1500000


    subsample_size=9
    for policy_run_num in policy_run_nums:
        for policy_seed in policy_seeds:
            result_dir = get_result_dir(trained_policy_env, trained_policy_num_timesteps, policy_run_num, policy_seed,
                                        eval_seed, eval_run_num)

            plot(subsample_size, result_dir)
def main():
    import multiprocessing as mp

    policy_num_timesteps = 5000000
    policy_env = "DartWalker2d-v1"
    policy_seeds = [3]
    policy_run_nums = [1]

    eval_seeds = [4]
    eval_run_nums = [4]

    augment_seeds = range(30)
    augment_run_nums = [0]
    augment_num_timesteps = 1500000
    network_sizes = [64]
    additional_note = "check_these_working_Ms_against_those_other_envs"

    policy_num_timesteps = 9000000
    policy_envs = ["DartWalker2d-v1", "DartSnake7Link-v1", "DartHopper-v1"]
    policy_seeds = [0]
    policy_run_nums = [0]

    eval_seeds = [3]
    eval_run_nums = [3]

    augment_seeds = range(30)
    augment_run_nums = [0]
    augment_num_timesteps = 1500000
    network_sizes = [64]
    additional_note = "reconfirm_check_these_working_Ms_against_those_other_envs"

    # policy_num_timesteps = 5000000
    # policy_env = "DartWalker2d-v1"
    # policy_seeds = [4]
    # policy_run_nums = [0]
    #
    # eval_seeds = [4]
    # eval_run_nums = [4]
    #
    # augment_seeds = range(1)
    # augment_run_nums = [0]
    # augment_num_timesteps = 1500000
    # top_num_to_includes = [slice(0,20)]
    # network_sizes = [64]
    # additional_note = "tee"

    # policy_seeds = [3, 4]
    # policy_run_nums = [0]
    # policy_num_timesteps = 5000
    # policy_env = "DartWalker2d-v1"
    #
    # eval_seeds = [4]
    # eval_run_nums = [4]
    #
    # augment_seeds = [0]
    # augment_run_nums = range(2)
    # augment_num_timesteps = 5000
    # top_num_to_includes = [slice(0,10)]
    # network_sizes = [16]
    # additional_note = "largebatchtestforotherruns"

    test_or_train=False
    # policy_num_timesteps = 2000000
    # policy_env = "DartWalker2d-v1"
    # policy_seeds = [0]
    # policy_run_nums = [0]
    #
    # eval_seeds = [3]
    # eval_run_nums = [3]
    #
    # augment_seeds = range(1)
    # augment_run_nums = [0]
    # augment_num_timesteps = 5000
    # top_num_to_includes = [slice(0, 10), slice(0,0)]
    # network_sizes = [64]
    # additional_note = "non_linear"



    with mp.Pool(mp.cpu_count()) as pool:


        #============================================================
        checks = [(slice(0,0), []),
                  (slice(0,10), [("M",6), ("M",7), ("M",8), ("M",12), ("M",16), ("M",20), ("M",24), ("M",25), ("M",26), ("COM", 1)]),
                 (slice(0,20), [("M",2),("M",3),("M",6),("M",7),("M",8),("M",11),("M",12),("M",15),("M",16),("M",20),("M",21),("M",22),("M",24),("M",25),("M",26),("M",30),("M",31),("M",40),("M",41), ("COM", 1)])]
        for policy_env in policy_envs:
            for policy_seed in policy_seeds:
                for policy_run_num in policy_run_nums:
                    for eval_seed in eval_seeds:
                        for eval_run_num in eval_run_nums:
                            # if not test:
                            result_dir = get_result_dir(policy_env, policy_num_timesteps, policy_run_num,
                                                        policy_seed, eval_seed, eval_run_num, additional_note)
                            # else:
                            #     result_dir = get_test_dir(policy_env, policy_num_timesteps, policy_run_num, policy_seed,
                            #                               eval_seed, eval_run_num, augment_seed, additional_note)

                            create_dir_if_not(result_dir)

                            for check in checks:
                                top_num_to_include, lagrangian_inds_to_include = check

                                run_experiment_args = [(augment_num_timesteps, top_num_to_include, augment_seed,
                                        augment_run_num, network_size,
                                        policy_env, policy_num_timesteps, policy_run_num, policy_seed, eval_seed,
                                        eval_run_num, learning_rate, additional_note, result_dir,
                                                        lagrangian_inds_to_include)


                                        for augment_seed in augment_seeds
                                        for augment_run_num in augment_run_nums
                                        for network_size in network_sizes
                                        for learning_rate in
                                        [64 / network_size * 3e-4]]

                                pool.starmap(run_experiment, run_experiment_args)

                                try:
                                    plot(result_dir, augment_num_timesteps)
                                except Exception as e:
                                    print(e)