Python select_storage_dirs примеры использования

Язык программирования: Python

Пространство имен/Пакет: alfred.utils.misc

Метод/Функция: select_storage_dirs

Примеров на hotexamples.com: 8

Python select_storage_dirs - 8 примеров найдено. Это лучшие примеры Python кода для alfred.utils.misc.select_storage_dirs, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

def _anonymize_config(from_file, storage_name, root_dir):
    logger = create_logger(name="ANONYMIZE CONFIG", loglevel=logging.INFO)
    logger.info("\nANONYMIZING Config")

    # Select storage_dirs to run over

    storage_dirs = select_storage_dirs(from_file, storage_name, root_dir)

    # Sanity-check that storages exist

    storage_dirs = [storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, logger)]

    for storage_to_copy in storage_dirs:
        logger.info(str(storage_to_copy))
        seeds_to_copy = get_all_seeds(storage_to_copy)

        # find the path to all the configs files

        for dir in seeds_to_copy:
            config_path = dir / 'config.json'
            config = load_dict_from_json(str(config_path))

            if 'experiment_name' in config:
                logger.info(f"ANONYMIZE -- Removing experiment_name from {str(config_path)}")
                del(config['experiment_name'])

            else:
                logger.info(f"PASS -- {str(config_path)} has no experiment_name. ")

            save_dict_to_json(config, filename=str(config_path))

Пример #2

Показать файл

Файл: update_config_unique.py Проект: julienroyd/alfred

def _update_config_unique(from_file, storage_name, root_dir):
    logger = create_logger(name="VERIFY CONFIG", loglevel=logging.INFO)
    logger.info("\nVERIFYING Config Unique")

    # Select storage_dirs to run over

    storage_dirs = select_storage_dirs(from_file, storage_name, root_dir)

    # Sanity-check that storages exist

    storage_dirs = [storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, logger)]

    for storage_to_copy in storage_dirs:
        logger.info(str(storage_to_copy))
        seeds_to_copy = get_all_seeds(storage_to_copy)

        # find the path to all the configs files

        for dir in seeds_to_copy:
            config_path = dir / 'config.json'
            config_unique_path = dir / 'config_unique.json'
            config = load_config_from_json(str(config_path))
            config_unique_dict = load_dict_from_json(str(config_unique_path))

            try:
                # check if configs are the same
                validate_config_unique(config, config_unique_dict)
            except:
                # If not we update config_unique
                logger.info(f"{str(dir)} config_unique is not coherent with config.\n"
                            f"Updating {str(config_unique_path)}")

                for key in config_unique_dict.keys():
                    config_unique_dict[key] = config.__dict__[key]
                # Validate again
                validate_config_unique(config, config_unique_dict)

                # Save updated config_unique
                save_dict_to_json(config_unique_dict, filename=str(config_unique_path))

Пример #3

Показать файл

Файл: launch_schedule.py Проект: julienroyd/alfred

def launch_schedule(from_file, storage_name, n_processes,
                    n_experiments_per_proc, use_pbar, check_hash,
                    run_clean_interrupted, root_dir, log_level):
    set_up_alfred()

    # Select storage_dirs to run over

    storage_dirs = select_storage_dirs(from_file, storage_name, root_dir)

    # Creates logger

    logger_id = str(random.randint(1, 999999)).zfill(6)
    master_logger = create_logger(name=f'ID:{logger_id} - MASTER',
                                  loglevel=log_level,
                                  logfile=None,
                                  streamHandle=True)

    # Sanity-checks that storage_dirs exist if not they are skipped

    storage_dirs = [
        storage_dir for storage_dir in storage_dirs
        if sanity_check_exists(storage_dir, master_logger)
    ]

    # Sanity-check that storage_dirs have correct hash is required

    if check_hash:
        storage_dirs = [
            storage_dir for storage_dir in storage_dirs
            if sanity_check_hash(storage_dir, master_logger)
        ]

    # Continues with sanity-checked storage_dir list

    for storage_dir in storage_dirs:
        file_handler = create_new_filehandler(
            master_logger.name,
            logfile=storage_dir / 'alfred_launch_schedule_logger.out')
        master_logger.addHandler(file_handler)

    master_logger.debug("Storage Directories to be launched:")
    for storage_dir in storage_dirs:
        master_logger.debug(storage_dir)

    # Log some info

    master_logger.debug(f"\n\n{'=' * 200}\n"
                        f"\nRunning schedule for:\n"
                        f"\nfrom_file={from_file}"
                        f"\nstorage_name={storage_name}"
                        f"\nn_processes={n_processes}"
                        f"\nn_experiments_per_proc={n_experiments_per_proc}"
                        f"\nuse_pbar={use_pbar}"
                        f"\ncheck_hash={check_hash}"
                        f"\nroot={get_root(root_dir)}"
                        f"\n")

    # Clean the storage_dirs if asked to

    if run_clean_interrupted:
        for storage_dir in storage_dirs:
            clean_interrupted(from_file=None,
                              storage_name=storage_dir.name,
                              clean_crashes=False,
                              ask_for_validation=False,
                              logger=master_logger,
                              root_dir=root_dir)

    # Launches multiple processes

    if n_processes > 1:
        ## TODO: Logger is not supported with multiprocess (should use queues and all)
        n_calls = None  # for now we only return n_calls != None if running with one process only

        processes = []

        for i in range(n_processes):

            # Creates process logger

            logger_id = str(random.randint(1, 999999)).zfill(6)
            logger = create_logger(name=f'ID:{logger_id} - SUBPROCESS_{i}',
                                   loglevel=log_level,
                                   logfile=storage_dir /
                                   'alfred_launch_schedule_logger.out',
                                   streamHandle=True)

            # Adds logfiles to logger if multiple storage_dirs
            if len(storage_dirs) > 1:
                for storage_dir in storage_dirs[1:]:
                    file_handler = create_new_filehandler(
                        logger.name,
                        logfile=storage_dir /
                        'alfred_launch_schedule_logger.out')
                    logger.addHandler(file_handler)

            # Creates process

            processes.append(
                Process(target=_work_on_schedule,
                        args=(storage_dirs, n_processes,
                              n_experiments_per_proc, use_pbar, logger,
                              root_dir, i)))
        try:
            # start processes

            for p in processes:
                p.start()
                time.sleep(0.5)

            # waits for all processes to end

            dead_processes = []
            while any([p.is_alive() for p in processes]):

                # check if some processes are dead

                for i, p in enumerate(processes):
                    if not p.is_alive() and i not in dead_processes:
                        master_logger.info(f'PROCESS_{i} has died.')
                        dead_processes.append(i)

                time.sleep(3)

        except KeyboardInterrupt:
            master_logger.info("KEYBOARD INTERRUPT. Killing all processes")

            # terminates all processes

            for process in processes:
                process.terminate()

        master_logger.info("All processes are done. Closing '__main__'\n\n")

    # No additional processes

    else:
        n_calls = _work_on_schedule(
            storage_dirs=storage_dirs,
            n_processes=n_processes,
            n_experiments_per_proc=n_experiments_per_proc,
            use_pbar=use_pbar,
            logger=master_logger,
            root_dir=root_dir)

    return n_calls

Пример #4

Показать файл

        load_dir="summary",
        save_dir="benchmark",
        logger=logger)

    return


if __name__ == '__main__':
    benchmark_args = get_benchmark_args()
    logger = create_logger(name="BENCHMARK - MAIN",
                           loglevel=benchmark_args.log_level)

    # Gets storage_dirs list

    storage_dirs = select_storage_dirs(
        from_file=benchmark_args.from_file,
        storage_name=benchmark_args.storage_names,
        root_dir=benchmark_args.root_dir)

    # Sanity-check that storages exist

    storage_dirs = [
        storage_dir for storage_dir in storage_dirs
        if sanity_check_exists(storage_dir, logger)
    ]

    # convert them to storage_name to be compatible with the function called down the line

    benchmark_args.storage_names = [
        storage_dir_path.name for storage_dir_path in storage_dirs
    ]

Пример #5

Показать файл

def clean_interrupted(from_file, storage_name, clean_crashes, ask_for_validation, logger, root_dir):
    # Select storage_dirs to run over

    storage_dirs = select_storage_dirs(from_file, storage_name, root_dir)

    # Sanity-check that storages exist

    storage_dirs = [storage_dir for storage_dir in storage_dirs if sanity_check_exists(storage_dir, logger)]

    # For all storage_dirs...

    for storage_dir in storage_dirs:

        all_seeds = get_all_seeds(storage_dir)
        unhatched_seeds = get_some_seeds(storage_dir, file_check='UNHATCHED')
        completed_seeds = get_some_seeds(storage_dir, file_check='COMPLETED')
        crashed_seeds = get_some_seeds(storage_dir, file_check='CRASH.txt')
        mysteriously_stopped_seeds = [seed_dir for seed_dir in all_seeds
                                      if seed_dir not in unhatched_seeds + completed_seeds + crashed_seeds]

        assert all([seed_dir in unhatched_seeds + completed_seeds + crashed_seeds + mysteriously_stopped_seeds
                    for seed_dir in all_seeds])

        # Prints some info

        logger.info(f"All seed_dir status in {storage_dir}:\n"
                    f"\nNumber of seeds = {len(all_seeds)}"
                    f"\nNumber of seeds COMPLETED = {len(completed_seeds)}"
                    f"\nNumber of seeds UNHATCHED = {len(unhatched_seeds)}"
                    f"\nNumber of seeds CRASHED = {len(crashed_seeds)}"
                    f"\nNumber of seeds MYSTERIOUSLY STOPPED = {len(mysteriously_stopped_seeds)}"
                    f"\n\nclean_crashes={clean_crashes}"
                    f"\n"
                    )

        if ask_for_validation:

            # Asks for validation to clean this storage_dir

            answer = input("\nShould we proceed? [y or n]")
            if answer.lower() not in ['y', 'yes']:
                logger.debug("Aborting...")
                continue

            logger.debug("Starting...")

        # Lists of crashes, completed and unhatched seeds should have no overlap

        assert not any([seed_dir in crashed_seeds for seed_dir in unhatched_seeds])
        assert not any([seed_dir in crashed_seeds for seed_dir in completed_seeds])
        assert not any([seed_dir in completed_seeds for seed_dir in unhatched_seeds])

        # Check what should be cleaned

        if clean_crashes:
            seeds_to_clean = [seed_dir for seed_dir in all_seeds
                              if seed_dir not in unhatched_seeds + completed_seeds]
        else:
            seeds_to_clean = [seed_dir for seed_dir in all_seeds
                              if seed_dir not in unhatched_seeds + completed_seeds + crashed_seeds]

        if len(seeds_to_clean) != 0:
            logger.info(f'Number of seeds to be cleaned: {len(seeds_to_clean)}')

            # Clean each seed_directory

            for seed_dir in seeds_to_clean:
                logger.info(f"Cleaning {seed_dir}")

                for path in seed_dir.iterdir():
                    if path.name not in ["config.json", "config_unique.json"]:
                        if path.is_dir():
                            shutil.rmtree(path)
                        else:
                            os.remove(path)
                    else:
                        continue

                open(str(seed_dir / 'UNHATCHED'), 'w+').close()
            logger.info(f'Done')

        else:
            logger.info('No seed_dir to clean.')

        # Clean flag-file

        if (storage_dir / "PLOT_ARRAYS_ONGOING").exists():
            os.remove(str(storage_dir / "PLOT_ARRAYS_ONGOING"))
        if (storage_dir / "PLOT_ARRAYS_COMPLETED").exists():
            os.remove(str(storage_dir / "PLOT_ARRAYS_COMPLETED"))

        # Clean summary folder

        if (storage_dir / "summary").exists():
            shutil.rmtree(storage_dir / "summary")

        # Clean benchmark folder

        if (storage_dir / "benchmark").exists():
            shutil.rmtree(storage_dir / "benchmark")

Пример #6

Показать файл

Файл: make_plot_arrays.py Проект: julienroyd/alfred

def create_plot_arrays(
        from_file,
        storage_name,
        root_dir,
        remove_none,
        logger,
        plots_to_make=alfred.defaults.DEFAULT_PLOTS_ARRAYS_TO_MAKE):
    """
    Creates and and saves comparative figure containing a plot of total reward for each different experiment
    :param storage_dir: pathlib.Path object of the model directory containing the experiments to compare
    :param plots_to_make: list of strings indicating which comparative plots to make
    :return: None
    """
    # Select storage_dirs to run over

    storage_dirs = select_storage_dirs(from_file, storage_name, root_dir)

    for storage_dir in storage_dirs:

        # Get all experiment directories and sorts them numerically

        sorted_experiments = DirectoryTree.get_all_experiments(storage_dir)

        all_seeds_dir = []
        for experiment in sorted_experiments:
            all_seeds_dir = all_seeds_dir + DirectoryTree.get_all_seeds(
                experiment)

        # Determines what type of search was done

        if (storage_dir / 'GRID_SEARCH').exists():
            search_type = 'grid'
        elif (storage_dir / 'RANDOM_SEARCH').exists():
            search_type = 'random'
        else:
            search_type = 'unknown'

        # Determines row and columns of subplots

        if search_type == 'grid':
            variations = load_dict_from_json(filename=str(storage_dir /
                                                          'variations.json'))

            # experiment_groups account for the fact that all the experiment_dir in a storage_dir may have been created
            # though several runs of prepare_schedule.py, and therefore, many "groups" of experiments have been created
            experiment_groups = {key: {} for key in variations.keys()}
            for group_key, properties in experiment_groups.items():
                properties['variations'] = variations[group_key]

                properties['variations_lengths'] = {
                    k: len(properties['variations'][k])
                    for k in properties['variations'].keys()
                }

                # Deleting alg_name and task_name from variations (because they will not be contained in same storage_dir)

                hyperparam_variations_lengths = deepcopy(
                    properties['variations_lengths'])
                del hyperparam_variations_lengths['alg_name']
                del hyperparam_variations_lengths['task_name']

                i_max = sorted(hyperparam_variations_lengths.values())[-1]
                j_max = int(
                    np.prod(
                        sorted(hyperparam_variations_lengths.values())[:-1]))

                if i_max < 4 and j_max == 1:
                    # If only one hyperparameter was varied over, we order plots on a line
                    j_max = i_max
                    i_max = 1
                    ax_array_dim = 1

                elif i_max >= 4 and j_max == 1:
                    # ... unless there are 4 or more variations, then we put them in a square-ish fashion
                    j_max = int(np.sqrt(i_max))
                    i_max = int(np.ceil(float(i_max) / float(j_max)))
                    ax_array_dim = 2

                else:
                    ax_array_dim = 2

                properties['ax_array_shape'] = (i_max, j_max)
                properties['ax_array_dim'] = ax_array_dim

        else:
            experiment_groups = {"all": {}}
            for group_key, properties in experiment_groups.items():
                i_max = len(sorted_experiments
                            )  # each experiment is on a different row
                j_max = len(all_seeds_dir
                            ) // i_max  # each seed is on a different column

                if i_max == 1:
                    ax_array_dim = 1
                else:
                    ax_array_dim = 2

                properties['ax_array_shape'] = (i_max, j_max)
                properties['ax_array_dim'] = ax_array_dim

        for group_key, properties in experiment_groups.items():
            logger.debug(
                f"\n===========================\nPLOTS FOR EXPERIMENT GROUP: {group_key}"
            )
            i_max, j_max = properties['ax_array_shape']
            ax_array_dim = properties['ax_array_dim']

            first_exp = group_key.split('-')[0] if group_key != "all" else 0
            if first_exp != 0:
                for seed_idx, seed_dir in enumerate(all_seeds_dir):
                    if seed_dir.parent.stem.strip('experiment') == first_exp:
                        first_seed_idx = seed_idx
                        break
            else:
                first_seed_idx = 0

            for plot_to_make in plots_to_make:
                x_metric, y_metric, x_lim, y_lim = plot_to_make
                logger.debug(f'\n{y_metric} as a function of {x_metric}:')

                # Creates the subplots

                fig, ax_array = plt.subplots(i_max,
                                             j_max,
                                             figsize=(10 * j_max, 6 * i_max))

                for i in range(i_max):
                    for j in range(j_max):

                        if ax_array_dim == 1 and i_max == 1 and j_max == 1:
                            current_ax = ax_array
                        elif ax_array_dim == 1 and (i_max > 1 or j_max > 1):
                            current_ax = ax_array[j]
                        elif ax_array_dim == 2:
                            current_ax = ax_array[i, j]
                        else:
                            raise Exception(
                                'ax_array should not have more than two dimensions'
                            )

                        try:
                            seed_dir = all_seeds_dir[first_seed_idx +
                                                     (i * j_max + j)]
                            if group_key != 'all' \
                                    and (int(str(seed_dir.parent).split('experiment')[1]) < int(group_key.split('-')[0]) \
                                         or int(str(seed_dir.parent).split('experiment')[1]) > int(
                                        group_key.split('-')[1])):
                                raise IndexError
                            logger.debug(str(seed_dir))
                        except IndexError as e:
                            logger.debug(
                                f'experiment{i * j_max + j} does not exist')
                            current_ax.text(0.2,
                                            0.2,
                                            "no experiment\n found",
                                            transform=current_ax.transAxes,
                                            fontsize=24,
                                            fontweight='bold',
                                            color='red')
                            continue

                        logger.debug(seed_dir)

                        # Writes unique hyperparameters on plot

                        config = load_config_from_json(
                            filename=str(seed_dir / 'config.json'))
                        config_unique_dict = load_dict_from_json(
                            filename=str(seed_dir / 'config_unique.json'))
                        validate_config_unique(config, config_unique_dict)

                        if search_type == 'grid':
                            sorted_keys = sorted(
                                config_unique_dict.keys(),
                                key=lambda item:
                                (properties['variations_lengths'][item], item),
                                reverse=True)

                        else:
                            sorted_keys = config_unique_dict

                        info_str = f'{seed_dir.parent.stem}\n' + '\n'.join([
                            f'{k} = {config_unique_dict[k]}'
                            for k in sorted_keys
                        ])
                        bbox_props = dict(facecolor='gray', alpha=0.1)
                        current_ax.text(0.05,
                                        0.95,
                                        info_str,
                                        transform=current_ax.transAxes,
                                        fontsize=12,
                                        verticalalignment='top',
                                        bbox=bbox_props)

                        # Skip cases of UNHATCHED or CRASHED experiments

                        if (seed_dir / 'UNHATCHED').exists():
                            logger.debug('UNHATCHED')
                            current_ax.text(0.2,
                                            0.2,
                                            "UNHATCHED",
                                            transform=current_ax.transAxes,
                                            fontsize=24,
                                            fontweight='bold',
                                            color='blue')
                            continue

                        if (seed_dir / 'CRASH.txt').exists():
                            logger.debug('CRASHED')
                            current_ax.text(0.2,
                                            0.2,
                                            "CRASHED",
                                            transform=current_ax.transAxes,
                                            fontsize=24,
                                            fontweight='bold',
                                            color='red')
                            continue

                        try:

                            # Loading the recorder

                            loaded_recorder = Recorder.init_from_pickle_file(
                                filename=str(seed_dir / 'recorders' /
                                             'train_recorder.pkl'))

                            # Checking if provided metrics are present in the recorder

                            if y_metric not in loaded_recorder.tape.keys():
                                logger.debug(
                                    f"'{y_metric}' was not recorded in train_recorder."
                                )
                                current_ax.text(0.2,
                                                0.2,
                                                "ABSENT METRIC",
                                                transform=current_ax.transAxes,
                                                fontsize=24,
                                                fontweight='bold',
                                                color='red')
                                continue

                            if x_metric not in loaded_recorder.tape.keys(
                            ) and x_metric is not None:
                                if x_metric is None:
                                    pass
                                else:
                                    logger.debug(
                                        f"'{x_metric}' was not recorded in train_recorder."
                                    )
                                    current_ax.text(
                                        0.2,
                                        0.2,
                                        "ABSENT METRIC",
                                        transform=current_ax.transAxes,
                                        fontsize=24,
                                        fontweight='bold',
                                        color='red')
                                    continue

                            # Removing None entries

                            if remove_none:
                                loaded_recorder.tape[x_metric] = remove_nones(
                                    loaded_recorder.tape[x_metric])
                                loaded_recorder.tape[y_metric] = remove_nones(
                                    loaded_recorder.tape[y_metric])

                            # Plotting

                            try:

                                if x_metric is not None:
                                    plot_curves(
                                        current_ax,
                                        ys=[loaded_recorder.tape[y_metric]],
                                        xs=[loaded_recorder.tape[x_metric]],
                                        xlim=x_lim,
                                        ylim=y_lim,
                                        xlabel=x_metric,
                                        title=y_metric)
                                else:
                                    plot_curves(
                                        current_ax,
                                        ys=[loaded_recorder.tape[y_metric]],
                                        xlim=x_lim,
                                        ylim=y_lim,
                                        title=y_metric)

                            except Exception as e:
                                logger.debug(f'Polotting error: {e}')

                        except FileNotFoundError:
                            logger.debug('Training recorder not found')
                            current_ax.text(0.2,
                                            0.2,
                                            "'train_recorder'\nnot found",
                                            transform=current_ax.transAxes,
                                            fontsize=24,
                                            fontweight='bold',
                                            color='red')
                            continue

                plt.tight_layout()
                fig.savefig(
                    str(storage_dir /
                        f'{group_key}_comparative_{y_metric}_over_{x_metric}.png'
                        ))
                plt.close(fig)

Пример #7

Показать файл

def copy_configs(from_file, storage_name, new_desc, append_new_desc,
                 additional_params, root_dir):

    logger = create_logger(name="COPY CONFIG", loglevel=logging.INFO)
    logger.info("\nCOPYING Config")

    # Select storage_dirs to run over

    storage_dirs = select_storage_dirs(from_file, storage_name, root_dir)

    # Sanity-check that storages exist

    storage_dirs = [
        storage_dir for storage_dir in storage_dirs
        if sanity_check_exists(storage_dir, logger)
    ]

    # Imports schedule file to have same settings for DirectoryTree.git_repos_to_track

    if from_file:
        schedule_file = str([
            path for path in Path(from_file).parent.iterdir()
            if 'schedule' in path.name and path.name.endswith('.py')
        ][0])
        schedule_module = ".".join(schedule_file.split('/')).strip('.py')
        schedule = import_module(schedule_module)

    for storage_to_copy in storage_dirs:
        seeds_to_copy = get_all_seeds(storage_to_copy)
        config_path_list = []
        config_unique_path_list = []

        # find the path to all the configs files

        for dir in seeds_to_copy:
            config_path_list.append(dir / 'config.json')
            config_unique_path_list.append(dir / 'config_unique.json')

        # extract storage name info

        _, _, _, _, old_desc = \
            DirectoryTree.extract_info_from_storage_name(storage_to_copy.name)

        # overwrites it

        tmp_dir_tree = DirectoryTree(alg_name="nope",
                                     task_name="nap",
                                     desc="nip",
                                     seed=1,
                                     root=root_dir)
        storage_name_id, git_hashes, _, _, _ = \
            DirectoryTree.extract_info_from_storage_name(str(tmp_dir_tree.storage_dir.name))

        if new_desc is None:
            desc = old_desc
        elif new_desc is not None and append_new_desc:
            desc = f"{old_desc}_{new_desc}"
        else:
            desc = new_desc

        # creates the new folders with loaded config from which we overwrite the task_name

        dir = None
        for config_path, config_unique_path in zip(config_path_list,
                                                   config_unique_path_list):

            config = load_config_from_json(str(config_path))
            config.desc = desc
            expe_name = config_path.parents[1].name
            experiment_num = int(''.join([s for s in expe_name
                                          if s.isdigit()]))

            config_unique_dict = load_dict_from_json(str(config_unique_path))

            if additional_params is not None:

                for (key, value) in additional_params:
                    config.__dict__[key] = value
                    config_unique_dict[key] = value

            dir = DirectoryTree(id=storage_name_id,
                                alg_name=config.alg_name,
                                task_name=config.task_name,
                                desc=config.desc,
                                seed=config.seed,
                                experiment_num=experiment_num,
                                git_hashes=git_hashes,
                                root=root_dir)

            dir.create_directories()
            print(f"Creating {str(dir.seed_dir)}\n")
            save_config_to_json(config,
                                filename=str(dir.seed_dir / "config.json"))
            validate_config_unique(config, config_unique_dict)
            save_dict_to_json(config_unique_dict,
                              filename=str(dir.seed_dir /
                                           "config_unique.json"))
            open(str(dir.seed_dir / 'UNHATCHED'), 'w+').close()

        open(
            str(dir.seed_dir.parents[1] /
                f'config_copied_from_{str(storage_to_copy.name)}'),
            'w+').close()

Пример #8

Показать файл

def create_retrain_best(from_file, storage_name, best_experiments_mapping,
                        n_retrain_seeds, train_time_factor, root_dir):
    logger = create_logger(name="CREATE_RETRAIN", loglevel=logging.INFO)
    logger.info("\nCREATING retrainBest directories")

    # Select storage_dirs to run over

    storage_dirs = select_storage_dirs(from_file, storage_name, root_dir)

    # Sanity-check that storages exist

    storage_dirs = [
        storage_dir for storage_dir in storage_dirs
        if sanity_check_exists(storage_dir, logger)
    ]

    # Imports schedule file to have same settings for DirectoryTree.git_repos_to_track

    if from_file:
        schedule_file = str([
            path for path in Path(from_file).parent.iterdir()
            if 'schedule' in path.name and path.name.endswith('.py')
        ][0])
        schedule_module = ".".join(schedule_file.split('/')).strip('.py')
        schedule = import_module(schedule_module)

    # Creates retrainBest directories

    retrainBest_storage_dirs = []
    new_retrainBest_storage_dirs = []
    for storage_dir in storage_dirs:

        try:
            # Checks if a retrainBest directory already exists for this search

            search_storage_id = storage_dir.name.split('_')[0]
            corresponding_retrain_directories = [
                path for path in get_root(root_dir).iterdir()
                if f"retrainBest{search_storage_id}" in path.name.split('_')
            ]

            if len(corresponding_retrain_directories) > 0:
                assert len(corresponding_retrain_directories) == 1
                retrainBest_dir = corresponding_retrain_directories[0]

                logger.info(f"Existing retrainBest\n\n"
                            f"\t{storage_dir.name} -> {retrainBest_dir.name}")

                retrainBest_storage_dirs.append(retrainBest_dir)
                continue

            else:

                # The retrainBest directory will contain one experiment with bestConfig from the search...

                if best_experiments_mapping is None:

                    # ... bestConfig is found in the summary/ folder from the search

                    best_config = [
                        path for path in (storage_dir / "summary").iterdir()
                        if path.name.startswith("bestConfig")
                    ][0]

                    assert len(best_config) == 1 and type(best_config) is list

                else:

                    # ... bestConfig is loaded based on specified --best_experiment_mapping

                    best_experiments_mapping_dict = load_dict_from_json(
                        best_experiments_mapping)
                    assert storage_dir.name in best_experiments_mapping_dict.keys(
                    )

                    best_experiment_num = best_experiments_mapping_dict[
                        storage_dir.name]
                    seed_dir = DirectoryTree.get_all_seeds(
                        experiment_dir=storage_dir /
                        f"experiment{best_experiment_num}")[0]
                    best_config = seed_dir / "config.json"

                config_dict = load_dict_from_json(filename=str(best_config))

                # Retrain experiments run for twice as long

                if config_dict['max_episodes'] is not None:
                    config_dict['max_episodes'] = int(
                        config_dict['max_episodes'] * train_time_factor)
                elif config_dict['max_steps'] is not None:
                    config_dict['max_steps'] = int(config_dict['max_steps'] *
                                                   train_time_factor)
                else:
                    raise ValueError(
                        "At least one of max_episodes or max_steps should be defined"
                    )

                # Updates the description

                if "random" in config_dict['desc'] or "grid" in config_dict[
                        'desc']:
                    new_desc = config_dict['desc'] \
                        .replace("random", f"retrainBest{search_storage_id}") \
                        .replace("grid", f"retrainBest{search_storage_id}")
                else:
                    new_desc = config_dict[
                        'desc'] + f"_retrainBest{search_storage_id}"

                config_dict['desc'] = new_desc

                # Creates config Namespace with loaded config_dict

                config = argparse.ArgumentParser().parse_args("")
                config_pointer = vars(config)
                config_pointer.update(config_dict)  # updates config

                config_unique_dict = {}
                config_unique_dict['alg_name'] = config.alg_name
                config_unique_dict['task_name'] = config.task_name
                config_unique_dict['seed'] = config.seed

                # Gets new storage_name_id

                tmp_dir_tree = DirectoryTree(alg_name="",
                                             task_name="",
                                             desc="",
                                             seed=1,
                                             root=root_dir)
                retrain_storage_id = tmp_dir_tree.storage_dir.name.split(
                    '_')[0]

                # Creates the new storage_dir for retrainBest

                dir_tree = create_experiment_dir(
                    storage_name_id=retrain_storage_id,
                    config=config,
                    config_unique_dict=config_unique_dict,
                    SEEDS=[i * 10 for i in range(n_retrain_seeds)],
                    root_dir=root_dir,
                    git_hashes=DirectoryTree.get_git_hashes())

                retrainBest_storage_dirs.append(dir_tree.storage_dir)
                new_retrainBest_storage_dirs.append(dir_tree.storage_dir)

                logger.info(
                    f"New retrainBest:\n\n"
                    f"\t{storage_dir.name} -> {dir_tree.storage_dir.name}")

        except Exception as e:
            logger.info(
                f"Could not create retrainBest-storage_dir {storage_dir}")
            logger.info(f"\n\n{e}\n{traceback.format_exc()}")

    # Saving the list of created storage_dirs in a text file located with the provided schedule_file

    schedule_name = Path(from_file).parent.stem
    with open(
            Path(from_file).parent / f"list_retrains_{schedule_name}.txt",
            "a+") as f:
        for storage_dir in new_retrainBest_storage_dirs:
            f.write(f"{storage_dir.name}\n")

    return retrainBest_storage_dirs