Exemplo n.º 1
0
def wandb_init():
    starting_new_fully_train_run = config.fully_train and not config.resume_fully_train
    starting_new_evolutionary_run = not run_folder_exists(
        config.run_name) and not config.wandb_run_path
    continuing_a_run = run_folder_exists(
        config.run_name) or config.resume_fully_train

    if starting_new_fully_train_run or starting_new_evolutionary_run:
        # Either new evolution run or new fully train run
        evo_run_path = config.wandb_run_path
        _new_run()
        if config.fully_train:
            # links the new fully_train wandb run to
            wandb.config['evolution_run_path'] = evo_run_path

    elif continuing_a_run:
        print('resuming')
        _resume_run()
        if config.resume_fully_train:
            print('resuming ft at', wandb.config.evolution_run_path)
            download_generations(run_path=wandb.config.evolution_run_path,
                                 replace=True)
            download_model(run_path=wandb.config.evolution_run_path,
                           replace=True)
    else:
        raise Exception("Something went wrong with wandb")

    wandb.config.update(config.__dict__, allow_val_change=True)
Exemplo n.º 2
0
def load_simple_config(config_path: str,
                       wandb_resume_run_fn,
                       wandb_new_run_fn,
                       ngpus: Optional[int] = None,
                       use_wandb_override=True):
    """
    Used for loading a normal run that is not part of a batch run. Therefore it is much more simple than loading a batch
    config

    Steps:
    * Read config to get run name and wandb related info
    * Read saved config if it is a saved run
    * Load wandb if wandb is requested
    * Read original config again for overwrites
    * Overwrite n_gpus option if required
    * Save config to run folder

    @param config_path: path to the config, can be relative to configuration/configs
    @param wandb_resume_run_fn: function that allows wandb to resume
    @param wandb_new_run_fn: function that creates a new wandb run
    @param ngpus: number of gpus if config option should be overridden
    :param use_wandb_override: determines whether wandb actions should be taken in the config loading
    """
    config.read(config_path)
    run_name = config.run_name
    print(f'Run name: {run_name}')

    if run_man.run_folder_exists(run_name):
        print('Run folder already exists, reading its config')
        run_man.load_config(run_name)  # load saved config
        if config.use_wandb and use_wandb_override:
            wandb_resume_run_fn()
    else:
        print(f'No runs folder detected with name {run_name}. Creating one')
        if config.use_wandb and use_wandb_override:
            wandb_new_run_fn()

        run_man.set_up_run_folder(config.run_name, use_wandb_override)

    config.read(
        config_path
    )  # overwrite saved/wandb config with provided config (only values present in this config)

    if ngpus is not None:  # n_gpu override
        config.n_gpus = ngpus

    run_man.save_config(run_name, use_wandb_override=use_wandb_override)
    print(f'config: {config.__dict__}')
Exemplo n.º 3
0
def load_saved_config(effective_run_name, cli_cfg_file_name):
    wandb_run_path = config.read_option(cli_cfg_file_name, 'wandb_run_path')

    if wandb_run_path:
        # wandb downloaded runs are not used by the batch scheduler.
        # this must be a standalone run
        print(f'downloading run from wandb with path: {wandb_run_path}')
        # if a wandb run path is specified - the cli configs run name is ignored, instead the run name
        # is determined by the saved config
        effective_run_name = download_run(run_path=wandb_run_path,
                                          replace=True)

    print('Run name', effective_run_name)
    if run_man.run_folder_exists(effective_run_name):
        print('Run folder already exists, reading its config')
        run_man.load_config(effective_run_name)

    return effective_run_name
Exemplo n.º 4
0
def load_batch_config():
    """
    there are 3 possible levels of configs to be loaded:
    1: a saved config which is attached to an existing run which has been executed before
        this config does not exist when starting a fresh run, only when continuing an existing one
    2: a scheduled config. If a run scheduler is used, it will point to a one of the configs in its schedule
    3: the cli config, which is specified as a run arg to the main program

    when no run schedule is used, the cli config values overwrite the saved config (if one exists)
        an example of when this is desirable is to change the num gpu's when continuing a run, or
        to change the man num of generations, to evolve a population for longer

    when a run schedule is specified, it will fetch a config file eg: mms.json
    It may be desirable to override certain properties of all runs in a schedule
        An example of this is schedule = {elite,base} - we may want to turn on DataAug for bot
        ie: transform the schedule into {da_elite,da_base}

    thus when a run schedule is used, the cli config starting the schedule may contain overriding config values (eg: da=true)

    therefore the priority of configs when a schedule is being used is:
        saved config (if exists)    - lowest
        scheduled config            - middle
        cli config                  - highest
    """
    cli_args = get_cli_args()
    stagger(cli_args.stagger_number)

    if cli_args.ngpus is not None:
        config.n_gpus = cli_args.ngpus

    effective_run_name, scheduled_cfg_file_name = get_batch_schedule_run_names(
        cli_args)
    effective_run_name = load_saved_config(effective_run_name, cli_args.config)

    if scheduled_cfg_file_name:
        print(f'reading scheduled config: {scheduled_cfg_file_name}')
        config.read(scheduled_cfg_file_name)

    print(f'Reading cli config {cli_args.config}')
    config.read(cli_args.config)  # final authority on config values

    # must detect whether the scheduler is calling for a fully train, or an evolutionary run
    fully_train, resume_fully_train = batch_runner.get_fully_train_state(
        effective_run_name)
    print(
        f'scheduler is starting run with FT = {fully_train} continue FT = {resume_fully_train}'
    )
    config.fully_train = fully_train
    config.resume_fully_train = resume_fully_train

    config.run_name = effective_run_name
    if cli_args.ngpus is not None:
        config.n_gpus = cli_args.ngpus
    else:
        print(f'no gpu argument given, using config value of {config.n_gpus}')

    # Full config is now loaded
    if config.use_wandb and not fully_train:
        wandb_init()

    if not run_man.run_folder_exists(config.run_name):
        print(f'New run, setting up run folder for {config.run_name}')
        run_man.set_up_run_folder(config.run_name)

    print(f'Saving conf to run {config.run_name}')
    config.fully_train = fully_train
    config.resume_fully_train = resume_fully_train
    run_man.save_config(config.run_name,
                        use_wandb_override=not config.fully_train)
    print(f'config: {config.__dict__}')