def _fully_train_logging(model: Network, loss: float, epoch: int, attempt: int, acc: float = -1): print('epoch: {}\nloss: {}'.format(epoch, loss)) log = {} metric_name = 'accuracy_fm_' + str(model.target_feature_multiplier) + ( "_r_" + str(attempt) if attempt > 0 else "") if acc != -1: log[metric_name] = acc print('accuracy: {}'.format(acc)) print('\n') internal_config.ft_epoch = epoch save_config(config.run_name) if config.use_wandb: log['loss_' + str(attempt)] = loss wandb.log(log) model.save() wandb.save(model.save_location()) wandb.config.update({'current_ft_epoch': epoch}, allow_val_change=True) wandb.save(join(get_run_folder_path(config.run_name), 'config.json'))
def on_end_epoch(self, model: Network, epoch: int, loss: float, acc: float): if acc != -1: self.training_results.add_accuracy(acc, epoch) self.training_results.add_loss(loss) internal_config.ft_epoch = epoch save_config(config.run_name, use_wandb_override=False)
def main(): wandb_override = False args = get_cli_args() load_simple_config(args.config, dont_init_wandb, dont_init_wandb, args.ngpus, wandb_override) # making sure ft is true config.fully_train = True save_config(config.run_name, use_wandb_override=wandb_override) _force_cuda_device_init() fully_train(config.run_name)
def load_simple_config(config_path: str, wandb_resume_run_fn, wandb_new_run_fn, ngpus: Optional[int] = None, use_wandb_override=True): """ Used for loading a normal run that is not part of a batch run. Therefore it is much more simple than loading a batch config Steps: * Read config to get run name and wandb related info * Read saved config if it is a saved run * Load wandb if wandb is requested * Read original config again for overwrites * Overwrite n_gpus option if required * Save config to run folder @param config_path: path to the config, can be relative to configuration/configs @param wandb_resume_run_fn: function that allows wandb to resume @param wandb_new_run_fn: function that creates a new wandb run @param ngpus: number of gpus if config option should be overridden :param use_wandb_override: determines whether wandb actions should be taken in the config loading """ config.read(config_path) run_name = config.run_name print(f'Run name: {run_name}') if run_man.run_folder_exists(run_name): print('Run folder already exists, reading its config') run_man.load_config(run_name) # load saved config if config.use_wandb and use_wandb_override: wandb_resume_run_fn() else: print(f'No runs folder detected with name {run_name}. Creating one') if config.use_wandb and use_wandb_override: wandb_new_run_fn() run_man.set_up_run_folder(config.run_name, use_wandb_override) config.read( config_path ) # overwrite saved/wandb config with provided config (only values present in this config) if ngpus is not None: # n_gpu override config.n_gpus = ngpus run_man.save_config(run_name, use_wandb_override=use_wandb_override) print(f'config: {config.__dict__}')
def load_batch_config(): """ there are 3 possible levels of configs to be loaded: 1: a saved config which is attached to an existing run which has been executed before this config does not exist when starting a fresh run, only when continuing an existing one 2: a scheduled config. If a run scheduler is used, it will point to a one of the configs in its schedule 3: the cli config, which is specified as a run arg to the main program when no run schedule is used, the cli config values overwrite the saved config (if one exists) an example of when this is desirable is to change the num gpu's when continuing a run, or to change the man num of generations, to evolve a population for longer when a run schedule is specified, it will fetch a config file eg: mms.json It may be desirable to override certain properties of all runs in a schedule An example of this is schedule = {elite,base} - we may want to turn on DataAug for bot ie: transform the schedule into {da_elite,da_base} thus when a run schedule is used, the cli config starting the schedule may contain overriding config values (eg: da=true) therefore the priority of configs when a schedule is being used is: saved config (if exists) - lowest scheduled config - middle cli config - highest """ cli_args = get_cli_args() stagger(cli_args.stagger_number) if cli_args.ngpus is not None: config.n_gpus = cli_args.ngpus effective_run_name, scheduled_cfg_file_name = get_batch_schedule_run_names( cli_args) effective_run_name = load_saved_config(effective_run_name, cli_args.config) if scheduled_cfg_file_name: print(f'reading scheduled config: {scheduled_cfg_file_name}') config.read(scheduled_cfg_file_name) print(f'Reading cli config {cli_args.config}') config.read(cli_args.config) # final authority on config values # must detect whether the scheduler is calling for a fully train, or an evolutionary run fully_train, resume_fully_train = batch_runner.get_fully_train_state( effective_run_name) print( f'scheduler is starting run with FT = {fully_train} continue FT = {resume_fully_train}' ) config.fully_train = fully_train config.resume_fully_train = resume_fully_train config.run_name = effective_run_name if cli_args.ngpus is not None: config.n_gpus = cli_args.ngpus else: print(f'no gpu argument given, using config value of {config.n_gpus}') # Full config is now loaded if config.use_wandb and not fully_train: wandb_init() if not run_man.run_folder_exists(config.run_name): print(f'New run, setting up run folder for {config.run_name}') run_man.set_up_run_folder(config.run_name) print(f'Saving conf to run {config.run_name}') config.fully_train = fully_train config.resume_fully_train = resume_fully_train run_man.save_config(config.run_name, use_wandb_override=not config.fully_train) print(f'config: {config.__dict__}')