def prep_resume(self): ui = Console_UI() resume_prefix = self.get('resume') resume_scene = self.get('resume_scene') if resume_scene is not None and resume_prefix is None: raise ValueError( 'You must provide resume prefix if you have set a resume scene' ) # for debug mode uncomment: # scenario_log_root = "/media/max/SSD_1TB/log/" if resume_prefix.lower() == 'last': dirs = sorted([ d for d in iglob(f'{self.scenario_log_root}/*/*/neural_nets') ]) dirs = [ d for d in dirs if len([f for f in iglob(f'{d}/*{resume_scene}.t7')]) > 0 ] if len(dirs) == 0: raise Exception( f'No previous runs found in \'{self.scenario_log_root}\' with *{resume_scene}.t7' ) resume_prefix = dirs[-1].lstrip( self.scenario_log_root).rstrip('/neural_nets') ui.inform_user(f'Resuming run from {resume_prefix}') elif resume_prefix is not None: resume_prefix = retrieve_dir(path=resume_prefix, base_path=self.scenario_log_root, expected_depth=1) ui.inform_user(f'Resuming run from {resume_prefix}') self.cfgs['resume'] = resume_prefix # for debug mode uncomment: # self.cfgs['resume'] = "../%s" % self.cfgs['resume'] if not self.cfgs['skip_tensorboard']: dst_tensorboard_path = os.path.join(self.log_folder, 'tensorboard') if os.path.exists(dst_tensorboard_path): ui.inform_user( f'Removing previous tensorboard catalogue: {dst_tensorboard_path}' ) shutil.rmtree(dst_tensorboard_path) ui.inform_user('Copying the previous tensorboard data') shutil.copytree( src=os.path.join(self.scenario_log_root, resume_prefix, 'tensorboard'), dst=dst_tensorboard_path, )
def __init__( self, graph_name, experiment_set, task_cfgs, scene_cfgs, scenario_cfgs, ): self.graph_name = graph_name self.task_cfgs = task_cfgs self.scene_cfgs = scene_cfgs self.scenario_cfgs = scenario_cfgs self.experiment_set = experiment_set self.experiment_name = self.experiment_set.get_name() self.graph_cfgs = self.get_graph_cfgs(self.graph_name) self.classification = self.get_cfgs('classification', default=False) self.reconstruction = self.get_cfgs('reconstruction', default=False) self.identification = self.get_cfgs('identification', default=False) self.regression = self.get_cfgs('regression', default=False) self.pi_model = self.get_cfgs('pi_model', default=False) self.real_fake = self.get_cfgs('real_fake', default=False) self.optimizer_type = self.get_cfgs('optimizer_type') if not Global_Cfgs().get('silent_init_info'): UI = Console_UI() UI.inform_user( info=[ 'explicit experiment modalities', list(self.get_experiment_explicit_modalities().keys()) ], debug=self.get_experiment_explicit_modalities(), ) UI.inform_user( info=[ 'implicit experiment modalities', list(self.get_experiment_implicit_modalities().keys()) ], debug=self.get_experiment_implicit_modalities(), ) UI.inform_user( info=[ 'explicit graph modalities', list(self.get_graph_specific_explicit_modalities().keys()) ], debug=self.get_graph_specific_explicit_modalities(), ) UI.inform_user( info=[ 'implicit graph modalities', list(self.get_graph_specific_implicit_modalities().keys()) ], debug=self.get_graph_specific_implicit_modalities(), ) UI.inform_user( info=[ 'explicit models', list(self.get_explicit_models().keys()) ], debug=self.get_explicit_models(), ) UI.inform_user( info=[ 'implicit models', list(self.get_implicit_models().keys()) ], debug=self.get_implicit_models(), )
def run_scene(self, start_epoch=0): logged_memory_usage = False ui = Console_UI() ui.overall_total_epochs = self.epochs ui.overall_total_repeats = self.repeat Global_Cfgs().set_forward_noise( self.get_cfgs('forward_noise', default=0)) for r in range(0, self.repeat): ui.overall_repeat = r if (self.stochastic_weight_averaging and r > 0): self.tasks[self.main_task].stochastic_weight_average() for e in range(0, self.epochs): ui.overall_epoch = e if start_epoch > e + r * self.epochs: Scene.iteration_counter += self.epoch_size else: for task in self.tasks.values(): task.update_learning_rate(self.get_learning_rate(e)) for _ in range(self.epoch_size): for key, task in self.tasks.items(): if self.should_task_run(task_name=key, task=task): task.step( iteration_counter=Scene.iteration_counter, scene_name=self.scene_name) Scene.iteration_counter += 1 if logged_memory_usage is False: for key in self.tasks.keys(): task = self.tasks[key] memory_usage = task.get_memory_usage_profile() File_Manager().write_usage_profile( scene_name=self.scene_name, task=key, memory_usage=memory_usage, ) ui.inform_user( f'\n Memory usage for {self.scene_name}::{key}\n' ) ui.inform_user(memory_usage) logged_memory_usage = True for task in self.tasks.values(): task.save(scene_name='last') # Not really helping with just emptying cache - we need to add something more # removing as this may be the cause for errors # torch.cuda.empty_cache() ui.reset_overall() # Note that the evaluation happens after this step and therefore averaging may hur the performance if self.stochastic_weight_averaging_last: self.tasks[self.main_task].stochastic_weight_average() for task in self.tasks.values(): task.save(scene_name='last') for task in self.tasks.values(): task.validate(iteration_counter=Scene.iteration_counter, scene_name=self.scene_name) task.test(iteration_counter=Scene.iteration_counter, scene_name=self.scene_name) # Save all tasks before enterering the next scene for task in self.tasks.values(): task.save(scene_name=self.scene_name) [g.dropModelNetworks() for g in task.graphs.values()]