def cleanup_orphan_experiments(self): """ - Display all orphan experiments for this calibration - Hard delete all orphans """ exp_orphan_list = self.list_orphan_experiments() for experiment in exp_orphan_list: ExperimentManagerFactory.from_experiment(experiment).delete_experiment() if len(exp_orphan_list) > 0: orphan_str_list = ['- %s - %s' % (exp.exp_id, exp.exp_name) for exp in exp_orphan_list] logger.info('\nOrphan Experiment List:') logger.info('\n'.join(orphan_str_list)) logger.info('\n') logger.info('Note: the detected orphan experiment(s) have been deleted.')
def commission_iteration(self, next_params): """ Commission an experiment of simulations constructed from a list of combinations of random seeds, calibration sites, and the next sample points. Cache the relevant experiment and simulation information to the IterationState. """ if self.simulations: logger.info( 'Reloading simulation data from cached iteration (%s) state.' % self.iteration) self.exp_manager = ExperimentManagerFactory.from_experiment( DataStore.get_experiment(self.experiment_id)) else: self.exp_manager = ExperimentManagerFactory.init() # use passed in function to create exp_builder exp_builder = self.exp_builder_func(next_params) self.exp_manager.run_simulations( config_builder=self.config_builder, exp_name='%s_iter%d' % (self.calibration_name, self.iteration), exp_builder=exp_builder, suite_id=self.suite_id) self.simulations = self.exp_manager.experiment.toJSON( )['simulations'] self.experiment_id = self.exp_manager.experiment.exp_id self.save()
def check_status(exp_list): for exp in exp_list: if not exp.is_successful(): logger.warning('Not all experiments have finished successfully yet...') exp_manager = ExperimentManagerFactory.from_experiment(exp) exp_manager.print_status() exit()
def cleanup(self): """ Cleanup the current calibration - Delete the result directory - If LOCAL -> also delete the simulations """ try: calib_data = self.read_calib_data() except Exception: logger.info('Calib data cannot be read -> skip') calib_data = None if calib_data: with SetupParser.TemporaryBlock(calib_data['selected_block']): # Retrieve suite ids and iter_count suites = calib_data.get('suites') iter_count = calib_data.get('iteration') # Kill self.kill() # Delete the simulations too logger.info('Cleaning up calibration %s' % self.name) for i in range(0, iter_count + 1): # Get the iteration cache iteration_cache = os.path.join(self.name, 'iter%d' % i, 'IterationState.json') if not os.path.exists(iteration_cache): break # Retrieve the iteration state it = IterationState.from_file(iteration_cache) # Create the associated experiment manager and ask for deletion try: exp_mgr = ExperimentManagerFactory.from_experiment( DataStore.get_experiment(it.experiment_id)) exp_mgr.hard_delete() except: continue # Delete all HPC suites (the local suites are only carried by experiments) for suite in suites: if suite['type'] == "HPC": logger.info('Delete COMPS suite %s' % suite['id']) COMPS_login(SetupParser.get('server_endpoint')) from simtools.Utilities.COMPSUtilities import delete_suite delete_suite(suite['id']) # Then delete the whole directory calib_dir = os.path.abspath(self.name) if os.path.exists(calib_dir): try: shutil.rmtree(calib_dir) except OSError: logger.error("Failed to delete %s" % calib_dir) logger.error( "Try deleting the folder manually before retrying the calibration." )
def get_status(expname) : expt = get_expt(expname) from simtools.ExperimentManager.BaseExperimentManager import BaseExperimentManager from simtools.ExperimentManager.ExperimentManagerFactory import ExperimentManagerFactory expt_manager = ExperimentManagerFactory.from_experiment(expt) states, msgs = BaseExperimentManager.get_simulation_status(expt_manager) df = pd.DataFrame( { 'simid' : states.keys(), 'status' : states.values() } ) return df
def reload_experiments(args=None): exp_id = args.expId if hasattr(args, 'expId') else None current_dir = args.current_dir if hasattr(args, 'current_dir') else None managers = [] experiments = DataStore.get_experiments_with_options(exp_id, current_dir) for exp in experiments: try: managers.append(ExperimentManagerFactory.from_experiment(exp)) except RuntimeError: print("Could not create manager... Bypassing...") return managers
def analyze_iteration(self): """ Analyze the output of completed simulations by using the relevant analyzers by site. Cache the results that are returned by those analyzers. """ if self.results: logger.info('Reloading results from cached iteration state.') return self.results['total'] if not self.exp_manager: self.exp_manager = ExperimentManagerFactory.from_experiment( self.experiment_id) from simtools.Analysis.BaseAnalyzers.BaseAnalyzer import BaseAnalyzer from simtools.Analysis.AnalyzeManager import AnalyzeManager as am if all(isinstance(a, BaseAnalyzer) for a in self.analyzer_list): analyzerManager = am(exp_list=self.exp_manager.experiment, analyzers=self.analyzer_list, working_dir=self.iteration_directory, verbose=True) else: analyzerManager = AnalyzeManager( exp_list=self.exp_manager.experiment, analyzers=self.analyzer_list, working_dir=self.iteration_directory) analyzerManager.analyze() # Ask the analyzers to cache themselves cached_analyses = { a.uid if not callable(a.uid) else a.uid(): a.cache() for a in analyzerManager.analyzers } logger.debug(cached_analyses) # Get the results from the analyzers and ask the next point how it wants to cache them results = pd.DataFrame({ a.uid if not callable(a.uid) else a.uid(): a.result for a in analyzerManager.analyzers }) cached_results = self.next_point_algo.get_results_to_cache(results) # Store the analyzers and results in the iteration state self.analyzers = cached_analyses self.results = cached_results # Set those results in the next point algorithm self.next_point_algo.set_results_for_iteration(self.iteration, results) # Update the summary table and all the results self.all_results, self.summary_table = self.next_point_algo.update_summary_table( self, self.all_results) logger.info(self.summary_table)
def kill(self): """ Kill the current calibration """ exp = self.load_experiment_from_iteration() if not exp: return # Cancel simulations for all active managers exp_manager = ExperimentManagerFactory.from_experiment(exp) exp_manager.cancel_experiment() logger.info("Waiting to complete cancellation...") exp_manager.wait_for_finished(verbose=False, sleep_time=1) # Print confirmation logger.info("Calibration %s successfully cancelled!" % self.name)
def create_parsers_for_experiment(self, experiment): # Create a manager for the current experiment exp_manager = ExperimentManagerFactory.from_experiment(experiment) # Refresh the experiment just to be sure to have latest info exp_manager.refresh_experiment() if exp_manager.location == 'HPC': # Get the sim map no matter what if self.create_dir_map: exp_manager.parserClass.createSimDirectoryMap( exp_id=exp_manager.experiment.exp_id, suite_id=exp_manager.experiment.suite_id, save=True, comps_experiment=exp_manager.comps_experiment, verbose=self.verbose) if not exp_manager.asset_service: exp_manager.parserClass.asset_service = False # Call the analyzer per experiment function for initialization for analyzer in self.analyzers: analyzer.per_experiment(experiment) # Create the thread pool to create the parsers p = ThreadPool() # Simulations to handle if experiment.exp_id in self.experiments_simulations: simulations = self.experiments_simulations[experiment.exp_id] # drop experiment from self.experiments_simulations self.experiments_simulations.pop(experiment.exp_id) else: simulations = exp_manager.experiment.simulations results = [ p.apply_async(self.parser_for_simulation, args=(s, experiment, exp_manager)) for s in simulations ] p.close() p.join() # Retrieve the parsers from the pool (remove the None) self.parsers.extend( list(filter(None.__ne__, (r.get() for r in results))))
def resume(self, iter_step): # step 1: If we know we are running -> recreate the exp_manager if iter_step.value >= StatusPoint.running.value: self.exp_manager = ExperimentManagerFactory.from_experiment( retrieve_experiment(self.experiment_id)) # step 2: restore next_point if iter_step not in (StatusPoint.plot, StatusPoint.next_point, StatusPoint.running) and self.iteration != 0: if iter_step == StatusPoint.commission or iter_step == StatusPoint.iteration_start: iteration_state = IterationState.restore_state( self.calibration_name, self.iteration - 1) self.next_point_algo.set_state(iteration_state.next_point, self.iteration - 1) elif iter_step == StatusPoint.analyze: iteration_state = IterationState.restore_state( self.calibration_name, self.iteration) self.next_point_algo.set_state(iteration_state.next_point, self.iteration) # For IMIS ONLY! self.next_point_algo.restore( IterationState.restore_state(self.calibration_name, self.iteration - 1)) else: self.next_point_algo.set_state(self.next_point, self.iteration) # step 3: restore Calibration results if self.iteration > 0 and iter_step.value < StatusPoint.plot.value: # it will combine current results with previous results self.restore_results(self.iteration - 1) else: # it will use the current results and resume from next iteration self.restore_results(self.iteration) # step 4: prepare resume states if iter_step.value <= StatusPoint.commission.value: # need to run simulations self.simulations = {} if iter_step.value <= StatusPoint.analyze.value: # just need to calculate the results self.results = {} self._status = StatusPoint(iter_step.value - 1) if iter_step.value > 0 else None
def status(args, unknownArgs): # No matter what check the overseer from simtools.ExperimentManager.BaseExperimentManager import BaseExperimentManager BaseExperimentManager.check_overseer() if args.active: print('Getting status of all active dtk experiments.') active_experiments = DataStore.get_active_experiments() for exp in active_experiments: exp_manager = ExperimentManagerFactory.from_experiment(exp) exp_manager.print_status() return exp_manager = reload_experiment(args) if args.repeat: exp_manager.wait_for_finished(verbose=True, sleep_time=20) else: exp_manager.print_status()
def reload_experiment(args=None, try_sync=True): """ Return the experiment (for given expId) or most recent experiment """ exp_id = args.expId if args else None if not exp_id: exp = DataStore.get_most_recent_experiment(exp_id) elif try_sync: try: exp = retrieve_experiment(exp_id, verbose=False) except: exp = None if not exp: logger.error( "No experiment found with the ID '%s' Locally or in COMPS. Exiting..." % exp_id) exit() return ExperimentManagerFactory.from_experiment(exp)
def test_run(self): input_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'input') model_file = os.path.join(input_path, 'dummy_model.py') local_manager = ExperimentManagerFactory.from_model( model_file, 'LOCAL') local_manager.run_simulations( config_builder=PythonConfigBuilder.from_defaults('sleep'), exp_builder=RunNumberSweepBuilder(self.nsims)) self.assertEqual(local_manager.experiment.exp_name, 'test') experiment = local_manager.experiment local_manager = ExperimentManagerFactory.from_experiment( experiment=DataStore.get_experiment(experiment.exp_id)) states, msgs = local_manager.get_simulation_status() self.assertListEqual(states.values(), [SimulationState.Created] * self.nsims) local_manager.hard_delete() import time time.sleep(3)
] # Ids of the created experiments for resuming capabilities # Check if we want to resume if os.path.exists('ids.json'): print("Previous run detected... Run [N]ew, [R]esume, [A]bort?") resp = "" while resp not in ('N', 'R', 'A'): resp = input() if resp == "A": exit() elif resp == "R": # In the resume case, retrieve the ids and create the managers experiments_ids = json.load(open('ids.json', 'r')) for id in experiments_ids: experiments.append( ExperimentManagerFactory.from_experiment(str(id))) elif resp == "N": # Delete shelve file if os.path.exists('DownloadAnalyzerTPI.shelf'): os.remove('DownloadAnalyzerTPI.shelf') # Delete the ids os.remove('ids.json') # If experiment_ids is empty -> we need to commission if not experiments_ids: # Create a suite to hold all the experiments suite_id = create_suite(suite_name) # Create the scenarios for scenario in scenarios_dict: scenario_name = scenario['Scenario']
active_experiments = DataStore.get_active_experiments() logger.debug('Waiting loop pass number %d, pid %d' % (count, os.getpid())) logger.debug('Active experiments') logger.debug(active_experiments) logger.debug('Managers') logger.debug(managers.keys()) # Create all the managers for experiment in active_experiments: logger.debug("Looking for manager for experiment %s" % experiment.id) if experiment.id not in managers: logger.debug('Creating manager for experiment id: %s' % experiment.id) manager = None try: sys.path.append(experiment.working_directory) manager = ExperimentManagerFactory.from_experiment(experiment) except Exception as e: logger.debug('Exception in creation manager for experiment %s' % experiment.id) logger.debug(e) logger.debug(traceback.format_exc()) if manager: if manager.location == "LOCAL": manager.local_queue = local_queue managers[experiment.id] = manager else: # Refresh the experiment logger.debug("Found manager for experiment %s" % experiment.id) managers[experiment.id].experiment = experiment # Check every one of them