def retrieve_experiment(exp_id, sync_if_missing=True, verbose=False, force_update=False): """ Retrieve an experiment in the local database based on its id. Can call a sync if missing if the flag is true. :param exp_id: Id of the experiment to retrieve :param sync_if_missing: Should we try to sync if not present? :return: The experiment found """ if not exp_id: raise Exception("Trying to retrieve an experiment without providing an experiment ID") from uuid import UUID if isinstance(exp_id,UUID): exp_id = str(exp_id) # If we dont force the update -> look first in the DB exp = DataStore.get_experiment(exp_id) or DataStore.get_most_recent_experiment(exp_id) if exp: # If we have an experiment and we want to force the update -> delete it if not force_update: return exp else: DataStore.delete_experiment(exp) if not sync_if_missing: raise Exception('Experiment %s not found in the local database and sync disabled.' % exp_id) logger.info('Experiment with id %s not found in local database, trying sync.' % exp_id) with SetupParser.TemporarySetup(temporary_block='HPC') as sp: endpoint = sp.get('server_endpoint') exp = COMPS_experiment_to_local_db(exp_id, endpoint, verbose) if exp: return exp raise Exception("Experiment '%s' could not be retrieved." % exp_id)
def cancel_simulations(self, sim_list): """ Cancel all the simulations provided in id list. """ sim_batch = [] for simulation in sim_list: if simulation is None: continue if simulation.status not in [ SimulationState.Succeeded, SimulationState.Failed, SimulationState.Canceled, SimulationState.CommissionRequested ]: self.kill_simulation(simulation) # Add to the batch sim_batch.append({ 'sid': simulation.id, 'status': SimulationState.Canceled, 'message': None, 'pid': None }) # Batch update the statuses DataStore.batch_simulations_update(sim_batch)
def consolidate_experiments_with_options(exp_dict, sim_dict, batch_name=None): # if batch name exists, always save experiments if batch_name is None: return exp_dict, sim_dict batch = DataStore.get_batch_by_name(batch_name) if batch: batch_exp_id_list = batch.get_experiment_ids() batch_sim_id_list = batch.get_simulation_ids() exp_diff = not compare_two_ids_list(exp_dict.keys(), batch_exp_id_list) sim_diff = not compare_two_ids_list(sim_dict.keys(), batch_sim_id_list) if exp_diff or sim_diff: # confirm only if existing batch contains different experiments print("\nBatch with name {} already exists and contains the following:\n".format(batch_name)) print(batch) if exp_dict or sim_dict: var = input('\nDo you want to [O]verwrite, [M]erge, or [C]ancel: ') # print("You selected '%s'" % var) if var == 'O': # clear existing experiments associated with this Batch DataStore.clear_batch(batch) return exp_dict, sim_dict elif var == 'M': return exp_dict, sim_dict elif var == 'C': exit() else: logger.error("Option '%s' is invalid..." % var) exit() return exp_dict, sim_dict
def check_overseer(): """ Ensure that the overseer thread is running. The thread pid is retrieved from the settings and then we test if it corresponds to a python thread. If not, just start it. """ logger.debug("Checking Overseer state") setting = DataStore.get_setting('overseer_pid') overseer_pid = int(setting.value) if setting else None # Launch the Overseer if needed if is_running(overseer_pid, name_part='python'): logger.debug("A valid Overseer was detected, pid: %d" % overseer_pid) else: logger.debug( "A valid Overseer was not detected for stored pid %s." % overseer_pid) current_dir = os.path.dirname(os.path.realpath(__file__)) runner_path = os.path.abspath( os.path.join(current_dir, '..', 'Overseer.py')) if LocalOS.name == LocalOS.WINDOWS: p = subprocess.Popen([sys.executable, runner_path], shell=False, creationflags=512) else: p = subprocess.Popen([sys.executable, runner_path], shell=False) # Save the pid in the settings DataStore.save_setting( DataStore.create_setting(key='overseer_pid', value=str(p.pid)))
def create_token(self): import getpass import github3 # Asks user for username/password user = input("Please enter your GitHub username: "******"Please enter your GitHub password: "******"/!\\ WARNING /!\\ Bad GitHub credentials.") print( "Cannot access disease packages. Please contact %s for assistance." .format(self.SUPPORT_EMAIL)) sys.stdout.flush() raise self.AuthorizationError() # Write the info to disk # Update the (local) mysql db with the token from simtools.DataAccess.DataStore import DataStore DataStore.save_setting( DataStore.create_setting(key=self.auth_token_field, value=auth.token)) return auth.token
def LogCleaner(): # Get the last time a cleanup happened last_cleanup = DataStore.get_setting('last_log_cleanup') if not last_cleanup or (datetime.today() - datetime.strptime(last_cleanup.value.split(' ')[0],'%Y-%m-%d')).days > 1: # Do the cleanup from simtools.DataAccess.LoggingDataStore import LoggingDataStore LoggingDataStore.log_cleanup() DataStore.save_setting(DataStore.create_setting(key='last_log_cleanup', value=datetime.today()))
def hard_delete(self): """ Delete data for experiment and marks the server entity for deletion. """ # Mark experiment for deletion in COMPS. COMPS_login(self.endpoint) self.comps_experiment.delete() # Delete in the DB from simtools.DataAccess.DataStore import DataStore DataStore.delete_experiment(self.experiment)
def hard_delete(self): """ Delete experiment and output data. """ # Delete local simulation data. exp_path = self.experiment.get_path() if os.path.exists(exp_path): try: shutil.rmtree(exp_path) except Exception as e: print("Could not delete path: {}\nReason: {}".format(exp_path,e)) # Delete in the DB from simtools.DataAccess.DataStore import DataStore DataStore.delete_experiment(self.experiment)
def monitor(self): logger.debug('COMPS - Start Monitoring for experiment %s' % self.experiment.id) # Until done, update the status last_states = dict() for simulation in self.experiment.simulations: last_states[simulation.id] = simulation.status # Create the monitor monitor = CompsSimulationMonitor(self.experiment.exp_id, None, self.experiment.endpoint) # Until done, update the status while True: try: states, _ = monitor.query() if states == {}: # No states returned... Consider failed states = { sim_id: SimulationState.Failed for sim_id in last_states.keys() } except Exception as e: logger.error( 'Exception in the COMPS Monitor for experiment %s' % self.experiment.id) logger.error(e) # Only update the simulations that changed since last check # We are also including simulations that were not present (in case we add some later) DataStore.batch_simulations_update( list({ "sid": key, "status": states[key].name } for key in states if (key in last_states and last_states[key] != states[key] ) or key not in last_states)) # Store the last state last_states = states if CompsExperimentManager.status_finished(states): logger.debug( 'Stop monitoring for experiment %s because all simulations finished' % self.experiment.id) break time.sleep(self.MONITOR_SLEEP)
def db_list(args, unknownArgs): # Filter by location selected_block = None num = 20 is_all = False name = None if len(unknownArgs) > 0: if len(unknownArgs) == 1: selected_block = unknownArgs[0][2:].upper() else: raise Exception('Too many unknown arguments: please see help.') # Limit number of experiments to display if args.limit: if args.limit.isdigit(): num = args.limit elif args.limit == '*': is_all = True else: raise Exception('Invalid limit: please see help.') # Filter by experiment name like if args.exp_name: name = args.exp_name # Execute query experiments = DataStore.get_recent_experiment_by_filter( num=num, name=name, is_all=is_all, location=selected_block) if len(experiments) > 0: for exp in experiments: print(exp) else: print("No experiments to display.")
def commission_iteration(self, next_params): """ Commission an experiment of simulations constructed from a list of combinations of random seeds, calibration sites, and the next sample points. Cache the relevant experiment and simulation information to the IterationState. """ if self.simulations: logger.info( 'Reloading simulation data from cached iteration (%s) state.' % self.iteration) self.exp_manager = ExperimentManagerFactory.from_experiment( DataStore.get_experiment(self.experiment_id)) else: self.exp_manager = ExperimentManagerFactory.init() # use passed in function to create exp_builder exp_builder = self.exp_builder_func(next_params) self.exp_manager.run_simulations( config_builder=self.config_builder, exp_name='%s_iter%d' % (self.calibration_name, self.iteration), exp_builder=exp_builder, suite_id=self.suite_id) self.simulations = self.exp_manager.experiment.toJSON( )['simulations'] self.experiment_id = self.exp_manager.experiment.exp_id self.save()
def list_orphan_experiments(self): """ Get orphan experiment list for this calibration """ suite_ids, exp_ids = self.get_experiments() exp_orphan_list = DataStore.list_leftover(suite_ids, exp_ids) return exp_orphan_list
def check_state(self): """ Update the simulation and check its state Returns: state of the simulation or None """ self.simulation = DataStore.get_simulation(self.simulation.id) return self.simulation.status
def cleanup(self): """ Cleanup the current calibration - Delete the result directory - If LOCAL -> also delete the simulations """ try: calib_data = self.read_calib_data() except Exception: logger.info('Calib data cannot be read -> skip') calib_data = None if calib_data: with SetupParser.TemporaryBlock(calib_data['selected_block']): # Retrieve suite ids and iter_count suites = calib_data.get('suites') iter_count = calib_data.get('iteration') # Kill self.kill() # Delete the simulations too logger.info('Cleaning up calibration %s' % self.name) for i in range(0, iter_count + 1): # Get the iteration cache iteration_cache = os.path.join(self.name, 'iter%d' % i, 'IterationState.json') if not os.path.exists(iteration_cache): break # Retrieve the iteration state it = IterationState.from_file(iteration_cache) # Create the associated experiment manager and ask for deletion try: exp_mgr = ExperimentManagerFactory.from_experiment( DataStore.get_experiment(it.experiment_id)) exp_mgr.hard_delete() except: continue # Delete all HPC suites (the local suites are only carried by experiments) for suite in suites: if suite['type'] == "HPC": logger.info('Delete COMPS suite %s' % suite['id']) COMPS_login(SetupParser.get('server_endpoint')) from simtools.Utilities.COMPSUtilities import delete_suite delete_suite(suite['id']) # Then delete the whole directory calib_dir = os.path.abspath(self.name) if os.path.exists(calib_dir): try: shutil.rmtree(calib_dir) except OSError: logger.error("Failed to delete %s" % calib_dir) logger.error( "Try deleting the folder manually before retrying the calibration." )
def retrieve_item(itemid): """ Return the object identified by id. Can be an experiment, a suite or a batch. If it is a suite, all experiments with this suite_id will be returned. """ # First try to get an experiment from simtools.Utilities.Experiments import retrieve_experiment from simtools.DataAccess.DataStore import DataStore from simtools.Utilities.COMPSUtilities import exps_for_suite_id from simtools.Utilities.Experiments import retrieve_simulation # Try experiments first try: return retrieve_experiment(itemid) except: pass # This was not an experiment, maybe a batch ? batch = DataStore.get_batch_by_id(itemid) if batch: return batch batch = DataStore.get_batch_by_name(itemid) if batch: return batch # Still no item found -> test the suites exps = DataStore.get_experiments_by_suite(itemid) if exps: return exps # Still no item found -> test the simulations sim = DataStore.get_simulation(itemid) if sim: return sim # Still not -> last chance is a COMPS suite exps = exps_for_suite_id(itemid) if exps: return [retrieve_experiment(str(exp.id)) for exp in exps] # Nothing, consider COMPS simulation try: return retrieve_simulation(itemid) except: pass # Didnt find anything sorry raise (Exception('Could not find any item corresponding to %s' % itemid))
def done_commissioning(self): self.experiment = DataStore.get_experiment(self.experiment.exp_id) for sim in self.experiment.simulations: if not sim.status or sim.status in [ SimulationState.CommissionRequested, SimulationState.Created ]: return False return True
def analyze(args, unknownArgs, builtinAnalyzers): # validate parameters if args.config_name is None: logger.error('Please provide Analyzer (-a or --config_name).') exit() # Retrieve what we need itemids = args.itemids batch_name = args.batch_name # collect all experiments and simulations exp_dict, sim_dict = collect_experiments_simulations(itemids) # consider batch existing case exp_dict, sim_dict = consolidate_experiments_with_options(exp_dict, sim_dict, batch_name) # check status for each experiment if not args.force: check_status(exp_dict.values()) # collect all analyzers analyzers = collect_analyzers(args, builtinAnalyzers) if not exp_dict and not sim_dict: # No experiment specified -> using latest experiment latest = DataStore.get_most_recent_experiment() exp_dict[latest.exp_id] = latest # create instance of AnalyzeManager analyzeManager = AnalyzeManager(exp_list=exp_dict.values(), sim_list=sim_dict.values(), analyzers=analyzers) exp_ids_to_be_saved = list(set(exp_dict.keys()) - set(analyzeManager.experiments_simulations.keys())) exp_to_be_saved = [exp_dict[exp_id] for exp_id in exp_ids_to_be_saved] # if batch name exists, always save experiments if batch_name: # save/create batch save_batch(batch_name, exp_to_be_saved, sim_dict.values()) # Only create a batch if we pass more than one experiment or simulation in total elif len(exp_dict) + len(sim_dict) > 1: # check if there is any existing batch containing the same experiments batch_existing = check_existing_batch(exp_dict, sim_dict) if batch_existing is None: # save/create batch save_batch(batch_name, exp_to_be_saved, sim_dict.values()) else: # display the existing batch logger.info('\nBatch: %s (id=%s)' % (batch_existing.name, batch_existing.id)) # start to analyze analyzeManager.analyze() # remove empty batches clean_batch() return analyzeManager
def process_batch(self): self.save_batch() # Now that the save is done, we have the ids ready -> create the simulations for sim in self.created_simulations: self.cache.append( DataStore.create_simulation( id=str(sim.id), tags=sim.tags, experiment_id=self.experiment.exp_id))
def clear_batch(id_or_name, ask=False): """ de-attach all associated experiments from the given batch """ batches = DataStore.get_batch_list(id_or_name) if not batches: print("No batches identified by '%s' were found in the DB." % id_or_name) exit() if ask: for batch in batches: print(batch) if input("Are you sure you want to detach all associated experiments from those batches (Y/n)? ") != 'Y': print('No action taken.') return DataStore.clear_batch(batches) print('The associated experiments/simulations were detached.')
def retrieve_token(self): if self.AUTH_TOKEN: token = self.AUTH_TOKEN else: from simtools.DataAccess.DataStore import DataStore setting = DataStore.get_setting(self.auth_token_field) if setting: token = setting.value else: token = self.create_token() return token
def save_batch(batch_name=None, exp_list=None, sim_list=None): # Try to get the batch based on name if provided batch = DataStore.get_batch_by_name(batch_name) if batch_name else None # No batches were found, need to create a new one if not batch: batch = Batch() batch.name = batch_name # add experiments batch.experiments.extend(exp_list) # add simulations batch.simulations.extend(sim_list) # Save DataStore.save_batch(batch) logger.info('\nBatch: %s (id=%s) saved!' % (batch.name, batch.id)) return batch
def reload_experiments(args=None): exp_id = args.expId if hasattr(args, 'expId') else None current_dir = args.current_dir if hasattr(args, 'current_dir') else None managers = [] experiments = DataStore.get_experiments_with_options(exp_id, current_dir) for exp in experiments: try: managers.append(ExperimentManagerFactory.from_experiment(exp)) except RuntimeError: print("Could not create manager... Bypassing...") return managers
def check_existing_batch(exp_dict, sim_dict): exp_ids_list = exp_dict.keys() sim_ids_list = sim_dict.keys() batch_list = DataStore.get_batch_list() for batch in batch_list: batch_exp_ids = batch.get_experiment_ids() batch_sim_ids = batch.get_simulation_ids() if compare_two_ids_list(exp_ids_list, batch_exp_ids) and compare_two_ids_list(sim_ids_list, batch_sim_ids): return batch return None
def clean_batch(ask=False): """ remove all empty batches """ if ask: choice = input("Are you sure you want to remove all empty Batches (Y/n)?") if choice != 'Y': print('No action taken.') return cnt = DataStore.remove_empty_batch() print("%s empty Batch(s) have been removed." % cnt)
def get_experiment_from_iteration(self, iteration=None, force_metadata=False): """ Retrieve experiment for a given iteration """ exp = None # Only check iteration for resume cases if force_metadata: iteration = self.adjust_iteration(iteration) it = self.read_iteration_data(iteration) exp = DataStore.get_experiment(it.experiment_id) return exp
def retrieve_simulation(sim_id, sync_if_missing=True, verbose=False, force_update=False): """ Retrieve a simulation in the local database based on its id. Can call a sync if missing if the flag is true. :param sim_id: Id of the simulation to retrieve :param sync_if_missing: Should we try to sync if not present? :return: The simulation found """ from simtools.Utilities.COMPSUtilities import get_simulation_by_id if not sim_id: raise Exception("Trying to retrieve a simulation without providing an simulation ID") from uuid import UUID if isinstance(sim_id, UUID): sim_id = str(sim_id) # If we dont force the update -> look first in the DB sim = DataStore.get_simulation(sim_id) if sim: # If we have a simulation and we want to force the update -> delete it if not force_update: return sim else: DataStore.delete_simulation(sim) if not sync_if_missing: raise Exception('Simulation %s not found in the local database and sync disabled.' % sim_id) logger.info('Simulation with id %s not found in local database, trying sync.' % sim_id) csim = get_simulation_by_id(sim_id) if csim: with SetupParser.TemporarySetup(temporary_block='HPC') as sp: endpoint = sp.get('server_endpoint') COMPS_experiment_to_local_db(csim.experiment_id, endpoint, verbose) sim = DataStore.get_simulation(sim_id) if sim: return sim raise Exception("Simulation '%s' could not be retrieved." % sim_id)
def __init__(self, exp_list=None, sim_list=None, analyzers=None, working_dir=None, force_analyze=False, verbose=False, create_dir_map=False): self.experiments = [] self.simulations = [] self.parsers = [] self.analyzers = [] self.experiments_simulations = {} self.verbose = verbose self.force_analyze = force_analyze self.create_dir_map = create_dir_map self.parse = True self.working_dir = working_dir or os.getcwd() with SetupParser.TemporarySetup() as sp: self.maxThreadSemaphore = multiprocessing.Semaphore( int(sp.get('max_threads', 16))) # If no experiment is specified, retrieve the most recent as a convenience if exp_list == 'latest': exp_list = DataStore.get_most_recent_experiment() # Initial adding of experiments if exp_list: exp_list = exp_list if isinstance( exp_list, collections.Iterable) and not isinstance( exp_list, str) else [exp_list] for exp in exp_list: self.add_experiment(exp) # Initial adding of the simulations if sim_list: sim_list = sim_list if isinstance( sim_list, collections.Iterable) else [sim_list] for sim in sim_list: self.add_simulation(sim) # Initial adding of the analyzers if analyzers: analyzer_list = analyzers if isinstance( analyzers, collections.Iterable) else [analyzers] for a in analyzer_list: self.add_analyzer(a)
def create_experiment(self, experiment_name, experiment_id, suite_id=None): self.experiment = DataStore.create_experiment( exp_id=experiment_id, suite_id=suite_id, exe_name=self.assets.exe_name, sim_root=SetupParser.get('sim_root'), exp_name=experiment_name, location=self.location, tags=self.experiment_tags, sim_type=self.config_builder.get_param('Simulation_Type'), dtk_tools_revision=get_tools_revision(), selected_block=SetupParser.selected_block, setup_overlay_file=SetupParser.overlay_path, working_directory=os.getcwd(), command_line=self.commandline.Commandline)
def delete_batch(id_or_name=None): """ Delete a particular batch or all batches in DB """ batches = DataStore.get_batch_list(id_or_name) if id_or_name: print("Batch to delete:") for batch in batches: print(batch) else: print("ALL the batches present in the database ({} batches total) will be deleted...".format(len(batches))) choice = input("Are you sure you want to proceed? (Y/n)") if choice != 'Y': print('No action taken.') return else: if not id_or_name: DataStore.delete_batch() # Wipe all else: for batch in batches: DataStore.delete_batch(batch) print('The Batch(s) have been deleted.')
def collect_simulations(args): simulations = dict() # retrieve ids ids = args.itemids if not ids: return simulations # For each, treat it differently depending on what it is for sid in ids: sim = DataStore.get_simulation(sid) simulations[sim.id] = sim return simulations