def _create_hil_reservation(restype_s, t_start_s, t_end_s, env_dict, pdata_dict, jobdata_dict): ''' Create a HIL reservation ''' # Generate a HIL reservation name resname = get_hil_reservation_name(env_dict, restype_s, t_start_s) # Check if reservation exists. If so, do nothing resdata_dict_list, stdout_data, stderr_data = exec_scontrol_show_cmd( 'reservation', resname) if (stderr_data) and ('not found' not in stderr_data): log_info('HIL reservation `%s` already exists' % resname) return resname, stderr_data log_info('Creating HIL reservation `%s`, ending %s' % (resname, t_end_s)) stdout_data, stderr_data = create_slurm_reservation( resname, env_dict['username'], t_start_s, t_end_s, nodes=None, flags=RES_CREATE_FLAGS, features=RES_CREATE_HIL_FEATURES, debug=False) return resname, stderr_data
def main(argv=[]): args = process_args() log_init('hil_slurmctld.prolog', HIL_SLURMCTLD_PROLOG_LOGFILE, logging.DEBUG) if args.hil_prolog: pass elif args.hil_epilog: pass else: log_debug('Must specify one of --hil_prolog or --hil_epilog', separator=True) return # Collect prolog/epilog environment, job data, and partition data into dictionaries, # perform basic sanity checks # Since data for one partition and one job is expected, select the first dict in the list env_dict = _get_prolog_environment() pdata_dict = get_partition_data(env_dict['partition'])[0] jobdata_dict = get_job_data(env_dict['job_id'])[0] if not pdata_dict or not jobdata_dict or not env_dict: log_debug('One of pdata_dict, jobdata_dict, or env_dict is empty') log_debug('Job data', jobdata_dict) log_debug('P data', pdata_dict) return if not _check_hil_partition(env_dict, pdata_dict): return # Verify the command is a HIL command. If so, process it. hil_cmd = _check_hil_command(env_dict) if not hil_cmd: return if args.hil_prolog: if (hil_cmd == 'hil_reserve'): log_info('HIL Slurmctld Prolog', separator=True) log_debug('Processing reserve request') _hil_reserve_cmd(env_dict, pdata_dict, jobdata_dict) elif args.hil_epilog: if (hil_cmd == 'hil_release'): log_info('HIL Slurmctld Epilog', separator=True) log_debug('Processing release request') _hil_release_cmd(env_dict, pdata_dict, jobdata_dict) return
def main(argv=[]): ''' ''' log_init('hil_monitor', HIL_MONITOR_LOGFILE, logging.DEBUG) # Look for HIL reservations. If there are none, return resdata_dict_list = _get_hil_reservations() if not len(resdata_dict_list): return log_info('HIL Reservation Monitor', separator=True) log_debug('') release_reservations = _find_hil_release_reservations(resdata_dict_list)
def _delete_hil_reservation(env_dict, pdata_dict, jobdata_dict, resname): ''' Delete a HIL reservation after validating HIL name prefix and owner name The latter restricts 'hil_release' of a reservation to the owner It is always possible to delete the reservation with 'scontrol delete'. ''' # Minimally validate the specified reservation if is_hil_reservation(resname, None): log_info('Deleting HIL reservation `%s`' % resname) return delete_slurm_reservation(resname, debug=False) else: log_info('Cannot delete HIL reservation, error in name (`%s`)' % resname) return None, 'hil_release: error: Invalid reservation name'
def _check_hil_partition(env_dict, pdata_dict): ''' Check if the partition exists and, if so, is properly named Retrieve partition data via 'scontrol show' ''' status = True pname = pdata_dict['PartitionName'] if not pname.startswith(HIL_PARTITION_PREFIX): log_info('Partition name `%s` does not match `%s*`' % (pname, HIL_PARTITION_PREFIX)) status = False # Verify the partition state is UP if RES_CHECK_PARTITION_STATE: if (pdata_dict['State'] != 'UP'): log_info('Partition `%s` state (`%s`) is not UP' % (pname, pdata_dict['State'])) status = False # Verify the partition is not the default partition if RES_CHECK_DEFAULT_PARTITION: if (pdata_dict['Default'] == 'YES'): log_info( 'Partition `%s` is the default partition, cannot be used for HIL' % pname) status = False # Verify the partition is not shared by checking 'Shared' and 'ExclusiveUser' attributes if RES_CHECK_SHARED_PARTITION: if (pdata_dict['Shared'] != 'NO'): log_info('Partition `%s` is shared, cannot be used for HIL' % pname) status = False if RES_CHECK_EXCLUSIVE_PARTITION: if (pdata_dict['ExclusiveUser'] != 'YES'): log_info( 'Partition `%s` not exclusive to `%s`, cannot be used for HIL' % (pname, env_dict['username'])) status = False return status
def _hil_release_cmd(env_dict, pdata_dict, jobdata_dict): ''' Delete the reserve reservation in which the release job was run. - Verify the reservation is a HIL areserve reservation - Verify the reservation is owned by the user - Get reserve reservation data, including the start time - Generate release reservation name - Update the start time of the release reservation Release reservation will be deleted later by the HIL reservation monitor ''' reserve_resname = jobdata_dict['Reservation'] if reserve_resname: if not is_hil_reservation(reserve_resname, HIL_RESERVE): log_error( 'Oops, reservation `%s` is not a HIL reserve reservation' % reserve_resname) elif env_dict['username'] not in reserve_resname: log_error('Reservation `%s` not owned by user `%s`' % (reserve_resname, env_dict['username'])) else: # Basic validation done # Get reserve reservation data reserve_rdata = get_object_data('reservation', reserve_resname)[0] # Delete the reserve reservation stdout_data, stderr_data = _delete_hil_reservation( env_dict, pdata_dict, jobdata_dict, reserve_resname) if (len(stderr_data) == 0): log_info('Deleted HIL reserve reservation `%s`' % reserve_resname) else: log_error('Error deleting HIL reserve reservation `%s`' % reserve_resname) log_error(stderr_data) else: log_error('No reservation name specified to `%s` command' % jobdata_dict['JobName'])
def _restore_nodes(nodelist): ''' ''' for node in nodelist: log_info('Restoring %s', node)
def _log_hil_reservation(resname, stderr_data, t_start_s=None, t_end_s=None): if len(stderr_data): log_error('Error creating reservation `%s`' % resname) log_error(stderr_data) else: log_info('Created HIL reservation `%s`' % resname)