def check_attribute(self, attribute_name, attribute_value): if attribute_name == 'steps': if not isinstance(attribute_value, list): raise Exception(f'Expected {attribute_name} to be a list') for step in attribute_value: subcampaign = step['subcampaign'] if not ModelBase.lambda_check('subcampaign')(subcampaign): raise Exception(f'Bad subcampaign prepid {subcampaign}') processing_string = step['processing_string'] if not ModelBase.lambda_check('processing_string')( processing_string): raise Exception( f'Bad processing string {processing_string}') time_per_event = step['time_per_event'] if time_per_event <= 0.0: raise Exception(f'Bad time per event {time_per_event}') size_per_event = step['size_per_event'] if size_per_event <= 0.0: raise Exception(f'Bad size per event {size_per_event}') priority = step['priority'] if not ModelBase.lambda_check('priority')(priority): raise Exception(f'Bad priority {priority}') return super().check_attribute(attribute_name, attribute_value)
def __init__(self, json_input=None, parent=None, check_attributes=True): self.parent = None ModelBase.__init__(self, json_input, check_attributes) if parent: self.parent = weakref.ref(parent) self.check_attribute('eventcontent', self.get('eventcontent')) self.check_attribute('datatier', self.get('datatier'))
class CampaignTicket(ModelBase): """ Campaign ticket has a list of input datasets, a campaign and a processing string Campaign ticket can be used to create requests for each input dataset """ _ModelBase__schema = { # Database id (required by CouchDB) '_id': '', # Document revision (required by CouchDB) '_rev': '', # PrepID 'prepid': '', # Name of campaign that is used as template for requests 'conditions_globaltag': '', # Processing string for this ticket (label at the time of the submission) 'processing_string': '', # List of prepids of requests that were created from this ticket 'created_requests': [], # Status is either new or done 'status': 'new', # User notes 'notes': '', #CMSSW_release 'cmssw_release':'CMSSW_ToBeIncludeFromValManagers', #sample tag (to be chosen from a list) 'sample_tag':'', #pile_up production (to be chosen from a list) 'pile_up':'', #high_statistics production (True or False) 'high_statistics': False, #string for GS input 'string_for_inputGS':'' #GEN-SIM samples to be re-used? 'ReUseGenSim': False, #extension number (just a number, if a similar sample was already submitted, observed especially in Phase II) 'extension_number':0, # Action history 'history': [] } _lambda_checks = { 'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9_\\-]{1,50}'), 'conditions_globaltag': lambda gt: ModelBase.matches_regex(gt, '[a-zA-Z0-9_\\-]{1,50}'), 'cmssw_release': lambda cmssw_release: ModelBase.matches_regex(cmssw_release, '[a-zA-Z0-9_\\-]{1,50}'), 'processing_string': lambda ps: ModelBase.matches_regex(ps, '[a-zA-Z0-9_]{0,100}'), 'status': lambda status: status in ('new', 'done'), 'sample_tag': lambda sample_tag: sample_tag in ('Run2_2016', 'Run2_2017', 'Run2_2018', 'fastSim_2016', 'fastSim_2017','fastSim_2018' 'Run3', 'PhaseII','customized'), 'pile_up': lambda pile_up: pile_up in ('classical_mixing', 'premix','no_pile_up'), 'high_statistics': lambda high_statistics: isinstance(high_statistics,bool), 'ReUseGenSim': lambda ReUseGenSim: isinstance(ReUseGenSim,bool), 'extension_number': lambda extension_number: isinstance(extension_number,int), 'string_for_inputGS': lambda string_for_inputGS: ModelBase.matches_regex(string_for_inputGS, '[a-zA-Z0-9_\\-]{1,50}') } def __init__(self, json_input=None): ModelBase.__init__(self, json_input)
class Subcampaign(ModelBase): """ Class that represents a snapshot of computing campaign It is used as a template for requests """ _ModelBase__schema = { # Database id (required by DB) '_id': '', # PrepID 'prepid': '', # CMSSW version 'cmssw_release': '', # Energy in TeV 'energy': 0.0, # Action history 'history': [], # Default memory 'memory': 2000, # User notes 'notes': '', # Path to json that contains all runs 'runs_json_path': '', # List of Sequences 'sequences': [], } __runs_json_regex = '[a-zA-Z0-9/\\-_]{0,150}(\\.json|\\.txt)?' lambda_checks = { 'prepid': ModelBase.lambda_check('subcampaign'), 'cmssw_release': ModelBase.lambda_check('cmssw_release'), 'energy': ModelBase.lambda_check('energy'), 'memory': ModelBase.lambda_check('memory'), 'runs_json_path': lambda rjp: ModelBase.matches_regex(rjp, Subcampaign.__runs_json_regex ), 'sequences': lambda s: len(s) > 0, '__sequences': lambda s: isinstance(s, Sequence), } def __init__(self, json_input=None, check_attributes=True): if json_input: json_input['runs_json_path'] = json_input.get( 'runs_json_path', '').strip().lstrip('/') sequence_objects = [] for sequence_json in json_input.get('sequences', []): sequence_objects.append(Sequence(json_input=sequence_json)) json_input['sequences'] = sequence_objects ModelBase.__init__(self, json_input, check_attributes)
def __init__(self, json_input=None, check_attributes=True): if json_input: json_input['runs_json_path'] = json_input.get( 'runs_json_path', '').strip().lstrip('/') sequence_objects = [] for sequence_json in json_input.get('sequences', []): sequence_objects.append(Sequence(json_input=sequence_json)) json_input['sequences'] = sequence_objects ModelBase.__init__(self, json_input, check_attributes)
def __init__(self, json_input=None, check_attributes=True): if json_input: json_input = deepcopy(json_input) json_input['runs'] = [int(r) for r in json_input.get('runs', [])] sequence_objects = [] for sequence_json in json_input.get('sequences', []): sequence_objects.append( Sequence(json_input=sequence_json, parent=self, check_attributes=check_attributes)) json_input['sequences'] = sequence_objects ModelBase.__init__(self, json_input, check_attributes)
def __init__(self, json_input=None, check_attributes=True): if json_input: json_input = deepcopy(json_input) json_input['workflow_ids'] = [ float(wid) for wid in json_input['workflow_ids'] ] json_input['recycle_gs'] = bool(json_input.get( 'recycle_gs', False)) if json_input.get('gpu', {}).get('requires') not in ('optional', 'required'): json_input['gpu'] = self.schema().get('gpu') json_input['gpu']['requires'] = 'forbidden' json_input['gpu_steps'] = [] ModelBase.__init__(self, json_input, check_attributes)
def __init__(self, json_input=None, parent=None, check_attributes=True): if json_input: json_input = deepcopy(json_input) # Remove -- from argument names schema = self.schema() if json_input.get('input', {}).get('dataset'): json_input['driver'] = schema.get('driver') json_input['gpu'] = schema.get('gpu') json_input['gpu']['requires'] = 'forbidden' step_input = json_input['input'] for key, default_value in schema['input'].items(): if key not in step_input: step_input[key] = default_value else: json_input['driver'] = { k.lstrip('-'): v for k, v in json_input['driver'].items() } json_input['input'] = schema.get('input') if json_input.get('gpu', {}).get('requires') not in ('optional', 'required'): json_input['gpu'] = schema.get('gpu') json_input['gpu']['requires'] = 'forbidden' driver = json_input['driver'] for key, default_value in schema['driver'].items(): if key not in driver: driver[key] = default_value if driver.get('data') and driver.get('mc'): raise Exception( 'Both --data and --mc are not allowed in the same step' ) if driver.get('data') and driver.get('fast'): raise Exception( 'Both --data and --fast are not allowed in the same step' ) ModelBase.__init__(self, json_input, check_attributes) if parent: self.parent = weakref.ref(parent) else: self.parent = None
def __init__(self, json_input=None, check_attributes=True): if json_input: json_input = deepcopy(json_input) steps = [] for step in json_input.get('steps', []): steps.append({ 'subcampaign': step.get('subcampaign', ''), 'processing_string': step.get('processing_string', ''), 'time_per_event': float(step.get('time_per_event', 0)), 'size_per_event': float(step.get('size_per_event', 0)), 'priority': int(step.get('priority', 0)) }) json_input['steps'] = steps ModelBase.__init__(self, json_input, check_attributes)
class Campaign(ModelBase): _ModelBase__schema = { # Database id '_id': '', # PrepID 'prepid': '', # No need for CMSSW version 'cmssw_release': '', #Sample tag 'sample_tag': '', # User notes 'notes': '', # User notes 'link_prodmon': '', #history 'history': [] } __lambda_checks = { 'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}'), 'link_prodmon': lambda link_prodmon: ModelBase.matches_regex(link_prodmon, '[a-zA-Z0-9]{1,50}'), 'sample_tag': lambda sample_tag: sample_tag in ['Phase2', 'Run3', 'Run2_2016'], 'cmssw_release': lambda cmssw_release: 'CMSSW' in cmssw_release } def __init__(self, json_input=None): ModelBase.__init__(self, json_input) def check_attribute(self, attribute_name, attribute_value): if attribute_name in self.__lambda_checks: return self.__lambda_checks.get(attribute_name)(attribute_value) return True
class Flow(ModelBase): _ModelBase__schema = { # Database id '_id': '', # PrepID 'prepid': '', # List of allowed source campaigns prepids 'source_campaigns': [], # List of statuses for the flow 'status': '', # Target campaign prepid 'target_campaign': ''} __lambda_checks = { 'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}') 'status': lambda status: status in ['new','submit','tasksubmit']) }
class Campaign(ModelBase): _ModelBase__schema = { # Database id '_id': '', # PrepID 'prepid': '', # Energy in TeV 'energy': 0.0, # Type LHE, MCReproc, Prod 'type': '', # Step type: MiniAOD, NanoAOD, etc. 'step': 'DR', # No need for CMSSW version #'cmssw_release': '', # User notes 'notes': '', # List of dictionaries that have cmsDriver options (default to be modified, just a guideline, what is normally needed) 'sequences': [{"conditions","GT_FromAlca","step","RAW2DIGI,L1Reco,RECO,EI,PAT,DQM:@rerecoCommon","datatier":"AOD,MINIAOD,DQM","eventcontent":"RECO,SKIM,ALCA,MINIAOD,DQMIO","era":"Run2_201XXX","extra":"--runUnscheduled","scenario":"pp","nThreads","8","customise":"Configuration/DataProcessing/RecoTLR.customisePostEra_Run2_201XXX"}], # Action history 'history': [], # Default memory 'memory': 2300} __lambda_checks = { 'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}'), 'energy': lambda energy: energy >= 0.0, 'step': lambda step: step in ['DR', 'MiniAOD', 'NanoAOD'], 'memory': lambda memory: memory >= 0, 'cmssw_release': lambda cmssw_release: 'CMSSW' in cmssw_release } def __init__(self, json_input=None): ModelBase.__init__(self, json_input) def check_attribute(self, attribute_name, attribute_value): if attribute_name in self.__lambda_checks: return self.__lambda_checks.get(attribute_name)(attribute_value) return True
class ChainedCampaign(ModelBase): _ModelBase__schema = { # Database id '_id': '', # PrepID 'prepid': '', # Notes 'notes': '', # List of flow and campaign pairs 'campaigns': []} __lambda_checks = { 'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}') } def __init__(self, json_input=None): ModelBase.__init__(self, json_input) def check_attribute(self, attribute_name, attribute_value): if attribute_name in self.__lambda_checks: return self.__lambda_checks.get(attribute_name)(attribute_value) return True
class Request(ModelBase): """ Request represents a single step in processing pipeline Request contains one or a few cmsDriver commands It is created based on a subcampaign that it is a member of """ _ModelBase__schema = { # Database id (required by DB) '_id': '', # PrepID 'prepid': '', # CMSSW version 'cmssw_release': '', # Completed events 'completed_events': 0, # Energy in TeV 'energy': 0.0, # Action history 'history': [], # Input dataset name or request name 'input': { 'dataset': '', 'request': '' }, # Dictionary of runs and their lumisection ranges to be processed 'lumisections': {}, # Memory in MB 'memory': 2000, # User notes 'notes': '', # List of output 'output_datasets': [], # Priority in computing 'priority': 110000, # Processing string 'processing_string': '', # List of runs to be processed 'runs': [], # List of dictionaries that have cmsDriver options 'sequences': [], # Disk size per event in kB 'size_per_event': 1.0, # Status is either new, approved, submitted or done 'status': 'new', # Subcampaign name 'subcampaign': '', # Time per event in seconds 'time_per_event': 1.0, # Total events 'total_events': 0, # List of workflows in computing 'workflows': [] } __prepid_regex = '[a-zA-Z0-9\\-_]{1,100}' lambda_checks = { 'prepid': lambda prepid: ModelBase.matches_regex(prepid, Request.__prepid_regex), 'cmssw_release': ModelBase.lambda_check('cmssw_release'), 'completed_events': lambda events: events >= 0, 'energy': ModelBase.lambda_check('energy'), '_input': { 'dataset': lambda ds: not ds or ModelBase.lambda_check('dataset')(ds), 'request': lambda r: not r or ModelBase.matches_regex(r, Request. __prepid_regex) }, 'memory': ModelBase.lambda_check('memory'), '__output_datasets': ModelBase.lambda_check('dataset'), 'priority': ModelBase.lambda_check('priority'), 'processing_string': ModelBase.lambda_check('processing_string'), '__runs': lambda r: isinstance(r, int) and r > 0, '__sequences': lambda s: isinstance(s, Sequence), 'size_per_event': lambda spe: spe > 0.0, 'status': lambda status: status in {'new', 'approved', 'submitting', 'submitted', 'done'}, 'subcampaign': ModelBase.lambda_check('subcampaign'), 'time_per_event': lambda tpe: tpe > 0.0, 'total_events': lambda events: events >= 0, } def __init__(self, json_input=None, check_attributes=True): if json_input: json_input = deepcopy(json_input) json_input['runs'] = [int(r) for r in json_input.get('runs', [])] sequence_objects = [] for sequence_json in json_input.get('sequences', []): sequence_objects.append( Sequence(json_input=sequence_json, parent=self, check_attributes=check_attributes)) json_input['sequences'] = sequence_objects ModelBase.__init__(self, json_input, check_attributes) def check_attribute(self, attribute_name, attribute_value): if attribute_name == 'input': if not attribute_value.get('dataset') and not attribute_value.get( 'request'): raise Exception( 'Either input dataset or input request must be provided') return super().check_attribute(attribute_name, attribute_value) def get_config_file_names(self): """ Get list of dictionaries of all config file names without extensions """ file_names = [] for sequence in self.get('sequences'): file_names.append(sequence.get_config_file_names()) return file_names def get_cmsdrivers(self, overwrite_input=None): """ Get all cmsDriver commands for this request """ built_command = '' for index, sequence in enumerate(self.get('sequences')): if index == 0 and overwrite_input: built_command += sequence.get_cmsdriver(overwrite_input) else: built_command += sequence.get_cmsdriver() if sequence.needs_harvesting(): built_command += '\n\n' built_command += sequence.get_harvesting_cmsdriver() built_command += '\n\n' return built_command.strip() def get_era(self): """ Return era based on input dataset """ input_dataset_parts = [ x for x in self.get('input')['dataset'].split('/') if x ] if len(input_dataset_parts) < 2: return self.get_prepid().split('-')[1] return input_dataset_parts[1].split('-')[0] def get_input_processing_string(self): """ Return processing string from input dataset """ input_dataset_parts = [ x for x in self.get('input')['dataset'].split('/') if x ] if len(input_dataset_parts) < 3: return '' middle_parts = [x for x in input_dataset_parts[1].split('-') if x] if len(middle_parts) < 3: return '' return '-'.join(middle_parts[1:-1]) def get_dataset(self): """ Return primary dataset based on input dataset """ input_dataset_parts = [ x for x in self.get('input')['dataset'].split('/') if x ] if not input_dataset_parts: return self.get_prepid().split('-')[2] return input_dataset_parts[0] def get_request_string(self): """ Return request string made of era, dataset and processing string """ processing_string = self.get('processing_string') era = self.get_era() dataset = self.get_dataset() return f'{era}_{dataset}_{processing_string}'.strip('_')
def __init__(self, json_input=None): ModelBase.__init__(self, json_input)
def __init__(self, json_input=None): ModelBase.__init__(self, json_input) self.collection = 'flows'
class Sequence(ModelBase): """ Sequence is a dictionary that has all user editable attributes for cmsDriver command """ _ModelBase__schema = { # What conditions to use. This has to be specified 'conditions': '', # Hash of configuration file uploaded to ReqMgr2 'config_id': '', # Specify the file where the code to modify the process object is stored # If inline_custom is set to 1, then inline the customisation file 'customise': '', # What data tier to use 'datatier': [], # Specify which era to use (e.g. "run2") 'era': '', # What event content to write out 'eventcontent': [], # Freeform attributes appended at the end 'extra': '', # Hash of harvesting configuration file uploaded to ReqMgr2 'harvesting_config_id': '', # How many threads should CMSSW use 'nThreads': 1, # Scenario overriding standard settings: 'pp', 'cosmics', 'nocoll', 'HeavyIons' 'scenario': 'pp', # The desired step. The possible values are: # RAW2DIGI, L1Reco, RECO, EI, PAT, NANO, ALCA[:@...], DQM[:@...], SKIM[:@...], # HARVESTING:@... 'step': [] } lambda_checks = { 'conditions': lambda c: ModelBase.matches_regex(c, '[a-zA-Z0-9_]{0,50}'), 'config_id': lambda cid: ModelBase.matches_regex(cid, '[a-f0-9]{0,50}'), '__datatier': lambda s: s in {'AOD', 'MINIAOD', 'NANOAOD', 'DQMIO', 'USER', 'ALCARECO', 'RECO'}, 'era': lambda e: ModelBase.matches_regex(e, '[a-zA-Z0-9_\\,]{0,50}'), '__eventcontent': lambda s: s in {'AOD', 'MINIAOD', 'NANOAOD', 'DQM', 'NANOEDMAOD', 'ALCARECO', 'RECO'}, 'harvesting_config_id': lambda cid: ModelBase.matches_regex(cid, '[a-f0-9]{0,50}'), 'nThreads': lambda n: 0 < n < 64, 'scenario': lambda s: s in {'pp', 'cosmics', 'nocoll', 'HeavyIons'}, '__step': lambda s: (s in {'RAW2DIGI', 'L1Reco', 'RECO', 'EI', 'PAT', 'NANO'} or s.startswith('ALCA') or s.startswith('DQM') or s.startswith( 'SKIM') or s.startswith('HARVESTING:@')) } def __init__(self, json_input=None, parent=None, check_attributes=True): self.parent = None ModelBase.__init__(self, json_input, check_attributes) if parent: self.parent = weakref.ref(parent) self.check_attribute('eventcontent', self.get('eventcontent')) self.check_attribute('datatier', self.get('datatier')) def get_prepid(self): if not self.parent: return 'Sequence' parent = self.parent() index = self.get_index_in_parent() return f'Sequence_{parent}_{index}' def check_attribute(self, attribute_name, attribute_value): if not self.initialized or attribute_name not in ('eventcontent', 'datatier'): return super().check_attribute(attribute_name, attribute_value) has_harvesting_step = bool( [s for s in self.get('step') if s.startswith('HARVESTING:@')]) if not self.get('step') or has_harvesting_step: return super().check_attribute(attribute_name, attribute_value) # If sequence does not have HARVESTING step, eventcontent and datatier cannot be empty if not self.get('eventcontent'): raise Exception( 'No eventcontent is allowed only with HARVESTING step') if not self.get('datatier'): raise Exception('No datatier is allowed only with HARVESTING step') return super().check_attribute(attribute_name, attribute_value) def needs_harvesting(self): """ Return if this sequence produces input file for harvesting and harvesting step is needed """ for step in self.get('step'): if step == 'DQM' or step.startswith('DQM:'): return True return False def get_index_in_parent(self): """ Return sequence's index in parent's list of sequences """ for index, sequence in enumerate(self.parent().get('sequences')): if self == sequence: return index raise Exception( f'Sequence is not a child of {self.parent().get_prepid()}') def get_name(self): """ Return a sequence name which is based on parent prepid and sequence number Last sequence always has the same name as parent prepid Other sequences have suffix with their index, e.g PrepID_0 PrepID_1 PrepID If there is only one sequence, it will be the last one and have the same name as parent prepid """ index = self.get_index_in_parent() parent_prepid = self.parent().get_prepid() if index != len(self.parent().get('sequences')) - 1: sequence_name = f'{parent_prepid}_{index}' else: sequence_name = f'{parent_prepid}' return sequence_name def get_config_file_names(self): """ Return dictionary of 'config' and 'harvest' config file names """ parent_prepid = self.parent().get_prepid() index = self.get_index_in_parent() config_file_names = {'config': f'{parent_prepid}_{index}_cfg'} if self.needs_harvesting(): config_file_names[ 'harvest'] = f'{parent_prepid}_{index}_harvest_cfg' return config_file_names def __build_cmsdriver(self, cmsdriver_type, arguments): """ Build a cmsDriver command from given arguments Add comment in front of the command """ self.logger.info('Generating %s cmsDriver', cmsdriver_type) # Actual command command = f'# Command for {cmsdriver_type}:\ncmsDriver.py {cmsdriver_type}' # Comment in front of the command for better readability comment = f'# Arguments for {cmsdriver_type}:\n' for key in sorted(arguments.keys()): if not arguments[key]: continue if key in 'extra': continue if isinstance(arguments[key], bool): arguments[key] = '' if isinstance(arguments[key], list): arguments[key] = ','.join([str(x) for x in arguments[key]]) command += f' --{key} {arguments[key]}'.rstrip() comment += f'# --{key} {arguments[key]}'.rstrip() + '\n' if arguments.get('extra'): extra_value = arguments['extra'] command += f' {extra_value}' comment += f'# <extra> {extra_value}\n' # Exit the script with error of cmsDriver.py command += ' || exit $?' return comment + '\n' + command def get_cmsdriver(self, overwrite_input=None): """ Return a cmsDriver command for this sequence Config file is named like this PrepID_0_cfg.py """ sequence_name = self.get_name() arguments_dict = dict(self.get_json()) # Delete sequence metadata arguments_dict.pop('config_id', None) arguments_dict.pop('harvesting_config_id', None) # Fetch list of files for specific runs das_query = '' # Handle input/output file names if overwrite_input: arguments_dict['filein'] = overwrite_input else: index = self.get_index_in_parent() arguments_dict['number'] = 10 if index == 0: input_dataset = self.parent().get('input')['dataset'] all_runs = self.parent().get('runs') if not input_dataset: input_request = self.parent().get('input')['request'] arguments_dict['filein'] = f'"file:{input_request}.root"' elif all_runs: das_file = f'{sequence_name}_files.txt' das_query += '# Query DAS to get list of files for specified runs\n' # Chunkify to 25 runs, otherwise script line gets very long for runs in self.chunkify(all_runs, 25): runs = ','.join([str(r) for r in runs]) das_query += 'dasgoclient --limit 0 ' das_query += f'--query "file dataset={input_dataset} run in [{runs}]" ' das_query += f'>> {das_file}\n' das_query += '\n' arguments_dict['filein'] = f'"filelist:{das_file}"' else: arguments_dict['filein'] = f'"dbs:{input_dataset}"' else: previous_sequence = self.parent().get('sequences')[index - 1] input_file = f'{previous_sequence.get_name()}.root' arguments_dict['filein'] = f'"file:{input_file}"' # Update ALCA and SKIM steps to ALCA:@Dataset and SKIM:@Dataset # if dataset name is in "auto" dictionary in CMSSW dynamic_steps = self.update_dynamic_steps(arguments_dict['step']) # Build argument dictionary config_names = self.get_config_file_names() arguments_dict['fileout'] = f'"file:{sequence_name}.root"' arguments_dict['python_filename'] = f'"{config_names["config"]}.py"' arguments_dict['no_exec'] = True cms_driver_command = self.__build_cmsdriver('RECO', arguments_dict) return dynamic_steps + das_query + cms_driver_command def update_dynamic_steps(self, steps): """ Update ALCA and SKIP steps to be variables and return code that resolves them """ dynamic_steps = '' for step_index, step in enumerate(steps): if step not in ('ALCA', 'SKIM'): continue dataset = self.parent().get_dataset() steps[step_index] = f'${step}_STEP' # Build a small python program to get value from CMSSW on the go step_var = f'{step}_STEP=$(python -c "' if step == 'ALCA': step_var += 'from Configuration.AlCa.autoAlca import AlCaRecoMatrix as ds;' elif step == 'SKIM': step_var += 'from Configuration.Skimming.autoSkim import autoSkim as ds;' step_var += f'print(\'{step}:@{dataset}\' if \'{dataset}\' in ds.keys() else \'\')")' dynamic_steps += f'{step_var}\n' if dynamic_steps: dynamic_steps = f'# Steps based on dataset name\n{dynamic_steps}\n' return dynamic_steps def get_harvesting_cmsdriver(self): """ Return a harvesting cmsDriver for this sequence Config file is named like this PrepID_0_harvest_cfg.py """ if not self.needs_harvesting(): return '' arguments_dict = dict(self.get_json()) # Delete sequence metadata for attr in ('config_id', 'harvesting_config_id', 'customise', 'datatier', 'eventcontent', 'nThreads', 'extra', 'scenario'): arguments_dict.pop(attr, None) # Get correct configuration of DQM step, e.g. # DQM:@rerecoCommon should be changed to HARVESTING:@rerecoCommon step = 'HARVESTING:dqmHarvesting' for one_step in self.get('step'): if one_step.startswith('DQM:'): step = one_step.replace('DQM:', 'HARVESTING:', 1) break # Build argument dictionary sequence_name = self.get_name() config_names = self.get_config_file_names() arguments_dict['data'] = True arguments_dict['no_exec'] = True arguments_dict['filetype'] = 'DQM' arguments_dict['step'] = step arguments_dict['era'] = arguments_dict['era'].split(',')[0] arguments_dict['filein'] = f'"file:{sequence_name}_inDQM.root"' arguments_dict['python_filename'] = f'"{config_names["harvest"]}.py"' arguments_dict['number'] = -1 harvesting_command = self.__build_cmsdriver('HARVESTING', arguments_dict) return harvesting_command @staticmethod def chunkify(items, chunk_size): """ Yield fixed size chunks of given list """ start = 0 chunk_size = max(chunk_size, 1) while start < len(items): yield items[start:start + chunk_size] start += chunk_size def get_output_module(self): """ Return a output module name """ eventcontent = [ e for e in self.get('eventcontent') if not e.startswith('DQM') ] if not eventcontent: return '' return f'{eventcontent[0]}output'
class Request(ModelBase): _ModelBase__schema = { # Database id '_id': '', # PrepID 'prepid': '', # Energy in TeV 'energy': 0.0, # Step type: MiniAOD, NanoAOD, etc. 'step': '', # CMSSW version 'cmssw_release': '', # User notes 'notes': '', # Input dataset name 'input_dataset_name': '', # Output dataset name 'output_dataset_name': '', # List of dictionaries that have cmsDriver options 'sequences': [], # Action history 'history': [], # Status 'status': 'new', #it should be either approved, submitted, done (nothing else) # Workflow name in computing when submitted 'reqmgr_name': '', # time event 'time_event': 5.0, # size event 'size_event': 2000, # priority 'priority': 110000, #runs for data injections 'runs': [], #type, depends on the datatier of the requests 'type':'', # process string 'process_string': '', # Default memory 'memory': 14000, #all the following will be filled in after injection (they will be created at the time of the injection) #request id (to be filled in the dictionary of injection) 'request_id': '', #reco cfg (to be filled in the dictionary of injection) 'reco_cfg': '', #harvest cfg (to be filled in the dictionary of injection) 'harvest_cfg': '' #dataset name (this is just the first split of the input dataset name, can be done automatically) 'dataset_name': '' } __lambda_checks = { 'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}'), 'energy': lambda energy: energy >= 0.0, 'time_event': lambda time_event: time_event >= 0.0, 'size_event': lambda size_event: size_event >= 0.0, 'priority': lambda priority: priority >= 0.0, 'DQM': lambda DQM: isinstance(DQM,bool), 'Reco': lambda Reco: isinstance(Reco,bool), 'step': lambda step: step in ['RAW2DIGI','L1Reco','RECO','EI','PAT','DQM','NANO'] or ['ALCARECO'] in step or ['DQM'] in step, 'datatier': lambda datatier: datatier in ['AOD', 'MiniAOD', 'NanoAOD', 'DQMIO', 'USER', 'ALCARECO'], 'memory': lambda memory: memory >= 0, 'type' : lambda type_req: type_req in ['GEN-SIM','FEVTDEBUG','GEN-SIM-RAW','GEN-SIM-DIGI-RAW','GEN-SIM-DIGI-RAW-HLT','NANOAOD','AODSIM','MINIAOD','FEVTDEBUGHLT'] 'cmssw_release': lambda cmssw_release: 'CMSSW' in cmssw_release, 'reco_cfg': lambda reco_cfg: ModelBase.matches_regex(reco_cfg, '[a-zA-Z0-9]{1,50}'), 'harvest_cfg': lambda harvest_cfg: ModelBase.matches_regex(harvest_cfg, '[a-zA-Z0-9]{1,50}'), 'process_string': lambda process_string: ModelBase.matches_regex(process_string, '[a-zA-Z0-9]{1,50}'), 'request_id': lambda request_id: ModelBase.matches_regex(request_id, '[a-zA-Z0-9]{1,50}'), 'dataset_name': lambda dataset_name: ModelBase.matches_regex(dataset_name, '[a-zA-Z0-9]{1,50}') } def __init__(self, json_input=None): ModelBase.__init__(self, json_input) def check_attribute(self, attribute_name, attribute_value): if attribute_name in self.__lambda_checks: return self.__lambda_checks.get(attribute_name)(attribute_value) return True
class Ticket(ModelBase): """ Ticket allows to create multiple similar RelVals in the same campaign """ _ModelBase__schema = { # Database id (required by database) '_id': '', # PrepID 'prepid': '', # Batch name 'batch_name': '', # CMSSW release 'cmssw_release': '', # Additional command to add to all cmsDrivers 'command': '', # List of steps that additional command should be applied to 'command_steps': [], # CPU cores 'cpu_cores': 1, # List of prepids of relvals that were created from this ticket 'created_relvals': [], # GPU parameters that will be added to selected steps 'gpu': { 'requires': 'forbidden', 'gpu_memory': '', 'cuda_capabilities': [], 'cuda_runtime': '', 'gpu_name': '', 'cuda_driver_version': '', 'cuda_runtime_version': '' }, # List of steps that GPU parameters should be applied to 'gpu_steps': [], # Action history 'history': [], # Label to be used in runTheMatrix 'label': '', # Type of relval: standard, upgrade, premix, etc. 'matrix': 'standard', # Memory in MB 'memory': 2000, # User notes 'notes': '', # nStreams to be used in all steps, 0 defaults to nThreads 'n_streams': 0, # Whether to recycle first step 'recycle_gs': False, # Which step should be first that run while recycling the input 'recycle_input_of': '', # String to rewrite middle part of INPUT dataset(s) /.../THIS/... 'rewrite_gt_string': '', # Tag to group workflow ids 'sample_tag': '', # Overwrite default scram arch 'scram_arch': '', # Status is either new or done 'status': 'new', # Workflow ids 'workflow_ids': [], } lambda_checks = { 'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9_\\-]{1,75}' ), 'batch_name': ModelBase.lambda_check('batch_name'), 'cmssw_release': ModelBase.lambda_check('cmssw_release'), 'cpu_cores': ModelBase.lambda_check('cpu_cores'), '__created_relvals': ModelBase.lambda_check('relval'), '_gpu': { 'requires': lambda r: r in ('forbidden', 'optional', 'required'), 'cuda_capabilities': lambda l: isinstance(l, list), 'gpu_memory': lambda m: m == '' or int(m) > 0, }, 'label': ModelBase.lambda_check('label'), 'matrix': ModelBase.lambda_check('matrix'), 'memory': ModelBase.lambda_check('memory'), 'n_streams': lambda streams: 0 <= streams <= 16, 'rewrite_gt_string': lambda rgs: ModelBase.matches_regex(rgs, '[a-zA-Z0-9\\.\\-_]{0,199}'), 'sample_tag': ModelBase.lambda_check('sample_tag'), 'status': lambda status: status in ('new', 'done'), 'scram_arch': lambda s: not s or ModelBase.lambda_check('scram_arch')(s), 'workflow_ids': lambda wf: len(wf) > 0, '__workflow_ids': lambda wf: wf > 0, } def __init__(self, json_input=None, check_attributes=True): if json_input: json_input = deepcopy(json_input) json_input['workflow_ids'] = [ float(wid) for wid in json_input['workflow_ids'] ] json_input['recycle_gs'] = bool(json_input.get( 'recycle_gs', False)) if json_input.get('gpu', {}).get('requires') not in ('optional', 'required'): json_input['gpu'] = self.schema().get('gpu') json_input['gpu']['requires'] = 'forbidden' json_input['gpu_steps'] = [] ModelBase.__init__(self, json_input, check_attributes)
class Ticket(ModelBase): """ Ticket has a list of input datasets and a list of steps specifications Ticket is used to create requests for each input dataset """ _ModelBase__schema = { # Database id (required by DB) '_id': '', # PrepID 'prepid': '', # List of prepids of requests that were created from this ticket 'created_requests': [], # Action history 'history': [], # List of input dataset names 'input_datasets': [], # User notes 'notes': '', # Status is either new or done 'status': 'new', # List of dicts that have subcampaign, processing_string, size/time per event values 'steps': [], } lambda_checks = { 'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9_\\-]{1,75}' ), '__created_requests': lambda pi: ModelBase.matches_regex(pi, '[a-zA-Z0-9\\-_]{1,100}'), '__input_datasets': ModelBase.lambda_check('dataset'), 'status': lambda status: status in {'new', 'done'}, 'steps': lambda s: len(s) > 0, } def __init__(self, json_input=None, check_attributes=True): if json_input: json_input = deepcopy(json_input) steps = [] for step in json_input.get('steps', []): steps.append({ 'subcampaign': step.get('subcampaign', ''), 'processing_string': step.get('processing_string', ''), 'time_per_event': float(step.get('time_per_event', 0)), 'size_per_event': float(step.get('size_per_event', 0)), 'priority': int(step.get('priority', 0)) }) json_input['steps'] = steps ModelBase.__init__(self, json_input, check_attributes) def check_attribute(self, attribute_name, attribute_value): if attribute_name == 'steps': if not isinstance(attribute_value, list): raise Exception(f'Expected {attribute_name} to be a list') for step in attribute_value: subcampaign = step['subcampaign'] if not ModelBase.lambda_check('subcampaign')(subcampaign): raise Exception(f'Bad subcampaign prepid {subcampaign}') processing_string = step['processing_string'] if not ModelBase.lambda_check('processing_string')( processing_string): raise Exception( f'Bad processing string {processing_string}') time_per_event = step['time_per_event'] if time_per_event <= 0.0: raise Exception(f'Bad time per event {time_per_event}') size_per_event = step['size_per_event'] if size_per_event <= 0.0: raise Exception(f'Bad size per event {size_per_event}') priority = step['priority'] if not ModelBase.lambda_check('priority')(priority): raise Exception(f'Bad priority {priority}') return super().check_attribute(attribute_name, attribute_value)
class RelValStep(ModelBase): """ RelVal is one step of RelVal - either a call to DAS for list of input files or a cmsDriver command """ _ModelBase__schema = { # Step name 'name': '', # CMSSW version of this step 'cmssw_release': '', # Hash of configuration file uploaded to ReqMgr2 'config_id': '', # cmsDriver arguments 'driver': { 'beamspot': '', 'conditions': '', 'customise': '', 'customise_commands': '', 'data': False, 'datatier': [], 'era': '', 'eventcontent': [], 'extra': '', 'fast': False, 'filetype': '', 'geometry': '', 'hltProcess': '', 'mc': False, 'number': '10', 'nStreams': '', 'pileup': '', 'pileup_input': '', 'process': '', 'relval': '', 'runUnscheduled': False, 'fragment_name': '', 'scenario': '', 'step': [], }, # Events per lumi - if empty, events per job will be used 'events_per_lumi': '', # GPU parameters 'gpu': { 'requires': 'forbidden', 'gpu_memory': '', 'cuda_capabilities': [], 'cuda_runtime': '', 'gpu_name': '', 'cuda_driver_version': '', 'cuda_runtime_version': '' }, # Input file info 'input': { 'dataset': '', 'lumisection': {}, 'run': [], 'label': '', }, # Keeping output of this task 'keep_output': True, # Lumis per job - applicable to non-first steps 'lumis_per_job': '', # Actual globaltag, resolved from auto:... conditions 'resolved_globaltag': '', # Overwrite default CMSSW scram arch 'scram_arch': '', } lambda_checks = { 'cmssw_release': lambda cmssw: not cmssw or ModelBase.lambda_check('cmssw_release') (cmssw), 'config_id': lambda cid: ModelBase.matches_regex(cid, '[a-f0-9]{0,50}'), '_driver': { 'conditions': lambda c: not c or ModelBase.matches_regex(c, '[a-zA-Z0-9_]{0,50}' ), 'era': lambda e: not e or ModelBase.matches_regex( e, '[a-zA-Z0-9_\\,]{0,50}'), 'scenario': lambda s: not s or s in {'pp', 'cosmics', 'nocoll', 'HeavyIons'}, }, '_gpu': { 'requires': lambda r: r in ('forbidden', 'optional', 'required'), 'cuda_capabilities': lambda l: isinstance(l, list), 'gpu_memory': lambda m: m == '' or int(m) > 0, }, '_input': { 'dataset': lambda ds: not ds or ModelBase.lambda_check('dataset') (ds), 'label': lambda l: not l or ModelBase.lambda_check('label')(l) }, 'lumis_per_job': lambda l: l == '' or int(l) > 0, 'name': lambda n: ModelBase.matches_regex(n, '[a-zA-Z0-9_\\-]{1,150}'), 'scram_arch': lambda s: not s or ModelBase.lambda_check('scram_arch')(s), } def __init__(self, json_input=None, parent=None, check_attributes=True): if json_input: json_input = deepcopy(json_input) # Remove -- from argument names schema = self.schema() if json_input.get('input', {}).get('dataset'): json_input['driver'] = schema.get('driver') json_input['gpu'] = schema.get('gpu') json_input['gpu']['requires'] = 'forbidden' step_input = json_input['input'] for key, default_value in schema['input'].items(): if key not in step_input: step_input[key] = default_value else: json_input['driver'] = { k.lstrip('-'): v for k, v in json_input['driver'].items() } json_input['input'] = schema.get('input') if json_input.get('gpu', {}).get('requires') not in ('optional', 'required'): json_input['gpu'] = schema.get('gpu') json_input['gpu']['requires'] = 'forbidden' driver = json_input['driver'] for key, default_value in schema['driver'].items(): if key not in driver: driver[key] = default_value if driver.get('data') and driver.get('mc'): raise Exception( 'Both --data and --mc are not allowed in the same step' ) if driver.get('data') and driver.get('fast'): raise Exception( 'Both --data and --fast are not allowed in the same step' ) ModelBase.__init__(self, json_input, check_attributes) if parent: self.parent = weakref.ref(parent) else: self.parent = None def get_prepid(self): return 'RelValStep' def get_short_name(self): """ Return a shortened step name GenSimFull for anything that has GenSim in it HadronizerFull for anything that has Hadronizer in it Split and cut by underscores for other cases """ name = self.get('name') if 'gensim' in name.lower(): return 'GenSimFull' if 'hadronizer' in name.lower(): return 'HadronizerFull' while len(name) > 50: name = '_'.join(name.split('_')[:-1]) if '_' not in name: break return name def get_index_in_parent(self): """ Return step's index in parent's list of steps """ for index, step in enumerate(self.parent().get('steps')): if self == step: return index raise Exception(f'Step is not a child of {self.parent().get_prepid()}') def get_step_type(self): """ Return whether this is cmsDriver or input file step """ if self.get('input').get('dataset'): return 'input_file' return 'cms_driver' @staticmethod def chunkify(items, chunk_size): """ Yield fixed size chunks of given list """ start = 0 chunk_size = max(chunk_size, 1) while start < len(items): yield items[start:start + chunk_size] start += chunk_size def __build_cmsdriver(self, step_index, arguments, for_submission): """ Build a cmsDriver command from given arguments Add comment in front of the command """ fragment_name = arguments['fragment_name'] if not fragment_name: fragment_name = f'step{step_index + 1}' self.logger.info('Generating %s cmsDriver for step %s', fragment_name, step_index) # Actual command command = '' if not for_submission: command += f'# Command for step {step_index + 1}:\n' command += f'cmsDriver.py {fragment_name}' # Comment in front of the command for better readability comment = f'# Arguments for step {step_index + 1}:\n' for key in sorted(arguments.keys()): if key in ('fragment_name', 'extra'): continue if not arguments[key]: continue if isinstance(arguments[key], bool): arguments[key] = '' if isinstance(arguments[key], list): arguments[key] = ','.join([str(x) for x in arguments[key]]) command += f' --{key} {arguments[key]}'.rstrip() comment += f'# --{key} {arguments[key]}'.rstrip() + '\n' extra_value = arguments.get('extra') if extra_value: command += f' {extra_value}' comment += f'# <extra> {extra_value}\n' # Exit the script with error of cmsDriver.py command += ' || exit $?' if for_submission: return command return comment + '\n' + command def __build_das_command(self, step_index): """ Build a dasgoclient command to fetch input dataset file names """ input_dict = self.get('input') dataset = input_dict['dataset'] lumisections = input_dict['lumisection'] if lumisections: self.logger.info( 'Making a DAS command for step %s with lumisection list', step_index) files_name = f'step{step_index + 1}_files.txt' lumis_name = f'step{step_index + 1}_lumi_ranges.txt' comment = f'# Arguments for step {step_index + 1}:\n' command = f'# Command for step {step_index + 1}:\n' comment += f'# dataset: {dataset}\n' command += f'echo "" > {files_name}\n' for run, lumi_ranges in lumisections.items(): for lumi_range in lumi_ranges: comment += f'# run: {run}, range: {lumi_range[0]} - {lumi_range[1]}\n' command += 'dasgoclient --limit 0 --format json ' command += f'--query "lumi,file dataset={dataset} run={run}"' command += f' | das-selected-lumis.py {lumi_range[0]},{lumi_range[1]}' command += f' | sort -u >> {files_name}\n' lumi_json = json.dumps(lumisections) command += f'echo \'{lumi_json}\' > {lumis_name}' return (comment + '\n' + command).strip() runs = input_dict['run'] if runs: self.logger.info('Making a DAS command for step %s with run list', step_index) files_name = f'step{step_index + 1}_files.txt' comment = f'# Arguments for step {step_index + 1}:\n' command = f'# Command for step {step_index + 1}:\n' comment += f'# dataset: {dataset}\n' command += f'echo "" > {files_name}\n' for run_chunk in self.chunkify(runs, 25): run_chunk = ','.join([str(r) for r in run_chunk]) comment += f'# runs: {run_chunk}\n' command += 'dasgoclient --limit 0 ' command += f'--query "file dataset={dataset} run in [{run_chunk}]" ' command += f'>> {files_name}\n' return (comment + '\n' + command).strip() return f'# Step {step_index + 1} is input dataset for next step: {dataset}' def get_command(self, custom_fragment=None, for_submission=False): """ Return a cmsDriver command for this step Config file is named like this """ step_type = self.get_step_type() index = self.get_index_in_parent() if step_type == 'input_file': if for_submission: return '# Nothing to do for input file step' return self.__build_das_command(index) arguments_dict = deepcopy(self.get('driver')) if custom_fragment: arguments_dict['fragment_name'] = custom_fragment # No execution arguments_dict['no_exec'] = True # Handle input/output file names arguments_dict['fileout'] = f'"file:step{index + 1}.root"' arguments_dict['python_filename'] = f'{self.get_config_file_name()}.py' # Add events per lumi to customise_commands events_per_lumi = self.get('events_per_lumi') if events_per_lumi: customise_commands = arguments_dict['customise_commands'] customise_commands += ';"process.source.numberEventsInLuminosityBlock=' customise_commands += f'cms.untracked.uint32({events_per_lumi})"' arguments_dict['customise_commands'] = customise_commands.lstrip( ';') # Add number of cpu cores of the RelVal if it is >1 and this is not a harvesting step cpu_cores = self.parent().get('cpu_cores') if cpu_cores > 1 and not self.has_step( 'HARVESTING') and not self.has_step('ALCAHARVEST'): arguments_dict['nThreads'] = cpu_cores all_steps = self.parent().get('steps') if index > 0: previous = all_steps[index - 1] previous_type = previous.get_step_type() if previous_type == 'input_file': # If previous step is an input file, use it as input if for_submission: arguments_dict['filein'] = '"file:_placeholder_.root"' else: previous_input = previous.get('input') previous_lumisection = previous_input['lumisection'] previous_run = previous_input['run'] if previous_lumisection: # If there are lumi ranges, add a file with them and list of files as input arguments_dict[ 'filein'] = f'"filelist:step{index}_files.txt"' arguments_dict[ 'lumiToProcess'] = f'"step{index}_lumi_ranges.txt"' elif previous_run: # If there is a run whitelist, add the file arguments_dict[ 'filein'] = f'"filelist:step{index}_files.txt"' else: # If there are no lumi ranges, use input file normally previous_dataset = previous_input['dataset'] arguments_dict['filein'] = f'"dbs:{previous_dataset}"' else: # If previous step is a cmsDriver, use it's output root file input_number = self.get_input_step_index() + 1 eventcontent_index, eventcontent = self.get_input_eventcontent( ) if eventcontent_index == 0: arguments_dict[ 'filein'] = f'"file:step{input_number}.root"' else: arguments_dict[ 'filein'] = f'"file:step{input_number}_in{eventcontent}.root"' cms_driver_command = self.__build_cmsdriver(index, arguments_dict, for_submission) return cms_driver_command def has_step(self, step): """ Return if this RelValStep has certain step in --step argument """ for one_step in self.get('driver')['step']: if one_step.startswith(step): return True return False def has_eventcontent(self, eventcontent): """ Return if this RelValStep has certain eventcontent in --eventcontent argument """ return eventcontent in self.get('driver')['eventcontent'] def get_input_step_index(self): """ Get index of step that will be used as input step for current step """ all_steps = self.parent().get('steps') index = self.get_index_in_parent() this_is_harvesting = self.has_step('HARVESTING') self_step = self.get('driver')['step'] this_is_alca = self_step and self_step[0].startswith('ALCA') self.logger.info('Get input for step %s, harvesting: %s', index, this_is_harvesting) for step_index in reversed(range(0, index)): step = all_steps[step_index] # Harvesting step is never input if step.has_step('HARVESTING'): continue # AlCa step is never input step_step = step.get('driver')['step'] if step_step and step_step[0].startswith('ALCA'): continue # Harvesting step needs DQM as input if this_is_harvesting and not step.has_eventcontent('DQM'): continue # AlCa step needs RECO as input if this_is_alca and not step.has_step('RECO'): continue return step_index name = self.get('name') if this_is_harvesting: raise Exception('No step with --eventcontent DQM could be found' f'as input for {name} (Harvesting step)') if this_is_alca: raise Exception('No step with --step RECO could be found ' f'as input for {name} (AlCa)') raise Exception(f'No input step for {name} could be found') def get_input_eventcontent(self, input_step=None): """ Return which eventcontent should be used as input for current RelVal step """ if input_step is None: all_steps = self.parent().get('steps') input_step_index = self.get_input_step_index() input_step = all_steps[input_step_index] this_is_harvesting = self.has_step('HARVESTING') self_step = self.get('driver')['step'] this_is_alca = self_step and self_step[0].startswith('ALCA') input_step_eventcontent = input_step.get('driver')['eventcontent'] if this_is_harvesting: for eventcontent_index, eventcontent in enumerate( input_step_eventcontent): if eventcontent == 'DQM': return eventcontent_index, eventcontent raise Exception( f'No DQM eventcontent in the input step {input_step_eventcontent}' ) if this_is_alca: for eventcontent_index, eventcontent in enumerate( input_step_eventcontent): if eventcontent.startswith('RECO'): return eventcontent_index, eventcontent raise Exception( f'No RECO eventcontent in the input step {input_step_eventcontent}' ) input_step_eventcontent = [ x for x in input_step_eventcontent if not x.startswith('DQM') ] return len(input_step_eventcontent) - 1, input_step_eventcontent[-1] def get_config_file_name(self): """ Return config file name without extension """ if self.get_step_type() == 'input_file': return None index = self.get_index_in_parent() return f'step_{index + 1}_cfg' def get_relval_events(self): """ Split --relval argument to total events and events per job/lumi """ relval = self.get('driver')['relval'] if not relval: raise Exception('--relval is not set') relval = relval.split(',') if len(relval) < 2: raise Exception('Not enough parameters in --relval argument') requested_events = int(relval[0]) events_per = int(relval[1]) return requested_events, events_per def get_release(self): """ Return CMSSW release of the step If CMSSW release is not specified, return release of the parent RelVal """ cmssw_release = self.get('cmssw_release') if cmssw_release: return cmssw_release if not self.parent: raise Exception( 'Could not get CMSSW release, because step has no parent') cmssw_release = self.parent().get('cmssw_release') return cmssw_release def get_scram_arch(self): """ Return the scram arch of the step If scram arch is not specified, return scram arch of the release """ scram_arch = self.get('scram_arch') if scram_arch: return scram_arch if self.parent: scram_arch = self.parent().get('scram_arch') if scram_arch: return scram_arch cmssw_release = self.get_release() scram_arch = get_scram_arch(cmssw_release) if scram_arch: return scram_arch raise Exception(f'Could not find SCRAM arch of {cmssw_release}') def get_gpu_requires(self): """ Return whether GPU is required, optional of forbidden """ return self.get('gpu')['requires'] def get_gpu_dict(self): """ Return a dictionary with GPU parameters for ReqMgr2 """ gpu_info = self.get('gpu') keys = { 'cuda_capabilities': 'CUDACapabilities', 'cuda_runtime': 'CUDARuntime', 'gpu_name': 'GPUName', 'cuda_driver_version': 'CUDADriverVersion', 'cuda_runtime_version': 'CUDARuntimeVersion' } params = { key: gpu_info[attr] for attr, key in keys.items() if gpu_info.get(attr) } if gpu_info.get('gpu_memory'): params['GPUMemoryMB'] = int(gpu_info['gpu_memory']) return params