Exemplo n.º 1
    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name == 'steps':
            if not isinstance(attribute_value, list):
                raise Exception(f'Expected {attribute_name} to be a list')

            for step in attribute_value:
                subcampaign = step['subcampaign']
                if not ModelBase.lambda_check('subcampaign')(subcampaign):
                    raise Exception(f'Bad subcampaign prepid {subcampaign}')

                processing_string = step['processing_string']
                if not ModelBase.lambda_check('processing_string')(
                    raise Exception(
                        f'Bad processing string {processing_string}')

                time_per_event = step['time_per_event']
                if time_per_event <= 0.0:
                    raise Exception(f'Bad time per event {time_per_event}')

                size_per_event = step['size_per_event']
                if size_per_event <= 0.0:
                    raise Exception(f'Bad size per event {size_per_event}')

                priority = step['priority']
                if not ModelBase.lambda_check('priority')(priority):
                    raise Exception(f'Bad priority {priority}')

        return super().check_attribute(attribute_name, attribute_value)
Exemplo n.º 2
    def __init__(self, json_input=None, parent=None, check_attributes=True):
        self.parent = None
        ModelBase.__init__(self, json_input, check_attributes)
        if parent:
            self.parent = weakref.ref(parent)

        self.check_attribute('eventcontent', self.get('eventcontent'))
        self.check_attribute('datatier', self.get('datatier'))
Exemplo n.º 3
class CampaignTicket(ModelBase):
    Campaign ticket has a list of input datasets, a campaign and a processing string
    Campaign ticket can be used to create requests for each input dataset

    _ModelBase__schema = {
        # Database id (required by CouchDB)
        '_id': '',
        # Document revision (required by CouchDB)
        '_rev': '',
        # PrepID
        'prepid': '',
        # Name of campaign that is used as template for requests
        'conditions_globaltag': '',
        # Processing string for this ticket (label at the time of the submission)
        'processing_string': '',
        # List of prepids of requests that were created from this ticket
        'created_requests': [],
        # Status is either new or done
        'status': 'new',
        # User notes
        'notes': '',
        #sample tag (to be chosen from a list)
        #pile_up production (to be chosen from a list)
        #high_statistics production (True or False)
        'high_statistics': False,
        #string for GS input
        #GEN-SIM samples to be re-used?
        'ReUseGenSim': False,
        #extension number (just a number, if a similar sample was already submitted, observed especially in Phase II)
        # Action history
        'history': []

    _lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9_\\-]{1,50}'),
        'conditions_globaltag': lambda gt: ModelBase.matches_regex(gt, '[a-zA-Z0-9_\\-]{1,50}'),
        'cmssw_release': lambda cmssw_release: ModelBase.matches_regex(cmssw_release, '[a-zA-Z0-9_\\-]{1,50}'),
        'processing_string': lambda ps: ModelBase.matches_regex(ps, '[a-zA-Z0-9_]{0,100}'),
        'status': lambda status: status in ('new', 'done'),
        'sample_tag': lambda sample_tag: sample_tag in ('Run2_2016', 'Run2_2017', 'Run2_2018', 'fastSim_2016', 'fastSim_2017','fastSim_2018' 'Run3', 'PhaseII','customized'),
        'pile_up': lambda pile_up: pile_up in ('classical_mixing', 'premix','no_pile_up'),
        'high_statistics': lambda high_statistics: isinstance(high_statistics,bool),
        'ReUseGenSim': lambda ReUseGenSim: isinstance(ReUseGenSim,bool),
        'extension_number': lambda extension_number: isinstance(extension_number,int),
        'string_for_inputGS': lambda string_for_inputGS: ModelBase.matches_regex(string_for_inputGS, '[a-zA-Z0-9_\\-]{1,50}')

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)
Exemplo n.º 4
class Subcampaign(ModelBase):
    Class that represents a snapshot of computing campaign
    It is used as a template for requests

    _ModelBase__schema = {
        # Database id (required by DB)
        '_id': '',
        # PrepID
        'prepid': '',
        # CMSSW version
        'cmssw_release': '',
        # Energy in TeV
        'energy': 0.0,
        # Action history
        'history': [],
        # Default memory
        'memory': 2000,
        # User notes
        'notes': '',
        # Path to json that contains all runs
        'runs_json_path': '',
        # List of Sequences
        'sequences': [],

    __runs_json_regex = '[a-zA-Z0-9/\\-_]{0,150}(\\.json|\\.txt)?'
    lambda_checks = {
        lambda rjp: ModelBase.matches_regex(rjp, Subcampaign.__runs_json_regex
        lambda s: len(s) > 0,
        lambda s: isinstance(s, Sequence),

    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input['runs_json_path'] = json_input.get(
                'runs_json_path', '').strip().lstrip('/')
            sequence_objects = []
            for sequence_json in json_input.get('sequences', []):

            json_input['sequences'] = sequence_objects

        ModelBase.__init__(self, json_input, check_attributes)
Exemplo n.º 5
    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input['runs_json_path'] = json_input.get(
                'runs_json_path', '').strip().lstrip('/')
            sequence_objects = []
            for sequence_json in json_input.get('sequences', []):

            json_input['sequences'] = sequence_objects

        ModelBase.__init__(self, json_input, check_attributes)
Exemplo n.º 6
    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            json_input['runs'] = [int(r) for r in json_input.get('runs', [])]
            sequence_objects = []
            for sequence_json in json_input.get('sequences', []):

            json_input['sequences'] = sequence_objects

        ModelBase.__init__(self, json_input, check_attributes)
Exemplo n.º 7
    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            json_input['workflow_ids'] = [
                float(wid) for wid in json_input['workflow_ids']
            json_input['recycle_gs'] = bool(json_input.get(
                'recycle_gs', False))
            if json_input.get('gpu', {}).get('requires') not in ('optional',
                json_input['gpu'] = self.schema().get('gpu')
                json_input['gpu']['requires'] = 'forbidden'
                json_input['gpu_steps'] = []

        ModelBase.__init__(self, json_input, check_attributes)
Exemplo n.º 8
    def __init__(self, json_input=None, parent=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            # Remove -- from argument names
            schema = self.schema()
            if json_input.get('input', {}).get('dataset'):
                json_input['driver'] = schema.get('driver')
                json_input['gpu'] = schema.get('gpu')
                json_input['gpu']['requires'] = 'forbidden'
                step_input = json_input['input']
                for key, default_value in schema['input'].items():
                    if key not in step_input:
                        step_input[key] = default_value
                json_input['driver'] = {
                    k.lstrip('-'): v
                    for k, v in json_input['driver'].items()
                json_input['input'] = schema.get('input')
                if json_input.get('gpu',
                                  {}).get('requires') not in ('optional',
                    json_input['gpu'] = schema.get('gpu')
                    json_input['gpu']['requires'] = 'forbidden'

                driver = json_input['driver']
                for key, default_value in schema['driver'].items():
                    if key not in driver:
                        driver[key] = default_value

                if driver.get('data') and driver.get('mc'):
                    raise Exception(
                        'Both --data and --mc are not allowed in the same step'

                if driver.get('data') and driver.get('fast'):
                    raise Exception(
                        'Both --data and --fast are not allowed in the same step'

        ModelBase.__init__(self, json_input, check_attributes)
        if parent:
            self.parent = weakref.ref(parent)
            self.parent = None
Exemplo n.º 9
    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            steps = []
            for step in json_input.get('steps', []):
                    step.get('subcampaign', ''),
                    step.get('processing_string', ''),
                    float(step.get('time_per_event', 0)),
                    float(step.get('size_per_event', 0)),
                    int(step.get('priority', 0))

            json_input['steps'] = steps

        ModelBase.__init__(self, json_input, check_attributes)
Exemplo n.º 10
class Campaign(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # No need for CMSSW version
        'cmssw_release': '',
        #Sample tag
        'sample_tag': '',
        # User notes
        'notes': '',
        # User notes
        'link_prodmon': '',
        'history': []

    __lambda_checks = {
        lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}'),
        lambda link_prodmon: ModelBase.matches_regex(link_prodmon,
        lambda sample_tag: sample_tag in ['Phase2', 'Run3', 'Run2_2016'],
        lambda cmssw_release: 'CMSSW' in cmssw_release

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name in self.__lambda_checks:
            return self.__lambda_checks.get(attribute_name)(attribute_value)

        return True
Exemplo n.º 11
class Flow(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # List of allowed source campaigns prepids
        'source_campaigns': [],
        # List of statuses for the flow
        'status': '',
        # Target campaign prepid
        'target_campaign': ''}

    __lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}')
        'status': lambda status: status in ['new','submit','tasksubmit'])
Exemplo n.º 12
class Campaign(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # Energy in TeV
        'energy': 0.0,
        # Type LHE, MCReproc, Prod
        'type': '',
        # Step type: MiniAOD, NanoAOD, etc.
        'step': 'DR',
        # No need for CMSSW version
        #'cmssw_release': '',
        # User notes
        'notes': '',
        # List of dictionaries that have cmsDriver options (default to be modified, just a guideline, what is normally needed)
        'sequences': [{"conditions","GT_FromAlca","step","RAW2DIGI,L1Reco,RECO,EI,PAT,DQM:@rerecoCommon","datatier":"AOD,MINIAOD,DQM","eventcontent":"RECO,SKIM,ALCA,MINIAOD,DQMIO","era":"Run2_201XXX","extra":"--runUnscheduled","scenario":"pp","nThreads","8","customise":"Configuration/DataProcessing/RecoTLR.customisePostEra_Run2_201XXX"}],
        # Action history
        'history': [],
        # Default memory
        'memory': 2300}

    __lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}'),
        'energy': lambda energy: energy >= 0.0,
        'step': lambda step: step in ['DR', 'MiniAOD', 'NanoAOD'],
        'memory': lambda memory: memory >= 0,
        'cmssw_release': lambda cmssw_release: 'CMSSW' in cmssw_release

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name in self.__lambda_checks:
            return self.__lambda_checks.get(attribute_name)(attribute_value)

        return True
Exemplo n.º 13
class ChainedCampaign(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # Notes
        'notes': '',
        # List of flow and campaign pairs
        'campaigns': []}

    __lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}')

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name in self.__lambda_checks:
            return self.__lambda_checks.get(attribute_name)(attribute_value)

        return True
Exemplo n.º 14
class Request(ModelBase):
    Request represents a single step in processing pipeline
    Request contains one or a few cmsDriver commands
    It is created based on a subcampaign that it is a member of

    _ModelBase__schema = {
        # Database id (required by DB)
        '_id': '',
        # PrepID
        'prepid': '',
        # CMSSW version
        'cmssw_release': '',
        # Completed events
        'completed_events': 0,
        # Energy in TeV
        'energy': 0.0,
        # Action history
        'history': [],
        # Input dataset name or request name
        'input': {
            'dataset': '',
            'request': ''
        # Dictionary of runs and their lumisection ranges to be processed
        'lumisections': {},
        # Memory in MB
        'memory': 2000,
        # User notes
        'notes': '',
        # List of output
        'output_datasets': [],
        # Priority in computing
        'priority': 110000,
        # Processing string
        'processing_string': '',
        # List of runs to be processed
        'runs': [],
        # List of dictionaries that have cmsDriver options
        'sequences': [],
        # Disk size per event in kB
        'size_per_event': 1.0,
        # Status is either new, approved, submitted or done
        'status': 'new',
        # Subcampaign name
        'subcampaign': '',
        # Time per event in seconds
        'time_per_event': 1.0,
        # Total events
        'total_events': 0,
        # List of workflows in computing
        'workflows': []

    __prepid_regex = '[a-zA-Z0-9\\-_]{1,100}'
    lambda_checks = {
        lambda prepid: ModelBase.matches_regex(prepid, Request.__prepid_regex),
        lambda events: events >= 0,
        '_input': {
            lambda ds: not ds or ModelBase.lambda_check('dataset')(ds),
            lambda r: not r or ModelBase.matches_regex(r, Request.
        lambda r: isinstance(r, int) and r > 0,
        lambda s: isinstance(s, Sequence),
        lambda spe: spe > 0.0,
        lambda status: status in
        {'new', 'approved', 'submitting', 'submitted', 'done'},
        lambda tpe: tpe > 0.0,
        lambda events: events >= 0,

    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            json_input['runs'] = [int(r) for r in json_input.get('runs', [])]
            sequence_objects = []
            for sequence_json in json_input.get('sequences', []):

            json_input['sequences'] = sequence_objects

        ModelBase.__init__(self, json_input, check_attributes)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name == 'input':
            if not attribute_value.get('dataset') and not attribute_value.get(
                raise Exception(
                    'Either input dataset or input request must be provided')

        return super().check_attribute(attribute_name, attribute_value)

    def get_config_file_names(self):
        Get list of dictionaries of all config file names without extensions
        file_names = []
        for sequence in self.get('sequences'):

        return file_names

    def get_cmsdrivers(self, overwrite_input=None):
        Get all cmsDriver commands for this request
        built_command = ''
        for index, sequence in enumerate(self.get('sequences')):
            if index == 0 and overwrite_input:
                built_command += sequence.get_cmsdriver(overwrite_input)
                built_command += sequence.get_cmsdriver()

            if sequence.needs_harvesting():
                built_command += '\n\n'
                built_command += sequence.get_harvesting_cmsdriver()

            built_command += '\n\n'

        return built_command.strip()

    def get_era(self):
        Return era based on input dataset
        input_dataset_parts = [
            x for x in self.get('input')['dataset'].split('/') if x
        if len(input_dataset_parts) < 2:
            return self.get_prepid().split('-')[1]

        return input_dataset_parts[1].split('-')[0]

    def get_input_processing_string(self):
        Return processing string from input dataset
        input_dataset_parts = [
            x for x in self.get('input')['dataset'].split('/') if x
        if len(input_dataset_parts) < 3:
            return ''

        middle_parts = [x for x in input_dataset_parts[1].split('-') if x]
        if len(middle_parts) < 3:
            return ''

        return '-'.join(middle_parts[1:-1])

    def get_dataset(self):
        Return primary dataset based on input dataset
        input_dataset_parts = [
            x for x in self.get('input')['dataset'].split('/') if x
        if not input_dataset_parts:
            return self.get_prepid().split('-')[2]

        return input_dataset_parts[0]

    def get_request_string(self):
        Return request string made of era, dataset and processing string
        processing_string = self.get('processing_string')
        era = self.get_era()
        dataset = self.get_dataset()
        return f'{era}_{dataset}_{processing_string}'.strip('_')
Exemplo n.º 15
 def __init__(self, json_input=None):
     ModelBase.__init__(self, json_input)
Exemplo n.º 16
 def __init__(self, json_input=None):
     ModelBase.__init__(self, json_input)
     self.collection = 'flows'
Exemplo n.º 17
class Sequence(ModelBase):
    Sequence is a dictionary that has all user editable attributes
    for cmsDriver command

    _ModelBase__schema = {
        # What conditions to use. This has to be specified
        'conditions': '',
        # Hash of configuration file uploaded to ReqMgr2
        'config_id': '',
        # Specify the file where the code to modify the process object is stored
        # If inline_custom is set to 1, then inline the customisation file
        'customise': '',
        # What data tier to use
        'datatier': [],
        # Specify which era to use (e.g. "run2")
        'era': '',
        # What event content to write out
        'eventcontent': [],
        # Freeform attributes appended at the end
        'extra': '',
        # Hash of harvesting configuration file uploaded to ReqMgr2
        'harvesting_config_id': '',
        # How many threads should CMSSW use
        'nThreads': 1,
        # Scenario overriding standard settings: 'pp', 'cosmics', 'nocoll', 'HeavyIons'
        'scenario': 'pp',
        # The desired step. The possible values are:
        # RAW2DIGI, L1Reco, RECO, EI, PAT, NANO, ALCA[:@...], DQM[:@...], SKIM[:@...],
        # HARVESTING:@...
        'step': []

    lambda_checks = {
        lambda c: ModelBase.matches_regex(c, '[a-zA-Z0-9_]{0,50}'),
        lambda cid: ModelBase.matches_regex(cid, '[a-f0-9]{0,50}'),
        lambda s: s in
        lambda e: ModelBase.matches_regex(e, '[a-zA-Z0-9_\\,]{0,50}'),
        lambda s: s in
        lambda cid: ModelBase.matches_regex(cid, '[a-f0-9]{0,50}'),
        lambda n: 0 < n < 64,
        lambda s: s in {'pp', 'cosmics', 'nocoll', 'HeavyIons'},
        lambda s: (s in {'RAW2DIGI', 'L1Reco', 'RECO', 'EI', 'PAT', 'NANO'} or
                   s.startswith('ALCA') or s.startswith('DQM') or s.startswith(
                       'SKIM') or s.startswith('HARVESTING:@'))

    def __init__(self, json_input=None, parent=None, check_attributes=True):
        self.parent = None
        ModelBase.__init__(self, json_input, check_attributes)
        if parent:
            self.parent = weakref.ref(parent)

        self.check_attribute('eventcontent', self.get('eventcontent'))
        self.check_attribute('datatier', self.get('datatier'))

    def get_prepid(self):
        if not self.parent:
            return 'Sequence'

        parent = self.parent()
        index = self.get_index_in_parent()
        return f'Sequence_{parent}_{index}'

    def check_attribute(self, attribute_name, attribute_value):
        if not self.initialized or attribute_name not in ('eventcontent',
            return super().check_attribute(attribute_name, attribute_value)

        has_harvesting_step = bool(
            [s for s in self.get('step') if s.startswith('HARVESTING:@')])
        if not self.get('step') or has_harvesting_step:
            return super().check_attribute(attribute_name, attribute_value)

        # If sequence does not have HARVESTING step, eventcontent and datatier cannot be empty
        if not self.get('eventcontent'):
            raise Exception(
                'No eventcontent is allowed only with HARVESTING step')

        if not self.get('datatier'):
            raise Exception('No datatier is allowed only with HARVESTING step')

        return super().check_attribute(attribute_name, attribute_value)

    def needs_harvesting(self):
        Return if this sequence produces input file for harvesting
        and harvesting step is needed
        for step in self.get('step'):
            if step == 'DQM' or step.startswith('DQM:'):
                return True

        return False

    def get_index_in_parent(self):
        Return sequence's index in parent's list of sequences
        for index, sequence in enumerate(self.parent().get('sequences')):
            if self == sequence:
                return index

        raise Exception(
            f'Sequence is not a child of {self.parent().get_prepid()}')

    def get_name(self):
        Return a sequence name which is based on parent
        prepid and sequence number
        Last sequence always has the same name as parent prepid
        Other sequences have suffix with their index, e.g


        If there is only one sequence, it will be the last one
        and have the same name as parent prepid
        index = self.get_index_in_parent()
        parent_prepid = self.parent().get_prepid()
        if index != len(self.parent().get('sequences')) - 1:
            sequence_name = f'{parent_prepid}_{index}'
            sequence_name = f'{parent_prepid}'

        return sequence_name

    def get_config_file_names(self):
        Return dictionary of 'config' and 'harvest' config file names
        parent_prepid = self.parent().get_prepid()
        index = self.get_index_in_parent()
        config_file_names = {'config': f'{parent_prepid}_{index}_cfg'}
        if self.needs_harvesting():
                'harvest'] = f'{parent_prepid}_{index}_harvest_cfg'

        return config_file_names

    def __build_cmsdriver(self, cmsdriver_type, arguments):
        Build a cmsDriver command from given arguments
        Add comment in front of the command
        self.logger.info('Generating %s cmsDriver', cmsdriver_type)
        # Actual command
        command = f'# Command for {cmsdriver_type}:\ncmsDriver.py {cmsdriver_type}'
        # Comment in front of the command for better readability
        comment = f'# Arguments for {cmsdriver_type}:\n'
        for key in sorted(arguments.keys()):
            if not arguments[key]:

            if key in 'extra':

            if isinstance(arguments[key], bool):
                arguments[key] = ''

            if isinstance(arguments[key], list):
                arguments[key] = ','.join([str(x) for x in arguments[key]])

            command += f' --{key} {arguments[key]}'.rstrip()
            comment += f'# --{key} {arguments[key]}'.rstrip() + '\n'

        if arguments.get('extra'):
            extra_value = arguments['extra']
            command += f' {extra_value}'
            comment += f'# <extra> {extra_value}\n'

        # Exit the script with error of cmsDriver.py
        command += ' || exit $?'

        return comment + '\n' + command

    def get_cmsdriver(self, overwrite_input=None):
        Return a cmsDriver command for this sequence
        Config file is named like this
        sequence_name = self.get_name()
        arguments_dict = dict(self.get_json())
        # Delete sequence metadata
        arguments_dict.pop('config_id', None)
        arguments_dict.pop('harvesting_config_id', None)

        # Fetch list of files for specific runs
        das_query = ''
        # Handle input/output file names
        if overwrite_input:
            arguments_dict['filein'] = overwrite_input
            index = self.get_index_in_parent()
            arguments_dict['number'] = 10
            if index == 0:
                input_dataset = self.parent().get('input')['dataset']
                all_runs = self.parent().get('runs')
                if not input_dataset:
                    input_request = self.parent().get('input')['request']
                    arguments_dict['filein'] = f'"file:{input_request}.root"'
                elif all_runs:
                    das_file = f'{sequence_name}_files.txt'
                    das_query += '# Query DAS to get list of files for specified runs\n'
                    # Chunkify to 25 runs, otherwise script line gets very long
                    for runs in self.chunkify(all_runs, 25):
                        runs = ','.join([str(r) for r in runs])
                        das_query += 'dasgoclient --limit 0 '
                        das_query += f'--query "file dataset={input_dataset} run in [{runs}]" '
                        das_query += f'>> {das_file}\n'

                    das_query += '\n'
                    arguments_dict['filein'] = f'"filelist:{das_file}"'
                    arguments_dict['filein'] = f'"dbs:{input_dataset}"'
                previous_sequence = self.parent().get('sequences')[index - 1]
                input_file = f'{previous_sequence.get_name()}.root'
                arguments_dict['filein'] = f'"file:{input_file}"'

        # Update ALCA and SKIM steps to ALCA:@Dataset and SKIM:@Dataset
        # if dataset name is in "auto" dictionary in CMSSW
        dynamic_steps = self.update_dynamic_steps(arguments_dict['step'])
        # Build argument dictionary
        config_names = self.get_config_file_names()
        arguments_dict['fileout'] = f'"file:{sequence_name}.root"'
        arguments_dict['python_filename'] = f'"{config_names["config"]}.py"'
        arguments_dict['no_exec'] = True
        cms_driver_command = self.__build_cmsdriver('RECO', arguments_dict)
        return dynamic_steps + das_query + cms_driver_command

    def update_dynamic_steps(self, steps):
        Update ALCA and SKIP steps to be variables and return code that resolves them
        dynamic_steps = ''
        for step_index, step in enumerate(steps):
            if step not in ('ALCA', 'SKIM'):

            dataset = self.parent().get_dataset()
            steps[step_index] = f'${step}_STEP'
            # Build a small python program to get value from CMSSW on the go
            step_var = f'{step}_STEP=$(python -c "'
            if step == 'ALCA':
                step_var += 'from Configuration.AlCa.autoAlca import AlCaRecoMatrix as ds;'
            elif step == 'SKIM':
                step_var += 'from Configuration.Skimming.autoSkim import autoSkim as ds;'

            step_var += f'print(\'{step}:@{dataset}\' if \'{dataset}\' in ds.keys() else \'\')")'
            dynamic_steps += f'{step_var}\n'

        if dynamic_steps:
            dynamic_steps = f'# Steps based on dataset name\n{dynamic_steps}\n'

        return dynamic_steps

    def get_harvesting_cmsdriver(self):
        Return a harvesting cmsDriver for this sequence
        Config file is named like this
        if not self.needs_harvesting():
            return ''

        arguments_dict = dict(self.get_json())
        # Delete sequence metadata
        for attr in ('config_id', 'harvesting_config_id', 'customise',
                     'datatier', 'eventcontent', 'nThreads', 'extra',
            arguments_dict.pop(attr, None)

        # Get correct configuration of DQM step, e.g.
        # DQM:@rerecoCommon should be changed to HARVESTING:@rerecoCommon
        step = 'HARVESTING:dqmHarvesting'
        for one_step in self.get('step'):
            if one_step.startswith('DQM:'):
                step = one_step.replace('DQM:', 'HARVESTING:', 1)

        # Build argument dictionary
        sequence_name = self.get_name()
        config_names = self.get_config_file_names()
        arguments_dict['data'] = True
        arguments_dict['no_exec'] = True
        arguments_dict['filetype'] = 'DQM'
        arguments_dict['step'] = step
        arguments_dict['era'] = arguments_dict['era'].split(',')[0]
        arguments_dict['filein'] = f'"file:{sequence_name}_inDQM.root"'
        arguments_dict['python_filename'] = f'"{config_names["harvest"]}.py"'
        arguments_dict['number'] = -1
        harvesting_command = self.__build_cmsdriver('HARVESTING',
        return harvesting_command

    def chunkify(items, chunk_size):
        Yield fixed size chunks of given list
        start = 0
        chunk_size = max(chunk_size, 1)
        while start < len(items):
            yield items[start:start + chunk_size]
            start += chunk_size

    def get_output_module(self):
        Return a output module name
        eventcontent = [
            e for e in self.get('eventcontent') if not e.startswith('DQM')
        if not eventcontent:
            return ''

        return f'{eventcontent[0]}output'
Exemplo n.º 18
class Request(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # Energy in TeV
        'energy': 0.0,
        # Step type: MiniAOD, NanoAOD, etc.
        'step': '',
        # CMSSW version
        'cmssw_release': '',
        # User notes
        'notes': '',
        # Input dataset name
        'input_dataset_name': '',
        # Output dataset name
        'output_dataset_name': '',
        # List of dictionaries that have cmsDriver options
        'sequences': [],
        # Action history
        'history': [],
        # Status 
        'status': 'new', #it should be either approved, submitted, done (nothing else)
        # Workflow name in computing when submitted 
        'reqmgr_name': '', 
        # time event
        'time_event': 5.0, 
        # size event
        'size_event': 2000, 
        # priority
        'priority': 110000, 
        #runs for data injections
        'runs': [],
        #type, depends on the datatier of the requests
        # process string
        'process_string': '', 
        # Default memory
        'memory': 14000,
        #all the following will be filled in after injection (they will be created at the time of the injection)
        #request id (to be filled in the dictionary of injection)
        'request_id': '',
        #reco cfg (to be filled in the dictionary of injection)
        'reco_cfg': '',
        #harvest cfg (to be filled in the dictionary of injection)
        'harvest_cfg': ''
        #dataset name (this is just the first split of the input dataset name, can be done automatically)
        'dataset_name': ''

    __lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}'),
        'energy': lambda energy: energy >= 0.0,
        'time_event': lambda time_event: time_event >= 0.0,
        'size_event': lambda size_event: size_event >= 0.0,
        'priority': lambda priority: priority >= 0.0,
        'DQM': lambda DQM: isinstance(DQM,bool),
        'Reco': lambda Reco: isinstance(Reco,bool),
        'step': lambda step: step in ['RAW2DIGI','L1Reco','RECO','EI','PAT','DQM','NANO'] or ['ALCARECO'] in step or ['DQM'] in step,
        'datatier': lambda datatier: datatier in ['AOD', 'MiniAOD', 'NanoAOD', 'DQMIO', 'USER', 'ALCARECO'],
        'memory': lambda memory: memory >= 0,
        'cmssw_release': lambda cmssw_release: 'CMSSW' in cmssw_release,
        'reco_cfg': lambda reco_cfg: ModelBase.matches_regex(reco_cfg, '[a-zA-Z0-9]{1,50}'),
        'harvest_cfg': lambda harvest_cfg: ModelBase.matches_regex(harvest_cfg, '[a-zA-Z0-9]{1,50}'),
        'process_string': lambda process_string: ModelBase.matches_regex(process_string, '[a-zA-Z0-9]{1,50}'),
        'request_id': lambda request_id: ModelBase.matches_regex(request_id, '[a-zA-Z0-9]{1,50}'),
        'dataset_name': lambda dataset_name: ModelBase.matches_regex(dataset_name, '[a-zA-Z0-9]{1,50}')

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name in self.__lambda_checks:
            return self.__lambda_checks.get(attribute_name)(attribute_value)

        return True
Exemplo n.º 19
class Ticket(ModelBase):
    Ticket allows to create multiple similar RelVals in the same campaign

    _ModelBase__schema = {
        # Database id (required by database)
        '_id': '',
        # PrepID
        'prepid': '',
        # Batch name
        'batch_name': '',
        # CMSSW release
        'cmssw_release': '',
        # Additional command to add to all cmsDrivers
        'command': '',
        # List of steps that additional command should be applied to
        'command_steps': [],
        # CPU cores
        'cpu_cores': 1,
        # List of prepids of relvals that were created from this ticket
        'created_relvals': [],
        # GPU parameters that will be added to selected steps
        'gpu': {
            'requires': 'forbidden',
            'gpu_memory': '',
            'cuda_capabilities': [],
            'cuda_runtime': '',
            'gpu_name': '',
            'cuda_driver_version': '',
            'cuda_runtime_version': ''
        # List of steps that GPU parameters should be applied to
        'gpu_steps': [],
        # Action history
        'history': [],
        # Label to be used in runTheMatrix
        'label': '',
        # Type of relval: standard, upgrade, premix, etc.
        'matrix': 'standard',
        # Memory in MB
        'memory': 2000,
        # User notes
        'notes': '',
        # nStreams to be used in all steps, 0 defaults to nThreads
        'n_streams': 0,
        # Whether to recycle first step
        'recycle_gs': False,
        # Which step should be first that run while recycling the input
        'recycle_input_of': '',
        # String to rewrite middle part of INPUT dataset(s) /.../THIS/...
        'rewrite_gt_string': '',
        # Tag to group workflow ids
        'sample_tag': '',
        # Overwrite default scram arch
        'scram_arch': '',
        # Status is either new or done
        'status': 'new',
        # Workflow ids
        'workflow_ids': [],

    lambda_checks = {
        lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9_\\-]{1,75}'
        '_gpu': {
            'requires': lambda r: r in ('forbidden', 'optional', 'required'),
            'cuda_capabilities': lambda l: isinstance(l, list),
            'gpu_memory': lambda m: m == '' or int(m) > 0,
        lambda streams: 0 <= streams <= 16,
        lambda rgs: ModelBase.matches_regex(rgs, '[a-zA-Z0-9\\.\\-_]{0,199}'),
        lambda status: status in ('new', 'done'),
        lambda s: not s or ModelBase.lambda_check('scram_arch')(s),
        lambda wf: len(wf) > 0,
        lambda wf: wf > 0,

    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            json_input['workflow_ids'] = [
                float(wid) for wid in json_input['workflow_ids']
            json_input['recycle_gs'] = bool(json_input.get(
                'recycle_gs', False))
            if json_input.get('gpu', {}).get('requires') not in ('optional',
                json_input['gpu'] = self.schema().get('gpu')
                json_input['gpu']['requires'] = 'forbidden'
                json_input['gpu_steps'] = []

        ModelBase.__init__(self, json_input, check_attributes)
Exemplo n.º 20
class Ticket(ModelBase):
    Ticket has a list of input datasets and a list of steps specifications
    Ticket is used to create requests for each input dataset

    _ModelBase__schema = {
        # Database id (required by DB)
        '_id': '',
        # PrepID
        'prepid': '',
        # List of prepids of requests that were created from this ticket
        'created_requests': [],
        # Action history
        'history': [],
        # List of input dataset names
        'input_datasets': [],
        # User notes
        'notes': '',
        # Status is either new or done
        'status': 'new',
        # List of dicts that have subcampaign, processing_string, size/time per event values
        'steps': [],

    lambda_checks = {
        lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9_\\-]{1,75}'
        lambda pi: ModelBase.matches_regex(pi, '[a-zA-Z0-9\\-_]{1,100}'),
        lambda status: status in {'new', 'done'},
        lambda s: len(s) > 0,

    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            steps = []
            for step in json_input.get('steps', []):
                    step.get('subcampaign', ''),
                    step.get('processing_string', ''),
                    float(step.get('time_per_event', 0)),
                    float(step.get('size_per_event', 0)),
                    int(step.get('priority', 0))

            json_input['steps'] = steps

        ModelBase.__init__(self, json_input, check_attributes)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name == 'steps':
            if not isinstance(attribute_value, list):
                raise Exception(f'Expected {attribute_name} to be a list')

            for step in attribute_value:
                subcampaign = step['subcampaign']
                if not ModelBase.lambda_check('subcampaign')(subcampaign):
                    raise Exception(f'Bad subcampaign prepid {subcampaign}')

                processing_string = step['processing_string']
                if not ModelBase.lambda_check('processing_string')(
                    raise Exception(
                        f'Bad processing string {processing_string}')

                time_per_event = step['time_per_event']
                if time_per_event <= 0.0:
                    raise Exception(f'Bad time per event {time_per_event}')

                size_per_event = step['size_per_event']
                if size_per_event <= 0.0:
                    raise Exception(f'Bad size per event {size_per_event}')

                priority = step['priority']
                if not ModelBase.lambda_check('priority')(priority):
                    raise Exception(f'Bad priority {priority}')

        return super().check_attribute(attribute_name, attribute_value)
Exemplo n.º 21
class RelValStep(ModelBase):
    RelVal is one step of RelVal - either a call to DAS for list of input files
    or a cmsDriver command

    _ModelBase__schema = {
        # Step name
        'name': '',
        # CMSSW version of this step
        'cmssw_release': '',
        # Hash of configuration file uploaded to ReqMgr2
        'config_id': '',
        # cmsDriver arguments
        'driver': {
            'beamspot': '',
            'conditions': '',
            'customise': '',
            'customise_commands': '',
            'data': False,
            'datatier': [],
            'era': '',
            'eventcontent': [],
            'extra': '',
            'fast': False,
            'filetype': '',
            'geometry': '',
            'hltProcess': '',
            'mc': False,
            'number': '10',
            'nStreams': '',
            'pileup': '',
            'pileup_input': '',
            'process': '',
            'relval': '',
            'runUnscheduled': False,
            'fragment_name': '',
            'scenario': '',
            'step': [],
        # Events per lumi - if empty, events per job will be used
        'events_per_lumi': '',
        # GPU parameters
        'gpu': {
            'requires': 'forbidden',
            'gpu_memory': '',
            'cuda_capabilities': [],
            'cuda_runtime': '',
            'gpu_name': '',
            'cuda_driver_version': '',
            'cuda_runtime_version': ''
        # Input file info
        'input': {
            'dataset': '',
            'lumisection': {},
            'run': [],
            'label': '',
        # Keeping output of this task
        'keep_output': True,
        # Lumis per job - applicable to non-first steps
        'lumis_per_job': '',
        # Actual globaltag, resolved from auto:... conditions
        'resolved_globaltag': '',
        # Overwrite default CMSSW scram arch
        'scram_arch': '',

    lambda_checks = {
        lambda cmssw: not cmssw or ModelBase.lambda_check('cmssw_release')
        lambda cid: ModelBase.matches_regex(cid, '[a-f0-9]{0,50}'),
        '_driver': {
            lambda c: not c or ModelBase.matches_regex(c, '[a-zA-Z0-9_]{0,50}'
            lambda e: not e or ModelBase.matches_regex(
                e, '[a-zA-Z0-9_\\,]{0,50}'),
            lambda s: not s or s in {'pp', 'cosmics', 'nocoll', 'HeavyIons'},
        '_gpu': {
            'requires': lambda r: r in ('forbidden', 'optional', 'required'),
            'cuda_capabilities': lambda l: isinstance(l, list),
            'gpu_memory': lambda m: m == '' or int(m) > 0,
        '_input': {
            'dataset': lambda ds: not ds or ModelBase.lambda_check('dataset')
            'label': lambda l: not l or ModelBase.lambda_check('label')(l)
        lambda l: l == '' or int(l) > 0,
        lambda n: ModelBase.matches_regex(n, '[a-zA-Z0-9_\\-]{1,150}'),
        lambda s: not s or ModelBase.lambda_check('scram_arch')(s),

    def __init__(self, json_input=None, parent=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            # Remove -- from argument names
            schema = self.schema()
            if json_input.get('input', {}).get('dataset'):
                json_input['driver'] = schema.get('driver')
                json_input['gpu'] = schema.get('gpu')
                json_input['gpu']['requires'] = 'forbidden'
                step_input = json_input['input']
                for key, default_value in schema['input'].items():
                    if key not in step_input:
                        step_input[key] = default_value
                json_input['driver'] = {
                    k.lstrip('-'): v
                    for k, v in json_input['driver'].items()
                json_input['input'] = schema.get('input')
                if json_input.get('gpu',
                                  {}).get('requires') not in ('optional',
                    json_input['gpu'] = schema.get('gpu')
                    json_input['gpu']['requires'] = 'forbidden'

                driver = json_input['driver']
                for key, default_value in schema['driver'].items():
                    if key not in driver:
                        driver[key] = default_value

                if driver.get('data') and driver.get('mc'):
                    raise Exception(
                        'Both --data and --mc are not allowed in the same step'

                if driver.get('data') and driver.get('fast'):
                    raise Exception(
                        'Both --data and --fast are not allowed in the same step'

        ModelBase.__init__(self, json_input, check_attributes)
        if parent:
            self.parent = weakref.ref(parent)
            self.parent = None

    def get_prepid(self):
        return 'RelValStep'

    def get_short_name(self):
        Return a shortened step name
        GenSimFull for anything that has GenSim in it
        HadronizerFull for anything that has Hadronizer in it
        Split and cut by underscores for other cases
        name = self.get('name')
        if 'gensim' in name.lower():
            return 'GenSimFull'

        if 'hadronizer' in name.lower():
            return 'HadronizerFull'

        while len(name) > 50:
            name = '_'.join(name.split('_')[:-1])
            if '_' not in name:

        return name

    def get_index_in_parent(self):
        Return step's index in parent's list of steps
        for index, step in enumerate(self.parent().get('steps')):
            if self == step:
                return index

        raise Exception(f'Step is not a child of {self.parent().get_prepid()}')

    def get_step_type(self):
        Return whether this is cmsDriver or input file step
        if self.get('input').get('dataset'):
            return 'input_file'

        return 'cms_driver'

    def chunkify(items, chunk_size):
        Yield fixed size chunks of given list
        start = 0
        chunk_size = max(chunk_size, 1)
        while start < len(items):
            yield items[start:start + chunk_size]
            start += chunk_size

    def __build_cmsdriver(self, step_index, arguments, for_submission):
        Build a cmsDriver command from given arguments
        Add comment in front of the command
        fragment_name = arguments['fragment_name']
        if not fragment_name:
            fragment_name = f'step{step_index + 1}'

        self.logger.info('Generating %s cmsDriver for step %s', fragment_name,
        # Actual command
        command = ''
        if not for_submission:
            command += f'# Command for step {step_index + 1}:\n'

        command += f'cmsDriver.py {fragment_name}'
        # Comment in front of the command for better readability
        comment = f'# Arguments for step {step_index + 1}:\n'
        for key in sorted(arguments.keys()):
            if key in ('fragment_name', 'extra'):

            if not arguments[key]:

            if isinstance(arguments[key], bool):
                arguments[key] = ''

            if isinstance(arguments[key], list):
                arguments[key] = ','.join([str(x) for x in arguments[key]])

            command += f' --{key} {arguments[key]}'.rstrip()
            comment += f'# --{key} {arguments[key]}'.rstrip() + '\n'

        extra_value = arguments.get('extra')
        if extra_value:
            command += f' {extra_value}'
            comment += f'# <extra> {extra_value}\n'

        # Exit the script with error of cmsDriver.py
        command += ' || exit $?'
        if for_submission:
            return command

        return comment + '\n' + command

    def __build_das_command(self, step_index):
        Build a dasgoclient command to fetch input dataset file names
        input_dict = self.get('input')
        dataset = input_dict['dataset']
        lumisections = input_dict['lumisection']
        if lumisections:
                'Making a DAS command for step %s with lumisection list',
            files_name = f'step{step_index + 1}_files.txt'
            lumis_name = f'step{step_index + 1}_lumi_ranges.txt'
            comment = f'# Arguments for step {step_index + 1}:\n'
            command = f'# Command for step {step_index + 1}:\n'
            comment += f'#   dataset: {dataset}\n'
            command += f'echo "" > {files_name}\n'
            for run, lumi_ranges in lumisections.items():
                for lumi_range in lumi_ranges:
                    comment += f'#   run: {run}, range: {lumi_range[0]} - {lumi_range[1]}\n'
                    command += 'dasgoclient --limit 0 --format json '
                    command += f'--query "lumi,file dataset={dataset} run={run}"'
                    command += f' | das-selected-lumis.py {lumi_range[0]},{lumi_range[1]}'
                    command += f' | sort -u >> {files_name}\n'

            lumi_json = json.dumps(lumisections)
            command += f'echo \'{lumi_json}\' > {lumis_name}'
            return (comment + '\n' + command).strip()

        runs = input_dict['run']
        if runs:
            self.logger.info('Making a DAS command for step %s with run list',
            files_name = f'step{step_index + 1}_files.txt'
            comment = f'# Arguments for step {step_index + 1}:\n'
            command = f'# Command for step {step_index + 1}:\n'
            comment += f'#   dataset: {dataset}\n'
            command += f'echo "" > {files_name}\n'
            for run_chunk in self.chunkify(runs, 25):
                run_chunk = ','.join([str(r) for r in run_chunk])
                comment += f'#   runs: {run_chunk}\n'
                command += 'dasgoclient --limit 0 '
                command += f'--query "file dataset={dataset} run in [{run_chunk}]" '
                command += f'>> {files_name}\n'

            return (comment + '\n' + command).strip()

        return f'# Step {step_index + 1} is input dataset for next step: {dataset}'

    def get_command(self, custom_fragment=None, for_submission=False):
        Return a cmsDriver command for this step
        Config file is named like this
        step_type = self.get_step_type()
        index = self.get_index_in_parent()
        if step_type == 'input_file':
            if for_submission:
                return '# Nothing to do for input file step'

            return self.__build_das_command(index)

        arguments_dict = deepcopy(self.get('driver'))
        if custom_fragment:
            arguments_dict['fragment_name'] = custom_fragment

        # No execution
        arguments_dict['no_exec'] = True
        # Handle input/output file names
        arguments_dict['fileout'] = f'"file:step{index + 1}.root"'
        arguments_dict['python_filename'] = f'{self.get_config_file_name()}.py'
        # Add events per lumi to customise_commands
        events_per_lumi = self.get('events_per_lumi')
        if events_per_lumi:
            customise_commands = arguments_dict['customise_commands']
            customise_commands += ';"process.source.numberEventsInLuminosityBlock='
            customise_commands += f'cms.untracked.uint32({events_per_lumi})"'
            arguments_dict['customise_commands'] = customise_commands.lstrip(

        # Add number of cpu cores of the RelVal if it is >1 and this is not a harvesting step
        cpu_cores = self.parent().get('cpu_cores')
        if cpu_cores > 1 and not self.has_step(
                'HARVESTING') and not self.has_step('ALCAHARVEST'):
            arguments_dict['nThreads'] = cpu_cores

        all_steps = self.parent().get('steps')
        if index > 0:
            previous = all_steps[index - 1]
            previous_type = previous.get_step_type()
            if previous_type == 'input_file':
                # If previous step is an input file, use it as input
                if for_submission:
                    arguments_dict['filein'] = '"file:_placeholder_.root"'
                    previous_input = previous.get('input')
                    previous_lumisection = previous_input['lumisection']
                    previous_run = previous_input['run']
                    if previous_lumisection:
                        # If there are lumi ranges, add a file with them and list of files as input
                            'filein'] = f'"filelist:step{index}_files.txt"'
                            'lumiToProcess'] = f'"step{index}_lumi_ranges.txt"'
                    elif previous_run:
                        # If there is a run whitelist, add the file
                            'filein'] = f'"filelist:step{index}_files.txt"'
                        # If there are no lumi ranges, use input file normally
                        previous_dataset = previous_input['dataset']
                        arguments_dict['filein'] = f'"dbs:{previous_dataset}"'
                # If previous step is a cmsDriver, use it's output root file
                input_number = self.get_input_step_index() + 1
                eventcontent_index, eventcontent = self.get_input_eventcontent(
                if eventcontent_index == 0:
                        'filein'] = f'"file:step{input_number}.root"'
                        'filein'] = f'"file:step{input_number}_in{eventcontent}.root"'

        cms_driver_command = self.__build_cmsdriver(index, arguments_dict,
        return cms_driver_command

    def has_step(self, step):
        Return if this RelValStep has certain step in --step argument
        for one_step in self.get('driver')['step']:
            if one_step.startswith(step):
                return True

        return False

    def has_eventcontent(self, eventcontent):
        Return if this RelValStep has certain eventcontent in --eventcontent argument
        return eventcontent in self.get('driver')['eventcontent']

    def get_input_step_index(self):
        Get index of step that will be used as input step for current step
        all_steps = self.parent().get('steps')
        index = self.get_index_in_parent()
        this_is_harvesting = self.has_step('HARVESTING')
        self_step = self.get('driver')['step']
        this_is_alca = self_step and self_step[0].startswith('ALCA')
        self.logger.info('Get input for step %s, harvesting: %s', index,
        for step_index in reversed(range(0, index)):
            step = all_steps[step_index]
            # Harvesting step is never input
            if step.has_step('HARVESTING'):

            # AlCa step is never input
            step_step = step.get('driver')['step']
            if step_step and step_step[0].startswith('ALCA'):

            # Harvesting step needs DQM as input
            if this_is_harvesting and not step.has_eventcontent('DQM'):

            # AlCa step needs RECO as input
            if this_is_alca and not step.has_step('RECO'):

            return step_index

        name = self.get('name')
        if this_is_harvesting:
            raise Exception('No step with --eventcontent DQM could be found'
                            f'as input for {name} (Harvesting step)')

        if this_is_alca:
            raise Exception('No step with --step RECO could be found '
                            f'as input for {name} (AlCa)')

        raise Exception(f'No input step for {name} could be found')

    def get_input_eventcontent(self, input_step=None):
        Return which eventcontent should be used as input for current RelVal step
        if input_step is None:
            all_steps = self.parent().get('steps')
            input_step_index = self.get_input_step_index()
            input_step = all_steps[input_step_index]

        this_is_harvesting = self.has_step('HARVESTING')
        self_step = self.get('driver')['step']
        this_is_alca = self_step and self_step[0].startswith('ALCA')
        input_step_eventcontent = input_step.get('driver')['eventcontent']
        if this_is_harvesting:
            for eventcontent_index, eventcontent in enumerate(
                if eventcontent == 'DQM':
                    return eventcontent_index, eventcontent

            raise Exception(
                f'No DQM eventcontent in the input step {input_step_eventcontent}'

        if this_is_alca:
            for eventcontent_index, eventcontent in enumerate(
                if eventcontent.startswith('RECO'):
                    return eventcontent_index, eventcontent

            raise Exception(
                f'No RECO eventcontent in the input step {input_step_eventcontent}'

        input_step_eventcontent = [
            x for x in input_step_eventcontent if not x.startswith('DQM')
        return len(input_step_eventcontent) - 1, input_step_eventcontent[-1]

    def get_config_file_name(self):
        Return config file name without extension
        if self.get_step_type() == 'input_file':
            return None

        index = self.get_index_in_parent()
        return f'step_{index + 1}_cfg'

    def get_relval_events(self):
        Split --relval argument to total events and events per job/lumi
        relval = self.get('driver')['relval']
        if not relval:
            raise Exception('--relval is not set')

        relval = relval.split(',')
        if len(relval) < 2:
            raise Exception('Not enough parameters in --relval argument')

        requested_events = int(relval[0])
        events_per = int(relval[1])
        return requested_events, events_per

    def get_release(self):
        Return CMSSW release of the step
        If CMSSW release is not specified, return release of the parent RelVal
        cmssw_release = self.get('cmssw_release')
        if cmssw_release:
            return cmssw_release

        if not self.parent:
            raise Exception(
                'Could not get CMSSW release, because step has no parent')

        cmssw_release = self.parent().get('cmssw_release')
        return cmssw_release

    def get_scram_arch(self):
        Return the scram arch of the step
        If scram arch is not specified, return scram arch of the release
        scram_arch = self.get('scram_arch')
        if scram_arch:
            return scram_arch

        if self.parent:
            scram_arch = self.parent().get('scram_arch')
            if scram_arch:
                return scram_arch

        cmssw_release = self.get_release()
        scram_arch = get_scram_arch(cmssw_release)
        if scram_arch:
            return scram_arch

        raise Exception(f'Could not find SCRAM arch of {cmssw_release}')

    def get_gpu_requires(self):
        Return whether GPU is required, optional of forbidden
        return self.get('gpu')['requires']

    def get_gpu_dict(self):
        Return a dictionary with GPU parameters for ReqMgr2
        gpu_info = self.get('gpu')
        keys = {
            'cuda_capabilities': 'CUDACapabilities',
            'cuda_runtime': 'CUDARuntime',
            'gpu_name': 'GPUName',
            'cuda_driver_version': 'CUDADriverVersion',
            'cuda_runtime_version': 'CUDARuntimeVersion'
        params = {
            key: gpu_info[attr]
            for attr, key in keys.items() if gpu_info.get(attr)
        if gpu_info.get('gpu_memory'):
            params['GPUMemoryMB'] = int(gpu_info['gpu_memory'])

        return params