Пример #1
0
    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name == 'steps':
            if not isinstance(attribute_value, list):
                raise Exception(f'Expected {attribute_name} to be a list')

            for step in attribute_value:
                subcampaign = step['subcampaign']
                if not ModelBase.lambda_check('subcampaign')(subcampaign):
                    raise Exception(f'Bad subcampaign prepid {subcampaign}')

                processing_string = step['processing_string']
                if not ModelBase.lambda_check('processing_string')(
                        processing_string):
                    raise Exception(
                        f'Bad processing string {processing_string}')

                time_per_event = step['time_per_event']
                if time_per_event <= 0.0:
                    raise Exception(f'Bad time per event {time_per_event}')

                size_per_event = step['size_per_event']
                if size_per_event <= 0.0:
                    raise Exception(f'Bad size per event {size_per_event}')

                priority = step['priority']
                if not ModelBase.lambda_check('priority')(priority):
                    raise Exception(f'Bad priority {priority}')

        return super().check_attribute(attribute_name, attribute_value)
Пример #2
0
    def __init__(self, json_input=None, parent=None, check_attributes=True):
        self.parent = None
        ModelBase.__init__(self, json_input, check_attributes)
        if parent:
            self.parent = weakref.ref(parent)

        self.check_attribute('eventcontent', self.get('eventcontent'))
        self.check_attribute('datatier', self.get('datatier'))
Пример #3
0
class CampaignTicket(ModelBase):
    """
    Campaign ticket has a list of input datasets, a campaign and a processing string
    Campaign ticket can be used to create requests for each input dataset
    """

    _ModelBase__schema = {
        # Database id (required by CouchDB)
        '_id': '',
        # Document revision (required by CouchDB)
        '_rev': '',
        # PrepID
        'prepid': '',
        # Name of campaign that is used as template for requests
        'conditions_globaltag': '',
        # Processing string for this ticket (label at the time of the submission)
        'processing_string': '',
        # List of prepids of requests that were created from this ticket
        'created_requests': [],
        # Status is either new or done
        'status': 'new',
        # User notes
        'notes': '',
        #CMSSW_release
        'cmssw_release':'CMSSW_ToBeIncludeFromValManagers',
        #sample tag (to be chosen from a list)
        'sample_tag':'',
        #pile_up production (to be chosen from a list)
        'pile_up':'',
        #high_statistics production (True or False)
        'high_statistics': False,
        #string for GS input
        'string_for_inputGS':''
        #GEN-SIM samples to be re-used?
        'ReUseGenSim': False,
        #extension number (just a number, if a similar sample was already submitted, observed especially in Phase II)
        'extension_number':0,
        # Action history
        'history': []
    }

    _lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9_\\-]{1,50}'),
        'conditions_globaltag': lambda gt: ModelBase.matches_regex(gt, '[a-zA-Z0-9_\\-]{1,50}'),
        'cmssw_release': lambda cmssw_release: ModelBase.matches_regex(cmssw_release, '[a-zA-Z0-9_\\-]{1,50}'),
        'processing_string': lambda ps: ModelBase.matches_regex(ps, '[a-zA-Z0-9_]{0,100}'),
        'status': lambda status: status in ('new', 'done'),
        'sample_tag': lambda sample_tag: sample_tag in ('Run2_2016', 'Run2_2017', 'Run2_2018', 'fastSim_2016', 'fastSim_2017','fastSim_2018' 'Run3', 'PhaseII','customized'),
        'pile_up': lambda pile_up: pile_up in ('classical_mixing', 'premix','no_pile_up'),
        'high_statistics': lambda high_statistics: isinstance(high_statistics,bool),
        'ReUseGenSim': lambda ReUseGenSim: isinstance(ReUseGenSim,bool),
        'extension_number': lambda extension_number: isinstance(extension_number,int),
        'string_for_inputGS': lambda string_for_inputGS: ModelBase.matches_regex(string_for_inputGS, '[a-zA-Z0-9_\\-]{1,50}')
    }

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)
Пример #4
0
class Subcampaign(ModelBase):
    """
    Class that represents a snapshot of computing campaign
    It is used as a template for requests
    """

    _ModelBase__schema = {
        # Database id (required by DB)
        '_id': '',
        # PrepID
        'prepid': '',
        # CMSSW version
        'cmssw_release': '',
        # Energy in TeV
        'energy': 0.0,
        # Action history
        'history': [],
        # Default memory
        'memory': 2000,
        # User notes
        'notes': '',
        # Path to json that contains all runs
        'runs_json_path': '',
        # List of Sequences
        'sequences': [],
    }

    __runs_json_regex = '[a-zA-Z0-9/\\-_]{0,150}(\\.json|\\.txt)?'
    lambda_checks = {
        'prepid':
        ModelBase.lambda_check('subcampaign'),
        'cmssw_release':
        ModelBase.lambda_check('cmssw_release'),
        'energy':
        ModelBase.lambda_check('energy'),
        'memory':
        ModelBase.lambda_check('memory'),
        'runs_json_path':
        lambda rjp: ModelBase.matches_regex(rjp, Subcampaign.__runs_json_regex
                                            ),
        'sequences':
        lambda s: len(s) > 0,
        '__sequences':
        lambda s: isinstance(s, Sequence),
    }

    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input['runs_json_path'] = json_input.get(
                'runs_json_path', '').strip().lstrip('/')
            sequence_objects = []
            for sequence_json in json_input.get('sequences', []):
                sequence_objects.append(Sequence(json_input=sequence_json))

            json_input['sequences'] = sequence_objects

        ModelBase.__init__(self, json_input, check_attributes)
Пример #5
0
    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input['runs_json_path'] = json_input.get(
                'runs_json_path', '').strip().lstrip('/')
            sequence_objects = []
            for sequence_json in json_input.get('sequences', []):
                sequence_objects.append(Sequence(json_input=sequence_json))

            json_input['sequences'] = sequence_objects

        ModelBase.__init__(self, json_input, check_attributes)
Пример #6
0
    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            json_input['runs'] = [int(r) for r in json_input.get('runs', [])]
            sequence_objects = []
            for sequence_json in json_input.get('sequences', []):
                sequence_objects.append(
                    Sequence(json_input=sequence_json,
                             parent=self,
                             check_attributes=check_attributes))

            json_input['sequences'] = sequence_objects

        ModelBase.__init__(self, json_input, check_attributes)
Пример #7
0
    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            json_input['workflow_ids'] = [
                float(wid) for wid in json_input['workflow_ids']
            ]
            json_input['recycle_gs'] = bool(json_input.get(
                'recycle_gs', False))
            if json_input.get('gpu', {}).get('requires') not in ('optional',
                                                                 'required'):
                json_input['gpu'] = self.schema().get('gpu')
                json_input['gpu']['requires'] = 'forbidden'
                json_input['gpu_steps'] = []

        ModelBase.__init__(self, json_input, check_attributes)
Пример #8
0
    def __init__(self, json_input=None, parent=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            # Remove -- from argument names
            schema = self.schema()
            if json_input.get('input', {}).get('dataset'):
                json_input['driver'] = schema.get('driver')
                json_input['gpu'] = schema.get('gpu')
                json_input['gpu']['requires'] = 'forbidden'
                step_input = json_input['input']
                for key, default_value in schema['input'].items():
                    if key not in step_input:
                        step_input[key] = default_value
            else:
                json_input['driver'] = {
                    k.lstrip('-'): v
                    for k, v in json_input['driver'].items()
                }
                json_input['input'] = schema.get('input')
                if json_input.get('gpu',
                                  {}).get('requires') not in ('optional',
                                                              'required'):
                    json_input['gpu'] = schema.get('gpu')
                    json_input['gpu']['requires'] = 'forbidden'

                driver = json_input['driver']
                for key, default_value in schema['driver'].items():
                    if key not in driver:
                        driver[key] = default_value

                if driver.get('data') and driver.get('mc'):
                    raise Exception(
                        'Both --data and --mc are not allowed in the same step'
                    )

                if driver.get('data') and driver.get('fast'):
                    raise Exception(
                        'Both --data and --fast are not allowed in the same step'
                    )

        ModelBase.__init__(self, json_input, check_attributes)
        if parent:
            self.parent = weakref.ref(parent)
        else:
            self.parent = None
Пример #9
0
    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            steps = []
            for step in json_input.get('steps', []):
                steps.append({
                    'subcampaign':
                    step.get('subcampaign', ''),
                    'processing_string':
                    step.get('processing_string', ''),
                    'time_per_event':
                    float(step.get('time_per_event', 0)),
                    'size_per_event':
                    float(step.get('size_per_event', 0)),
                    'priority':
                    int(step.get('priority', 0))
                })

            json_input['steps'] = steps

        ModelBase.__init__(self, json_input, check_attributes)
Пример #10
0
class Campaign(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # No need for CMSSW version
        'cmssw_release': '',
        #Sample tag
        'sample_tag': '',
        # User notes
        'notes': '',
        # User notes
        'link_prodmon': '',
        #history
        'history': []
    }

    __lambda_checks = {
        'prepid':
        lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}'),
        'link_prodmon':
        lambda link_prodmon: ModelBase.matches_regex(link_prodmon,
                                                     '[a-zA-Z0-9]{1,50}'),
        'sample_tag':
        lambda sample_tag: sample_tag in ['Phase2', 'Run3', 'Run2_2016'],
        'cmssw_release':
        lambda cmssw_release: 'CMSSW' in cmssw_release
    }

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name in self.__lambda_checks:
            return self.__lambda_checks.get(attribute_name)(attribute_value)

        return True
Пример #11
0
class Flow(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # List of allowed source campaigns prepids
        'source_campaigns': [],
        # List of statuses for the flow
        'status': '',
        # Target campaign prepid
        'target_campaign': ''}

    __lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}')
        'status': lambda status: status in ['new','submit','tasksubmit'])
    }
Пример #12
0
class Campaign(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # Energy in TeV
        'energy': 0.0,
        # Type LHE, MCReproc, Prod
        'type': '',
        # Step type: MiniAOD, NanoAOD, etc.
        'step': 'DR',
        # No need for CMSSW version
        #'cmssw_release': '',
        # User notes
        'notes': '',
        # List of dictionaries that have cmsDriver options (default to be modified, just a guideline, what is normally needed)
        'sequences': [{"conditions","GT_FromAlca","step","RAW2DIGI,L1Reco,RECO,EI,PAT,DQM:@rerecoCommon","datatier":"AOD,MINIAOD,DQM","eventcontent":"RECO,SKIM,ALCA,MINIAOD,DQMIO","era":"Run2_201XXX","extra":"--runUnscheduled","scenario":"pp","nThreads","8","customise":"Configuration/DataProcessing/RecoTLR.customisePostEra_Run2_201XXX"}],
        # Action history
        'history': [],
        # Default memory
        'memory': 2300}

    __lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}'),
        'energy': lambda energy: energy >= 0.0,
        'step': lambda step: step in ['DR', 'MiniAOD', 'NanoAOD'],
        'memory': lambda memory: memory >= 0,
        'cmssw_release': lambda cmssw_release: 'CMSSW' in cmssw_release
    }

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name in self.__lambda_checks:
            return self.__lambda_checks.get(attribute_name)(attribute_value)

        return True
Пример #13
0
class ChainedCampaign(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # Notes
        'notes': '',
        # List of flow and campaign pairs
        'campaigns': []}

    __lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}')
    }

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name in self.__lambda_checks:
            return self.__lambda_checks.get(attribute_name)(attribute_value)

        return True
Пример #14
0
class Request(ModelBase):
    """
    Request represents a single step in processing pipeline
    Request contains one or a few cmsDriver commands
    It is created based on a subcampaign that it is a member of
    """

    _ModelBase__schema = {
        # Database id (required by DB)
        '_id': '',
        # PrepID
        'prepid': '',
        # CMSSW version
        'cmssw_release': '',
        # Completed events
        'completed_events': 0,
        # Energy in TeV
        'energy': 0.0,
        # Action history
        'history': [],
        # Input dataset name or request name
        'input': {
            'dataset': '',
            'request': ''
        },
        # Dictionary of runs and their lumisection ranges to be processed
        'lumisections': {},
        # Memory in MB
        'memory': 2000,
        # User notes
        'notes': '',
        # List of output
        'output_datasets': [],
        # Priority in computing
        'priority': 110000,
        # Processing string
        'processing_string': '',
        # List of runs to be processed
        'runs': [],
        # List of dictionaries that have cmsDriver options
        'sequences': [],
        # Disk size per event in kB
        'size_per_event': 1.0,
        # Status is either new, approved, submitted or done
        'status': 'new',
        # Subcampaign name
        'subcampaign': '',
        # Time per event in seconds
        'time_per_event': 1.0,
        # Total events
        'total_events': 0,
        # List of workflows in computing
        'workflows': []
    }

    __prepid_regex = '[a-zA-Z0-9\\-_]{1,100}'
    lambda_checks = {
        'prepid':
        lambda prepid: ModelBase.matches_regex(prepid, Request.__prepid_regex),
        'cmssw_release':
        ModelBase.lambda_check('cmssw_release'),
        'completed_events':
        lambda events: events >= 0,
        'energy':
        ModelBase.lambda_check('energy'),
        '_input': {
            'dataset':
            lambda ds: not ds or ModelBase.lambda_check('dataset')(ds),
            'request':
            lambda r: not r or ModelBase.matches_regex(r, Request.
                                                       __prepid_regex)
        },
        'memory':
        ModelBase.lambda_check('memory'),
        '__output_datasets':
        ModelBase.lambda_check('dataset'),
        'priority':
        ModelBase.lambda_check('priority'),
        'processing_string':
        ModelBase.lambda_check('processing_string'),
        '__runs':
        lambda r: isinstance(r, int) and r > 0,
        '__sequences':
        lambda s: isinstance(s, Sequence),
        'size_per_event':
        lambda spe: spe > 0.0,
        'status':
        lambda status: status in
        {'new', 'approved', 'submitting', 'submitted', 'done'},
        'subcampaign':
        ModelBase.lambda_check('subcampaign'),
        'time_per_event':
        lambda tpe: tpe > 0.0,
        'total_events':
        lambda events: events >= 0,
    }

    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            json_input['runs'] = [int(r) for r in json_input.get('runs', [])]
            sequence_objects = []
            for sequence_json in json_input.get('sequences', []):
                sequence_objects.append(
                    Sequence(json_input=sequence_json,
                             parent=self,
                             check_attributes=check_attributes))

            json_input['sequences'] = sequence_objects

        ModelBase.__init__(self, json_input, check_attributes)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name == 'input':
            if not attribute_value.get('dataset') and not attribute_value.get(
                    'request'):
                raise Exception(
                    'Either input dataset or input request must be provided')

        return super().check_attribute(attribute_name, attribute_value)

    def get_config_file_names(self):
        """
        Get list of dictionaries of all config file names without extensions
        """
        file_names = []
        for sequence in self.get('sequences'):
            file_names.append(sequence.get_config_file_names())

        return file_names

    def get_cmsdrivers(self, overwrite_input=None):
        """
        Get all cmsDriver commands for this request
        """
        built_command = ''
        for index, sequence in enumerate(self.get('sequences')):
            if index == 0 and overwrite_input:
                built_command += sequence.get_cmsdriver(overwrite_input)
            else:
                built_command += sequence.get_cmsdriver()

            if sequence.needs_harvesting():
                built_command += '\n\n'
                built_command += sequence.get_harvesting_cmsdriver()

            built_command += '\n\n'

        return built_command.strip()

    def get_era(self):
        """
        Return era based on input dataset
        """
        input_dataset_parts = [
            x for x in self.get('input')['dataset'].split('/') if x
        ]
        if len(input_dataset_parts) < 2:
            return self.get_prepid().split('-')[1]

        return input_dataset_parts[1].split('-')[0]

    def get_input_processing_string(self):
        """
        Return processing string from input dataset
        """
        input_dataset_parts = [
            x for x in self.get('input')['dataset'].split('/') if x
        ]
        if len(input_dataset_parts) < 3:
            return ''

        middle_parts = [x for x in input_dataset_parts[1].split('-') if x]
        if len(middle_parts) < 3:
            return ''

        return '-'.join(middle_parts[1:-1])

    def get_dataset(self):
        """
        Return primary dataset based on input dataset
        """
        input_dataset_parts = [
            x for x in self.get('input')['dataset'].split('/') if x
        ]
        if not input_dataset_parts:
            return self.get_prepid().split('-')[2]

        return input_dataset_parts[0]

    def get_request_string(self):
        """
        Return request string made of era, dataset and processing string
        """
        processing_string = self.get('processing_string')
        era = self.get_era()
        dataset = self.get_dataset()
        return f'{era}_{dataset}_{processing_string}'.strip('_')
Пример #15
0
 def __init__(self, json_input=None):
     ModelBase.__init__(self, json_input)
Пример #16
0
 def __init__(self, json_input=None):
     ModelBase.__init__(self, json_input)
     self.collection = 'flows'
Пример #17
0
class Sequence(ModelBase):
    """
    Sequence is a dictionary that has all user editable attributes
    for cmsDriver command
    """

    _ModelBase__schema = {
        # What conditions to use. This has to be specified
        'conditions': '',
        # Hash of configuration file uploaded to ReqMgr2
        'config_id': '',
        # Specify the file where the code to modify the process object is stored
        # If inline_custom is set to 1, then inline the customisation file
        'customise': '',
        # What data tier to use
        'datatier': [],
        # Specify which era to use (e.g. "run2")
        'era': '',
        # What event content to write out
        'eventcontent': [],
        # Freeform attributes appended at the end
        'extra': '',
        # Hash of harvesting configuration file uploaded to ReqMgr2
        'harvesting_config_id': '',
        # How many threads should CMSSW use
        'nThreads': 1,
        # Scenario overriding standard settings: 'pp', 'cosmics', 'nocoll', 'HeavyIons'
        'scenario': 'pp',
        # The desired step. The possible values are:
        # RAW2DIGI, L1Reco, RECO, EI, PAT, NANO, ALCA[:@...], DQM[:@...], SKIM[:@...],
        # HARVESTING:@...
        'step': []
    }

    lambda_checks = {
        'conditions':
        lambda c: ModelBase.matches_regex(c, '[a-zA-Z0-9_]{0,50}'),
        'config_id':
        lambda cid: ModelBase.matches_regex(cid, '[a-f0-9]{0,50}'),
        '__datatier':
        lambda s: s in
        {'AOD', 'MINIAOD', 'NANOAOD', 'DQMIO', 'USER', 'ALCARECO', 'RECO'},
        'era':
        lambda e: ModelBase.matches_regex(e, '[a-zA-Z0-9_\\,]{0,50}'),
        '__eventcontent':
        lambda s: s in
        {'AOD', 'MINIAOD', 'NANOAOD', 'DQM', 'NANOEDMAOD', 'ALCARECO', 'RECO'},
        'harvesting_config_id':
        lambda cid: ModelBase.matches_regex(cid, '[a-f0-9]{0,50}'),
        'nThreads':
        lambda n: 0 < n < 64,
        'scenario':
        lambda s: s in {'pp', 'cosmics', 'nocoll', 'HeavyIons'},
        '__step':
        lambda s: (s in {'RAW2DIGI', 'L1Reco', 'RECO', 'EI', 'PAT', 'NANO'} or
                   s.startswith('ALCA') or s.startswith('DQM') or s.startswith(
                       'SKIM') or s.startswith('HARVESTING:@'))
    }

    def __init__(self, json_input=None, parent=None, check_attributes=True):
        self.parent = None
        ModelBase.__init__(self, json_input, check_attributes)
        if parent:
            self.parent = weakref.ref(parent)

        self.check_attribute('eventcontent', self.get('eventcontent'))
        self.check_attribute('datatier', self.get('datatier'))

    def get_prepid(self):
        if not self.parent:
            return 'Sequence'

        parent = self.parent()
        index = self.get_index_in_parent()
        return f'Sequence_{parent}_{index}'

    def check_attribute(self, attribute_name, attribute_value):
        if not self.initialized or attribute_name not in ('eventcontent',
                                                          'datatier'):
            return super().check_attribute(attribute_name, attribute_value)

        has_harvesting_step = bool(
            [s for s in self.get('step') if s.startswith('HARVESTING:@')])
        if not self.get('step') or has_harvesting_step:
            return super().check_attribute(attribute_name, attribute_value)

        # If sequence does not have HARVESTING step, eventcontent and datatier cannot be empty
        if not self.get('eventcontent'):
            raise Exception(
                'No eventcontent is allowed only with HARVESTING step')

        if not self.get('datatier'):
            raise Exception('No datatier is allowed only with HARVESTING step')

        return super().check_attribute(attribute_name, attribute_value)

    def needs_harvesting(self):
        """
        Return if this sequence produces input file for harvesting
        and harvesting step is needed
        """
        for step in self.get('step'):
            if step == 'DQM' or step.startswith('DQM:'):
                return True

        return False

    def get_index_in_parent(self):
        """
        Return sequence's index in parent's list of sequences
        """
        for index, sequence in enumerate(self.parent().get('sequences')):
            if self == sequence:
                return index

        raise Exception(
            f'Sequence is not a child of {self.parent().get_prepid()}')

    def get_name(self):
        """
        Return a sequence name which is based on parent
        prepid and sequence number
        Last sequence always has the same name as parent prepid
        Other sequences have suffix with their index, e.g

        PrepID_0
        PrepID_1
        PrepID

        If there is only one sequence, it will be the last one
        and have the same name as parent prepid
        """
        index = self.get_index_in_parent()
        parent_prepid = self.parent().get_prepid()
        if index != len(self.parent().get('sequences')) - 1:
            sequence_name = f'{parent_prepid}_{index}'
        else:
            sequence_name = f'{parent_prepid}'

        return sequence_name

    def get_config_file_names(self):
        """
        Return dictionary of 'config' and 'harvest' config file names
        """
        parent_prepid = self.parent().get_prepid()
        index = self.get_index_in_parent()
        config_file_names = {'config': f'{parent_prepid}_{index}_cfg'}
        if self.needs_harvesting():
            config_file_names[
                'harvest'] = f'{parent_prepid}_{index}_harvest_cfg'

        return config_file_names

    def __build_cmsdriver(self, cmsdriver_type, arguments):
        """
        Build a cmsDriver command from given arguments
        Add comment in front of the command
        """
        self.logger.info('Generating %s cmsDriver', cmsdriver_type)
        # Actual command
        command = f'# Command for {cmsdriver_type}:\ncmsDriver.py {cmsdriver_type}'
        # Comment in front of the command for better readability
        comment = f'# Arguments for {cmsdriver_type}:\n'
        for key in sorted(arguments.keys()):
            if not arguments[key]:
                continue

            if key in 'extra':
                continue

            if isinstance(arguments[key], bool):
                arguments[key] = ''

            if isinstance(arguments[key], list):
                arguments[key] = ','.join([str(x) for x in arguments[key]])

            command += f' --{key} {arguments[key]}'.rstrip()
            comment += f'# --{key} {arguments[key]}'.rstrip() + '\n'

        if arguments.get('extra'):
            extra_value = arguments['extra']
            command += f' {extra_value}'
            comment += f'# <extra> {extra_value}\n'

        # Exit the script with error of cmsDriver.py
        command += ' || exit $?'

        return comment + '\n' + command

    def get_cmsdriver(self, overwrite_input=None):
        """
        Return a cmsDriver command for this sequence
        Config file is named like this
        PrepID_0_cfg.py
        """
        sequence_name = self.get_name()
        arguments_dict = dict(self.get_json())
        # Delete sequence metadata
        arguments_dict.pop('config_id', None)
        arguments_dict.pop('harvesting_config_id', None)

        # Fetch list of files for specific runs
        das_query = ''
        # Handle input/output file names
        if overwrite_input:
            arguments_dict['filein'] = overwrite_input
        else:
            index = self.get_index_in_parent()
            arguments_dict['number'] = 10
            if index == 0:
                input_dataset = self.parent().get('input')['dataset']
                all_runs = self.parent().get('runs')
                if not input_dataset:
                    input_request = self.parent().get('input')['request']
                    arguments_dict['filein'] = f'"file:{input_request}.root"'
                elif all_runs:
                    das_file = f'{sequence_name}_files.txt'
                    das_query += '# Query DAS to get list of files for specified runs\n'
                    # Chunkify to 25 runs, otherwise script line gets very long
                    for runs in self.chunkify(all_runs, 25):
                        runs = ','.join([str(r) for r in runs])
                        das_query += 'dasgoclient --limit 0 '
                        das_query += f'--query "file dataset={input_dataset} run in [{runs}]" '
                        das_query += f'>> {das_file}\n'

                    das_query += '\n'
                    arguments_dict['filein'] = f'"filelist:{das_file}"'
                else:
                    arguments_dict['filein'] = f'"dbs:{input_dataset}"'
            else:
                previous_sequence = self.parent().get('sequences')[index - 1]
                input_file = f'{previous_sequence.get_name()}.root'
                arguments_dict['filein'] = f'"file:{input_file}"'

        # Update ALCA and SKIM steps to ALCA:@Dataset and SKIM:@Dataset
        # if dataset name is in "auto" dictionary in CMSSW
        dynamic_steps = self.update_dynamic_steps(arguments_dict['step'])
        # Build argument dictionary
        config_names = self.get_config_file_names()
        arguments_dict['fileout'] = f'"file:{sequence_name}.root"'
        arguments_dict['python_filename'] = f'"{config_names["config"]}.py"'
        arguments_dict['no_exec'] = True
        cms_driver_command = self.__build_cmsdriver('RECO', arguments_dict)
        return dynamic_steps + das_query + cms_driver_command

    def update_dynamic_steps(self, steps):
        """
        Update ALCA and SKIP steps to be variables and return code that resolves them
        """
        dynamic_steps = ''
        for step_index, step in enumerate(steps):
            if step not in ('ALCA', 'SKIM'):
                continue

            dataset = self.parent().get_dataset()
            steps[step_index] = f'${step}_STEP'
            # Build a small python program to get value from CMSSW on the go
            step_var = f'{step}_STEP=$(python -c "'
            if step == 'ALCA':
                step_var += 'from Configuration.AlCa.autoAlca import AlCaRecoMatrix as ds;'
            elif step == 'SKIM':
                step_var += 'from Configuration.Skimming.autoSkim import autoSkim as ds;'

            step_var += f'print(\'{step}:@{dataset}\' if \'{dataset}\' in ds.keys() else \'\')")'
            dynamic_steps += f'{step_var}\n'

        if dynamic_steps:
            dynamic_steps = f'# Steps based on dataset name\n{dynamic_steps}\n'

        return dynamic_steps

    def get_harvesting_cmsdriver(self):
        """
        Return a harvesting cmsDriver for this sequence
        Config file is named like this
        PrepID_0_harvest_cfg.py
        """
        if not self.needs_harvesting():
            return ''

        arguments_dict = dict(self.get_json())
        # Delete sequence metadata
        for attr in ('config_id', 'harvesting_config_id', 'customise',
                     'datatier', 'eventcontent', 'nThreads', 'extra',
                     'scenario'):
            arguments_dict.pop(attr, None)

        # Get correct configuration of DQM step, e.g.
        # DQM:@rerecoCommon should be changed to HARVESTING:@rerecoCommon
        step = 'HARVESTING:dqmHarvesting'
        for one_step in self.get('step'):
            if one_step.startswith('DQM:'):
                step = one_step.replace('DQM:', 'HARVESTING:', 1)
                break

        # Build argument dictionary
        sequence_name = self.get_name()
        config_names = self.get_config_file_names()
        arguments_dict['data'] = True
        arguments_dict['no_exec'] = True
        arguments_dict['filetype'] = 'DQM'
        arguments_dict['step'] = step
        arguments_dict['era'] = arguments_dict['era'].split(',')[0]
        arguments_dict['filein'] = f'"file:{sequence_name}_inDQM.root"'
        arguments_dict['python_filename'] = f'"{config_names["harvest"]}.py"'
        arguments_dict['number'] = -1
        harvesting_command = self.__build_cmsdriver('HARVESTING',
                                                    arguments_dict)
        return harvesting_command

    @staticmethod
    def chunkify(items, chunk_size):
        """
        Yield fixed size chunks of given list
        """
        start = 0
        chunk_size = max(chunk_size, 1)
        while start < len(items):
            yield items[start:start + chunk_size]
            start += chunk_size

    def get_output_module(self):
        """
        Return a output module name
        """
        eventcontent = [
            e for e in self.get('eventcontent') if not e.startswith('DQM')
        ]
        if not eventcontent:
            return ''

        return f'{eventcontent[0]}output'
Пример #18
0
class Request(ModelBase):

    _ModelBase__schema = {
        # Database id
        '_id': '',
        # PrepID
        'prepid': '',
        # Energy in TeV
        'energy': 0.0,
        # Step type: MiniAOD, NanoAOD, etc.
        'step': '',
        # CMSSW version
        'cmssw_release': '',
        # User notes
        'notes': '',
        # Input dataset name
        'input_dataset_name': '',
        # Output dataset name
        'output_dataset_name': '',
        # List of dictionaries that have cmsDriver options
        'sequences': [],
        # Action history
        'history': [],
        # Status 
        'status': 'new', #it should be either approved, submitted, done (nothing else)
        # Workflow name in computing when submitted 
        'reqmgr_name': '', 
        # time event
        'time_event': 5.0, 
        # size event
        'size_event': 2000, 
        # priority
        'priority': 110000, 
        #runs for data injections
        'runs': [],
        #type, depends on the datatier of the requests
        'type':'',
        # process string
        'process_string': '', 
        # Default memory
        'memory': 14000,
        #all the following will be filled in after injection (they will be created at the time of the injection)
        #request id (to be filled in the dictionary of injection)
        'request_id': '',
        #reco cfg (to be filled in the dictionary of injection)
        'reco_cfg': '',
        #harvest cfg (to be filled in the dictionary of injection)
        'harvest_cfg': ''
        #dataset name (this is just the first split of the input dataset name, can be done automatically)
        'dataset_name': ''
    }

    __lambda_checks = {
        'prepid': lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9]{1,50}'),
        'energy': lambda energy: energy >= 0.0,
        'time_event': lambda time_event: time_event >= 0.0,
        'size_event': lambda size_event: size_event >= 0.0,
        'priority': lambda priority: priority >= 0.0,
        'DQM': lambda DQM: isinstance(DQM,bool),
        'Reco': lambda Reco: isinstance(Reco,bool),
        'step': lambda step: step in ['RAW2DIGI','L1Reco','RECO','EI','PAT','DQM','NANO'] or ['ALCARECO'] in step or ['DQM'] in step,
        'datatier': lambda datatier: datatier in ['AOD', 'MiniAOD', 'NanoAOD', 'DQMIO', 'USER', 'ALCARECO'],
        'memory': lambda memory: memory >= 0,
        'type' : lambda type_req: type_req in ['GEN-SIM','FEVTDEBUG','GEN-SIM-RAW','GEN-SIM-DIGI-RAW','GEN-SIM-DIGI-RAW-HLT','NANOAOD','AODSIM','MINIAOD','FEVTDEBUGHLT']
        'cmssw_release': lambda cmssw_release: 'CMSSW' in cmssw_release,
        'reco_cfg': lambda reco_cfg: ModelBase.matches_regex(reco_cfg, '[a-zA-Z0-9]{1,50}'),
        'harvest_cfg': lambda harvest_cfg: ModelBase.matches_regex(harvest_cfg, '[a-zA-Z0-9]{1,50}'),
        'process_string': lambda process_string: ModelBase.matches_regex(process_string, '[a-zA-Z0-9]{1,50}'),
        'request_id': lambda request_id: ModelBase.matches_regex(request_id, '[a-zA-Z0-9]{1,50}'),
        'dataset_name': lambda dataset_name: ModelBase.matches_regex(dataset_name, '[a-zA-Z0-9]{1,50}')
    }

    def __init__(self, json_input=None):
        ModelBase.__init__(self, json_input)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name in self.__lambda_checks:
            return self.__lambda_checks.get(attribute_name)(attribute_value)

        return True
Пример #19
0
class Ticket(ModelBase):
    """
    Ticket allows to create multiple similar RelVals in the same campaign
    """

    _ModelBase__schema = {
        # Database id (required by database)
        '_id': '',
        # PrepID
        'prepid': '',
        # Batch name
        'batch_name': '',
        # CMSSW release
        'cmssw_release': '',
        # Additional command to add to all cmsDrivers
        'command': '',
        # List of steps that additional command should be applied to
        'command_steps': [],
        # CPU cores
        'cpu_cores': 1,
        # List of prepids of relvals that were created from this ticket
        'created_relvals': [],
        # GPU parameters that will be added to selected steps
        'gpu': {
            'requires': 'forbidden',
            'gpu_memory': '',
            'cuda_capabilities': [],
            'cuda_runtime': '',
            'gpu_name': '',
            'cuda_driver_version': '',
            'cuda_runtime_version': ''
        },
        # List of steps that GPU parameters should be applied to
        'gpu_steps': [],
        # Action history
        'history': [],
        # Label to be used in runTheMatrix
        'label': '',
        # Type of relval: standard, upgrade, premix, etc.
        'matrix': 'standard',
        # Memory in MB
        'memory': 2000,
        # User notes
        'notes': '',
        # nStreams to be used in all steps, 0 defaults to nThreads
        'n_streams': 0,
        # Whether to recycle first step
        'recycle_gs': False,
        # Which step should be first that run while recycling the input
        'recycle_input_of': '',
        # String to rewrite middle part of INPUT dataset(s) /.../THIS/...
        'rewrite_gt_string': '',
        # Tag to group workflow ids
        'sample_tag': '',
        # Overwrite default scram arch
        'scram_arch': '',
        # Status is either new or done
        'status': 'new',
        # Workflow ids
        'workflow_ids': [],
    }

    lambda_checks = {
        'prepid':
        lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9_\\-]{1,75}'
                                               ),
        'batch_name':
        ModelBase.lambda_check('batch_name'),
        'cmssw_release':
        ModelBase.lambda_check('cmssw_release'),
        'cpu_cores':
        ModelBase.lambda_check('cpu_cores'),
        '__created_relvals':
        ModelBase.lambda_check('relval'),
        '_gpu': {
            'requires': lambda r: r in ('forbidden', 'optional', 'required'),
            'cuda_capabilities': lambda l: isinstance(l, list),
            'gpu_memory': lambda m: m == '' or int(m) > 0,
        },
        'label':
        ModelBase.lambda_check('label'),
        'matrix':
        ModelBase.lambda_check('matrix'),
        'memory':
        ModelBase.lambda_check('memory'),
        'n_streams':
        lambda streams: 0 <= streams <= 16,
        'rewrite_gt_string':
        lambda rgs: ModelBase.matches_regex(rgs, '[a-zA-Z0-9\\.\\-_]{0,199}'),
        'sample_tag':
        ModelBase.lambda_check('sample_tag'),
        'status':
        lambda status: status in ('new', 'done'),
        'scram_arch':
        lambda s: not s or ModelBase.lambda_check('scram_arch')(s),
        'workflow_ids':
        lambda wf: len(wf) > 0,
        '__workflow_ids':
        lambda wf: wf > 0,
    }

    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            json_input['workflow_ids'] = [
                float(wid) for wid in json_input['workflow_ids']
            ]
            json_input['recycle_gs'] = bool(json_input.get(
                'recycle_gs', False))
            if json_input.get('gpu', {}).get('requires') not in ('optional',
                                                                 'required'):
                json_input['gpu'] = self.schema().get('gpu')
                json_input['gpu']['requires'] = 'forbidden'
                json_input['gpu_steps'] = []

        ModelBase.__init__(self, json_input, check_attributes)
Пример #20
0
class Ticket(ModelBase):
    """
    Ticket has a list of input datasets and a list of steps specifications
    Ticket is used to create requests for each input dataset
    """

    _ModelBase__schema = {
        # Database id (required by DB)
        '_id': '',
        # PrepID
        'prepid': '',
        # List of prepids of requests that were created from this ticket
        'created_requests': [],
        # Action history
        'history': [],
        # List of input dataset names
        'input_datasets': [],
        # User notes
        'notes': '',
        # Status is either new or done
        'status': 'new',
        # List of dicts that have subcampaign, processing_string, size/time per event values
        'steps': [],
    }

    lambda_checks = {
        'prepid':
        lambda prepid: ModelBase.matches_regex(prepid, '[a-zA-Z0-9_\\-]{1,75}'
                                               ),
        '__created_requests':
        lambda pi: ModelBase.matches_regex(pi, '[a-zA-Z0-9\\-_]{1,100}'),
        '__input_datasets':
        ModelBase.lambda_check('dataset'),
        'status':
        lambda status: status in {'new', 'done'},
        'steps':
        lambda s: len(s) > 0,
    }

    def __init__(self, json_input=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            steps = []
            for step in json_input.get('steps', []):
                steps.append({
                    'subcampaign':
                    step.get('subcampaign', ''),
                    'processing_string':
                    step.get('processing_string', ''),
                    'time_per_event':
                    float(step.get('time_per_event', 0)),
                    'size_per_event':
                    float(step.get('size_per_event', 0)),
                    'priority':
                    int(step.get('priority', 0))
                })

            json_input['steps'] = steps

        ModelBase.__init__(self, json_input, check_attributes)

    def check_attribute(self, attribute_name, attribute_value):
        if attribute_name == 'steps':
            if not isinstance(attribute_value, list):
                raise Exception(f'Expected {attribute_name} to be a list')

            for step in attribute_value:
                subcampaign = step['subcampaign']
                if not ModelBase.lambda_check('subcampaign')(subcampaign):
                    raise Exception(f'Bad subcampaign prepid {subcampaign}')

                processing_string = step['processing_string']
                if not ModelBase.lambda_check('processing_string')(
                        processing_string):
                    raise Exception(
                        f'Bad processing string {processing_string}')

                time_per_event = step['time_per_event']
                if time_per_event <= 0.0:
                    raise Exception(f'Bad time per event {time_per_event}')

                size_per_event = step['size_per_event']
                if size_per_event <= 0.0:
                    raise Exception(f'Bad size per event {size_per_event}')

                priority = step['priority']
                if not ModelBase.lambda_check('priority')(priority):
                    raise Exception(f'Bad priority {priority}')

        return super().check_attribute(attribute_name, attribute_value)
Пример #21
0
class RelValStep(ModelBase):
    """
    RelVal is one step of RelVal - either a call to DAS for list of input files
    or a cmsDriver command
    """

    _ModelBase__schema = {
        # Step name
        'name': '',
        # CMSSW version of this step
        'cmssw_release': '',
        # Hash of configuration file uploaded to ReqMgr2
        'config_id': '',
        # cmsDriver arguments
        'driver': {
            'beamspot': '',
            'conditions': '',
            'customise': '',
            'customise_commands': '',
            'data': False,
            'datatier': [],
            'era': '',
            'eventcontent': [],
            'extra': '',
            'fast': False,
            'filetype': '',
            'geometry': '',
            'hltProcess': '',
            'mc': False,
            'number': '10',
            'nStreams': '',
            'pileup': '',
            'pileup_input': '',
            'process': '',
            'relval': '',
            'runUnscheduled': False,
            'fragment_name': '',
            'scenario': '',
            'step': [],
        },
        # Events per lumi - if empty, events per job will be used
        'events_per_lumi': '',
        # GPU parameters
        'gpu': {
            'requires': 'forbidden',
            'gpu_memory': '',
            'cuda_capabilities': [],
            'cuda_runtime': '',
            'gpu_name': '',
            'cuda_driver_version': '',
            'cuda_runtime_version': ''
        },
        # Input file info
        'input': {
            'dataset': '',
            'lumisection': {},
            'run': [],
            'label': '',
        },
        # Keeping output of this task
        'keep_output': True,
        # Lumis per job - applicable to non-first steps
        'lumis_per_job': '',
        # Actual globaltag, resolved from auto:... conditions
        'resolved_globaltag': '',
        # Overwrite default CMSSW scram arch
        'scram_arch': '',
    }

    lambda_checks = {
        'cmssw_release':
        lambda cmssw: not cmssw or ModelBase.lambda_check('cmssw_release')
        (cmssw),
        'config_id':
        lambda cid: ModelBase.matches_regex(cid, '[a-f0-9]{0,50}'),
        '_driver': {
            'conditions':
            lambda c: not c or ModelBase.matches_regex(c, '[a-zA-Z0-9_]{0,50}'
                                                       ),
            'era':
            lambda e: not e or ModelBase.matches_regex(
                e, '[a-zA-Z0-9_\\,]{0,50}'),
            'scenario':
            lambda s: not s or s in {'pp', 'cosmics', 'nocoll', 'HeavyIons'},
        },
        '_gpu': {
            'requires': lambda r: r in ('forbidden', 'optional', 'required'),
            'cuda_capabilities': lambda l: isinstance(l, list),
            'gpu_memory': lambda m: m == '' or int(m) > 0,
        },
        '_input': {
            'dataset': lambda ds: not ds or ModelBase.lambda_check('dataset')
            (ds),
            'label': lambda l: not l or ModelBase.lambda_check('label')(l)
        },
        'lumis_per_job':
        lambda l: l == '' or int(l) > 0,
        'name':
        lambda n: ModelBase.matches_regex(n, '[a-zA-Z0-9_\\-]{1,150}'),
        'scram_arch':
        lambda s: not s or ModelBase.lambda_check('scram_arch')(s),
    }

    def __init__(self, json_input=None, parent=None, check_attributes=True):
        if json_input:
            json_input = deepcopy(json_input)
            # Remove -- from argument names
            schema = self.schema()
            if json_input.get('input', {}).get('dataset'):
                json_input['driver'] = schema.get('driver')
                json_input['gpu'] = schema.get('gpu')
                json_input['gpu']['requires'] = 'forbidden'
                step_input = json_input['input']
                for key, default_value in schema['input'].items():
                    if key not in step_input:
                        step_input[key] = default_value
            else:
                json_input['driver'] = {
                    k.lstrip('-'): v
                    for k, v in json_input['driver'].items()
                }
                json_input['input'] = schema.get('input')
                if json_input.get('gpu',
                                  {}).get('requires') not in ('optional',
                                                              'required'):
                    json_input['gpu'] = schema.get('gpu')
                    json_input['gpu']['requires'] = 'forbidden'

                driver = json_input['driver']
                for key, default_value in schema['driver'].items():
                    if key not in driver:
                        driver[key] = default_value

                if driver.get('data') and driver.get('mc'):
                    raise Exception(
                        'Both --data and --mc are not allowed in the same step'
                    )

                if driver.get('data') and driver.get('fast'):
                    raise Exception(
                        'Both --data and --fast are not allowed in the same step'
                    )

        ModelBase.__init__(self, json_input, check_attributes)
        if parent:
            self.parent = weakref.ref(parent)
        else:
            self.parent = None

    def get_prepid(self):
        return 'RelValStep'

    def get_short_name(self):
        """
        Return a shortened step name
        GenSimFull for anything that has GenSim in it
        HadronizerFull for anything that has Hadronizer in it
        Split and cut by underscores for other cases
        """
        name = self.get('name')
        if 'gensim' in name.lower():
            return 'GenSimFull'

        if 'hadronizer' in name.lower():
            return 'HadronizerFull'

        while len(name) > 50:
            name = '_'.join(name.split('_')[:-1])
            if '_' not in name:
                break

        return name

    def get_index_in_parent(self):
        """
        Return step's index in parent's list of steps
        """
        for index, step in enumerate(self.parent().get('steps')):
            if self == step:
                return index

        raise Exception(f'Step is not a child of {self.parent().get_prepid()}')

    def get_step_type(self):
        """
        Return whether this is cmsDriver or input file step
        """
        if self.get('input').get('dataset'):
            return 'input_file'

        return 'cms_driver'

    @staticmethod
    def chunkify(items, chunk_size):
        """
        Yield fixed size chunks of given list
        """
        start = 0
        chunk_size = max(chunk_size, 1)
        while start < len(items):
            yield items[start:start + chunk_size]
            start += chunk_size

    def __build_cmsdriver(self, step_index, arguments, for_submission):
        """
        Build a cmsDriver command from given arguments
        Add comment in front of the command
        """
        fragment_name = arguments['fragment_name']
        if not fragment_name:
            fragment_name = f'step{step_index + 1}'

        self.logger.info('Generating %s cmsDriver for step %s', fragment_name,
                         step_index)
        # Actual command
        command = ''
        if not for_submission:
            command += f'# Command for step {step_index + 1}:\n'

        command += f'cmsDriver.py {fragment_name}'
        # Comment in front of the command for better readability
        comment = f'# Arguments for step {step_index + 1}:\n'
        for key in sorted(arguments.keys()):
            if key in ('fragment_name', 'extra'):
                continue

            if not arguments[key]:
                continue

            if isinstance(arguments[key], bool):
                arguments[key] = ''

            if isinstance(arguments[key], list):
                arguments[key] = ','.join([str(x) for x in arguments[key]])

            command += f' --{key} {arguments[key]}'.rstrip()
            comment += f'# --{key} {arguments[key]}'.rstrip() + '\n'

        extra_value = arguments.get('extra')
        if extra_value:
            command += f' {extra_value}'
            comment += f'# <extra> {extra_value}\n'

        # Exit the script with error of cmsDriver.py
        command += ' || exit $?'
        if for_submission:
            return command

        return comment + '\n' + command

    def __build_das_command(self, step_index):
        """
        Build a dasgoclient command to fetch input dataset file names
        """
        input_dict = self.get('input')
        dataset = input_dict['dataset']
        lumisections = input_dict['lumisection']
        if lumisections:
            self.logger.info(
                'Making a DAS command for step %s with lumisection list',
                step_index)
            files_name = f'step{step_index + 1}_files.txt'
            lumis_name = f'step{step_index + 1}_lumi_ranges.txt'
            comment = f'# Arguments for step {step_index + 1}:\n'
            command = f'# Command for step {step_index + 1}:\n'
            comment += f'#   dataset: {dataset}\n'
            command += f'echo "" > {files_name}\n'
            for run, lumi_ranges in lumisections.items():
                for lumi_range in lumi_ranges:
                    comment += f'#   run: {run}, range: {lumi_range[0]} - {lumi_range[1]}\n'
                    command += 'dasgoclient --limit 0 --format json '
                    command += f'--query "lumi,file dataset={dataset} run={run}"'
                    command += f' | das-selected-lumis.py {lumi_range[0]},{lumi_range[1]}'
                    command += f' | sort -u >> {files_name}\n'

            lumi_json = json.dumps(lumisections)
            command += f'echo \'{lumi_json}\' > {lumis_name}'
            return (comment + '\n' + command).strip()

        runs = input_dict['run']
        if runs:
            self.logger.info('Making a DAS command for step %s with run list',
                             step_index)
            files_name = f'step{step_index + 1}_files.txt'
            comment = f'# Arguments for step {step_index + 1}:\n'
            command = f'# Command for step {step_index + 1}:\n'
            comment += f'#   dataset: {dataset}\n'
            command += f'echo "" > {files_name}\n'
            for run_chunk in self.chunkify(runs, 25):
                run_chunk = ','.join([str(r) for r in run_chunk])
                comment += f'#   runs: {run_chunk}\n'
                command += 'dasgoclient --limit 0 '
                command += f'--query "file dataset={dataset} run in [{run_chunk}]" '
                command += f'>> {files_name}\n'

            return (comment + '\n' + command).strip()

        return f'# Step {step_index + 1} is input dataset for next step: {dataset}'

    def get_command(self, custom_fragment=None, for_submission=False):
        """
        Return a cmsDriver command for this step
        Config file is named like this
        """
        step_type = self.get_step_type()
        index = self.get_index_in_parent()
        if step_type == 'input_file':
            if for_submission:
                return '# Nothing to do for input file step'

            return self.__build_das_command(index)

        arguments_dict = deepcopy(self.get('driver'))
        if custom_fragment:
            arguments_dict['fragment_name'] = custom_fragment

        # No execution
        arguments_dict['no_exec'] = True
        # Handle input/output file names
        arguments_dict['fileout'] = f'"file:step{index + 1}.root"'
        arguments_dict['python_filename'] = f'{self.get_config_file_name()}.py'
        # Add events per lumi to customise_commands
        events_per_lumi = self.get('events_per_lumi')
        if events_per_lumi:
            customise_commands = arguments_dict['customise_commands']
            customise_commands += ';"process.source.numberEventsInLuminosityBlock='
            customise_commands += f'cms.untracked.uint32({events_per_lumi})"'
            arguments_dict['customise_commands'] = customise_commands.lstrip(
                ';')

        # Add number of cpu cores of the RelVal if it is >1 and this is not a harvesting step
        cpu_cores = self.parent().get('cpu_cores')
        if cpu_cores > 1 and not self.has_step(
                'HARVESTING') and not self.has_step('ALCAHARVEST'):
            arguments_dict['nThreads'] = cpu_cores

        all_steps = self.parent().get('steps')
        if index > 0:
            previous = all_steps[index - 1]
            previous_type = previous.get_step_type()
            if previous_type == 'input_file':
                # If previous step is an input file, use it as input
                if for_submission:
                    arguments_dict['filein'] = '"file:_placeholder_.root"'
                else:
                    previous_input = previous.get('input')
                    previous_lumisection = previous_input['lumisection']
                    previous_run = previous_input['run']
                    if previous_lumisection:
                        # If there are lumi ranges, add a file with them and list of files as input
                        arguments_dict[
                            'filein'] = f'"filelist:step{index}_files.txt"'
                        arguments_dict[
                            'lumiToProcess'] = f'"step{index}_lumi_ranges.txt"'
                    elif previous_run:
                        # If there is a run whitelist, add the file
                        arguments_dict[
                            'filein'] = f'"filelist:step{index}_files.txt"'
                    else:
                        # If there are no lumi ranges, use input file normally
                        previous_dataset = previous_input['dataset']
                        arguments_dict['filein'] = f'"dbs:{previous_dataset}"'
            else:
                # If previous step is a cmsDriver, use it's output root file
                input_number = self.get_input_step_index() + 1
                eventcontent_index, eventcontent = self.get_input_eventcontent(
                )
                if eventcontent_index == 0:
                    arguments_dict[
                        'filein'] = f'"file:step{input_number}.root"'
                else:
                    arguments_dict[
                        'filein'] = f'"file:step{input_number}_in{eventcontent}.root"'

        cms_driver_command = self.__build_cmsdriver(index, arguments_dict,
                                                    for_submission)
        return cms_driver_command

    def has_step(self, step):
        """
        Return if this RelValStep has certain step in --step argument
        """
        for one_step in self.get('driver')['step']:
            if one_step.startswith(step):
                return True

        return False

    def has_eventcontent(self, eventcontent):
        """
        Return if this RelValStep has certain eventcontent in --eventcontent argument
        """
        return eventcontent in self.get('driver')['eventcontent']

    def get_input_step_index(self):
        """
        Get index of step that will be used as input step for current step
        """
        all_steps = self.parent().get('steps')
        index = self.get_index_in_parent()
        this_is_harvesting = self.has_step('HARVESTING')
        self_step = self.get('driver')['step']
        this_is_alca = self_step and self_step[0].startswith('ALCA')
        self.logger.info('Get input for step %s, harvesting: %s', index,
                         this_is_harvesting)
        for step_index in reversed(range(0, index)):
            step = all_steps[step_index]
            # Harvesting step is never input
            if step.has_step('HARVESTING'):
                continue

            # AlCa step is never input
            step_step = step.get('driver')['step']
            if step_step and step_step[0].startswith('ALCA'):
                continue

            # Harvesting step needs DQM as input
            if this_is_harvesting and not step.has_eventcontent('DQM'):
                continue

            # AlCa step needs RECO as input
            if this_is_alca and not step.has_step('RECO'):
                continue

            return step_index

        name = self.get('name')
        if this_is_harvesting:
            raise Exception('No step with --eventcontent DQM could be found'
                            f'as input for {name} (Harvesting step)')

        if this_is_alca:
            raise Exception('No step with --step RECO could be found '
                            f'as input for {name} (AlCa)')

        raise Exception(f'No input step for {name} could be found')

    def get_input_eventcontent(self, input_step=None):
        """
        Return which eventcontent should be used as input for current RelVal step
        """
        if input_step is None:
            all_steps = self.parent().get('steps')
            input_step_index = self.get_input_step_index()
            input_step = all_steps[input_step_index]

        this_is_harvesting = self.has_step('HARVESTING')
        self_step = self.get('driver')['step']
        this_is_alca = self_step and self_step[0].startswith('ALCA')
        input_step_eventcontent = input_step.get('driver')['eventcontent']
        if this_is_harvesting:
            for eventcontent_index, eventcontent in enumerate(
                    input_step_eventcontent):
                if eventcontent == 'DQM':
                    return eventcontent_index, eventcontent

            raise Exception(
                f'No DQM eventcontent in the input step {input_step_eventcontent}'
            )

        if this_is_alca:
            for eventcontent_index, eventcontent in enumerate(
                    input_step_eventcontent):
                if eventcontent.startswith('RECO'):
                    return eventcontent_index, eventcontent

            raise Exception(
                f'No RECO eventcontent in the input step {input_step_eventcontent}'
            )

        input_step_eventcontent = [
            x for x in input_step_eventcontent if not x.startswith('DQM')
        ]
        return len(input_step_eventcontent) - 1, input_step_eventcontent[-1]

    def get_config_file_name(self):
        """
        Return config file name without extension
        """
        if self.get_step_type() == 'input_file':
            return None

        index = self.get_index_in_parent()
        return f'step_{index + 1}_cfg'

    def get_relval_events(self):
        """
        Split --relval argument to total events and events per job/lumi
        """
        relval = self.get('driver')['relval']
        if not relval:
            raise Exception('--relval is not set')

        relval = relval.split(',')
        if len(relval) < 2:
            raise Exception('Not enough parameters in --relval argument')

        requested_events = int(relval[0])
        events_per = int(relval[1])
        return requested_events, events_per

    def get_release(self):
        """
        Return CMSSW release of the step
        If CMSSW release is not specified, return release of the parent RelVal
        """
        cmssw_release = self.get('cmssw_release')
        if cmssw_release:
            return cmssw_release

        if not self.parent:
            raise Exception(
                'Could not get CMSSW release, because step has no parent')

        cmssw_release = self.parent().get('cmssw_release')
        return cmssw_release

    def get_scram_arch(self):
        """
        Return the scram arch of the step
        If scram arch is not specified, return scram arch of the release
        """
        scram_arch = self.get('scram_arch')
        if scram_arch:
            return scram_arch

        if self.parent:
            scram_arch = self.parent().get('scram_arch')
            if scram_arch:
                return scram_arch

        cmssw_release = self.get_release()
        scram_arch = get_scram_arch(cmssw_release)
        if scram_arch:
            return scram_arch

        raise Exception(f'Could not find SCRAM arch of {cmssw_release}')

    def get_gpu_requires(self):
        """
        Return whether GPU is required, optional of forbidden
        """
        return self.get('gpu')['requires']

    def get_gpu_dict(self):
        """
        Return a dictionary with GPU parameters for ReqMgr2
        """
        gpu_info = self.get('gpu')
        keys = {
            'cuda_capabilities': 'CUDACapabilities',
            'cuda_runtime': 'CUDARuntime',
            'gpu_name': 'GPUName',
            'cuda_driver_version': 'CUDADriverVersion',
            'cuda_runtime_version': 'CUDARuntimeVersion'
        }
        params = {
            key: gpu_info[attr]
            for attr, key in keys.items() if gpu_info.get(attr)
        }
        if gpu_info.get('gpu_memory'):
            params['GPUMemoryMB'] = int(gpu_info['gpu_memory'])

        return params