Exemplo n.º 1
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id)

        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name == 'DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(
                dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % (
                        job.backend.requirements.sites)
                    raise ApplicationConfigurationError(None, printout)

        #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job
        #will already have been prepared
        #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing.
        #if app.is_prepared is not True:
        #    for position in xrange(len(app.option_file)):
        #        app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name)))
        # Expand Athena jobOptions
        if not app.atlas_exetype in ['EXE']:
            athena_options = ' '.join([
                os.path.basename(opt_file.name) for opt_file in app.option_file
            ])
            #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options
            if app.options:
                athena_options = app.options + ' ' + athena_options

            inputbox = [File(opt_file.name) for opt_file in app.option_file]
        else:
            athena_options = ' '.join([
                os.path.basename(opt_file.name) for opt_file in app.option_file
            ])
            inputbox = []

        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

        #       prepare input sandbox

        inputbox.append(File(os.path.join(__directory__, 'athena-utility.sh')))

        if job.inputdata and job.inputdata._name == "AMIDataset" and job.inputdata.goodRunListXML.name != '':
            inputbox.append(File(job.inputdata.goodRunListXML.name))

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                _append_files(inputbox, 'ganga-stagein-lfc.py')
            else:
                _append_files(inputbox, 'ganga-stagein.py')

        if app.user_area.name:
            #we will now use the user_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_user_name = os.path.join(
                    os.path.join(shared_path, app.is_prepared.name),
                    os.path.basename(app.user_area.name))
                inputbox.append(File(tmp_user_name))
            else:
                inputbox.append(File(app.user_area.name))

        #if app.group_area.name: inputbox += [ File(app.group_area.name) ]
        if app.group_area.name and str(app.group_area.name).find('http') < 0:
            #we will now use the group_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_group_name = os.path.join(
                    os.path.join(shared_path, app.is_prepared.name),
                    os.path.basename(app.group_area.name))
                inputbox.append(File(tmp_group_name))
            else:
                inputbox.append(File(app.group_area.name))

        if app.user_setupfile.name:
            inputbox.append(File(app.user_setupfile.name))

        # CN: added TNTJobSplitter clause

        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) or (job._getRoot().splitter
               and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py', 'dq2_get',
                          'dq2info.tar.gz')
            if job.inputdata and job.inputdata.type == 'LFC' and not (
                    job._getRoot().splitter
                    and job._getRoot().splitter._name == 'TNTJobSplitter'):
                _append_files(inputbox, 'dq2_get_old')

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py',
                          'dq2info.tar.gz')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            #if not job.outputdata.location:
            #    raise ApplicationConfigurationError(None,'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !')
            if not 'ganga-stage-in-out-dq2.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
            _append_files(inputbox, 'ganga-joboption-parse.py')
            if not 'dq2info.tar.gz' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'dq2info.tar.gz')

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
        if not 'dq2tracerreport.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'dq2tracerreport.py')
        if not 'db_dq2localid.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'db_dq2localid.py')
        if not 'getstats.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'getstats.py')

        if str(app.atlas_release).find('12.') >= 0:
            _append_files(inputbox, 'libDCache.so', 'libRFIO.so', 'libdcap.so')
        elif str(app.atlas_release).find('13.') >= 0:
            _append_files(inputbox, 'libdcap.so')
        else:
            _append_files(inputbox, 'libdcap.so')

        if job.inputsandbox: inputbox += job.inputsandbox

        #       prepare environment

        if not app.atlas_release:
            raise ApplicationConfigurationError(
                None,
                'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.'
            )

        environment = {
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': athena_options,
            'ATHENA_USERSETUPFILE': athena_usersetupfile,
            'ATLAS_PROJECT': app.atlas_project,
            'ATLAS_EXETYPE': app.atlas_exetype,
            'GANGA_VERSION': configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']

        if app.atlas_environment:
            for var in app.atlas_environment:
                try:
                    vars = re.match("^(\w+)=(.*)", var).group(1)
                    value = re.match("^(\w+)=(.*)", var).group(2)
                    environment[vars] = value
                except:
                    logger.warning(
                        'Athena.atlas_environment variable not correctly configured: %s',
                        var)
                    pass

        if app.atlas_production and app.atlas_release.find(
                '12.') >= 0 and app.atlas_project != 'AtlasPoint1':
            temp_atlas_production = re.sub('\.', '_', app.atlas_production)
            prod_url = config[
                'PRODUCTION_ARCHIVE_BASEURL'] + '/AtlasProduction_' + temp_atlas_production + '_noarch.tar.gz'
            logger.info('Using Production cache from: %s', prod_url)
            environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1'
                                     or app.atlas_release.find('12.') <= 0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production

        if app.user_area.name:
            environment['USER_AREA'] = os.path.basename(app.user_area.name)
        #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name)
        if app.group_area.name:
            if str(app.group_area.name).find('http') >= 0:
                environment['GROUP_AREA_REMOTE'] = str(app.group_area.name)
            else:
                environment['GROUP_AREA'] = os.path.basename(
                    app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)

        if job.backend.requirements._name == 'AtlasLCGRequirements':
            requirements = AtlasLCGRequirements()
        elif job.backend.requirements._name == 'AtlasCREAMRequirements':
            requirements = AtlasCREAMRequirements()
        else:
            requirements = AtlasLCGRequirements()

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                environment['GANGA_LFC_HOST'] = job.inputdata.lfc

        if 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']

        if job.inputdata and (job.inputdata._name
                              in ['DQ2Dataset', 'AMIDataset', 'EventPicking']):
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(
                    job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2[
                    'DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment[
                        'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1

            else:
                raise ApplicationConfigurationError(
                    None,
                    'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.'
                )

            # Raise submission exception
            if (not job.backend.CE and not (job.backend.requirements._name in [
                    'AtlasLCGRequirements', 'AtlasCREAMRequirements'
            ] and job.backend.requirements.sites)
                    and not (job.splitter
                             and job.splitter._name == 'DQ2JobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'TNTJobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'AnaTaskSplitterJob')
                    and not (job.splitter
                             and job.splitter._name == 'ATLASTier3Splitter')):

                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )

            if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0:
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

            # Add TAG datasetname
            if job.inputdata.tagdataset:
                environment['TAGDATASETNAME'] = ':'.join(
                    job.inputdata.tagdataset)

#       prepare job requirements
        requirementsSoftware = getLCGReleaseTag(app)

        releaseBlacklist = job.backend.requirements.list_release_blacklist()
        if requirementsSoftware and requirementsSoftware[0] in releaseBlacklist:
            logger.error(
                'The athena release %s you are using is not recommended for distributed analysis !',
                requirementsSoftware[0])
            logger.error(
                'For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !'
            )
            requirements.software = requirementsSoftware
        else:
            requirements.software = requirementsSoftware

        # Set athena architecture: 32 or 64 bit
        environment['ATLAS_ARCH'] = '32'
        if requirementsSoftware and requirementsSoftware[0].find(
                'x86_64') >= 0:
            environment['ATLAS_ARCH'] = '64'

        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ] and job.inputdata.type in [
                'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'
        ] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']:
            try:
                # override the default one if the dq2client_version is presented
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version

        if app.atlas_dbrelease:
            if not app._name == "AthenaTask" and not (
                    job.splitter and
                (job.splitter._name == 'DQ2JobSplitter'
                 or job.splitter._name == 'ATLASTier3Splitter')):
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !'
                )
            try:
                environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split(
                    ':')[0]
                environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1]
            except:
                logger.warning(
                    'Problems with the atlas_dbrelease configuration')

        # Fill AtlasLCGRequirements access mode
        if configDQ2['USE_ACCESS_INFO']:
            logger.warning(
                "config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !"
            )
            import pickle, StringIO
            #if job.backend.requirements.sites:
            info = job.backend.requirements.list_access_info()
            fileHandle = StringIO.StringIO()
            pickle.dump(info, fileHandle)
            fileHandle.seek(-1)
            lines = fileHandle.read()
            inputbox.append(FileBuffer('access_info.pickle', lines))
            _append_files(inputbox, 'access_info.py')
            if not 'make_filestager_joption.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'make_filestager_joption.py',
                              'dm_util.py', 'fs-copy.py')


#       jobscript

        exe = os.path.join(__directory__, 'run-athena-lcg.sh')

        #       output sandbox
        outputbox = [
            'output_guids', 'output_location', 'output_data', 'stats.pickle'
        ]

        ## retrieve the FileStager log
        if configDQ2['USE_ACCESS_INFO'] or (
                job.inputdata and
            (job.inputdata._name
             in ['DQ2Dataset', 'AMIDataset', 'EventPicking'])
                and job.inputdata.type in ['FILE_STAGER']):
            outputbox += ['FileStager.out', 'FileStager.err']

        if job.outputsandbox: outputbox += job.outputsandbox

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'

        return LCGJobConfig(File(exe), inputbox, [], outputbox, environment,
                            [], requirements)
    def master_prepare( self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent() # Returns job or subjob object
        logger.debug('TagPrepareLCGRTHandler master_prepare called: %s', job.id )

        self.username = gridProxy.identity(safe=True)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' %(job.backend.requirements.sites)
                    raise ApplicationConfigurationError(None,printout )


        # prepare input sandbox
        inputbox = [ ( File(os.path.join(__athdirectory__,'athena-utility.sh')) ),
                     ( File(os.path.join(__directory__,'get_tag_info.py')))]
            
        # CN: added TNTJobSplitter clause  
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            _append_files(inputbox,os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'),
                          os.path.join(__athdirectory__, 'dq2_get'),
                          os.path.join(__athdirectory__, 'dq2info.tar.gz'))

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'))
        if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, os.path.join(__athdirectory__,'dq2tracerreport.py'))
        if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, os.path.join(__athdirectory__, 'db_dq2localid.py'))
        if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, os.path.join(__athdirectory__, 'getstats.py'))

        _append_files(inputbox,os.path.join(__athdirectory__, 'libdcap.so'))

        if job.inputsandbox: inputbox += job.inputsandbox
            
        # prepare environment
        environment={
            'MAXNUMREFS'     : str(app.max_num_refs),
            'STREAM_REF'     : app.stream_ref,
            'ATLAS_RELEASE'  : app.atlas_release,
            'ATHENA_OPTIONS' : '',
            'ATHENA_USERSETUPFILE' : '',
            'ATLAS_PROJECT' : '',
            'ATLAS_EXETYPE' : 'ATHENA',
            'GANGA_VERSION' : configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']
        requirements = AtlasLCGRequirements()
        
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1
                    
            else:
                raise ApplicationConfigurationError(None,'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.')

            # Raise submission exception
            if (not job.backend.CE and 
                not (job.backend.requirements._name == 'AtlasLCGRequirements' and job.backend.requirements.sites) and
                not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and
                not (job.splitter and job.splitter._name == 'TNTJobSplitter') and
                not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob')):

                raise ApplicationConfigurationError(None,'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')

            if job.inputdata.match_ce_all or job.inputdata.min_num_files>0:
                raise ApplicationConfigurationError(None,'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

        # prepare job requirements
        cmtconfig = app.atlas_cmtconfig
        if not cmtconfig in ['i686-slc4-gcc34-opt', 'i686-slc5-gcc43-opt']:
            cmtconfig = 'i686-slc4-gcc34-opt'

        requirements.software = ['VO-atlas-offline-%s-%s' %(app.atlas_release, cmtconfig )]

        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata.type in [ 'DQ2_DOWNLOAD', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER']:
            try:
                # override the default one if the dq2client_version is presented 
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version

#       jobscript

        exe = os.path.join(__directory__,'run-tagprepare-lcg.sh')
        #exe = os.path.join(__directory__,'get_tag_info.py')

#       output sandbox
        outputbox = [
            'taginfo.pkl'
        ]


        if job.outputsandbox: outputbox += job.outputsandbox

        return LCGJobConfig(File(exe),inputbox,[],outputbox,environment,[],requirements) 
Exemplo n.º 3
0
    def master_prepare( self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent() # Returns job or subjob object
        logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id )


        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name=='DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' %(job.backend.requirements.sites)
                    raise ApplicationConfigurationError(printout )


        #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job
        #will already have been prepared
        #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing.
        #if app.is_prepared is not True:
        #    for position in xrange(len(app.option_file)):
        #        app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name)))
        # Expand Athena jobOptions
        if not app.atlas_exetype in ['EXE']:
            athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file])
            #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options
            if app.options:
                athena_options = app.options + ' ' + athena_options
                
            inputbox = [ File(opt_file.name) for opt_file in app.option_file ]
        else:
            athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file])
            inputbox = []
            
        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

#       prepare input sandbox

        
        inputbox.append( File(os.path.join(__directory__,'athena-utility.sh')) )

        if app.user_area.name: 
            #we will now use the user_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_user_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name))
                inputbox.append(File(tmp_user_name))
            else:
                inputbox.append(File(app.user_area.name))

        #if app.group_area.name: inputbox += [ File(app.group_area.name) ]
        if app.group_area.name and str(app.group_area.name).find('http')<0:
            #we will now use the group_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_group_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.group_area.name))
                inputbox.append(File(tmp_group_name))
            else:
                inputbox.append(File(app.group_area.name))
    
        if app.user_setupfile.name: inputbox.append(File(app.user_setupfile.name))

        # CN: added TNTJobSplitter clause  

        if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ] ) or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2_get','dq2info.tar.gz')
            if job.inputdata and job.inputdata.type == 'LFC' and not (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'):
                _append_files(inputbox,'dq2_get_old')

        if job.inputdata and job.inputdata._name ==  'ATLASTier3Dataset':
            _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2info.tar.gz')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            #if not job.outputdata.location:
            #    raise ApplicationConfigurationError('j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !')
            if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
                _append_files(inputbox,'ganga-stage-in-out-dq2.py')
            _append_files(inputbox,'ganga-joboption-parse.py')
            if not 'dq2info.tar.gz' in [os.path.basename(file.name) for file in inputbox ]:
                _append_files(inputbox,'dq2info.tar.gz') 

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
        if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'dq2tracerreport.py')
        if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'db_dq2localid.py')
        if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'getstats.py')


        if str(app.atlas_release).find('12.')>=0:
            _append_files(inputbox, 'libDCache.so','libRFIO.so','libdcap.so')
        elif str(app.atlas_release).find('13.')>=0:
            _append_files(inputbox,'libdcap.so')
        else:
            _append_files(inputbox,'libdcap.so')

        if job.inputsandbox: inputbox += job.inputsandbox
            
#       prepare environment

        if not app.atlas_release: 
            raise ApplicationConfigurationError('j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.')

        environment={ 
            'ATLAS_RELEASE'  : app.atlas_release,
            'ATHENA_OPTIONS' : athena_options,
            'ATHENA_USERSETUPFILE' : athena_usersetupfile,
            'ATLAS_PROJECT' : app.atlas_project,
            'ATLAS_EXETYPE' : app.atlas_exetype,
            'GANGA_VERSION' : configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']

        if app.atlas_environment:
            for var in app.atlas_environment:
                try:
                    vars = re.match("^(\w+)=(.*)",var).group(1)
                    value = re.match("^(\w+)=(.*)",var).group(2)
                    environment[vars]=value
                except:
                    logger.warning('Athena.atlas_environment variable not correctly configured: %s', var)
                    pass

        if app.atlas_production and app.atlas_release.find('12.')>=0 and app.atlas_project != 'AtlasPoint1':
            temp_atlas_production = re.sub('\.','_',app.atlas_production)
            prod_url = config['PRODUCTION_ARCHIVE_BASEURL']+'/AtlasProduction_'+ temp_atlas_production +'_noarch.tar.gz'
            logger.info('Using Production cache from: %s', prod_url)
            environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.')<=0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production
        
        if app.user_area.name: environment['USER_AREA'] = os.path.basename(app.user_area.name)
        #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name)
        if app.group_area.name:
            if str(app.group_area.name).find('http')>=0:
                environment['GROUP_AREA_REMOTE'] = str(app.group_area.name)
            else:
                environment['GROUP_AREA'] = os.path.basename(app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)
        
        if job.backend.requirements._name == 'AtlasLCGRequirements':
            requirements = AtlasLCGRequirements()
        elif job.backend.requirements._name == 'AtlasCREAMRequirements':
            requirements = AtlasCREAMRequirements()
        else:
            requirements = AtlasLCGRequirements()
        
        if 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
        
        if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']):
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1
                    
            else:
                raise ApplicationConfigurationError('j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.')

            # Raise submission exception
            if (not job.backend.CE and 
                not (job.backend.requirements._name in [ 'AtlasLCGRequirements', 'AtlasCREAMRequirements' ] and job.backend.requirements.sites) and
                not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and
                not (job.splitter and job.splitter._name == 'TNTJobSplitter') and
                not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob') and
                not (job.splitter and job.splitter._name == 'ATLASTier3Splitter')):

                raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')

            if job.inputdata.match_ce_all or job.inputdata.min_num_files>0:
                raise ApplicationConfigurationError('Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

#       prepare job requirements
        requirementsSoftware = getLCGReleaseTag( app )

        releaseBlacklist = job.backend.requirements.list_release_blacklist()     
        if requirementsSoftware and  requirementsSoftware[0] in releaseBlacklist:
            logger.error('The athena release %s you are using is not recommended for distributed analysis !', requirementsSoftware[0])
            logger.error('For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !')
            requirements.software = requirementsSoftware
        else:
            requirements.software = requirementsSoftware

        # Set athena architecture: 32 or 64 bit    
        environment['ATLAS_ARCH'] = '32'
        if requirementsSoftware and requirementsSoftware[0].find('x86_64')>=0:
            environment['ATLAS_ARCH'] = '64'
            
        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ]  and job.inputdata.type in [ 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']:
            try:
                # override the default one if the dq2client_version is presented 
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version

        if app.atlas_dbrelease:
            if not app._name == "AthenaTask" and not (job.splitter and (job.splitter._name == 'DQ2JobSplitter' or job.splitter._name == 'ATLASTier3Splitter')):
                raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !')
            try:
                environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split(':')[0]
                environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1]
            except:
                logger.warning('Problems with the atlas_dbrelease configuration')


        # Fill AtlasLCGRequirements access mode 
        if configDQ2['USE_ACCESS_INFO']:
            logger.warning("config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !" )
            import pickle, StringIO
            #if job.backend.requirements.sites:
            info = job.backend.requirements.list_access_info()
            fileHandle = StringIO.StringIO()
            pickle.dump(info,fileHandle)
            fileHandle.seek(-1)
            lines = fileHandle.read()
            inputbox.append(FileBuffer( 'access_info.pickle', lines))
            _append_files(inputbox, 'access_info.py')
            if not 'make_filestager_joption.py' in [ os.path.basename(file.name) for file in inputbox ]:
                _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py')

#       jobscript

        exe = os.path.join(__directory__,'run-athena-lcg.sh')

#       output sandbox
        outputbox = [
            'output_guids',
            'output_location',
            'output_data',
            'stats.pickle'
        ]

        ## retrieve the FileStager log
        if configDQ2['USE_ACCESS_INFO'] or (job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']):
            outputbox += ['FileStager.out', 'FileStager.err']
            
        if job.outputsandbox: outputbox += job.outputsandbox

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'
            
        return LCGJobConfig(File(exe),inputbox,[],outputbox,environment,[],requirements) 
Exemplo n.º 4
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug('TagPrepareLCGRTHandler master_prepare called: %s',
                     job.id)

        self.username = gridProxy.identity(safe=True)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % (
                        job.backend.requirements.sites)
                    raise ApplicationConfigurationError(None, printout)

        # prepare input sandbox
        inputbox = [(File(os.path.join(__athdirectory__,
                                       'athena-utility.sh'))),
                    (File(os.path.join(__directory__, 'get_tag_info.py')))]

        # CN: added TNTJobSplitter clause
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            _append_files(
                inputbox,
                os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'),
                os.path.join(__athdirectory__, 'dq2_get'),
                os.path.join(__athdirectory__, 'dq2info.tar.gz'))

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.type in [
                'FILE_STAGER'
        ]:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(
                inputbox,
                os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'))
        if not 'dq2tracerreport.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox,
                          os.path.join(__athdirectory__, 'dq2tracerreport.py'))
        if not 'db_dq2localid.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox,
                          os.path.join(__athdirectory__, 'db_dq2localid.py'))
        if not 'getstats.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox,
                          os.path.join(__athdirectory__, 'getstats.py'))

        _append_files(inputbox, os.path.join(__athdirectory__, 'libdcap.so'))

        if job.inputsandbox: inputbox += job.inputsandbox

        # prepare environment
        environment = {
            'MAXNUMREFS': str(app.max_num_refs),
            'STREAM_REF': app.stream_ref,
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': '',
            'ATHENA_USERSETUPFILE': '',
            'ATLAS_PROJECT': '',
            'ATLAS_EXETYPE': 'ATHENA',
            'GANGA_VERSION': configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']
        requirements = AtlasLCGRequirements()

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(
                    job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2[
                    'DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment[
                        'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1

            else:
                raise ApplicationConfigurationError(
                    None,
                    'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.'
                )

            # Raise submission exception
            if (not job.backend.CE and not (job.backend.requirements._name
                                            == 'AtlasLCGRequirements'
                                            and job.backend.requirements.sites)
                    and not (job.splitter
                             and job.splitter._name == 'DQ2JobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'TNTJobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'AnaTaskSplitterJob')):

                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )

            if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0:
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

        # prepare job requirements
        cmtconfig = app.atlas_cmtconfig
        if not cmtconfig in ['i686-slc4-gcc34-opt', 'i686-slc5-gcc43-opt']:
            cmtconfig = 'i686-slc4-gcc34-opt'

        requirements.software = [
            'VO-atlas-offline-%s-%s' % (app.atlas_release, cmtconfig)
        ]

        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata.type in [
                'DQ2_DOWNLOAD', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'
        ]:
            try:
                # override the default one if the dq2client_version is presented
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                requirements.software += [
                    'VO-atlas-dq2clients-%s' % dq2client_version
                ]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version


#       jobscript

        exe = os.path.join(__directory__, 'run-tagprepare-lcg.sh')
        #exe = os.path.join(__directory__,'get_tag_info.py')

        #       output sandbox
        outputbox = ['taginfo.pkl']

        if job.outputsandbox: outputbox += job.outputsandbox

        return LCGJobConfig(File(exe), inputbox, [], outputbox, environment,
                            [], requirements)
Exemplo n.º 5
0
    def master_prepare(self, app, appmasterconfig):
        if app.siteroot:
            os.environ["SITEROOT"] = app.siteroot
        os.environ["CMTSITE"] = app.cmtsite

        job = app._getParent()
        if job.backend._name in ["Local", "PBS"]:
            if app.dryrun:
                os.environ["SITEROOT"] = "NONE"
                os.environ["CMTSITE"] = "NONE"
            try:
                assert "SITEROOT" in os.environ
            except:
                raise ApplicationConfigurationError(
                    None, " ATLAS environment not defined")

            try:
                assert "CMTSITE" in os.environ
            except:
                raise ApplicationConfigurationError(
                    None,
                    "cmt not setup properly. Please check your ATLAS setup or run on the grid"
                )

            if os.environ["CMTSITE"] == "CERN" and "AtlasVersion" in os.environ:
                logger.debug(
                    "Checking AtlasVersion: %s and selected atlas release %s" %
                    (os.environ["AtlasVersion"], app.atlas_rel))
                try:
                    assert app.atlas_rel == os.environ["AtlasVersion"]
                except:
                    logger.error(
                        "Mismatching atlas release. Local setup is %s, resetting requested atlas release to local value."
                        % os.environ["AtlasVersion"])
                    app.atlas_release = os.environ["AtlasVersion"]
                    app.atlas_rel == os.environ["AtlasVersion"]
            elif "ATLAS_RELEASE" in os.environ:
                logger.debug(
                    "Checking ATLAS_RELEASE: %s and selected atlas release %s"
                    % (os.environ["ATLAS_RELEASE"], app.atlas_rel))
                try:
                    assert app.atlas_rel == os.environ["ATLAS_RELASE"]
                except:
                    logger.error(
                        "Mismatching atlas release. Local setup is %s, resetting requested atlas release to local value."
                        % os.environ["ATLAS_RELEASE"])
                    app.atlas_rel = os.environ["ATLAS_RELEASE"]
            else:
                logger.warning(
                    "Could not compare requested release and local setup. Hope you are doing something sensible..."
                )

        if job.backend._name == "LSF":
            try:
                assert "CMTSITE" in os.environ and os.environ[
                    "CMTSITE"] == "CERN"
            except:
                raise ApplicationConfigurationError(
                    None,
                    "Error, CERN ATLAS AFS environment not defined. Needed by LSF backend"
                )

        environment = {'T_LCG_GFAL_INFOSYS': 'atlas-bdii.cern.ch:2170'}

        trfopts = app.transflags
        # need to parse them to be able to pass them in an environment variable
        trfopts = trfopts.replace(" ", "/W")
        trfopts = trfopts.replace("-", "/F")

        trflags = trfopts
        if app.mode == "evgen":
            trflags = "/Ft"
            if app.verbosity:
                trflags += "/W/Fl/W%s" % app.verbosity

        if trflags:
            environment["TRFLAGS"] = trflags

        # setting output site from input data if any.
        outsite, backup, outputlocation, backuplocation = "", "", "", ""
        logger.info("checking sites from input data: %s" % str(app.sites))

        # must distinguish running site (backend.requirements.sites) and output storage site (app.se_name)

        # matching with user's wishes (app.se_name or backend.requirements.sites)

        usersites = []
        if len(job.backend.requirements.sites) > 0:
            usersites = job.backend.requirements.sites
##        elif job.application.se_name and job.application.se_name != "none":
##            usersites=job.application.se_name.split(" ")
        logger.info("user selection: %s" % str(usersites))

        # select sites which are matching user's wishes, if any.
        selectedSites = app.sites
        if len(selectedSites) == 0:
            selectedSites = usersites
        if len(usersites) > 0 and len(app.sites) > 0:
            selectedSites = job.inputdata.trimSites(usersites, app.sites)
        # evgen case (no input data-> app.sites=[])
        if len(app.sites) == 0 and app.se_name and app.se_name != "none":
            selectedSites = app.se_name.split(" ")

        # This comes last: using surviving sites from matching process.
        if len(selectedSites) == 0:
            try:
                assert len(usersites) == 0
            except:
                raise ApplicationConfigurationError(
                    None,
                    "Could not find a match between input dataset locations: %s and your requested sites: %s. Please use a space token compatible with one of the input dataset locations (replace _XXXDISK or _XXXTAPE by _LOCALGROUPDISK or _SCRATCHDISK if necessary)"
                    % (str(app.sites), str(usersites)))
            logger.warning(
                "Failed to obtain processing site from input data, will use default value: CERN-PROD_SCRATCHDISK and submit production to CERN"
            )
            selectedSites.append(_defaultSite)

        [outlfc, outsite,
         outputlocation] = job.outputdata.getDQ2Locations(selectedSites[0])
        if len(selectedSites) > 1:
            [outlfc2, backup,
             backuplocation] = job.outputdata.getDQ2Locations(selectedSites[1])

        # app.se_name set: users wishes to get the output data written to another site than the one hosting the input.
        # One needs to ensure that this location is at least in the same cloud as the targetted processing site. This is done by insuring that the lfcs are the same.
        userSEs = []
        outse = ""
        if job.application.se_name and job.application.se_name != "none":
            userSEs = job.application.se_name.split(" ")
            # loop through userSEs until up to 2 valid sites are found...
            outse = ""
            for SE in userSEs:
                [lfc, se, location] = job.outputdata.getDQ2Locations(SE)
                if lfc == outlfc:
                    if not outse:
                        outse = se  # important to use outse and not outsite here, as outsite is used for selection of processing site.
                        # userSEs overrides outlfc,outputlocation, but not outsite as outsite is unfortunately used for choice of the processing site.
                        outputlocation = location
                    else:
                        outlfc2 = lfc
                        backup = se
                        backuplocation = location
                        break
        # finally: if no backup location is defined at this point, enforce CERN-PROD_SCRATCHDISK as backup location
        if backup == "":
            [outlfc2, backup,
             backuplocation] = job.outputdata.getDQ2Locations(_defaultSite)

        logger.info("Final selection of output sites: %s , backup: %s" %
                    (outsite, backup))
        try:
            assert outsite
        except:
            raise ApplicationConfigurationError(
                None,
                "Could not find suitable location for your output. Please subscribe your input dataset (if any) to a suitable location or change application.se_name to a suitable space token"
            )

        # srmv2 sites special treatment: the space token has been prefixed to the outputlocation and must be removed now:
        imin = string.find(outputlocation, "token:")
        imax = string.find(outputlocation, "srm:")
        spacetoken = ""
        if imin > -1 and imax > -1:
            spacetoken = outputlocation[imin + 6:imax - 1]
            outputlocation = outputlocation[imax:]
        # same treatment for backup location if any
        imin = string.find(backuplocation, "token:")
        imax = string.find(backuplocation, "srm:")
        bst = ""
        if imin > -1 and imax > -1:
            bst = backuplocation[imin + 6:imax - 1]
            backuplocation = backuplocation[imax:]

        environment["OUTLFC"] = outlfc
        environment["OUTSITE"] = outsite
        if outse:
            environment[
                "OUTSITE"] = outse  # user's choice for output storage location overriding AthenaMC's.

        environment["OUTPUT_LOCATION"] = outputlocation
        if spacetoken:
            environment["SPACETOKEN"] = spacetoken
        if backup:
            environment["OUTLFC2"] = outlfc2
            environment["OUTSITE2"] = backup
            environment["OUTPUT_LOCATION2"] = backuplocation

        environment["PROD_RELEASE"] = app.prod_release

        # setting environment["BACKEND"]
        # Local, Condor become "batch". LSF becomes "batch" unless the inputdata is on castor (in this case, it becomes "castor")
        environment["BACKEND"] = job.backend._name
        environment["BACKEND_DATA"] = app.backend_inputdata
        if job.backend._name == "LSF" and len(app.turls.values()) > 0:
            turl = app.turls.values()[0]
            if string.find(turl, "castor") > -1:
                environment["BACKEND_DATA"] = "castor"
            else:
                environment["BACKEND_DATA"] = "batch"
        if job.backend._name in ["Local", "Condor", "PBS"]:
            environment["BACKEND_DATA"] = "batch"
            environment["SITEROOT"] = os.environ["SITEROOT"]
            environment["CMTSITE"] = os.environ["CMTSITE"]

#       finalise environment

# preparing input sandbox, output sandbox , environment vars and job requirements

        inputbox = [
            File(os.path.join(os.path.dirname(__file__), 'setup-release.sh')),
            File(os.path.join(os.path.dirname(__file__), 'stage-in.sh')),
            File(os.path.join(os.path.dirname(__file__), 'stage-out.sh')),
            File(os.path.join(os.path.dirname(__file__), 'adler32.py'))
        ]

        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [File(app.transform_archive)]
        elif app.transform_archive:
            # tarball in local or remote web area.
            if string.find(app.transform_archive, "http") >= 0:
                environment['TRANSFORM_ARCHIVE'] = "%s" % (
                    app.transform_archive)
            else:
                myfile = os.path.basename(app.transform_archive)
                myfile = "http://cern.ch/atlas-computing/links/kitsDirectory/Production/kits/" + myfile
                environment['TRANSFORM_ARCHIVE'] = "%s" % (myfile)

        if app.evgen_job_option and os.path.exists(app.evgen_job_option):
            # locally modified job option file to add to the input sand box
            inputbox += [File(app.evgen_job_option)]
            # need to strip the path away.
            self.evgen_job_option = app.evgen_job_option.split("/")[-1]
            environment['CUSTOM_JOB_OPTION'] = "%s" % (self.evgen_job_option)
        elif app.evgen_job_option:
            self.evgen_job_option = app.evgen_job_option

        # user area:
        if app.userarea:
            inputbox.append(File(app.userarea))
            environment['USER_AREA'] = os.path.basename(app.userarea)

        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [file]

        outputbox = []
        outputGUIDs = 'output_guids'
        outputLOCATION = 'output_location'
        outputbox.append(outputGUIDs)
        outputbox.append(outputLOCATION)
        outputbox.append('output_data')
        if (job.outputsandbox):
            for file in job.outputsandbox:
                outputbox += [file]

        # switch JobTransforms/AtlasProduction package.
        self.isJT = string.find(app.transform_archive, "JobTransform")
        if self.isJT > -1 and app.mode == "evgen":
            environment['T_CONTEXT'] = str(
                self.number_events_job
            )  # needed to avoid prodsys failure mechanism based on a hardcoded minimum number of event of 5000 per job

        #       prepare job requirements

        if hasattr(job.backend, 'requirements') and hasattr(
                job.backend.requirements, 'sites') and hasattr(
                    job.backend.requirements, 'software') and hasattr(
                        job.backend.requirements, 'other'):
            requirements = job.backend.requirements
        else:
            requirements = AtlasLCGRequirements()

#        requirements.other.append('other.GlueCEStateStatus=="Production"') # missing production
        imax = string.rfind(app.atlas_rel, ".")
        rel = string.atof(
            app.atlas_rel[:imax]
        )  # to deal with string comparisons: [2-9].0.0 > 11.0.0.
        if app.atlas_rel <= "11.4.0" or rel <= 11.4:
            requirements.software = ['VO-atlas-release-%s' % app.atlas_rel]
        elif app.atlas_rel < "12.0.3":
            requirements.software = ['VO-atlas-offline-%s' % app.atlas_rel]
        elif app.atlas_rel >= "14.0.0" and app.atlas_rel <= "15.6.1":
            requirements.software = [
                'VO-atlas-offline-%s-i686-slc4-gcc34-opt' % app.atlas_rel
            ]
        elif app.atlas_rel > "15.6.1":
            requirements.software = [
                'VO-atlas-offline-%s-i686-slc5-gcc43-opt' % app.atlas_rel
            ]
        else:
            requirements.software = ['VO-atlas-production-%s' % app.atlas_rel]
        # case of prod_release set
        if app.prod_release:
            # no prod release tag before 13.0.X
            if app.atlas_rel < "14.0.0" and app.atlas_rel > "13.0.0":
                requirements.software = [
                    'VO-atlas-production-%s' % app.prod_release
                ]
            elif app.atlas_rel >= "14.0.0" and app.atlas_rel <= "15.6.1":
                requirements.software = [
                    'VO-atlas-production-%s-i686-slc4-gcc34-opt' %
                    app.prod_release
                ]
            elif app.atlas_rel > "15.6.1":
                requirements.software = [
                    'VO-atlas-production-%s-i686-slc5-gcc43-opt' %
                    app.prod_release
                ]

        if app.transform_archive and string.find(app.transform_archive,
                                                 "AtlasTier0") > -1:
            requirements.software = ['VO-atlas-tier0-%s' % app.prod_release]
##        extraConfig=getConfig('defaults_AtlasLCGRequirements')
##        if  'dq2client_version' in extraConfig:
##            dq2client_version = extraConfig['dq2client_version']

##        if job.backend.requirements.dq2client_version:
##            dq2client_version = job.backend.requirements.dq2client_version
##        try:
##            assert dq2client_version!=""
##        except:
##            raise  ApplicationConfigurationError(None,"Please give a value to dq2client_version in job.backend.requirements.")

#        requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
#        requirements.other+=['RegExp("VO-atlas-dq2clients",other.GlueHostApplicationSoftwareRunTimeEnvironment)']

# controlled relaxation for simple cases: one single input dataset, less than 200 subjobs. In this case, the subjobs can be submitted to the whole cloud.
        loosematch = "true"
        if job.splitter and job.splitter.numsubjobs > 200:
            loosematch = "false"
        if job.inputdata and (job.inputdata.cavern or job.inputdata.minbias):
            loosematch = "false"
# commented the nex block out as stage-in.sh can now ensure that the local copy is downloaded in the first attempt. However, as a safety net, we maintain the veto on complex jobs with pileup and or minbias, because they are heavy weight anyway and should not be run everywhere.
#        if app.dbrelease:
#            loosematch="false"
        if len(job.backend.requirements.sites) > 0:
            loosematch = "false"  # specified sites take precedence over cloud.

        userCloud = job.backend.requirements.cloud
        if userCloud == 'ALL':
            userCloud = ''  # not supporting the AthenaLCGRequirements catch-all
        # By default: job to data, strict: target outsite and nothing else.
        requirements.sites = outsite

        if loosematch == "true" and userCloud:
            logger.debug(
                "Your job qualifies for controlled relaxation of the current job-to-data policy. Now checking that requested cloud matches with input data"
            )

            from dq2.info.TiersOfATLAS import whichCloud, ToACache
            targetSites = whichCloud(outsite)
            cloud = ""
            for cloudID, sites in ToACache.dbcloud.iteritems():
                if sites == targetSites:
                    cloud = cloudID
            try:
                assert cloud == userCloud
            except:
                raise ApplicationConfigurationError(
                    None,
                    "Requested cloud: %s did not match selected processing cloud: %s. Reverting to submission to site %s"
                    % (userCloud, cloud, outsite))

            requirements.cloud = cloud
            # looks like cloud has to be converted in a list of sites anyway, and this is not done in AtlasLCGRequirements.convert()...
            allsites = requirements.list_sites_cloud()
            try:
                assert len(allsites) > 0
            except:
                raise ApplicationConfigurationError(
                    None,
                    "Could not get any sites from the specified cloud: %s. You will have to specify a target site in job.backend.requirements.sites"
                    % cloud)
            # need to weed out unwanted sites from excluded list
            excludedSites = requirements.excluded_sites
            goodsites = []
            for checksite in allsites:
                selsite = True
                for site in excludedSites:
                    imax = site.find("_")
                    shortSite = site[:imax]
                    if shortSite in checksite:
                        #  print "site is excluded, skipping ", checksite
                        selsite = False
                        break
                if selsite and checksite not in goodsites:
                    goodsites.append(checksite)
#            print len(allsites),len(goodsites)
            if len(goodsites) > 0:
                allsites = goodsites
            job.backend.requirements.sites = allsites
            logger.debug(
                "Relaxing job to data policy to job to cloud. Selected cloud is %s"
                % cloud)

        logger.debug("master job submit?")

        if job.backend._name == "LCG" or job.backend._name == "Cronus" or job.backend._name == "Condor" or job.backend._name == "NG":
            return LCGJobConfig("", inputbox, [], outputbox, environment, [],
                                requirements)
        else:
            return StandardJobConfig("", inputbox, [], outputbox, environment)
Exemplo n.º 6
0
    def master_prepare(self,app,appmasterconfig):
        if app.siteroot: 
            os.environ["SITEROOT"]=app.siteroot
        os.environ["CMTSITE"]=app.cmtsite

        job = app._getParent()
        if job.backend._name in ["Local","PBS"]:
            if app.dryrun:
                os.environ["SITEROOT"]  = "NONE"
                os.environ["CMTSITE"]  = "NONE"
            try:
                assert "SITEROOT" in os.environ
            except:
                raise ApplicationConfigurationError(None," ATLAS environment not defined")
                
            try:
                assert "CMTSITE" in os.environ
            except:
                raise ApplicationConfigurationError(None,"cmt not setup properly. Please check your ATLAS setup or run on the grid")
            
            if os.environ["CMTSITE"]=="CERN" and "AtlasVersion" in os.environ:
                logger.debug("Checking AtlasVersion: %s and selected atlas release %s" % (os.environ["AtlasVersion"],app.atlas_rel))
                try:
                    assert app.atlas_rel==os.environ["AtlasVersion"]
                except:
                    logger.error("Mismatching atlas release. Local setup is %s, resetting requested atlas release to local value." % os.environ["AtlasVersion"])
                    app.atlas_release=os.environ["AtlasVersion"]
                    app.atlas_rel==os.environ["AtlasVersion"]
            elif "ATLAS_RELEASE" in os.environ:
                logger.debug("Checking ATLAS_RELEASE: %s and selected atlas release %s" % (os.environ["ATLAS_RELEASE"],app.atlas_rel))
                try:
                    assert app.atlas_rel==os.environ["ATLAS_RELASE"]
                except:
                    logger.error("Mismatching atlas release. Local setup is %s, resetting requested atlas release to local value." % os.environ["ATLAS_RELEASE"])
                    app.atlas_rel=os.environ["ATLAS_RELEASE"]
            else:
                logger.warning("Could not compare requested release and local setup. Hope you are doing something sensible...")

                
        if job.backend._name=="LSF":
            try:
                assert "CMTSITE" in os.environ and os.environ["CMTSITE"]=="CERN"
            except:
                raise ApplicationConfigurationError(None,"Error, CERN ATLAS AFS environment not defined. Needed by LSF backend")

       
        environment={'T_LCG_GFAL_INFOSYS' :'atlas-bdii.cern.ch:2170'}

        trfopts=app.transflags
        # need to parse them to be able to pass them in an environment variable
        trfopts=trfopts.replace(" ","/W")
        trfopts=trfopts.replace("-","/F")
        
        trflags=trfopts
        if app.mode =="evgen":
            trflags="/Ft"
            if app.verbosity:
                trflags+="/W/Fl/W%s" % app.verbosity
        
        if trflags:
            environment["TRFLAGS"]=trflags

        # setting output site from input data if any.
        outsite,backup,outputlocation,backuplocation="","","",""
        logger.info("checking sites from input data: %s" % str(app.sites))

        # must distinguish running site (backend.requirements.sites) and output storage site (app.se_name)
        
        # matching with user's wishes (app.se_name or backend.requirements.sites)

        usersites=[]
        if len(job.backend.requirements.sites)>0:
            usersites=job.backend.requirements.sites
##        elif job.application.se_name and job.application.se_name != "none":
##            usersites=job.application.se_name.split(" ")
        logger.info("user selection: %s" % str(usersites))
            
        # select sites which are matching user's wishes, if any.
        selectedSites=app.sites
        if len(selectedSites)==0:
            selectedSites=usersites
        if len(usersites)>0 and len(app.sites)>0:
            selectedSites=job.inputdata.trimSites(usersites,app.sites)
        # evgen case (no input data-> app.sites=[])
        if len(app.sites)==0 and app.se_name and app.se_name != "none":
            selectedSites=app.se_name.split(" ")

        # This comes last: using surviving sites from matching process.
        if len(selectedSites)==0:
            try:
                assert len(usersites)==0
            except:
                raise ApplicationConfigurationError(None,"Could not find a match between input dataset locations: %s and your requested sites: %s. Please use a space token compatible with one of the input dataset locations (replace _XXXDISK or _XXXTAPE by _LOCALGROUPDISK or _SCRATCHDISK if necessary)" % (str(app.sites),str(usersites)))
            logger.warning("Failed to obtain processing site from input data, will use default value: CERN-PROD_SCRATCHDISK and submit production to CERN")
            selectedSites.append(_defaultSite)


        [outlfc,outsite,outputlocation]=job.outputdata.getDQ2Locations(selectedSites[0])
        if len(selectedSites)>1:
            [outlfc2,backup,backuplocation]=job.outputdata.getDQ2Locations(selectedSites[1])

        # app.se_name set: users wishes to get the output data written to another site than the one hosting the input.
        # One needs to ensure that this location is at least in the same cloud as the targetted processing site. This is done by insuring that the lfcs are the same.
        userSEs=[]
        outse=""
        if job.application.se_name and job.application.se_name != "none":
            userSEs=job.application.se_name.split(" ")
            # loop through userSEs until up to 2 valid sites are found...
            outse=""
            for SE in userSEs:
                [lfc,se,location]=job.outputdata.getDQ2Locations(SE)
                if lfc==outlfc:
                    if not outse:
                        outse=se # important to use outse and not outsite here, as outsite is used for selection of processing site.
                        # userSEs overrides outlfc,outputlocation, but not outsite as outsite is unfortunately used for choice of the processing site.
                        outputlocation=location
                    else:
                        outlfc2=lfc
                        backup=se
                        backuplocation=location
                        break
        # finally: if no backup location is defined at this point, enforce CERN-PROD_SCRATCHDISK as backup location
        if backup=="":
             [outlfc2,backup,backuplocation]=job.outputdata.getDQ2Locations(_defaultSite)
        
        logger.info("Final selection of output sites: %s , backup: %s" % (outsite,backup))
        try:
            assert outsite
        except:
            raise ApplicationConfigurationError(None,"Could not find suitable location for your output. Please subscribe your input dataset (if any) to a suitable location or change application.se_name to a suitable space token")


        # srmv2 sites special treatment: the space token has been prefixed to the outputlocation and must be removed now:
        imin=string.find(outputlocation,"token:")
        imax=string.find(outputlocation,"srm:")
        spacetoken=""
        if imin>-1 and imax>-1:
            spacetoken=outputlocation[imin+6:imax-1]
            outputlocation=outputlocation[imax:]
        # same treatment for backup location if any
        imin=string.find(backuplocation,"token:")
        imax=string.find(backuplocation,"srm:")
        bst=""
        if imin>-1 and imax>-1:
            bst=backuplocation[imin+6:imax-1]
            backuplocation=backuplocation[imax:]

        environment["OUTLFC"]=outlfc
        environment["OUTSITE"]=outsite
        if outse:
           environment["OUTSITE"]=outse # user's choice for output storage location overriding AthenaMC's.
           
        environment["OUTPUT_LOCATION"]=outputlocation
        if spacetoken:
            environment["SPACETOKEN"]=spacetoken
        if backup:
            environment["OUTLFC2"]=outlfc2
            environment["OUTSITE2"]=backup
            environment["OUTPUT_LOCATION2"]=backuplocation

        environment["PROD_RELEASE"]=app.prod_release

        # setting environment["BACKEND"]
        # Local, Condor become "batch". LSF becomes "batch" unless the inputdata is on castor (in this case, it becomes "castor")
        environment["BACKEND"]=job.backend._name
        environment["BACKEND_DATA"]=app.backend_inputdata
        if job.backend._name=="LSF" and len(app.turls.values())>0:
            turl=app.turls.values()[0]
            if string.find(turl,"castor")>-1:
                environment["BACKEND_DATA"]="castor"
            else:
                environment["BACKEND_DATA"]="batch"
        if job.backend._name in ["Local","Condor","PBS"]:
            environment["BACKEND_DATA"]="batch"
            environment["SITEROOT"]=os.environ["SITEROOT"]
            environment["CMTSITE"]=os.environ["CMTSITE"]

#       finalise environment


        # preparing input sandbox, output sandbox , environment vars and job requirements
        
        inputbox = [ 
            File(os.path.join(os.path.dirname(__file__),'setup-release.sh')),
            File(os.path.join(os.path.dirname(__file__),'stage-in.sh')),
            File(os.path.join(os.path.dirname(__file__),'stage-out.sh')),
            File(os.path.join(os.path.dirname(__file__),'adler32.py'))
        ]

        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [ File(app.transform_archive) ]
        elif app.transform_archive:
            # tarball in local or remote web area.
            if string.find(app.transform_archive,"http")>=0:
                environment['TRANSFORM_ARCHIVE'] = "%s" % (app.transform_archive)
            else:
                myfile=os.path.basename(app.transform_archive)
                myfile="http://cern.ch/atlas-computing/links/kitsDirectory/Production/kits/"+myfile
                environment['TRANSFORM_ARCHIVE'] = "%s" % (myfile)
                
        
        if app.evgen_job_option and os.path.exists(app.evgen_job_option):
            # locally modified job option file to add to the input sand box
            inputbox += [ File(app.evgen_job_option) ]
            # need to strip the path away.
            self.evgen_job_option = app.evgen_job_option.split("/")[-1]
            environment['CUSTOM_JOB_OPTION'] = "%s" % (self.evgen_job_option)
        elif app.evgen_job_option:
            self.evgen_job_option = app.evgen_job_option
            
        # user area:
        if app.userarea : 
            inputbox.append(File(app.userarea))
            environment['USER_AREA']=os.path.basename(app.userarea)


        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [ file ]


        outputbox = [ ]
        outputGUIDs='output_guids'
        outputLOCATION='output_location'
        outputbox.append( outputGUIDs )
        outputbox.append( outputLOCATION )
        outputbox.append( 'output_data' )
        if (job.outputsandbox):
            for file in job.outputsandbox:
                outputbox += [ file ]

        # switch JobTransforms/AtlasProduction package.
        self.isJT=string.find(app.transform_archive,"JobTransform")
        if self.isJT>-1 and app.mode=="evgen":
            environment['T_CONTEXT'] = str(self.number_events_job) # needed to avoid prodsys failure mechanism based on a hardcoded minimum number of event of 5000 per job

            
        #       prepare job requirements
            
        if hasattr(job.backend,'requirements') and hasattr(job.backend.requirements,'sites') and hasattr(job.backend.requirements,'software') and hasattr(job.backend.requirements,'other') :
            requirements=job.backend.requirements
        else:
            requirements = AtlasLCGRequirements()
        
#        requirements.other.append('other.GlueCEStateStatus=="Production"') # missing production
        imax=string.rfind(app.atlas_rel,".")
        rel=string.atof(app.atlas_rel[:imax]) # to deal with string comparisons: [2-9].0.0 > 11.0.0. 
        if app.atlas_rel <= "11.4.0" or rel <=11.4:
            requirements.software=['VO-atlas-release-%s' % app.atlas_rel ]
        elif app.atlas_rel < "12.0.3":
            requirements.software=['VO-atlas-offline-%s' % app.atlas_rel ]
        elif app.atlas_rel >= "14.0.0" and app.atlas_rel<= "15.6.1":
            requirements.software=['VO-atlas-offline-%s-i686-slc4-gcc34-opt' % app.atlas_rel ]
        elif app.atlas_rel> "15.6.1":
            requirements.software=['VO-atlas-offline-%s-i686-slc5-gcc43-opt' % app.atlas_rel ]
        else:
            requirements.software=['VO-atlas-production-%s' % app.atlas_rel ]
        # case of prod_release set
        if app.prod_release:
            # no prod release tag before 13.0.X
            if app.atlas_rel < "14.0.0" and app.atlas_rel > "13.0.0":
                requirements.software=['VO-atlas-production-%s' % app.prod_release]
            elif app.atlas_rel>= "14.0.0" and app.atlas_rel<= "15.6.1":
                requirements.software=['VO-atlas-production-%s-i686-slc4-gcc34-opt' % app.prod_release]
            elif app.atlas_rel> "15.6.1":
                requirements.software=['VO-atlas-production-%s-i686-slc5-gcc43-opt' % app.prod_release]

        if app.transform_archive and string.find(app.transform_archive,"AtlasTier0")>-1:
            requirements.software=['VO-atlas-tier0-%s' % app.prod_release]
##        extraConfig=getConfig('defaults_AtlasLCGRequirements')
##        if  'dq2client_version' in extraConfig:
##            dq2client_version = extraConfig['dq2client_version']

##        if job.backend.requirements.dq2client_version:
##            dq2client_version = job.backend.requirements.dq2client_version
##        try:
##            assert dq2client_version!=""
##        except:
##            raise  ApplicationConfigurationError(None,"Please give a value to dq2client_version in job.backend.requirements.")
        
#        requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
#        requirements.other+=['RegExp("VO-atlas-dq2clients",other.GlueHostApplicationSoftwareRunTimeEnvironment)']

        # controlled relaxation for simple cases: one single input dataset, less than 200 subjobs. In this case, the subjobs can be submitted to the whole cloud.
        loosematch="true"
        if job.splitter and job.splitter.numsubjobs>200:
            loosematch="false"
        if job.inputdata and (job.inputdata.cavern or job.inputdata.minbias):
            loosematch="false"
# commented the nex block out as stage-in.sh can now ensure that the local copy is downloaded in the first attempt. However, as a safety net, we maintain the veto on complex jobs with pileup and or minbias, because they are heavy weight anyway and should not be run everywhere.
#        if app.dbrelease: 
#            loosematch="false"
        if len(job.backend.requirements.sites)>0:
            loosematch="false" # specified sites take precedence over cloud.
            
        userCloud=job.backend.requirements.cloud
        if userCloud=='ALL':
            userCloud='' # not supporting the AthenaLCGRequirements catch-all
        # By default: job to data, strict: target outsite and nothing else.
        requirements.sites=outsite
        
        if loosematch=="true" and userCloud :
            logger.debug("Your job qualifies for controlled relaxation of the current job-to-data policy. Now checking that requested cloud matches with input data")
            
            from dq2.info.TiersOfATLAS import whichCloud,ToACache
            targetSites=whichCloud(outsite)
            cloud=""
            for cloudID,sites in ToACache.dbcloud.iteritems():
                if sites==targetSites:
                    cloud=cloudID
            try:
                assert cloud==userCloud
            except:
                raise ApplicationConfigurationError(None,"Requested cloud: %s did not match selected processing cloud: %s. Reverting to submission to site %s" % (userCloud,cloud,outsite))

            requirements.cloud=cloud
            # looks like cloud has to be converted in a list of sites anyway, and this is not done in AtlasLCGRequirements.convert()... 
            allsites=requirements.list_sites_cloud()
            try:
                assert len(allsites)>0
            except:
                raise ApplicationConfigurationError(None,"Could not get any sites from the specified cloud: %s. You will have to specify a target site in job.backend.requirements.sites" % cloud)
            # need to weed out unwanted sites from excluded list
            excludedSites=requirements.excluded_sites
            goodsites=[]
            for checksite in allsites:
                selsite=True
                for site in excludedSites:
                    imax=site.find("_")
                    shortSite=site[:imax]
                    if shortSite in checksite:
                      #  print "site is excluded, skipping ", checksite
                        selsite=False
                        break
                if selsite and checksite not in goodsites:
                    goodsites.append(checksite)
#            print len(allsites),len(goodsites)
            if len(goodsites)>0:
                allsites=goodsites
            job.backend.requirements.sites=allsites
            logger.debug("Relaxing job to data policy to job to cloud. Selected cloud is %s" % cloud)

        logger.debug("master job submit?")
        
        if job.backend._name=="LCG" or job.backend._name=="Cronus" or job.backend._name=="Condor" or job.backend._name=="NG":
            return LCGJobConfig("",inputbox,[],outputbox,environment,[],requirements)
        else :
            return StandardJobConfig("",inputbox,[],outputbox,environment)