示例#1
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id)

        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name == 'DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(
                dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % (
                        job.backend.requirements.sites)
                    raise ApplicationConfigurationError(None, printout)

        #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job
        #will already have been prepared
        #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing.
        #if app.is_prepared is not True:
        #    for position in xrange(len(app.option_file)):
        #        app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name)))
        # Expand Athena jobOptions
        if not app.atlas_exetype in ['EXE']:
            athena_options = ' '.join([
                os.path.basename(opt_file.name) for opt_file in app.option_file
            ])
            #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options
            if app.options:
                athena_options = app.options + ' ' + athena_options

            inputbox = [File(opt_file.name) for opt_file in app.option_file]
        else:
            athena_options = ' '.join([
                os.path.basename(opt_file.name) for opt_file in app.option_file
            ])
            inputbox = []

        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

        #       prepare input sandbox

        inputbox.append(File(os.path.join(__directory__, 'athena-utility.sh')))

        if job.inputdata and job.inputdata._name == "AMIDataset" and job.inputdata.goodRunListXML.name != '':
            inputbox.append(File(job.inputdata.goodRunListXML.name))

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                _append_files(inputbox, 'ganga-stagein-lfc.py')
            else:
                _append_files(inputbox, 'ganga-stagein.py')

        if app.user_area.name:
            #we will now use the user_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_user_name = os.path.join(
                    os.path.join(shared_path, app.is_prepared.name),
                    os.path.basename(app.user_area.name))
                inputbox.append(File(tmp_user_name))
            else:
                inputbox.append(File(app.user_area.name))

        #if app.group_area.name: inputbox += [ File(app.group_area.name) ]
        if app.group_area.name and str(app.group_area.name).find('http') < 0:
            #we will now use the group_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_group_name = os.path.join(
                    os.path.join(shared_path, app.is_prepared.name),
                    os.path.basename(app.group_area.name))
                inputbox.append(File(tmp_group_name))
            else:
                inputbox.append(File(app.group_area.name))

        if app.user_setupfile.name:
            inputbox.append(File(app.user_setupfile.name))

        # CN: added TNTJobSplitter clause

        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) or (job._getRoot().splitter
               and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py', 'dq2_get',
                          'dq2info.tar.gz')
            if job.inputdata and job.inputdata.type == 'LFC' and not (
                    job._getRoot().splitter
                    and job._getRoot().splitter._name == 'TNTJobSplitter'):
                _append_files(inputbox, 'dq2_get_old')

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py',
                          'dq2info.tar.gz')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            #if not job.outputdata.location:
            #    raise ApplicationConfigurationError(None,'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !')
            if not 'ganga-stage-in-out-dq2.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
            _append_files(inputbox, 'ganga-joboption-parse.py')
            if not 'dq2info.tar.gz' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'dq2info.tar.gz')

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
        if not 'dq2tracerreport.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'dq2tracerreport.py')
        if not 'db_dq2localid.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'db_dq2localid.py')
        if not 'getstats.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'getstats.py')

        if str(app.atlas_release).find('12.') >= 0:
            _append_files(inputbox, 'libDCache.so', 'libRFIO.so', 'libdcap.so')
        elif str(app.atlas_release).find('13.') >= 0:
            _append_files(inputbox, 'libdcap.so')
        else:
            _append_files(inputbox, 'libdcap.so')

        if job.inputsandbox: inputbox += job.inputsandbox

        #       prepare environment

        if not app.atlas_release:
            raise ApplicationConfigurationError(
                None,
                'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.'
            )

        environment = {
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': athena_options,
            'ATHENA_USERSETUPFILE': athena_usersetupfile,
            'ATLAS_PROJECT': app.atlas_project,
            'ATLAS_EXETYPE': app.atlas_exetype,
            'GANGA_VERSION': configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']

        if app.atlas_environment:
            for var in app.atlas_environment:
                try:
                    vars = re.match("^(\w+)=(.*)", var).group(1)
                    value = re.match("^(\w+)=(.*)", var).group(2)
                    environment[vars] = value
                except:
                    logger.warning(
                        'Athena.atlas_environment variable not correctly configured: %s',
                        var)
                    pass

        if app.atlas_production and app.atlas_release.find(
                '12.') >= 0 and app.atlas_project != 'AtlasPoint1':
            temp_atlas_production = re.sub('\.', '_', app.atlas_production)
            prod_url = config[
                'PRODUCTION_ARCHIVE_BASEURL'] + '/AtlasProduction_' + temp_atlas_production + '_noarch.tar.gz'
            logger.info('Using Production cache from: %s', prod_url)
            environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1'
                                     or app.atlas_release.find('12.') <= 0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production

        if app.user_area.name:
            environment['USER_AREA'] = os.path.basename(app.user_area.name)
        #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name)
        if app.group_area.name:
            if str(app.group_area.name).find('http') >= 0:
                environment['GROUP_AREA_REMOTE'] = str(app.group_area.name)
            else:
                environment['GROUP_AREA'] = os.path.basename(
                    app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)

        if job.backend.requirements._name == 'AtlasLCGRequirements':
            requirements = AtlasLCGRequirements()
        elif job.backend.requirements._name == 'AtlasCREAMRequirements':
            requirements = AtlasCREAMRequirements()
        else:
            requirements = AtlasLCGRequirements()

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                environment['GANGA_LFC_HOST'] = job.inputdata.lfc

        if 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']

        if job.inputdata and (job.inputdata._name
                              in ['DQ2Dataset', 'AMIDataset', 'EventPicking']):
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(
                    job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2[
                    'DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment[
                        'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1

            else:
                raise ApplicationConfigurationError(
                    None,
                    'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.'
                )

            # Raise submission exception
            if (not job.backend.CE and not (job.backend.requirements._name in [
                    'AtlasLCGRequirements', 'AtlasCREAMRequirements'
            ] and job.backend.requirements.sites)
                    and not (job.splitter
                             and job.splitter._name == 'DQ2JobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'TNTJobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'AnaTaskSplitterJob')
                    and not (job.splitter
                             and job.splitter._name == 'ATLASTier3Splitter')):

                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )

            if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0:
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

            # Add TAG datasetname
            if job.inputdata.tagdataset:
                environment['TAGDATASETNAME'] = ':'.join(
                    job.inputdata.tagdataset)

#       prepare job requirements
        requirementsSoftware = getLCGReleaseTag(app)

        releaseBlacklist = job.backend.requirements.list_release_blacklist()
        if requirementsSoftware and requirementsSoftware[0] in releaseBlacklist:
            logger.error(
                'The athena release %s you are using is not recommended for distributed analysis !',
                requirementsSoftware[0])
            logger.error(
                'For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !'
            )
            requirements.software = requirementsSoftware
        else:
            requirements.software = requirementsSoftware

        # Set athena architecture: 32 or 64 bit
        environment['ATLAS_ARCH'] = '32'
        if requirementsSoftware and requirementsSoftware[0].find(
                'x86_64') >= 0:
            environment['ATLAS_ARCH'] = '64'

        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ] and job.inputdata.type in [
                'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'
        ] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']:
            try:
                # override the default one if the dq2client_version is presented
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version

        if app.atlas_dbrelease:
            if not app._name == "AthenaTask" and not (
                    job.splitter and
                (job.splitter._name == 'DQ2JobSplitter'
                 or job.splitter._name == 'ATLASTier3Splitter')):
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !'
                )
            try:
                environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split(
                    ':')[0]
                environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1]
            except:
                logger.warning(
                    'Problems with the atlas_dbrelease configuration')

        # Fill AtlasLCGRequirements access mode
        if configDQ2['USE_ACCESS_INFO']:
            logger.warning(
                "config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !"
            )
            import pickle, StringIO
            #if job.backend.requirements.sites:
            info = job.backend.requirements.list_access_info()
            fileHandle = StringIO.StringIO()
            pickle.dump(info, fileHandle)
            fileHandle.seek(-1)
            lines = fileHandle.read()
            inputbox.append(FileBuffer('access_info.pickle', lines))
            _append_files(inputbox, 'access_info.py')
            if not 'make_filestager_joption.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'make_filestager_joption.py',
                              'dm_util.py', 'fs-copy.py')


#       jobscript

        exe = os.path.join(__directory__, 'run-athena-lcg.sh')

        #       output sandbox
        outputbox = [
            'output_guids', 'output_location', 'output_data', 'stats.pickle'
        ]

        ## retrieve the FileStager log
        if configDQ2['USE_ACCESS_INFO'] or (
                job.inputdata and
            (job.inputdata._name
             in ['DQ2Dataset', 'AMIDataset', 'EventPicking'])
                and job.inputdata.type in ['FILE_STAGER']):
            outputbox += ['FileStager.out', 'FileStager.err']

        if job.outputsandbox: outputbox += job.outputsandbox

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'

        return LCGJobConfig(File(exe), inputbox, [], outputbox, environment,
                            [], requirements)
示例#2
0
def whichCloudExt(site):
    if site.startswith("NDGF"):
        return "NG"
    return whichCloud(site)
示例#3
0
def whichCloudExt(site):
   if site.startswith("NDGF"):
      return "NG"
   return whichCloud(site)
    def master_prepare( self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent() # Returns job or subjob object
        logger.debug('TagPrepareLCGRTHandler master_prepare called: %s', job.id )

        self.username = gridProxy.identity(safe=True)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' %(job.backend.requirements.sites)
                    raise ApplicationConfigurationError(None,printout )


        # prepare input sandbox
        inputbox = [ ( File(os.path.join(__athdirectory__,'athena-utility.sh')) ),
                     ( File(os.path.join(__directory__,'get_tag_info.py')))]
            
        # CN: added TNTJobSplitter clause  
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            _append_files(inputbox,os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'),
                          os.path.join(__athdirectory__, 'dq2_get'),
                          os.path.join(__athdirectory__, 'dq2info.tar.gz'))

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'))
        if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, os.path.join(__athdirectory__,'dq2tracerreport.py'))
        if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, os.path.join(__athdirectory__, 'db_dq2localid.py'))
        if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, os.path.join(__athdirectory__, 'getstats.py'))

        _append_files(inputbox,os.path.join(__athdirectory__, 'libdcap.so'))

        if job.inputsandbox: inputbox += job.inputsandbox
            
        # prepare environment
        environment={
            'MAXNUMREFS'     : str(app.max_num_refs),
            'STREAM_REF'     : app.stream_ref,
            'ATLAS_RELEASE'  : app.atlas_release,
            'ATHENA_OPTIONS' : '',
            'ATHENA_USERSETUPFILE' : '',
            'ATLAS_PROJECT' : '',
            'ATLAS_EXETYPE' : 'ATHENA',
            'GANGA_VERSION' : configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']
        requirements = AtlasLCGRequirements()
        
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1
                    
            else:
                raise ApplicationConfigurationError(None,'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.')

            # Raise submission exception
            if (not job.backend.CE and 
                not (job.backend.requirements._name == 'AtlasLCGRequirements' and job.backend.requirements.sites) and
                not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and
                not (job.splitter and job.splitter._name == 'TNTJobSplitter') and
                not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob')):

                raise ApplicationConfigurationError(None,'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')

            if job.inputdata.match_ce_all or job.inputdata.min_num_files>0:
                raise ApplicationConfigurationError(None,'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

        # prepare job requirements
        cmtconfig = app.atlas_cmtconfig
        if not cmtconfig in ['i686-slc4-gcc34-opt', 'i686-slc5-gcc43-opt']:
            cmtconfig = 'i686-slc4-gcc34-opt'

        requirements.software = ['VO-atlas-offline-%s-%s' %(app.atlas_release, cmtconfig )]

        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata.type in [ 'DQ2_DOWNLOAD', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER']:
            try:
                # override the default one if the dq2client_version is presented 
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version

#       jobscript

        exe = os.path.join(__directory__,'run-tagprepare-lcg.sh')
        #exe = os.path.join(__directory__,'get_tag_info.py')

#       output sandbox
        outputbox = [
            'taginfo.pkl'
        ]


        if job.outputsandbox: outputbox += job.outputsandbox

        return LCGJobConfig(File(exe),inputbox,[],outputbox,environment,[],requirements) 
示例#5
0
    def master_prepare( self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent() # Returns job or subjob object
        logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id )


        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name=='DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' %(job.backend.requirements.sites)
                    raise ApplicationConfigurationError(printout )


        #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job
        #will already have been prepared
        #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing.
        #if app.is_prepared is not True:
        #    for position in xrange(len(app.option_file)):
        #        app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name)))
        # Expand Athena jobOptions
        if not app.atlas_exetype in ['EXE']:
            athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file])
            #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options
            if app.options:
                athena_options = app.options + ' ' + athena_options
                
            inputbox = [ File(opt_file.name) for opt_file in app.option_file ]
        else:
            athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file])
            inputbox = []
            
        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

#       prepare input sandbox

        
        inputbox.append( File(os.path.join(__directory__,'athena-utility.sh')) )

        if app.user_area.name: 
            #we will now use the user_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_user_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name))
                inputbox.append(File(tmp_user_name))
            else:
                inputbox.append(File(app.user_area.name))

        #if app.group_area.name: inputbox += [ File(app.group_area.name) ]
        if app.group_area.name and str(app.group_area.name).find('http')<0:
            #we will now use the group_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_group_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.group_area.name))
                inputbox.append(File(tmp_group_name))
            else:
                inputbox.append(File(app.group_area.name))
    
        if app.user_setupfile.name: inputbox.append(File(app.user_setupfile.name))

        # CN: added TNTJobSplitter clause  

        if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ] ) or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2_get','dq2info.tar.gz')
            if job.inputdata and job.inputdata.type == 'LFC' and not (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'):
                _append_files(inputbox,'dq2_get_old')

        if job.inputdata and job.inputdata._name ==  'ATLASTier3Dataset':
            _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2info.tar.gz')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            #if not job.outputdata.location:
            #    raise ApplicationConfigurationError('j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !')
            if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
                _append_files(inputbox,'ganga-stage-in-out-dq2.py')
            _append_files(inputbox,'ganga-joboption-parse.py')
            if not 'dq2info.tar.gz' in [os.path.basename(file.name) for file in inputbox ]:
                _append_files(inputbox,'dq2info.tar.gz') 

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
        if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'dq2tracerreport.py')
        if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'db_dq2localid.py')
        if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'getstats.py')


        if str(app.atlas_release).find('12.')>=0:
            _append_files(inputbox, 'libDCache.so','libRFIO.so','libdcap.so')
        elif str(app.atlas_release).find('13.')>=0:
            _append_files(inputbox,'libdcap.so')
        else:
            _append_files(inputbox,'libdcap.so')

        if job.inputsandbox: inputbox += job.inputsandbox
            
#       prepare environment

        if not app.atlas_release: 
            raise ApplicationConfigurationError('j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.')

        environment={ 
            'ATLAS_RELEASE'  : app.atlas_release,
            'ATHENA_OPTIONS' : athena_options,
            'ATHENA_USERSETUPFILE' : athena_usersetupfile,
            'ATLAS_PROJECT' : app.atlas_project,
            'ATLAS_EXETYPE' : app.atlas_exetype,
            'GANGA_VERSION' : configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']

        if app.atlas_environment:
            for var in app.atlas_environment:
                try:
                    vars = re.match("^(\w+)=(.*)",var).group(1)
                    value = re.match("^(\w+)=(.*)",var).group(2)
                    environment[vars]=value
                except:
                    logger.warning('Athena.atlas_environment variable not correctly configured: %s', var)
                    pass

        if app.atlas_production and app.atlas_release.find('12.')>=0 and app.atlas_project != 'AtlasPoint1':
            temp_atlas_production = re.sub('\.','_',app.atlas_production)
            prod_url = config['PRODUCTION_ARCHIVE_BASEURL']+'/AtlasProduction_'+ temp_atlas_production +'_noarch.tar.gz'
            logger.info('Using Production cache from: %s', prod_url)
            environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.')<=0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production
        
        if app.user_area.name: environment['USER_AREA'] = os.path.basename(app.user_area.name)
        #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name)
        if app.group_area.name:
            if str(app.group_area.name).find('http')>=0:
                environment['GROUP_AREA_REMOTE'] = str(app.group_area.name)
            else:
                environment['GROUP_AREA'] = os.path.basename(app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)
        
        if job.backend.requirements._name == 'AtlasLCGRequirements':
            requirements = AtlasLCGRequirements()
        elif job.backend.requirements._name == 'AtlasCREAMRequirements':
            requirements = AtlasCREAMRequirements()
        else:
            requirements = AtlasLCGRequirements()
        
        if 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
        
        if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']):
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1
                    
            else:
                raise ApplicationConfigurationError('j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.')

            # Raise submission exception
            if (not job.backend.CE and 
                not (job.backend.requirements._name in [ 'AtlasLCGRequirements', 'AtlasCREAMRequirements' ] and job.backend.requirements.sites) and
                not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and
                not (job.splitter and job.splitter._name == 'TNTJobSplitter') and
                not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob') and
                not (job.splitter and job.splitter._name == 'ATLASTier3Splitter')):

                raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')

            if job.inputdata.match_ce_all or job.inputdata.min_num_files>0:
                raise ApplicationConfigurationError('Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

#       prepare job requirements
        requirementsSoftware = getLCGReleaseTag( app )

        releaseBlacklist = job.backend.requirements.list_release_blacklist()     
        if requirementsSoftware and  requirementsSoftware[0] in releaseBlacklist:
            logger.error('The athena release %s you are using is not recommended for distributed analysis !', requirementsSoftware[0])
            logger.error('For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !')
            requirements.software = requirementsSoftware
        else:
            requirements.software = requirementsSoftware

        # Set athena architecture: 32 or 64 bit    
        environment['ATLAS_ARCH'] = '32'
        if requirementsSoftware and requirementsSoftware[0].find('x86_64')>=0:
            environment['ATLAS_ARCH'] = '64'
            
        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ]  and job.inputdata.type in [ 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']:
            try:
                # override the default one if the dq2client_version is presented 
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version

        if app.atlas_dbrelease:
            if not app._name == "AthenaTask" and not (job.splitter and (job.splitter._name == 'DQ2JobSplitter' or job.splitter._name == 'ATLASTier3Splitter')):
                raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !')
            try:
                environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split(':')[0]
                environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1]
            except:
                logger.warning('Problems with the atlas_dbrelease configuration')


        # Fill AtlasLCGRequirements access mode 
        if configDQ2['USE_ACCESS_INFO']:
            logger.warning("config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !" )
            import pickle, StringIO
            #if job.backend.requirements.sites:
            info = job.backend.requirements.list_access_info()
            fileHandle = StringIO.StringIO()
            pickle.dump(info,fileHandle)
            fileHandle.seek(-1)
            lines = fileHandle.read()
            inputbox.append(FileBuffer( 'access_info.pickle', lines))
            _append_files(inputbox, 'access_info.py')
            if not 'make_filestager_joption.py' in [ os.path.basename(file.name) for file in inputbox ]:
                _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py')

#       jobscript

        exe = os.path.join(__directory__,'run-athena-lcg.sh')

#       output sandbox
        outputbox = [
            'output_guids',
            'output_location',
            'output_data',
            'stats.pickle'
        ]

        ## retrieve the FileStager log
        if configDQ2['USE_ACCESS_INFO'] or (job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']):
            outputbox += ['FileStager.out', 'FileStager.err']
            
        if job.outputsandbox: outputbox += job.outputsandbox

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'
            
        return LCGJobConfig(File(exe),inputbox,[],outputbox,environment,[],requirements) 
示例#6
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug('TagPrepareLCGRTHandler master_prepare called: %s',
                     job.id)

        self.username = gridProxy.identity(safe=True)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % (
                        job.backend.requirements.sites)
                    raise ApplicationConfigurationError(None, printout)

        # prepare input sandbox
        inputbox = [(File(os.path.join(__athdirectory__,
                                       'athena-utility.sh'))),
                    (File(os.path.join(__directory__, 'get_tag_info.py')))]

        # CN: added TNTJobSplitter clause
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            _append_files(
                inputbox,
                os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'),
                os.path.join(__athdirectory__, 'dq2_get'),
                os.path.join(__athdirectory__, 'dq2info.tar.gz'))

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.type in [
                'FILE_STAGER'
        ]:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(
                inputbox,
                os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'))
        if not 'dq2tracerreport.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox,
                          os.path.join(__athdirectory__, 'dq2tracerreport.py'))
        if not 'db_dq2localid.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox,
                          os.path.join(__athdirectory__, 'db_dq2localid.py'))
        if not 'getstats.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox,
                          os.path.join(__athdirectory__, 'getstats.py'))

        _append_files(inputbox, os.path.join(__athdirectory__, 'libdcap.so'))

        if job.inputsandbox: inputbox += job.inputsandbox

        # prepare environment
        environment = {
            'MAXNUMREFS': str(app.max_num_refs),
            'STREAM_REF': app.stream_ref,
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': '',
            'ATHENA_USERSETUPFILE': '',
            'ATLAS_PROJECT': '',
            'ATLAS_EXETYPE': 'ATHENA',
            'GANGA_VERSION': configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']
        requirements = AtlasLCGRequirements()

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(
                    job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2[
                    'DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment[
                        'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1

            else:
                raise ApplicationConfigurationError(
                    None,
                    'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.'
                )

            # Raise submission exception
            if (not job.backend.CE and not (job.backend.requirements._name
                                            == 'AtlasLCGRequirements'
                                            and job.backend.requirements.sites)
                    and not (job.splitter
                             and job.splitter._name == 'DQ2JobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'TNTJobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'AnaTaskSplitterJob')):

                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )

            if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0:
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

        # prepare job requirements
        cmtconfig = app.atlas_cmtconfig
        if not cmtconfig in ['i686-slc4-gcc34-opt', 'i686-slc5-gcc43-opt']:
            cmtconfig = 'i686-slc4-gcc34-opt'

        requirements.software = [
            'VO-atlas-offline-%s-%s' % (app.atlas_release, cmtconfig)
        ]

        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata.type in [
                'DQ2_DOWNLOAD', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'
        ]:
            try:
                # override the default one if the dq2client_version is presented
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                requirements.software += [
                    'VO-atlas-dq2clients-%s' % dq2client_version
                ]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version


#       jobscript

        exe = os.path.join(__directory__, 'run-tagprepare-lcg.sh')
        #exe = os.path.join(__directory__,'get_tag_info.py')

        #       output sandbox
        outputbox = ['taginfo.pkl']

        if job.outputsandbox: outputbox += job.outputsandbox

        return LCGJobConfig(File(exe), inputbox, [], outputbox, environment,
                            [], requirements)