Exemplo n.º 1
0
def submit_untracked_crab(args):
    '''Submit jobs from an inputDirectory'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)
    config.Site.whitelist = ['T2_US_Wisconsin'] # whitelist wisconsin so it only runs there


    # get samples
    sampleList = hdfs_ls_directory(args.inputDirectory)

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        primaryDataset = sample
        config.General.requestName = '{0}'.format(primaryDataset)
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now
        config.Data.outputPrimaryDataset = primaryDataset
        # get file list
        config.Data.userInputFiles = get_hdfs_root_files(args.inputDirectory,sample)
        # submit the job
        submitArgs = ['--config',config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
Exemplo n.º 2
0
    def testLumiSubmit(self):
        """
        Test submission with the lumiMask parameter"
        """

        sections = ["General", "User", "Data", "Site", "JobType"]
        lumiConf = Configuration()
        for sec in sections:
            lumiConf.section_(sec)

        lumiConf.General.serverUrl = "localhost:8518"
        lumiConf.JobType.externalPluginFile = os.path.join(
            os.path.dirname(__file__), "TestPlugin.py")
        lumiConf.Site.storageSite = 'T2_XXX'

        lumiInput = os.path.join(os.path.dirname(__file__),
                                 "../../../data/lumiInput.json")
        lumiConf.Data.splitting = 'LumiBased'
        lumiConf.Data.lumiMask = 'lumiInput.json'

        sub = submit(
            self.logger, self.maplistopt + [
                "-c", lumiConf, "-p",
                "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni",
                "-s", "127.0.0.1:8518"
            ])

        res = sub()
        expRes = CommandResult(0, None)
        self.assertEquals(res, expRes)
Exemplo n.º 3
0
    def testLumiSubmit(self):
        """
        Test submission with the lumiMask parameter"
        """

        sections = ["General", "User", "Data", "Site" , "JobType"]
        lumiConf = Configuration()
        for sec in sections:
            lumiConf.section_(sec)

        lumiConf.General.serverUrl = "localhost:8518"
        lumiConf.JobType.externalPluginFile = os.path.join( os.path.dirname(__file__), "TestPlugin.py")
        lumiConf.Site.storageSite = 'T2_XXX'

        lumiInput = os.path.join( os.path.dirname(__file__), "../../../data/lumiInput.json")
        lumiConf.Data.splitting = 'LumiBased'
        lumiConf.Data.lumiMask = 'lumiInput.json'

        sub = submit(self.logger, self.maplistopt + ["-c", lumiConf,
                                                     "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni",
                                                     "-s", "127.0.0.1:8518"])

        res = sub()
        expRes = CommandResult(0, None)
        self.assertEquals(res, expRes)
Exemplo n.º 4
0
def submit_das_crab(args):
    '''Submit samples using DAS'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)

    # get samples
    sampleList = []
    if args.samples:
        sampleList += args.samples
    elif os.path.isfile(args.sampleList):
        with open(args.sampleList,'r') as f:
            sampleList = [line.strip() for line in f]
    else:
        log.error('Sample input list {0} does not exist.'.format(args.sampleList))

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        # lookup reasonable sites
        if args.ignoreLocality:
            sites = get_sites(sample)
            if sites: # if we found an ignoreLocality site list
                config.Data.ignoreLocality  = True
                config.Site.whitelist = sites
            else:
                logging.warning('Not enabling ignoreLocality, no sites found')
        _, primaryDataset, datasetTag, dataFormat = sample.split('/')
        config.General.requestName = '{0}'.format(primaryDataset)
        maxDatasetTagSize = 97-len(primaryDataset)
        config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:]
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now
        config.Data.inputDataset   = sample
        # submit the job
        submitArgs = ['--config',config]
        if args.dryrun:
            submitArgs += ['--dryrun']
            print 'Will submit with args:'
            print submitArgs
            print config.__str__()
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)()
            # save config file text
            outdir = os.path.join(config.General.workArea, 'crab_{0}'.format(config.General.requestName), 'inputs/crabConfig.py')
            with open(outdir,'w') as f:
                f.write(config.__str__())
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
Exemplo n.º 5
0
    def testSubmit(self):
        #Delete workdir
        if os.path.isdir("crab_" + self.TestConfig.config.General.requestName):
            shutil.rmtree("crab_" + self.TestConfig.config.General.requestName)

        #2) The config file is not found
        sub = submit(self.logger, self.maplistopt + ["-c", "asdf", "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \
                                       "-s", "127.0.0.1:8518"])
        res = sub()
        self.assertEqual(res[0], 1)

        #3) Is the client chacking the configurations?
        #If a mandatory section is not there => fail!
        sections = ["General", "User", "Data", "Site",
                    "JobType"]  #mandatory sections
        emptyConf = Configuration()
        for sec in sections:
            sub = submit(self.logger, self.maplistopt + ["-c", "asdf", "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \
                                           "-s", "127.0.0.1:8518"])
            res = sub()
            self.assertEqual(res[0], 1)
            emptyConf.section_(sec)

        emptyConf.General.serverUrl = "localhost:8518"
        emptyConf.General.requestName = 'TestAnalysisSubmit'
        emptyConf.JobType.externalPluginFile = os.path.join(
            os.path.dirname(__file__), "TestPlugin.py")
        emptyConf.Site.storageSite = 'T2_XXX'
        expRes = CommandResult(0, None)
        sub = submit(self.logger, self.maplistopt + ["-c", emptyConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \
                                       "-s", "127.0.0.1:8518"])
        res = sub()
        self.assertEquals(res, expRes)

        shutil.rmtree("./crab_TestAnalysisSubmit")
        emptyConf.Data.runWhitelist = '1,3,9-13'
        emptyConf.Data.runBlacklist = '1,3,9-13'
        expRes = CommandResult(0, None)
        sub = submit(self.logger, self.maplistopt + ["-c", emptyConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \
                                       "-s", "127.0.0.1:8518"])
        res = sub()
        self.assertEquals(res, expRes)
Exemplo n.º 6
0
    def testSubmit(self):
        #Delete workdir
        if os.path.isdir("crab_" + self.TestConfig.config.General.requestName):
            shutil.rmtree("crab_" + self.TestConfig.config.General.requestName)

        #2) The config file is not found
        sub = submit(self.logger, self.maplistopt + ["-c", "asdf", "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \
                                       "-s", "127.0.0.1:8518"])
        res = sub()
        self.assertEqual(res[0], 1)

        #3) Is the client chacking the configurations?
        #If a mandatory section is not there => fail!
        sections = ["General", "User", "Data", "Site" , "JobType"]#mandatory sections
        emptyConf = Configuration()
        for sec in sections:
            sub = submit(self.logger, self.maplistopt + ["-c", "asdf", "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \
                                           "-s", "127.0.0.1:8518"])
            res = sub()
            self.assertEqual(res[0], 1)
            emptyConf.section_(sec)

        emptyConf.General.serverUrl = "localhost:8518"
        emptyConf.General.requestName = 'TestAnalysisSubmit'
        emptyConf.JobType.externalPluginFile = os.path.join( os.path.dirname(__file__), "TestPlugin.py")
        emptyConf.Site.storageSite = 'T2_XXX'
        expRes = CommandResult(0, None)
        sub = submit(self.logger, self.maplistopt + ["-c", emptyConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \
                                       "-s", "127.0.0.1:8518"])
        res = sub()
        self.assertEquals(res, expRes)

        shutil.rmtree("./crab_TestAnalysisSubmit")
        emptyConf.Data.runWhitelist = '1,3,9-13'
        emptyConf.Data.runBlacklist = '1,3,9-13'
        expRes = CommandResult(0, None)
        sub = submit(self.logger, self.maplistopt + ["-c", emptyConf, "-p", "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=mmascher/CN=720897/CN=Marco Mascheroni", \
                                       "-s", "127.0.0.1:8518"])
        res = sub()
        self.assertEquals(res, expRes)
Exemplo n.º 7
0
def submit_das_crab(args):
    '''Submit samples using DAS'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)

    # get samples
    sampleList = []
    if args.samples:
        sampleList += args.samples
    elif os.path.isfile(args.sampleList):
        with open(args.sampleList, 'r') as f:
            sampleList = [line.strip() for line in f]
    else:
        log.error('Sample input list {0} does not exist.'.format(
            args.sampleList))

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        _, primaryDataset, datasetTag, dataFormat = sample.split('/')
        config.General.requestName = '{0}'.format(primaryDataset)
        maxDatasetTagSize = 97 - len(primaryDataset)
        config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:]
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:
                                                                99]  # Warning: may not be unique now
        config.Data.inputDataset = sample
        # submit the job
        submitArgs = ['--config', config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, cle))
Exemplo n.º 8
0
def submit_das_crab(args):
    '''Submit samples using DAS'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)

    # get samples
    sampleList = []
    if args.samples:
        sampleList += args.samples
    elif os.path.isfile(args.sampleList):
        with open(args.sampleList,'r') as f:
            sampleList = [line.strip() for line in f]
    else:
        log.error('Sample input list {0} does not exist.'.format(args.sampleList))

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        _, primaryDataset, datasetTag, dataFormat = sample.split('/')
        config.General.requestName = '{0}'.format(primaryDataset)
        maxDatasetTagSize = 97-len(primaryDataset)
        config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:]
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now
        config.Data.inputDataset   = sample
        # submit the job
        submitArgs = ['--config',config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
Exemplo n.º 9
0
def submit_untracked_crab(args):
    '''Submit jobs from an inputDirectory'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)
    config.Site.whitelist = [
        args.site
    ]  # whitelist site, run on same site as files located

    # get samples
    sampleList = hdfs_ls_directory(args.inputDirectory)

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        primaryDataset = sample
        config.General.requestName = '{0}'.format(primaryDataset)
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:
                                                                99]  # Warning: may not be unique now
        config.Data.outputPrimaryDataset = primaryDataset
        # get file list
        config.Data.userInputFiles = get_hdfs_root_files(
            args.inputDirectory, sample)
        # submit the job
        submitArgs = ['--config', config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, cle))
def submitLimitCrab(tag, h, amasses, **kwargs):
    dryrun = kwargs.get('dryrun', False)
    jobName = kwargs.get('jobName', None)
    pointsPerJob = kwargs.get('pointsPerJob', 10)
    parametric = kwargs.get('parametric', False)
    postscript = kwargs.get('postscript', '')

    a = '${A}'

    datacard = 'datacards_shape/MuMuTauTau/{}_HToAAH{}A{}{}.txt'.format(
        tag, h, 'X' if parametric else '${A}', postscript)

    combineCommands = getCommands(**kwargs)

    sample_dir = '/{}/{}/crab_projects/{}/{}{}/{}'.format(
        scratchDir,
        pwd.getpwuid(os.getuid())[0], jobName, tag, postscript, h)
    python_mkdir(sample_dir)

    # create submit dir
    submit_dir = '{}/crab'.format(sample_dir)
    if os.path.exists(submit_dir):
        logging.warning('Submission directory exists for {0}.'.format(jobName))
        return

    # create bash script
    bash_name = '{}/script.sh'.format(sample_dir)
    bashScript = '#!/bin/bash\n'
    bashScript += 'eval `scramv1 runtime -sh`\n'
    bashScript += 'ls\n'
    bashScript += 'printenv\n'
    bashScript += 'mkdir datacards_shape\n'
    bashScript += 'mv MuMuTauTau datacards_shape/MuMuTauTau\n'
    bashScript += 'files=`python -c "import PSet; print \' \'.join(list(PSet.process.source.fileNames))"`\n'
    bashScript += 'echo $files\n'
    bashScript += 'for A in $files; do\n'
    for cc in combineCommands:
        bashScript += cc.format(
            datacard=datacard, h=h, a=a, tag=tag, postscript=postscript) + '\n'
    bashScript += 'done\n'
    bashScript += """echo '''<FrameworkJobReport>\
<ReadBranches>\n
</ReadBranches>\n
<PerformanceReport>\n
  <PerformanceSummary Metric="StorageStatistics">\n
    <Metric Name="Parameter-untracked-bool-enabled" Value="true"/>\n
    <Metric Name="Parameter-untracked-bool-stats" Value="true"/>\n
    <Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>\n
    <Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>\n
    <Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>\n
    <Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>\n
  </PerformanceSummary>\n
</PerformanceReport>\n
<GeneratorInfo>\n
</GeneratorInfo>\n
</FrameworkJobReport>''' > FrameworkJobReport.xml\n"""
    with open(bash_name, 'w') as file:
        file.write(bashScript)
    os.system('chmod +x {0}'.format(bash_name))

    # setup crab config
    from CRABClient.UserUtilities import config

    config = config()

    config.General.workArea = submit_dir
    config.General.transferOutputs = True

    config.JobType.pluginName = 'Analysis'
    config.JobType.psetName = '{0}/src/DevTools/Utilities/test/PSet.py'.format(
        os.environ['CMSSW_BASE'])
    config.JobType.scriptExe = bash_name
    config.JobType.outputFiles = []
    config.JobType.inputFiles = ['datacards_shape/MuMuTauTau']

    config.Data.outLFNDirBase = '/store/user/{}/{}/{}/{}'.format(
        UNAME, jobName, tag, h)
    config.Data.outputDatasetTag = jobName
    config.Data.userInputFiles = [str(a) for a in amasses]
    config.Data.splitting = 'FileBased'
    config.Data.unitsPerJob = pointsPerJob
    config.Data.outputPrimaryDataset = 'Limits'

    config.Site.storageSite = 'T2_US_Wisconsin'

    # submit
    submitArgs = ['--config', config]
    if dryrun: submitArgs += ['--dryrun']

    from CRABClient.ClientExceptions import ClientException
    from CRABClient.ClientUtilities import initLoggers
    from httplib import HTTPException
    import CRABClient.Commands.submit as crabClientSubmit

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    try:
        logging.info('Submitting {}/{}/{}'.format(jobName, tag, h))
        crabClientSubmit.submit(logger, submitArgs)()
    except HTTPException as hte:
        logging.info("Submission failed: {}".format(hte.headers))
    except ClientException as cle:
        logging.info("Submission failed: {}".format(cle))
Exemplo n.º 11
0
def submit_untracked_crab(args):
    '''Submit jobs from an inputDirectory'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)
    config.Site.whitelist = [args.site] # whitelist site, run on same site as files located

    # get samples
    sampleList = hdfs_ls_directory(args.inputDirectory)

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        if hasattr(args,'sampleFilter'):
            submitSample = False
            for sampleFilter in args.sampleFilter:
                if fnmatch.fnmatch(sample,sampleFilter): submitSample = True
            if not submitSample: continue
        primaryDataset = sample
        config.General.requestName = '{0}'.format(primaryDataset)
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now
        config.Data.outputPrimaryDataset = primaryDataset
        # get file list
        inputFiles = get_hdfs_root_files(args.inputDirectory,sample)
        config.Data.userInputFiles = inputFiles
        totalFiles = len(inputFiles)
        if totalFiles==0:
            logging.warning('{0} {1} has no files.'.format(inputDirectory,sample))
            continue
        filesPerJob = args.filesPerJob
        if args.gigabytesPerJob:
            totalSize = get_hdfs_directory_size(os.path.join(args.inputDirectory,sample))
            if totalSize:
                averageSize = totalSize/totalFiles
                GB = 1024.*1024.*1024.
                filesPerJob = int(math.ceil(args.gigabytesPerJob*GB/averageSize))
        if hasattr(args,'jsonFilesPerJob') and args.jsonFilesPerJob:
            if os.path.isfile(args.jsonFilesPerJob):
                with open(args.jsonFilesPerJob) as f:
                    data = json.load(f)
                if sample in data:
                    filesPerJob = data[sample]
            else:
                logging.error('JSON map {0} for jobs does not exist'.format(args.jsonFilesPerJob))
                return
        config.Data.unitsPerJob = filesPerJob
        # submit the job
        submitArgs = ['--config',config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
Exemplo n.º 12
0
def submitLimitCrab(tag,h,amasses,**kwargs):
    dryrun = kwargs.get('dryrun',False)
    jobName = kwargs.get('jobName',None)
    pointsPerJob = kwargs.get('pointsPerJob',10)
    parametric = kwargs.get('parametric',False)

    a = '${A}'

    datacard = 'datacards_shape/MuMuTauTau/mmmt_{}_HToAAH{}A{}.txt'.format(tag,h,'X' if parametric else '${A}')

    combineCommands = getCommands(**kwargs)

    sample_dir = '/{}/{}/crab_projects/{}/{}/{}'.format(scratchDir,pwd.getpwuid(os.getuid())[0], jobName, tag, h)
    python_mkdir(sample_dir)

    # create submit dir
    submit_dir = '{}/crab'.format(sample_dir)
    if os.path.exists(submit_dir):
        logging.warning('Submission directory exists for {0}.'.format(jobName))
        return

    # create bash script
    bash_name = '{}/script.sh'.format(sample_dir)
    bashScript = '#!/bin/bash\n'
    bashScript += 'eval `scramv1 runtime -sh`\n'
    bashScript += 'ls\n'
    bashScript += 'printenv\n'
    bashScript += 'mkdir datacards_shape\n'
    bashScript += 'mv MuMuTauTau datacards_shape/MuMuTauTau\n'
    bashScript += 'files=`python -c "import PSet; print \' \'.join(list(PSet.process.source.fileNames))"`\n'
    bashScript += 'echo $files\n'
    bashScript += 'for A in $files; do\n'
    for cc in combineCommands:
        bashScript += cc.format(datacard=datacard,h=h,a=a,tag=tag)+'\n'
    bashScript += 'done\n'
    bashScript += """echo '''<FrameworkJobReport>\
<ReadBranches>\n
</ReadBranches>\n
<PerformanceReport>\n
  <PerformanceSummary Metric="StorageStatistics">\n
    <Metric Name="Parameter-untracked-bool-enabled" Value="true"/>\n
    <Metric Name="Parameter-untracked-bool-stats" Value="true"/>\n
    <Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>\n
    <Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>\n
    <Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>\n
    <Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>\n
  </PerformanceSummary>\n
</PerformanceReport>\n
<GeneratorInfo>\n
</GeneratorInfo>\n
</FrameworkJobReport>''' > FrameworkJobReport.xml\n"""
    with open(bash_name,'w') as file:
        file.write(bashScript)
    os.system('chmod +x {0}'.format(bash_name))

    # setup crab config
    from CRABClient.UserUtilities import config

    config = config()

    config.General.workArea         = submit_dir
    config.General.transferOutputs  = True

    config.JobType.pluginName       = 'Analysis'
    config.JobType.psetName         = '{0}/src/DevTools/Utilities/test/PSet.py'.format(os.environ['CMSSW_BASE'])
    config.JobType.scriptExe        = bash_name
    config.JobType.outputFiles      = []
    config.JobType.inputFiles       = ['datacards_shape/MuMuTauTau']

    config.Data.outLFNDirBase       = '/store/user/{}/{}/{}/{}'.format(UNAME, jobName, tag, h)
    config.Data.outputDatasetTag    = jobName
    config.Data.userInputFiles      = [str(a) for a in amasses]
    config.Data.splitting           = 'FileBased'
    config.Data.unitsPerJob         = pointsPerJob
    config.Data.outputPrimaryDataset= 'Limits'

    config.Site.storageSite         = 'T2_US_Wisconsin'

    # submit
    submitArgs = ['--config',config]
    if dryrun: submitArgs += ['--dryrun']

    from CRABClient.ClientExceptions import ClientException
    from CRABClient.ClientUtilities import initLoggers
    from httplib import HTTPException
    import CRABClient.Commands.submit as crabClientSubmit

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    try:
        logging.info('Submitting {}/{}/{}'.format(jobName,tag,h))
        crabClientSubmit.submit(logger,submitArgs)()
    except HTTPException as hte:
        logging.info("Submission failed: {}".format(hte.headers))
    except ClientException as cle:
        logging.info("Submission failed: {}".format(cle))
Exemplo n.º 13
0
def submit_untracked_crab(args):
    '''Submit jobs from an inputDirectory'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)
    config.Site.whitelist = [
        args.site
    ]  # whitelist site, run on same site as files located

    # get samples
    sampleList = hdfs_ls_directory(args.inputDirectory)

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        if hasattr(args, 'sampleFilter'):
            submitSample = False
            for sampleFilter in args.sampleFilter:
                if fnmatch.fnmatch(sample, sampleFilter): submitSample = True
            if not submitSample: continue
        primaryDataset = sample
        config.General.requestName = '{0}'.format(primaryDataset)
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:
                                                                99]  # Warning: may not be unique now
        config.Data.outputPrimaryDataset = primaryDataset
        # get file list
        inputFiles = get_hdfs_root_files(args.inputDirectory, sample)
        config.Data.userInputFiles = inputFiles
        totalFiles = len(inputFiles)
        if totalFiles == 0:
            logging.warning('{0} {1} has no files.'.format(
                inputDirectory, sample))
            continue
        filesPerJob = args.filesPerJob
        if args.gigabytesPerJob:
            totalSize = get_hdfs_directory_size(
                os.path.join(args.inputDirectory, sample))
            if totalSize:
                averageSize = totalSize / totalFiles
                GB = 1024. * 1024. * 1024.
                filesPerJob = int(
                    math.ceil(args.gigabytesPerJob * GB / averageSize))
        if hasattr(args, 'jsonFilesPerJob') and args.jsonFilesPerJob:
            if os.path.isfile(args.jsonFilesPerJob):
                with open(args.jsonFilesPerJob) as f:
                    data = json.load(f)
                if sample in data:
                    filesPerJob = data[sample]
            else:
                logging.error('JSON map {0} for jobs does not exist'.format(
                    args.jsonFilesPerJob))
                return
        config.Data.unitsPerJob = filesPerJob
        # submit the job
        submitArgs = ['--config', config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, cle))
Exemplo n.º 14
0
def submit_das_crab(args):
    '''Submit samples using DAS'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)

    # get samples
    sampleList = []
    if args.samples:
        sampleList += args.samples
    elif os.path.isfile(args.sampleList):
        with open(args.sampleList, 'r') as f:
            sampleList = [line.strip() for line in f]
    else:
        log.error('Sample input list {0} does not exist.'.format(
            args.sampleList))

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        # lookup reasonable sites
        if args.ignoreLocality:
            sites = get_sites(sample)
            if sites:  # if we found an ignoreLocality site list
                config.Data.ignoreLocality = True
                config.Site.whitelist = sites
            else:
                logging.warning('Not enabling ignoreLocality, no sites found')
        _, primaryDataset, datasetTag, dataFormat = sample.split('/')
        config.General.requestName = '{0}'.format(primaryDataset)
        maxDatasetTagSize = 97 - len(primaryDataset)
        config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:]
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:
                                                                99]  # Warning: may not be unique now
        config.Data.inputDataset = sample
        # submit the job
        submitArgs = ['--config', config]
        if args.dryrun:
            submitArgs += ['--dryrun']
            print 'Will submit with args:'
            print submitArgs
            print config.__str__()
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)()
            # save config file text
            outdir = os.path.join(
                config.General.workArea,
                'crab_{0}'.format(config.General.requestName),
                'inputs/crabConfig.py')
            with open(outdir, 'w') as f:
                f.write(config.__str__())
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, cle))