Пример #1
0
def execRaw(command, args):
    """
        execRaw - executes a given command with certain arguments and returns
                  the raw result back from the client. args is a python list,
                  the same python list parsed by the optparse module
    """
    tblogger, logger, memhandler = initLoggers()

    try:
        mod = __import__('CRABClient.Commands.%s' % command, fromlist=command)
    except ImportError:
        raise CRABAPI.BadArgumentException( \
                                        'Could not find command "%s"' % command)

    try:
        cmdobj = getattr(mod, command)(logger, args)
        res = cmdobj()
    except SystemExit as se:
        # most likely an error from the OptionParser in Subcommand.
        # CRABClient #4283 should make this less ugly
        if se.code == 2:
            raise CRABAPI.BadArgumentException
        else:
            # We can reach here if the PSet raises a SystemExit exception
            # Without this, CRAB raises a confusing UnboundLocalError
            logger.error('PSet raised a SystemExit. Traceback follows:')
            logger.error(traceback.format_exc())
            raise
    finally:
        flushMemoryLogger(tblogger, memhandler, logger.logfile)
        removeLoggerHandlers(tblogger)
        removeLoggerHandlers(logger)
    return res
Пример #2
0
def status_crab(args):
    '''Check jobs'''
    if not crabLoaded:
        logging.error('You must source a crab environment to submit to crab.\nsource /cvmfs/cms.cern.ch/crab3/crab.sh')
        return
    crab_dirs = []
    if args.jobName:
        workArea = get_crab_workArea(args)
        crab_dirs += sorted(glob.glob('{0}/*'.format(workArea)))
    elif args.directories:
        for d in args.directories:
            crab_dirs += glob.glob(d)
    else:
        log.error("Shouldn't be possible to get here")

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.WARNING)
    logger.setLevel(logging.WARNING)
    memhandler.setLevel(logging.WARNING)

    statusMap = {}
    for d in crab_dirs:
        if os.path.exists(d):
            statusArgs = ['--dir',d]
            #if args.verbose: statusArgs += ['--long']
            try:
                log.info('Retrieving status of {0}'.format(d))
                statusMap[d] = crabClientStatus.status(logger,statusArgs)()
                if args.verbose: print_single_status(args,statusMap[d])
            except HTTPException as hte:
                log.warning("Status for input directory {0} failed: {1}".format(d, hte.headers))
            except ClientException as cle:
                log.warning("Status for input directory {0} failed: {1}".format(d, cle))

    parse_crab_status(args,statusMap)
Пример #3
0
def status_crab(args):
    '''Check jobs'''
    crab_dirs = []
    if args.jobName:
        workArea = get_crab_workArea(args)
        crab_dirs += sorted(glob.glob('{0}/*'.format(workArea)))
    elif args.crabDirectories:
        for d in args.crabDirectories:
            crab_dirs += glob.glob(d)
    else:
        log.error("Shouldn't be possible to get here")

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.WARNING)
    logger.setLevel(logging.WARNING)
    memhandler.setLevel(logging.WARNING)

    statusMap = {}
    for d in crab_dirs:
        if os.path.exists(d):
            statusArgs = ['--dir', d]
            #if args.verbose: statusArgs += ['--long']
            try:
                log.info('Retrieving status of {0}'.format(d))
                statusMap[d] = crabClientStatus.status(logger, statusArgs)()
            except HTTPException as hte:
                log.warning(
                    "Status for input directory {0} failed: {1}".format(
                        d, hte.headers))
            except ClientException as cle:
                log.warning(
                    "Status for input directory {0} failed: {1}".format(
                        d, cle))

    parse_crab_status(args, statusMap)
Пример #4
0
def execRaw(command, args):
    """
        execRaw - executes a given command with certain arguments and returns
                  the raw result back from the client. args is a python list,
                  the same python list parsed by the optparse module
    """
    tblogger, logger, memhandler = initLoggers()

    try:
        mod = __import__('CRABClient.Commands.%s' % command, fromlist=command)
    except ImportError:
        raise CRABAPI.BadArgumentException( \
                                        'Could not find command "%s"' % command)

    try:
        cmdobj = getattr(mod, command)(logger, args)
        res = cmdobj()
    except SystemExit as se:
        # most likely an error from the OptionParser in Subcommand.
        # CRABClient #4283 should make this less ugly
        if se.code == 2:
            raise CRABAPI.BadArgumentException
    finally:
        flushMemoryLogger(tblogger, memhandler, logger.logfile)
        removeLoggerHandlers(tblogger)
        removeLoggerHandlers(logger)
    return res
Пример #5
0
def submit_untracked_crab(args):
    '''Submit jobs from an inputDirectory'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)
    config.Site.whitelist = ['T2_US_Wisconsin'] # whitelist wisconsin so it only runs there


    # get samples
    sampleList = hdfs_ls_directory(args.inputDirectory)

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        primaryDataset = sample
        config.General.requestName = '{0}'.format(primaryDataset)
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now
        config.Data.outputPrimaryDataset = primaryDataset
        # get file list
        config.Data.userInputFiles = get_hdfs_root_files(args.inputDirectory,sample)
        # submit the job
        submitArgs = ['--config',config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
Пример #6
0
def status_crab(args):
    '''Check jobs'''
    crab_dirs = []
    if args.jobName:
        workArea = get_crab_workArea(args)
        crab_dirs += sorted(glob.glob('{0}/*'.format(workArea)))
    elif args.crabDirectories:
        for d in args.crabDirectories:
            crab_dirs += glob.glob(d)
    else:
        log.error("Shouldn't be possible to get here")

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.WARNING)
    logger.setLevel(logging.WARNING)
    memhandler.setLevel(logging.WARNING)

    statusMap = {}
    for d in crab_dirs:
        if os.path.exists(d):
            statusArgs = ['--dir',d]
            #if args.verbose: statusArgs += ['--long']
            try:
                log.info('Retrieving status of {0}'.format(d))
                statusMap[d] = crabClientStatus.status(logger,statusArgs)()
            except HTTPException as hte:
                log.warning("Status for input directory {0} failed: {1}".format(d, hte.headers))
            except ClientException as cle:
                log.warning("Status for input directory {0} failed: {1}".format(d, cle))

    parse_crab_status(args,statusMap)
Пример #7
0
def execRaw(command, args):
    """
        execRaw - executes a given command with certain arguments and returns
                  the raw result back from the client. args is a python list,
                  the same python list parsed by the optparse module
    """
    tblogger, logger, memhandler = initLoggers()

    try:
        mod = __import__('CRABClient.Commands.%s' % command, fromlist=command)
    except ImportError:
        raise CRABAPI.BadArgumentException( \
                                        'Could not find command "%s"' % command)

    try:
        cmdobj = getattr(mod, command)(logger, args)
        res = cmdobj()
    except SystemExit as se:
        # most likely an error from the OptionParser in Subcommand.
        # CRABClient #4283 should make this less ugly
        if se.code == 2:
            raise CRABAPI.BadArgumentException
    finally:
        flushMemoryLogger(tblogger, memhandler, logger.logfile)
        removeLoggerHandlers(tblogger)
        removeLoggerHandlers(logger)
    return res
Пример #8
0
def purge_crab(args):
    '''Resubmit jobs'''
    if not crabLoaded:
        logging.error(
            'You must source a crab environment to submit to crab.\nsource /cvmfs/cms.cern.ch/crab3/crab.sh'
        )
        return
    crab_dirs = []
    if args.jobName:
        workArea = get_crab_workArea(args)
        crab_dirs += sorted(glob.glob('{0}/*'.format(workArea)))
    elif args.directories:
        for d in args.directories:
            crab_dirs += glob.glob(d)
    else:
        log.error("Shouldn't be possible to get here")

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.WARNING)
    logger.setLevel(logging.WARNING)
    memhandler.setLevel(logging.WARNING)

    purgeMap = {}
    for d in crab_dirs:
        if os.path.exists(d):
            statusArgs = ['--dir', d]
            purgeArgs = ['--cache', '--dir', d]
            try:
                summary = crabClientStatus.status(logger, statusArgs)()
                purge = False
                total = 0
                finished = 0
                allJobStatus = {}
                if 'jobs' in summary:
                    for j, job in summary['jobs'].iteritems():
                        total += 1
                        if job['State'] not in allJobStatus:
                            allJobStatus[job['State']] = 0
                        allJobStatus[job['State']] += 1
                        if job['State'] in ['finished']:
                            finished += 1
                if total and finished == total:
                    purge = True
                if purge:
                    log.info('Purging {0}'.format(d))
                    log.info(' '.join([
                        '{0}: {1}'.format(state, allJobStatus[state])
                        for state in allowedStates if state in allJobStatus
                    ]))
                    purgeMap[d] = crabClientPurge.purge(logger, purgeArgs)()
            except HTTPException as hte:
                log.warning(
                    "Submission for input directory {0} failed: {1}".format(
                        d, hte.headers))
            except ClientException as cle:
                log.warning(
                    "Submission for input directory {0} failed: {1}".format(
                        d, cle))
Пример #9
0
def submit_das_crab(args):
    '''Submit samples using DAS'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)

    # get samples
    sampleList = []
    if args.samples:
        sampleList += args.samples
    elif os.path.isfile(args.sampleList):
        with open(args.sampleList,'r') as f:
            sampleList = [line.strip() for line in f]
    else:
        log.error('Sample input list {0} does not exist.'.format(args.sampleList))

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        # lookup reasonable sites
        if args.ignoreLocality:
            sites = get_sites(sample)
            if sites: # if we found an ignoreLocality site list
                config.Data.ignoreLocality  = True
                config.Site.whitelist = sites
            else:
                logging.warning('Not enabling ignoreLocality, no sites found')
        _, primaryDataset, datasetTag, dataFormat = sample.split('/')
        config.General.requestName = '{0}'.format(primaryDataset)
        maxDatasetTagSize = 97-len(primaryDataset)
        config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:]
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now
        config.Data.inputDataset   = sample
        # submit the job
        submitArgs = ['--config',config]
        if args.dryrun:
            submitArgs += ['--dryrun']
            print 'Will submit with args:'
            print submitArgs
            print config.__str__()
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)()
            # save config file text
            outdir = os.path.join(config.General.workArea, 'crab_{0}'.format(config.General.requestName), 'inputs/crabConfig.py')
            with open(outdir,'w') as f:
                f.write(config.__str__())
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
Пример #10
0
def resubmit_crab(args):
    '''Resubmit jobs'''
    if not crabLoaded:
        logging.error('You must source a crab environment to submit to crab.\nsource /cvmfs/cms.cern.ch/crab3/crab.sh')
        return
    crab_dirs = []
    if args.jobName:
        workArea = get_crab_workArea(args)
        crab_dirs += sorted(glob.glob('{0}/*'.format(workArea)))
    elif args.directories:
        for d in args.directories:
            crab_dirs += glob.glob(d)
    else:
        log.error("Shouldn't be possible to get here")

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.WARNING)
    logger.setLevel(logging.WARNING)
    memhandler.setLevel(logging.WARNING)

    resubmitMap = {}
    for d in crab_dirs:
        if os.path.exists(d):
            statusArgs = ['--dir',d]
            resubmitArgs = ['--dir',d]
            try:
                summary = crabClientStatus.status(logger,statusArgs)()
                resubmit = False
                total = 0
                failed = 0
                allJobStatus = {}
                if 'jobs' in summary:
                    for j,job in summary['jobs'].iteritems():
                        total += 1
                        if job['State'] not in allJobStatus: allJobStatus[job['State']] = 0
                        allJobStatus[job['State']] += 1
                        if job['State'] in ['failed']:
                            failed += 1
                            resubmit = True
                if resubmit:
                    log.info('Resubmitting {0}'.format(d))
                    log.info('{0} of {1} jobs failed'.format(failed,total))
                    log.info(' '.join(['{0}: {1}'.format(state,allJobStatus[state]) for state in allowedStates if state in allJobStatus]))
                    resubmitMap[d] = crabClientResubmit.resubmit(logger,resubmitArgs)()
            except HTTPException as hte:
                log.warning("Submission for input directory {0} failed: {1}".format(d, hte.headers))
            except ClientException as cle:
                log.warning("Submission for input directory {0} failed: {1}".format(d, cle))

    for d,statMap in resubmitMap.iteritems():
        if statMap['status'] != 'SUCCESS':
            log.info('Status: {0} - {1}'.format(statMap['status'],d))
Пример #11
0
def resubmit_crab(args):
    '''Resubmit jobs'''
    crab_dirs = []
    if args.jobName:
        workArea = get_crab_workArea(args)
        crab_dirs += sorted(glob.glob('{0}/*'.format(workArea)))
    elif args.crabDirectories:
        for d in args.crabDirectories:
            crab_dirs += glob.glob(d)
    else:
        log.error("Shouldn't be possible to get here")

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.WARNING)
    logger.setLevel(logging.WARNING)
    memhandler.setLevel(logging.WARNING)

    resubmitMap = {}
    for d in crab_dirs:
        if os.path.exists(d):
            statusArgs = ['--dir', d]
            resubmitArgs = ['--dir', d]
            try:
                summary = crabClientStatus.status(logger, statusArgs)()
                resubmit = False
                total = 0
                failed = 0
                if 'jobs' in summary:
                    for j, job in summary['jobs'].iteritems():
                        total += 1
                        if job['State'] in ['failed']:
                            failed += 1
                            resubmit = True
                if resubmit:
                    log.info('Resubmitting {0}'.format(d))
                    log.info('{0} of {1} jobs failed'.format(failed, total))
                    resubmitMap[d] = crabClientResubmit.resubmit(
                        logger, resubmitArgs)()
            except HTTPException as hte:
                log.warning(
                    "Submission for input directory {0} failed: {1}".format(
                        d, hte.headers))
            except ClientException as cle:
                log.warning(
                    "Submission for input directory {0} failed: {1}".format(
                        d, cle))

    for d, statMap in resubmitMap.iteritems():
        if statMap['status'] != 'SUCCESS':
            log.info('Status: {0} - {1}'.format(statMap['status'], d))
Пример #12
0
def resubmit_crab(args):
    '''Resubmit jobs'''
    crab_dirs = []
    if args.jobName:
        workArea = get_crab_workArea(args)
        crab_dirs += sorted(glob.glob('{0}/*'.format(workArea)))
    elif args.crabDirectories:
        for d in args.crabDirectories:
            crab_dirs += glob.glob(d)
    else:
        log.error("Shouldn't be possible to get here")

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.WARNING)
    logger.setLevel(logging.WARNING)
    memhandler.setLevel(logging.WARNING)

    resubmitMap = {}
    for d in crab_dirs:
        if os.path.exists(d):
            statusArgs = ['--dir',d]
            resubmitArgs = ['--dir',d]
            try:
                summary = crabClientStatus.status(logger,statusArgs)()
                resubmit = False
                total = 0
                failed = 0
                if 'jobs' in summary:
                    for j,job in summary['jobs'].iteritems():
                        total += 1
                        if job['State'] in ['failed']:
                            failed += 1
                            resubmit = True
                if resubmit:
                    log.info('Resubmitting {0}'.format(d))
                    log.info('{0} of {1} jobs failed'.format(failed,total))
                    resubmitMap[d] = crabClientResubmit.resubmit(logger,resubmitArgs)()
            except HTTPException as hte:
                log.warning("Submission for input directory {0} failed: {1}".format(d, hte.headers))
            except ClientException as cle:
                log.warning("Submission for input directory {0} failed: {1}".format(d, cle))

    for d,statMap in resubmitMap.iteritems():
        if statMap['status'] != 'SUCCESS':
            log.info('Status: {0} - {1}'.format(statMap['status'],d))
Пример #13
0
def submit_das_crab(args):
    '''Submit samples using DAS'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)

    # get samples
    sampleList = []
    if args.samples:
        sampleList += args.samples
    elif os.path.isfile(args.sampleList):
        with open(args.sampleList, 'r') as f:
            sampleList = [line.strip() for line in f]
    else:
        log.error('Sample input list {0} does not exist.'.format(
            args.sampleList))

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        _, primaryDataset, datasetTag, dataFormat = sample.split('/')
        config.General.requestName = '{0}'.format(primaryDataset)
        maxDatasetTagSize = 97 - len(primaryDataset)
        config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:]
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:
                                                                99]  # Warning: may not be unique now
        config.Data.inputDataset = sample
        # submit the job
        submitArgs = ['--config', config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, cle))
Пример #14
0
def status_crab(args):
    '''Check jobs'''
    if not crabLoaded:
        logging.error(
            'You must source a crab environment to submit to crab.\nsource /cvmfs/cms.cern.ch/crab3/crab.sh'
        )
        return
    crab_dirs = []
    if args.jobName:
        workArea = get_crab_workArea(args)
        crab_dirs += sorted(glob.glob('{0}/*'.format(workArea)))
    elif args.directories:
        for d in args.directories:
            crab_dirs += glob.glob(d)
    else:
        log.error("Shouldn't be possible to get here")

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.WARNING)
    logger.setLevel(logging.WARNING)
    memhandler.setLevel(logging.WARNING)

    statusMap = {}
    for d in crab_dirs:
        if os.path.exists(d):
            statusArgs = ['--dir', d]
            #if args.verbose: statusArgs += ['--long']
            try:
                log.info('Retrieving status of {0}'.format(d))
                statusMap[d] = crabClientStatus.status(logger, statusArgs)()
                if args.verbose: print_single_status(args, statusMap[d])
            except HTTPException as hte:
                log.warning(
                    "Status for input directory {0} failed: {1}".format(
                        d, hte.headers))
            except ClientException as cle:
                log.warning(
                    "Status for input directory {0} failed: {1}".format(
                        d, cle))

    parse_crab_status(args, statusMap)
Пример #15
0
def submit_das_crab(args):
    '''Submit samples using DAS'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)

    # get samples
    sampleList = []
    if args.samples:
        sampleList += args.samples
    elif os.path.isfile(args.sampleList):
        with open(args.sampleList,'r') as f:
            sampleList = [line.strip() for line in f]
    else:
        log.error('Sample input list {0} does not exist.'.format(args.sampleList))

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        _, primaryDataset, datasetTag, dataFormat = sample.split('/')
        config.General.requestName = '{0}'.format(primaryDataset)
        maxDatasetTagSize = 97-len(primaryDataset)
        config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:]
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now
        config.Data.inputDataset   = sample
        # submit the job
        submitArgs = ['--config',config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
Пример #16
0
def submit_untracked_crab(args):
    '''Submit jobs from an inputDirectory'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)
    config.Site.whitelist = [
        args.site
    ]  # whitelist site, run on same site as files located

    # get samples
    sampleList = hdfs_ls_directory(args.inputDirectory)

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        primaryDataset = sample
        config.General.requestName = '{0}'.format(primaryDataset)
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:
                                                                99]  # Warning: may not be unique now
        config.Data.outputPrimaryDataset = primaryDataset
        # get file list
        config.Data.userInputFiles = get_hdfs_root_files(
            args.inputDirectory, sample)
        # submit the job
        submitArgs = ['--config', config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, cle))
Пример #17
0
def submit_das_crab(args):
    '''Submit samples using DAS'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)

    # get samples
    sampleList = []
    if args.samples:
        sampleList += args.samples
    elif os.path.isfile(args.sampleList):
        with open(args.sampleList, 'r') as f:
            sampleList = [line.strip() for line in f]
    else:
        log.error('Sample input list {0} does not exist.'.format(
            args.sampleList))

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        # lookup reasonable sites
        if args.ignoreLocality:
            sites = get_sites(sample)
            if sites:  # if we found an ignoreLocality site list
                config.Data.ignoreLocality = True
                config.Site.whitelist = sites
            else:
                logging.warning('Not enabling ignoreLocality, no sites found')
        _, primaryDataset, datasetTag, dataFormat = sample.split('/')
        config.General.requestName = '{0}'.format(primaryDataset)
        maxDatasetTagSize = 97 - len(primaryDataset)
        config.General.requestName += '_' + datasetTag[-maxDatasetTagSize:]
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:
                                                                99]  # Warning: may not be unique now
        config.Data.inputDataset = sample
        # submit the job
        submitArgs = ['--config', config]
        if args.dryrun:
            submitArgs += ['--dryrun']
            print 'Will submit with args:'
            print submitArgs
            print config.__str__()
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)()
            # save config file text
            outdir = os.path.join(
                config.General.workArea,
                'crab_{0}'.format(config.General.requestName),
                'inputs/crabConfig.py')
            with open(outdir, 'w') as f:
                f.write(config.__str__())
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, cle))
def submitLimitCrab(tag,h,amasses,**kwargs):
    dryrun = kwargs.get('dryrun',False)
    jobName = kwargs.get('jobName',None)
    pointsPerJob = kwargs.get('pointsPerJob',10)
    parametric = kwargs.get('parametric',False)

    a = '${A}'

    datacard = 'datacards_shape/MuMuTauTau/mmmt_{}_HToAAH{}A{}.txt'.format(tag,h,'X' if parametric else '${A}')

    combineCommands = getCommands(**kwargs)

    sample_dir = '/{}/{}/crab_projects/{}/{}/{}'.format(scratchDir,pwd.getpwuid(os.getuid())[0], jobName, tag, h)
    python_mkdir(sample_dir)

    # create submit dir
    submit_dir = '{}/crab'.format(sample_dir)
    if os.path.exists(submit_dir):
        logging.warning('Submission directory exists for {0}.'.format(jobName))
        return

    # create bash script
    bash_name = '{}/script.sh'.format(sample_dir)
    bashScript = '#!/bin/bash\n'
    bashScript += 'eval `scramv1 runtime -sh`\n'
    bashScript += 'ls\n'
    bashScript += 'printenv\n'
    bashScript += 'mkdir datacards_shape\n'
    bashScript += 'mv MuMuTauTau datacards_shape/MuMuTauTau\n'
    bashScript += 'files=`python -c "import PSet; print \' \'.join(list(PSet.process.source.fileNames))"`\n'
    bashScript += 'echo $files\n'
    bashScript += 'for A in $files; do\n'
    for cc in combineCommands:
        bashScript += cc.format(datacard=datacard,h=h,a=a,tag=tag)+'\n'
    bashScript += 'done\n'
    bashScript += """echo '''<FrameworkJobReport>\
<ReadBranches>\n
</ReadBranches>\n
<PerformanceReport>\n
  <PerformanceSummary Metric="StorageStatistics">\n
    <Metric Name="Parameter-untracked-bool-enabled" Value="true"/>\n
    <Metric Name="Parameter-untracked-bool-stats" Value="true"/>\n
    <Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>\n
    <Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>\n
    <Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>\n
    <Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>\n
  </PerformanceSummary>\n
</PerformanceReport>\n
<GeneratorInfo>\n
</GeneratorInfo>\n
</FrameworkJobReport>''' > FrameworkJobReport.xml\n"""
    with open(bash_name,'w') as file:
        file.write(bashScript)
    os.system('chmod +x {0}'.format(bash_name))

    # setup crab config
    from CRABClient.UserUtilities import config

    config = config()

    config.General.workArea         = submit_dir
    config.General.transferOutputs  = True

    config.JobType.pluginName       = 'Analysis'
    config.JobType.psetName         = '{0}/src/DevTools/Utilities/test/PSet.py'.format(os.environ['CMSSW_BASE'])
    config.JobType.scriptExe        = bash_name
    config.JobType.outputFiles      = []
    config.JobType.inputFiles       = ['datacards_shape/MuMuTauTau']

    config.Data.outLFNDirBase       = '/store/user/{}/{}/{}/{}'.format(UNAME, jobName, tag, h)
    config.Data.outputDatasetTag    = jobName
    config.Data.userInputFiles      = [str(a) for a in amasses]
    config.Data.splitting           = 'FileBased'
    config.Data.unitsPerJob         = pointsPerJob
    config.Data.outputPrimaryDataset= 'Limits'

    config.Site.storageSite         = 'T2_US_Wisconsin'

    # submit
    submitArgs = ['--config',config]
    if dryrun: submitArgs += ['--dryrun']

    from CRABClient.ClientExceptions import ClientException
    from CRABClient.ClientUtilities import initLoggers
    from httplib import HTTPException
    import CRABClient.Commands.submit as crabClientSubmit

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    try:
        logging.info('Submitting {}/{}/{}'.format(jobName,tag,h))
        crabClientSubmit.submit(logger,submitArgs)()
    except HTTPException as hte:
        logging.info("Submission failed: {}".format(hte.headers))
    except ClientException as cle:
        logging.info("Submission failed: {}".format(cle))
Пример #19
0
                    success.append(filename)
                    retry += 1 #To retrieve retried job log, if there is any.
                except HTTPException as hte:
                    succeded = False
                    # Ignore the exception if the HTTP status code is 404. Status 404 means file
                    # not found (see http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html). File
                    # not found error is expected, since we try all the job retries.
                    if hasattr(hte.args[0], 'status') and hte.args[0].status != 404:
                        self.logger.debug(str(hte))
                        failed.append(filename)

        return failed, success

class nil:
    def __getattr__(self,attr): return self
    def __call__(self,*args,**kwargs): return None
import sys
import trace

from CRABClient.ClientUtilities import initLoggers, flushMemoryLogger, removeLoggerHandlers

tblogger,logger,memhandler=initLoggers()
try:
    trace=trace.Trace()
    #trace.runfunc(getlog(logger,sys.argv[1:]))
    getlog(logger,sys.argv[1:])()
finally:
    flushMemoryLogger(tblogger, memhandler, logger.logfile)
    removeLoggerHandlers(tblogger)
    removeLoggerHandlers(logger)
Пример #20
0
def submit_untracked_crab(args):
    '''Submit jobs from an inputDirectory'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)
    config.Site.whitelist = [args.site] # whitelist site, run on same site as files located

    # get samples
    sampleList = hdfs_ls_directory(args.inputDirectory)

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        if hasattr(args,'sampleFilter'):
            submitSample = False
            for sampleFilter in args.sampleFilter:
                if fnmatch.fnmatch(sample,sampleFilter): submitSample = True
            if not submitSample: continue
        primaryDataset = sample
        config.General.requestName = '{0}'.format(primaryDataset)
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:99] # Warning: may not be unique now
        config.Data.outputPrimaryDataset = primaryDataset
        # get file list
        inputFiles = get_hdfs_root_files(args.inputDirectory,sample)
        config.Data.userInputFiles = inputFiles
        totalFiles = len(inputFiles)
        if totalFiles==0:
            logging.warning('{0} {1} has no files.'.format(inputDirectory,sample))
            continue
        filesPerJob = args.filesPerJob
        if args.gigabytesPerJob:
            totalSize = get_hdfs_directory_size(os.path.join(args.inputDirectory,sample))
            if totalSize:
                averageSize = totalSize/totalFiles
                GB = 1024.*1024.*1024.
                filesPerJob = int(math.ceil(args.gigabytesPerJob*GB/averageSize))
        if hasattr(args,'jsonFilesPerJob') and args.jsonFilesPerJob:
            if os.path.isfile(args.jsonFilesPerJob):
                with open(args.jsonFilesPerJob) as f:
                    data = json.load(f)
                if sample in data:
                    filesPerJob = data[sample]
            else:
                logging.error('JSON map {0} for jobs does not exist'.format(args.jsonFilesPerJob))
                return
        config.Data.unitsPerJob = filesPerJob
        # submit the job
        submitArgs = ['--config',config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger,submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(sample, cle))
Пример #21
0
def submit_untracked_crab(args):
    '''Submit jobs from an inputDirectory'''
    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    # crab config
    config = get_config(args)
    config.Site.whitelist = [
        args.site
    ]  # whitelist site, run on same site as files located

    # get samples
    sampleList = hdfs_ls_directory(args.inputDirectory)

    submitMap = {}
    # iterate over samples
    for sample in sampleList:
        if hasattr(args, 'sampleFilter'):
            submitSample = False
            for sampleFilter in args.sampleFilter:
                if fnmatch.fnmatch(sample, sampleFilter): submitSample = True
            if not submitSample: continue
        primaryDataset = sample
        config.General.requestName = '{0}'.format(primaryDataset)
        # make it only 100 characters
        config.General.requestName = config.General.requestName[:
                                                                99]  # Warning: may not be unique now
        config.Data.outputPrimaryDataset = primaryDataset
        # get file list
        inputFiles = get_hdfs_root_files(args.inputDirectory, sample)
        config.Data.userInputFiles = inputFiles
        totalFiles = len(inputFiles)
        if totalFiles == 0:
            logging.warning('{0} {1} has no files.'.format(
                inputDirectory, sample))
            continue
        filesPerJob = args.filesPerJob
        if args.gigabytesPerJob:
            totalSize = get_hdfs_directory_size(
                os.path.join(args.inputDirectory, sample))
            if totalSize:
                averageSize = totalSize / totalFiles
                GB = 1024. * 1024. * 1024.
                filesPerJob = int(
                    math.ceil(args.gigabytesPerJob * GB / averageSize))
        if hasattr(args, 'jsonFilesPerJob') and args.jsonFilesPerJob:
            if os.path.isfile(args.jsonFilesPerJob):
                with open(args.jsonFilesPerJob) as f:
                    data = json.load(f)
                if sample in data:
                    filesPerJob = data[sample]
            else:
                logging.error('JSON map {0} for jobs does not exist'.format(
                    args.jsonFilesPerJob))
                return
        config.Data.unitsPerJob = filesPerJob
        # submit the job
        submitArgs = ['--config', config]
        if args.dryrun: submitArgs += ['--dryrun']
        try:
            log.info("Submitting for input dataset {0}".format(sample))
            submitMap[sample] = crabClientSubmit.submit(logger, submitArgs)()
        except HTTPException as hte:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, hte.headers))
        except ClientException as cle:
            log.info("Submission for input dataset {0} failed: {1}".format(
                sample, cle))
def submitLimitCrab(tag, h, amasses, **kwargs):
    dryrun = kwargs.get('dryrun', False)
    jobName = kwargs.get('jobName', None)
    pointsPerJob = kwargs.get('pointsPerJob', 10)
    parametric = kwargs.get('parametric', False)
    postscript = kwargs.get('postscript', '')

    a = '${A}'

    datacard = 'datacards_shape/MuMuTauTau/{}_HToAAH{}A{}{}.txt'.format(
        tag, h, 'X' if parametric else '${A}', postscript)

    combineCommands = getCommands(**kwargs)

    sample_dir = '/{}/{}/crab_projects/{}/{}{}/{}'.format(
        scratchDir,
        pwd.getpwuid(os.getuid())[0], jobName, tag, postscript, h)
    python_mkdir(sample_dir)

    # create submit dir
    submit_dir = '{}/crab'.format(sample_dir)
    if os.path.exists(submit_dir):
        logging.warning('Submission directory exists for {0}.'.format(jobName))
        return

    # create bash script
    bash_name = '{}/script.sh'.format(sample_dir)
    bashScript = '#!/bin/bash\n'
    bashScript += 'eval `scramv1 runtime -sh`\n'
    bashScript += 'ls\n'
    bashScript += 'printenv\n'
    bashScript += 'mkdir datacards_shape\n'
    bashScript += 'mv MuMuTauTau datacards_shape/MuMuTauTau\n'
    bashScript += 'files=`python -c "import PSet; print \' \'.join(list(PSet.process.source.fileNames))"`\n'
    bashScript += 'echo $files\n'
    bashScript += 'for A in $files; do\n'
    for cc in combineCommands:
        bashScript += cc.format(
            datacard=datacard, h=h, a=a, tag=tag, postscript=postscript) + '\n'
    bashScript += 'done\n'
    bashScript += """echo '''<FrameworkJobReport>\
<ReadBranches>\n
</ReadBranches>\n
<PerformanceReport>\n
  <PerformanceSummary Metric="StorageStatistics">\n
    <Metric Name="Parameter-untracked-bool-enabled" Value="true"/>\n
    <Metric Name="Parameter-untracked-bool-stats" Value="true"/>\n
    <Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>\n
    <Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>\n
    <Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>\n
    <Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>\n
  </PerformanceSummary>\n
</PerformanceReport>\n
<GeneratorInfo>\n
</GeneratorInfo>\n
</FrameworkJobReport>''' > FrameworkJobReport.xml\n"""
    with open(bash_name, 'w') as file:
        file.write(bashScript)
    os.system('chmod +x {0}'.format(bash_name))

    # setup crab config
    from CRABClient.UserUtilities import config

    config = config()

    config.General.workArea = submit_dir
    config.General.transferOutputs = True

    config.JobType.pluginName = 'Analysis'
    config.JobType.psetName = '{0}/src/DevTools/Utilities/test/PSet.py'.format(
        os.environ['CMSSW_BASE'])
    config.JobType.scriptExe = bash_name
    config.JobType.outputFiles = []
    config.JobType.inputFiles = ['datacards_shape/MuMuTauTau']

    config.Data.outLFNDirBase = '/store/user/{}/{}/{}/{}'.format(
        UNAME, jobName, tag, h)
    config.Data.outputDatasetTag = jobName
    config.Data.userInputFiles = [str(a) for a in amasses]
    config.Data.splitting = 'FileBased'
    config.Data.unitsPerJob = pointsPerJob
    config.Data.outputPrimaryDataset = 'Limits'

    config.Site.storageSite = 'T2_US_Wisconsin'

    # submit
    submitArgs = ['--config', config]
    if dryrun: submitArgs += ['--dryrun']

    from CRABClient.ClientExceptions import ClientException
    from CRABClient.ClientUtilities import initLoggers
    from httplib import HTTPException
    import CRABClient.Commands.submit as crabClientSubmit

    tblogger, logger, memhandler = initLoggers()
    tblogger.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    memhandler.setLevel(logging.INFO)

    try:
        logging.info('Submitting {}/{}/{}'.format(jobName, tag, h))
        crabClientSubmit.submit(logger, submitArgs)()
    except HTTPException as hte:
        logging.info("Submission failed: {}".format(hte.headers))
    except ClientException as cle:
        logging.info("Submission failed: {}".format(cle))
Пример #23
0
    def __call__(self):

        (options, args) = self.parser.parse_args()

        ## The default logfile destination is ./crab.log. It will be changed once we
        ## know/create the CRAB project directory.
        if options.quiet:
            setConsoleLogLevelVar(logging.WARNING)
        elif options.debug:
            setConsoleLogLevelVar(logging.DEBUG)
        self.tblogger, self.logger, self.memhandler = initLoggers()

        #Instructions needed in case of early failures: sometimes the traceback logger
        #has not been set yet.

        ## Will replace Python's sys.excepthook default function with the next function.
        ## This function is used for handling uncaught exceptions (in a Python program
        ## this happens just before the program exits).
        ## In this function:
        ## - make sure everything is logged to the crab.log file;
        ## However, we already have a `finally' clause where we make sure everything is
        ## logged to the crab log file.
        def log_exception(exc_type, exc_value, tback):
            """
            Send a short version of the exception to the console,
            a long version to the log

            Adapted from Doug Hellmann

            This might help sometimes:
            import traceback,pprint;
            pprint.pprint(traceback.format_tb(tback))
            """

            ## Add to the CRAB3 logger a file handler to the log file (if it doesn't have it
            ## already).
            tbLogger = logging.getLogger('CRAB3')
            hasFileHandler = False
            for h in tbLogger.handlers:
                if isinstance(h, logging.FileHandler
                              ) and h.stream.name == client.logger.logfile:
                    hasFileHandler = True
            if not hasFileHandler:
                filehandler = logging.FileHandler(client.logger.logfile)
                filehandler.setFormatter(LOGFORMATTER)
                tbLogger.addHandler(filehandler)
            ## This goes to the log file.
            tbLogger.error("Unhandled Exception!")
            tbLogger.error(exc_value, exc_info=(exc_type, exc_value, tback))

            ## This goes to the console (via the CRAB3.all logger) and to the log file (via
            ## the parent CRAB3 logger).
            logger = logging.getLogger('CRAB3.all')
            logger.error("ERROR: %s: %s", exc_type.__name__, exc_value)
            logger.error(
                "\n\tPlease email %s for support with the crab.log file or crab.log URL.",
                FEEDBACKMAIL)
            logger.error("\tClient Version: %s", client_version)

            logger.error(
                "\tPlease use 'crab uploadlog' to upload the log file %s to the CRAB cache.",
                client.logger.logfile)

        sys.excepthook = log_exception

        # check that the command is valid
        if len(args) == 0:
            print("You have not specified a command.")
            # Described the valid commands in epilog, reuse here
            print(self.parser.epilog)
            sys.exit(-1)

        sub_cmd = None
        try:
            sub_cmd = next(v for k, v in self.subCommands.items()
                           if args[0] in v.shortnames or args[0] == v.name)
        except StopIteration:
            print("'" + str(args[0]) + "' is not a valid command.")
            self.parser.print_help()
            sys.exit(-1)
        self.cmd = sub_cmd(self.logger, args[1:])

        self.cmd()