Python check_dataset_completion примеры использования

Язык программирования: Python

Пространство имен/Пакет: check_dataset_completion

Метод/Функция: check_dataset_completion

Примеров на hotexamples.com: 4

Python check_dataset_completion - 4 примера найдено. Это лучшие примеры Python кода для check_dataset_completion.check_dataset_completion, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: check_all_datasets.py Проект: lgray/usercode

            #(base_all, 'job_summer12_t_tW'),
            #(base_all, 'job_summer12_tbar_s'),
            #(base_all, 'job_summer12_tbar_t'),
            #(base_all, 'job_summer12_tbar_tW'),
            (base_all, 'job_summer12_ttW'),
            (base_all, 'job_summer12_ttZ'),
            #(base_all, 'job_summer12_ttg'),
            #(base_all, 'job_summer12_ttjets_1l'),
            #(base_all, 'job_summer12_ttjets_2l'),


            #(base_data, 'job_muon_2012a_Jan22rereco'),
            #(base_data, 'job_muon_2012b_Jan22rereco'),
            #(base_data, 'job_muon_2012c_Jan22rereco'),
            #(base_data, 'job_muon_2012d_Jan22rereco'),
            #(base_alberto, 'job_electron_2012a_Jan22rereco'),
            #(base_alberto, 'job_electron_2012b_Jan22rereco'),
            #(base_alberto, 'job_electron_2012c_Jan2012rereco'),
            #(base_alberto, 'job_electron_2012d_Jan22rereco'),


]


for base, ds in job_conf :

    print "FIXME"
    orig_nevt_tree, orig_nevt_hist, filt_nevt_tree, filt_nevt_hist = check_dataset_completion( originalDS=base, filteredDS=base_filtered+'/'+ds, treeNameOrig='ggNtuplizer/EventTree', treeNameFilt='ggNtuplizer/EventTree',histNameFilt='ggNtuplizer/filter', fileKeyOrig=ds+'.root', fileKeyFilt='tree.root' )

    print '%s : Orignal = %d events, filtered = %d events.  Difference = %d' %( ds, orig_nevt_tree, filt_nevt_hist, orig_nevt_tree-filt_nevt_hist)

Пример #2

Показать файл

Файл: check_all_datasets_olc.py Проект: sachikot/usercode

    #(base_phB, 'job_fall13_photonRunB2012_2'),
    #(base_phB, 'job_fall13_photonRunB2012_3'),
    #(base_phB, 'job_fall13_photonRunB2012_4'),
    #(base_phB, 'job_fall13_photonRunB2012_5'),
    #(base_phB, 'job_fall13_photonRunB2012_6'),
    #(base_phB, 'job_fall13_photonRunB2012_7'),
    #(base_phB, 'job_fall13_photonRunB2012_8'),
    #(base_phB, 'job_fall13_photonRunB2012_9'),
    #(base_phD, 'job_2photon_2012d_Jan22rereco_1of4'),
    #(base_phD, 'job_2photon_2012d_Jan22rereco_2of4'),
    #(base_phD, 'job_2photon_2012d_Jan22rereco_3of4'),
    #(base_phD2, 'job_2photon_2012d_Jan22rereco_4of4'),
    #(base_phD2, 'job_2photon_2012d_Jan22rereco_5of5'),
]

for base, ds in job_conf:

    print "FIXME"
    #orig_nevt_tree, orig_nevt_hist, filt_nevt_tree, filt_nevt_hist = check_dataset_completion( originalDS=base+'/'+ds, filteredDS=base_filtered+'/'+ds, treeNameOrig='ggNtuplizer/EventTree', treeNameFilt='ggNtuplizer/EventTree',histNameFilt='ggNtuplizer/filter', fileKeyOrig='tree.root', fileKeyFilt='tree.root' )
    orig_nevt_tree, orig_nevt_hist, filt_nevt_tree, filt_nevt_hist = check_dataset_completion(
        originalDS=base,
        filteredDS=base_filtered + '/' + ds,
        treeNameOrig='ggNtuplizer/EventTree',
        treeNameFilt='ggNtuplizer/EventTree',
        histNameFilt='ggNtuplizer/filter',
        fileKeyOrig=ds + '.root',
        fileKeyFilt='tree.root')

    print '%s : Orignal = %d events, filtered = %d events.  Difference = %d' % (
        ds, orig_nevt_tree, filt_nevt_hist, orig_nevt_tree - filt_nevt_hist)

Пример #3

Показать файл

Файл: scheduler_base.py Проект: rytse/WG_Analysis

def RunJobs(jobs, configs, options, dry_run=False):

    if not isinstance(jobs, list):
        jobs = [jobs]
    if not isinstance(configs, list):
        configs = [configs]

    if isinstance(options, Namespace):
        options = vars(options)

    jobs_data = []
    jobs_mc = []

    for j in jobs:
        if j.isData:
            jobs_data.append(j)
        else:
            jobs_mc.append(j)

    run = options.get('run', False)
    check = options.get('check', False)
    exename = options.get('exename', 'RunAnalysis')
    nFilesPerJob = options.get('nFilesPerJob', 0)
    nJobs = options.get('nJobs', 0)
    filekey = options.get('filekey', 'tree.root')
    treename = options.get('treename', None)
    copyInputFiles = options.get('copyInputFiles', False)
    batch = options.get('batch', False)
    nproc = options.get('nproc', 1)
    resubmit = options.get('resubmit', False)
    enableKeepFilter = options.get('enableKeepFilter', False)
    totalEvents = options.get('totalEvents', False)
    enableRemoveFilter = options.get('enableRemoveFilter', False)
    disableOutputTree = options.get('disableOutputTree', False)
    PUPath = options.get('PUPath', None)
    usexrd = options.get('usexrd', False)

    if run:
        for config in configs:
            first_data = True
            first_mc = True

            select_dataset = config.get('dataset', [])
            if not isinstance(select_dataset, list):
                select_dataset = [select_dataset]

            proc = subprocess.Popen(['make', 'clean'])
            proc.wait()
            for job in jobs_data:

                if select_dataset:
                    if job.sample not in select_dataset:
                        print 'Skipping data sample %s that is not requested for this configuration' % job.sample
                        continue

                job_exename = '%s_Data_%s' % (exename, config['tag'])

                module_arg = dict(config.get('args', {}))
                for tag in getattr(job, 'tags', []):
                    module_arg_tag = config.get('args_tag_%s' % tag)
                    for conf, val in module_arg_tag.iteritems():
                        module_arg[conf] = val

                module_arg['isData'] = 'true'
                module_str = '{ '
                # build the module string
                for key, val in module_arg.iteritems():
                    if isinstance(val, basestring):
                        module_str += '\'%s\' : \'%s\',' % (key, val)
                    else:
                        module_str += '\'%s\' : %s,' % (key, val)
                module_str += '}'

                outsample = job.sample
                suffix = getattr(job, 'suffix', None)
                if suffix is not None:
                    outsample = outsample + suffix

                command = command_base % {
                    'base': job.base,
                    'sample': job.sample,
                    'outsample': outsample,
                    'nFilesPerJob':
                    job.nfiles if hasattr(job, "nfiles") else nFilesPerJob,
                    'input': config['input'],
                    'output': config['output'],
                    'exename': job_exename,
                    'treename': treename,
                    'module': config['module'],
                    'moduleArgs': module_str,
                    'version': job.version,
                    'filekey': filekey,
                    'year': job.year,
                    "nJobs": nJobs
                }

                keepSelection = config.get('keepSelection', None)
                removeSelection = config.get('removeSelection', None)

                if totalEvents:
                    command += ' --totalEvents %i' % totalEvents
                if enableKeepFilter:
                    command += ' --enableKeepFilter '
                    if keepSelection is not None:
                        command += ' --keepFilterSelection %s' % keepSelection

                if enableRemoveFilter:
                    command += ' --enableRemoveFilter '
                    if removeSelection is not None:
                        command += ' --removeFilterSelection %s' % removeSelection

                if disableOutputTree:
                    command += ' --disableOutputTree'

                if batch:
                    if _AT_UMD:
                        command += ' --condor '
                    else:
                        command += ' --condor '
                    #    command += ' --batch '

                    if copyInputFiles:
                        command += ' --copyInputFiles '
                else:
                    command += ' --nproc %d ' % nproc

                if usexrd:
                    command += ' --usexrd '

                if resubmit:
                    command += ' --resubmit '

                if not first_data:
                    command += ' --noCompileWithCheck '

                print tPurple % command
                if not dry_run:
                    returncode = os.system(command)
                    if returncode >= 256:
                        print returncode
                        raise KeyboardInterrupt
                if first_data:
                    first_data = False

            proc = subprocess.Popen(['make', 'clean'])
            proc.wait()
            for job in jobs_mc:
                job_exename = '%s_MC_%s' % (exename, config['tag'])

                module_arg = dict(config.get('args', {}))
                for tag in getattr(job, 'tags', []):
                    module_arg_tag = config.get('args_tag_%s' % tag)
                    for conf, val in module_arg_tag.iteritems():
                        module_arg[conf] = val

                module_str = '{ '
                for key, val in module_arg.iteritems():
                    if isinstance(val, basestring):
                        module_str += '\'%s\' : \'%s\',' % (key, val)
                    else:
                        module_str += '\'%s\' : %s,' % (key, val)

                if PUPath is not None:

                    module_str += '\'sampleFile\' : \'%s/%s/hist.root\', ' % (
                        PUPath, job.sample)

                module_str += '}'

                outsample = job.sample
                suffix = getattr(job, 'suffix', None)
                if suffix is not None:
                    outsample = outsample + suffix

                command = command_base % {
                    'base': job.base,
                    'sample': job.sample,
                    'outsample': outsample,
                    'nFilesPerJob':
                    job.nfiles if hasattr(job, "nfiles") else nFilesPerJob,
                    'input': config['input'],
                    'output': config['output'],
                    'exename': job_exename,
                    'treename': treename,
                    'module': config['module'],
                    'moduleArgs': module_str,
                    'version': job.version,
                    'filekey': filekey,
                    'year': getattr(job, "year", 0),
                    "nJobs": nJobs
                }

                keepSelection = config.get('keepSelection', None)
                removeSelection = config.get('removeSelection', None)

                if totalEvents:
                    command += ' --totalEvents %i' % totalEvents
                if enableKeepFilter:
                    command += ' --enableKeepFilter '
                    if keepSelection is not None:
                        command += ' --keepFilterSelection %s' % keepSelection

                if enableRemoveFilter:
                    command += ' --enableRemoveFilter '
                    if removeSelection is not None:
                        command += ' --removeFilterSelection %s' % removeSelection

                if disableOutputTree:
                    command += ' --disableOutputTree'

                if not first_mc:
                    command += ' --noCompileWithCheck '

                if batch:
                    if _AT_UMD:
                        command += ' --condor '
                    else:
                        command += ' --condor '
                        #command += ' --batch '

                    if copyInputFiles:
                        command += ' --copyInputFiles '
                else:
                    command += ' --nproc %d ' % nproc

                if usexrd:
                    command += ' --usexrd '

                if resubmit:
                    command += ' --resubmit '

                print tPurple % command
                if not dry_run:
                    returncode = os.system(command)
                    if returncode >= 256:
                        print returncode
                        raise KeyboardInterrupt

                if first_mc:
                    first_mc = False

    if check:

        check_results = {}
        for config in configs:
            select_dataset = config.get('dataset', [])
            if not isinstance(select_dataset, list):
                select_dataset = [select_dataset]

            for job in (jobs_data + jobs_mc):

                if job.isData and select_dataset:
                    if job.sample not in select_dataset:
                        print 'Skipping data sample %s that is not requested for this configuration' % job.sample
                        continue

                outsample = job.sample
                suffix = getattr(job, 'suffix', None)
                if suffix is not None:
                    outsample = outsample + suffix


#python ../../Util/scripts/check_dataset_completion.py --originalDS /data/users/jkunkle/Resonances//RecoOutput_2017_04_12/MadGraphChargedResonance_WGToLNu_M300_width5/ --filteredDS /data/users/jkunkle/Resonances/LepGammaSigOnly_elgEB_2017_07_27/MadGraphChargedResonance_WGToLNu_M300_width5 --treeNameOrig tupel/EventTree --histNameFilt tupel/filter --fileKeyOrig tree.root --fileKeyFilt tree.root

                job_info_dic = {
                    'base': job.base,
                    'sample': job.sample,
                    'outsample': outsample,
                    'output': config['output'],
                    'input': config['input'],
                    'treename': treename,
                    'version': job.version,
                    'filekey': filekey
                }

                originalDS = '%(base)s/%(input)s/%(sample)s/%(version)s' % job_info_dic
                filteredDS = '%(output)s/%(outsample)s' % job_info_dic
                treeNameOrig = '%(treename)s' % job_info_dic
                histNameFilt = 'filter'
                if treename is not None:
                    split_name = treename.split('/')
                    if len(split_name) > 1:
                        histNameFilt = split_name[0] + '/filter'

                fileKeyOrig = '%(filekey)s' % job_info_dic
                fileKeyFilt = 'tree.root'

                this_result = check_dataset_completion(originalDS,
                                                       filteredDS,
                                                       treeNameOrig,
                                                       None,
                                                       None,
                                                       histNameFilt,
                                                       fileKeyOrig,
                                                       fileKeyFilt,
                                                       quiet=True)

                check_results[filteredDS] = {
                    'res': this_result,
                    'origDS': originalDS
                }

                #command = check_base%{ 'base': job.base , 'sample' : job.sample, 'outsample' : outsample, 'output' : config['output'], 'input' : config['input'], 'treename' : treename, 'version' : job.version, 'filekey' : filekey}
                #print command
                #os.system(command)

        good_ds = []
        missing_ds = []
        bad_ds = []
        for ds, res in check_results.iteritems():
            print "%s dataset has %d events" % (ds,
                                                check_results[ds]['res'][0])

            if res['res'][0] == 0:
                missing_ds.append(ds)
            elif res['res'][0] != res['res'][3]:
                bad_ds.append(ds)
            elif res['res'][0] == res['res'][3]:
                good_ds.append(ds)
            else:
                print 'Could not categorize dataset %s with results:' % ds
                print res

        print '%d filtered datasets are missing events : ' % len(bad_ds)
        for ds in bad_ds:
            nevt_orig = check_results[ds]['res'][0]
            nevt_filt = check_results[ds]['res'][3]
            print '%s : Original has %d events, Filtered has %d events.  Difference = %d' % (
                ds, nevt_orig, nevt_filt, nevt_orig - nevt_filt)
        print '%d Original datasets do not have events : ' % len(missing_ds)
        for ds in missing_ds:
            print check_results[ds]['origDS']
        print '%d datasets have matching events ' % len(good_ds)

Пример #4

Показать файл

Файл: scheduler_base.py Проект: jkunkle/usercode

def RunJobs( jobs, configs, options, dry_run=False ) :

    if not isinstance( jobs, list ) :
        jobs = [jobs]
    if not isinstance( configs, list ) :
        configs = [configs]

    if isinstance( options, Namespace ) :
        options = vars(options)

    jobs_data = []
    jobs_mc   = []

    for j in jobs : 
        if j.isData :
            jobs_data.append(j)
        else :
            jobs_mc.append(j)


    run                = options.get('run'                , False         )
    check              = options.get('check'              , False         )
    exename            = options.get('exename'            , 'RunAnalysis' )
    nFilesPerJob       = options.get('nFilesPerJob'       , 0             )
    filekey            = options.get('filekey'            , 'tree.root'   )
    treename           = options.get('treename'           , None          )
    copyInputFiles     = options.get('copyInputFiles'     , False         )
    batch              = options.get('batch'              , False         )
    nproc              = options.get('nproc'              , 1             )
    resubmit           = options.get('resubmit'           , False         )
    enableKeepFilter   = options.get('enableKeepFilter'   , False         )
    enableRemoveFilter = options.get('enableRemoveFilter' , False         ) 
    disableOutputTree  = options.get('disableOutputTree'  , False         )
    PUPath             = options.get('PUPath'             , None          )

    if run :
        for config in configs :
            first_data = True
            first_mc = True

            select_dataset = config.get('dataset', [] )
            if not isinstance( select_dataset, list ) :
                select_dataset = [select_dataset]
    
            for job in jobs_data :

                if select_dataset :
                    if job.sample not in select_dataset :
                        print 'Skipping data sample %s that is not requested for this configuration' %job.sample
                        continue

                job_exename = '%s_Data_%s' %(exename, config['tag'] )
    
                module_arg = dict(config.get('args', {}))
                module_arg['isData'] = 'true'
                module_str = '{ '
                # build the module string
                for key, val in module_arg.iteritems() :
                    if isinstance( val, basestring ) :
                        module_str += '\'%s\' : \'%s\',' %( key, val)
                    else :
                        module_str += '\'%s\' : %s,' %( key, val)
                module_str += '}'

                outsample = job.sample
                suffix = getattr(job, 'suffix', None )
                if suffix is not None :
                    outsample = outsample+suffix
    
                command = command_base %{ 'base' : job.base, 'sample' : job.sample, 'outsample' : outsample, 'nFilesPerJob' : nFilesPerJob, 'input' : config['input'], 'output' : config['output'], 'exename' : job_exename, 'treename' : treename, 'module' : config['module'], 'moduleArgs' : module_str, 'version' : job.version, 'filekey' : filekey }

                if enableKeepFilter :
                    command += ' --enableKeepFilter '
                if enableRemoveFilter :
                    command += ' --enableRemoveFilter '
                if disableOutputTree :
                    command += ' --disableOutputTree'

                if batch :
                    if _AT_UMD :
                        command += ' --condor '
                    else :
                        command += ' --batch '

                    if copyInputFiles :
                        command += ' --copyInputFiles '
                else :
                    command += ' --nproc %d ' %nproc

                if resubmit :
                    command += ' --resubmit '
    
                if not first_data :
                    command += ' --noCompileWithCheck '
    
                print command
                if not dry_run :
                    os.system(command)
                if first_data :
                    first_data = False
    
            for job in jobs_mc :
                job_exename = '%s_MC_%s' %(exename, config['tag'] )
    
                module_arg = dict(config.get('args', {}) )
    
                module_str = '{ '
                for key, val in module_arg.iteritems() :
                    if isinstance( val, basestring ) :
                        module_str += '\'%s\' : \'%s\',' %( key, val)
                    else :
                        module_str += '\'%s\' : %s,' %( key, val)

                if PUPath is not None :

                    module_str += '\'sampleFile\' : \'%s/%s/hist.root\', ' %( PUPath, job.sample ) 
    
                module_str += '}'
    
                outsample = job.sample
                suffix = getattr(job, 'suffix', None )
                if suffix is not None :
                    outsample = outsample+suffix

                command = command_base %{ 'base' : job.base, 'sample' : job.sample, 'outsample' : outsample, 'nFilesPerJob' : nFilesPerJob, 'input' : config['input'], 'output' : config['output'], 'exename' : job_exename, 'treename' : treename, 'module' : config['module'], 'moduleArgs' : module_str, 'version' : job.version, 'filekey' : filekey }

                if enableKeepFilter :
                    command += ' --enableKeepFilter '
                if enableRemoveFilter :
                    command += ' --enableRemoveFilter '
                if disableOutputTree :
                    command += ' --disableOutputTree'

                if not first_mc :
                    command += ' --noCompileWithCheck '
    
                if batch :
                    if _AT_UMD :
                        command += ' --condor '
                    else :
                        command += ' --batch '

                    if copyInputFiles :
                        command += ' --copyInputFiles '
                else :
                    command += ' --nproc %d ' %nproc

                if resubmit :
                    command += ' --resubmit '
    
                print command
                if not dry_run :
                    os.system(command)
                if first_mc :
                    first_mc = False
    
    if check :

        check_results = {}
        for config in configs :
            select_dataset = config.get('dataset', [] )
            if not isinstance( select_dataset, list ) :
                select_dataset = [select_dataset]
        
            for  job in (jobs_data + jobs_mc) :
        
                if job.isData and select_dataset :
                    if job.sample not in select_dataset :
                        print 'Skipping data sample %s that is not requested for this configuration' %job.sample
                        continue

                outsample = job.sample
                suffix = getattr(job, 'suffix', None )
                if suffix is not None :
                    outsample = outsample+suffix

#python ../../Util/scripts/check_dataset_completion.py --originalDS /data/users/jkunkle/Resonances//RecoOutput_2017_04_12/MadGraphChargedResonance_WGToLNu_M300_width5/ --filteredDS /data/users/jkunkle/Resonances/LepGammaSigOnly_elgEB_2017_07_27/MadGraphChargedResonance_WGToLNu_M300_width5 --treeNameOrig tupel/EventTree --histNameFilt tupel/filter --fileKeyOrig tree.root --fileKeyFilt tree.root

                job_info_dic = {  'base': job.base , 'sample' : job.sample, 'outsample' : outsample, 'output' : config['output'], 'input' : config['input'], 'treename' : treename, 'version' : job.version, 'filekey' : filekey }

                originalDS = '%(base)s/%(input)s/%(sample)s/%(version)s'%job_info_dic
                filteredDS = '%(output)s/%(outsample)s' %job_info_dic
                treeNameOrig  = '%(treename)s' %job_info_dic
                histNameFilt = 'tupel/filter' 
                fileKeyOrig = '%(filekey)s' %job_info_dic 
                fileKeyFilt  = 'tree.root'

                this_result = check_dataset_completion( originalDS, filteredDS, treeNameOrig, None, None, histNameFilt, fileKeyOrig, fileKeyFilt, quiet=True )

                check_results[filteredDS] = {'res' : this_result, 'origDS' : originalDS }

                #command = check_base%{ 'base': job.base , 'sample' : job.sample, 'outsample' : outsample, 'output' : config['output'], 'input' : config['input'], 'treename' : treename, 'version' : job.version, 'filekey' : filekey}
                #print command                                                                               
                #os.system(command)                                                                          

        good_ds = []
        missing_ds = []
        bad_ds = []
        for ds, res in check_results.iteritems() :

            if res['res'][0] == 0 :
                missing_ds.append(ds)
            elif res['res'][0] != res['res'][3] :
                bad_ds.append(ds)
            elif res['res'][0] == res['res'][3] :
                good_ds.append(ds)
            else :
                print 'Could not categorize dataset %s with results:' %ds
                print res

        print '%d filtered datasets are missing events : ' %len( bad_ds )
        for ds in bad_ds :
            nevt_orig = check_results[ds]['res'][0]
            nevt_filt = check_results[ds]['res'][3]
            print '%s : Original has %d events, Filtered has %d events.  Difference = %d' %( ds, nevt_orig, nevt_filt, nevt_orig-nevt_filt )
        print '%d Original datasets do not have events : ' %len( missing_ds )
        for ds in missing_ds :
            print check_results[ds]['origDS']
        print '%d datasets have matching events ' %len( good_ds )