def get_input_from_argv(process): from JMTucker.Tools.general import typed_from_argv # Look for just a list of files in argv first. def leave_alone(x): return x.startswith('/store') or x.startswith('root://') files = [x for x in sys.argv if leave_alone(x) and x.endswith('.root')] files += ['file:%s' % x for x in sys.argv if not leave_alone(x) and os.path.isfile(x) and x.endswith('.root')] if not files: # else, files from txt file list_fn = os.path.expanduser(typed_from_argv(str, name='list')) if list_fn is None: raise ValueError('no files to run on') files = [x.strip() for x in open(list_fn).read().split() if x.strip()] print 'Files to run over:', len(files) pprint(files) process.source.fileNames = files process.out.fileName = 'merged.root' out = typed_from_argv(str, name='out') if out is not None: process.out.fileName = os.path.expanduser(out) print 'Merging to', process.out.fileName.value() max_events = typed_from_argv(int, name='max_events') if max_events is not None: print 'Max events =', max_events process.maxEvents.input = max_events skip_events = typed_from_argv(int, name='skip_events') if skip_events is not None: print 'Skip events =', skip_events process.source.skipEvents = cms.untracked.uint32(skip_events)
def main(samples_registry): import sys if 'merge' in sys.argv: samples = samples_registry.from_argv(from_root_fns=True) out_fn = [x for x in sys.argv if x.endswith('.root') and not os.path.isfile(x)] out_fn = out_fn[0] if out_fn else 'merge.root' norm_to = typed_from_argv(float, default_value=1.) norm_path = typed_from_argv(str, default_value='', name='norm_path') merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path)
import sys, os from array import array from JMTucker.Tools.general import typed_from_argv from JMTucker.Tools.ROOTTools import * from JMTucker.Tools import Samples from JMTucker.Tools.Samples import * version = '2017p8v4' zoom = False #(0.98,1.005) save_more = True data_only = False use_qcd = False num_dir, den_dir = 'num', 'den' #num_dir, den_dir = 'numjet6pt75', 'denjet6pt75' which = typed_from_argv(int, 0) data_period, int_lumi = [ ('p8',101037.), ('', 41525.), ('B', 4794.), ('C', 9631.), ('D', 4248.), ('E', 9314.), ('F', 13538.), ('', 59512.), ('A', 14002.), ('B', 7091.), ('C', 6937.), ('D', 31482.), ][which] year = 2017 if which < 7 else 2018
import sys, os from array import array from JMTucker.Tools.general import typed_from_argv from JMTucker.Tools.ROOTTools import * from JMTucker.Tools import Samples from JMTucker.Tools.Samples import * version = '2017p8v4' zoom = False #(0.98,1.005) save_more = True data_only = False use_qcd = False num_dir, den_dir = 'num', 'den' #num_dir, den_dir = 'numjet6pt75', 'denjet6pt75' which = typed_from_argv(int, 0) data_period, int_lumi = [ ('p8', 101037.), ('', 41525.), ('B', 4794.), ('C', 9631.), ('D', 4248.), ('E', 9314.), ('F', 13538.), ('', 59512.), ('A', 14002.), ('B', 7091.), ('C', 6937.), ('D', 31482.), ][which] year = 2017 if which < 7 else 2018
#!/usr/bin/env python import sys from JMTucker.Tools.general import typed_from_argv from JMTucker.Tools.MiniAOD_cfg import cms, pat_tuple_process from JMTucker.Tools.CMSSWTools import * # 3 magic lines, don't touch is_mc = True njets = 2 nbjets = 0 ints = typed_from_argv(int, default_value=[], return_multiple=True) if len(ints) > 0: if len(ints) != 2: raise RuntimeError("if you put any ints there must be two") njets, nbjets = ints process = pat_tuple_process(None, is_mc=is_mc) tfileservice(process, "movedtree.root") random_service(process, {"mfvVertices": 12179, "mfvMovedTracks": 13068}) if is_mc: process.mcStat.histos = True process.patMuons.embedTrack = False process.patElectrons.embedTrack = False del process.outp
if len(sys.argv) < 4: print sys.argv usage() batch_name = sys.argv[1] datasets = Samples.registry.datasets_from_argv() if len(datasets) != 1: raise ValueError('expect exactly one dataset in argv') dataset = datasets[0] samples = [s for s in Samples.registry.from_argv() if s.has_dataset(dataset)] output_fn = [x for x in sys.argv[2:] if x.endswith('.root')] output_fn = output_fn[0] if output_fn else 'phadd.root' nfileses = typed_from_argv(int, return_multiple=True) d = len(samples) - len(nfileses) if d < 0: raise ValueError('too many nfiles %s for # samples %i' % (nfileses, len(samples))) elif d > 0: nfileses.extend([1]*d) for sample, nfiles in zip(samples, nfileses): sample.set_curr_dataset(dataset) sample.split_by = 'files' sample.files_per = int_ceil(len(sample.filenames), nfiles) sample.njobs = nfiles print 'sample %s dataset %s nfiles %i -> %i' % (sample.name, dataset, len(sample.filenames), nfiles) phadd(batch_name, dataset, samples, output_fn)
def main(samples_registry): from glob import glob from sys import argv from pprint import pprint from JMTucker.Tools import colors if 'merge' in argv: samples = samples_registry.from_argv(from_root_fns=True, raise_if_none=True) out_fn = [x for x in argv if x.endswith('.root') and not os.path.isfile(x)] out_fn = out_fn[0] if out_fn else 'merge.root' norm_to = typed_from_argv(float, default_value=1.) norm_path = typed_from_argv(str, default_value='', name='norm_path') merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path) elif 'printmissing' in argv: samples = [s.name for s in samples_registry.from_argv(raise_if_none=True)] samples.sort() look_for_root_files = 'no_root' not in sys.argv no_batch_dir, no_root_file = [], [] for s in samples: if not os.path.isdir('condor_' + s) and not glob('crab_*_' + s): no_batch_dir.append(s) if not os.path.isfile('%s.root' % s): no_root_file.append(s) if no_batch_dir: print colors.yellow('no batch dir for these:') for s in no_batch_dir: print s if look_for_root_files and no_root_file: print colors.yellow('no root file for these:') for s in no_root_file: print s elif 'ds' in argv: samples = samples_registry.from_argv(raise_if_none=True) if len(samples) != 1: raise ValueError('must have exactly one sample in argv') sample = samples[0] dataset = argv[argv.index(sample.name)+1] if not sample.has_dataset(dataset): raise KeyError('no dataset %s in %s' % (dataset, sample)) print sample.datasets[dataset].dataset elif 'file' in argv: samples = samples_registry.from_argv(raise_if_none=True) if len(samples) != 1: raise ValueError('must have exactly one sample in argv') sample = samples[0] dataset = argv[argv.index(sample.name)+1] if not sample.has_dataset(dataset): raise KeyError('no dataset %s in %s' % (dataset, sample)) sample.set_curr_dataset(dataset) for x in sample.filenames[:typed_from_argv(int, 5)]: print x elif 'nevents' in argv: samples = samples_registry.from_argv(raise_if_none=True) if len(samples) != 1: raise ValueError('must have exactly one sample in argv') sample = samples[0] dataset = argv[argv.index(sample.name)+1] if not sample.has_dataset(dataset): raise KeyError('no dataset %s in %s' % (dataset, sample)) sample.set_curr_dataset(dataset) print DBS.numevents_in_dataset(sample.dataset) elif 'site' in argv: samples = samples_registry.from_argv(raise_if_none=True) dataset = samples_registry.datasets_from_argv() if len(dataset) > 1: raise ValueError('only zero/one dataset allowed') dataset = dataset[0] if len(dataset) == 1 else 'main' mlen = max(len(s.name) for s in samples) for sample in samples: sample.set_curr_dataset(dataset) try: sites = DBS.sites_for_dataset(sample.dataset, json=True) except RuntimeError: print sample.name, 'PROBLEM' continue print sample.name.ljust(mlen+5), sites.sort(key=lambda x: x['name']) for site in sites: if DBS.site_is_tape(site): continue is_complete = DBS.complete_at_site(site) print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)), elif 'samplefiles' in argv: # rm a; touch a; for ds in '' miniaod; do for x in qcd ttbar leptonic; do ( samples samplefiles ${x}_samples_2017 $ds >> a ) ; done; done # rm a; touch a; for ds in '' miniaod; do for year in 2017 2018; do for x in data auxiliary_data ; do ( samples samplefiles ${x}_samples_${year} $ds >> a ) ; done; done; done samples = samples_registry.from_argv(raise_if_none=True) dataset = 'main' for arg in argv[1:]: if arg == 'miniaod' or arg.startswith('ntuple'): dataset = arg break print 'getting files for dataset %s:' % dataset, ', '.join(s.name for s in samples) import SampleFiles as sf for s in samples: d = {} if not s.has_dataset(dataset): print colors.yellow('no dataset %s for %s' % (dataset, s.name)) continue s.set_curr_dataset(dataset) if sf.has(s.name, dataset): raise KeyError('SampleFiles already has an entry for %s' % s.name) else: fns = s.filenames print 'DBS has %i files for %s' % (len(fns), s.name) d[(s.name, dataset)] = (len(fns), fns) print "('%s:%s', '%s')," % (s.name, dataset, sf._enc(d))
def main(samples_registry): from glob import glob from sys import argv from pprint import pprint from JMTucker.Tools import colors from JMTucker.Tools.general import chunks, typed_from_argv samples = samples_registry.from_argv() datasets = samples_registry.datasets_from_argv() def prnt(*x): print ' '.join(str(y) for y in x) def runem(cb): for dataset in datasets: for sample in samples: if not sample.has_dataset(dataset): print colors.yellow('no dataset %s for %s' % (dataset, sample.name)) continue sample.set_curr_dataset(dataset) cb(dataset, sample) if 'merge' in argv: samples = samples_registry.from_argv(from_root_fns=True, raise_if_none=True) out_fn = [x for x in argv if x.endswith('.root') and not os.path.isfile(x)] out_fn = out_fn[0] if out_fn else 'merge.root' norm_to = typed_from_argv(float, default_value=1.) norm_path = typed_from_argv(str, default_value='', name='norm_path') merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path) elif 'printmissing' in argv: samples = [s.name for s in samples_registry.from_argv(raise_if_none=True)] samples.sort() look_for_root_files = 'no_root' not in sys.argv no_batch_dir, no_root_file = [], [] for s in samples: if not os.path.isdir('condor_' + s) and not glob('crab_*_' + s): no_batch_dir.append(s) if not os.path.isfile('%s.root' % s): no_root_file.append(s) if no_batch_dir: print colors.yellow('no batch dir for these:') for s in no_batch_dir: print s if look_for_root_files and no_root_file: print colors.yellow('no root file for these:') for s in no_root_file: print s elif 'name' in argv: runem(lambda dataset, sample: prnt(sample.name, dataset)) elif 'ds' in argv: runem(lambda dataset, sample: prnt(sample.name, dataset, sample.dataset)) elif 'file' in argv: runem(lambda dataset, sample: [prnt(sample.name, dataset, x) for x in sample.filenames[:typed_from_argv(int, 5)]]) elif 'nevents' in argv: runem(lambda dataset, sample: prnt(sample.name, dataset, DBS.numevents_in_dataset(sample.dataset))) elif 'files_for_events' in argv: rles = typed_from_argv(int, return_multiple=True) if len(rles) % 3 != 0: raise ValueError('expect list of ints in argv with length divisible by 3 [run1 lumi1 event1 ...]') rles = list(chunks(rles,3)) runem(lambda dataset, sample: prnt(sample.name, dataset, ' '.join(DBS.files_for_events(rles, sample.dataset)))) elif 'site' in argv: mlen = max(len(s.name) for s in samples) def cb(dataset, sample): ljname = sample.name.ljust(mlen+3) try: sites = DBS.sites_for_dataset(sample.dataset, json=True) except RuntimeError: print colors.boldred(ljname + ' DBS problem') else: print ljname, sites.sort(key=lambda x: x['name']) for site in sites: if DBS.site_is_tape(site): continue is_complete = DBS.complete_at_site(site) print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)), ' ', print runem(cb) elif 'samplefiles' in argv: import SampleFiles as sf def cb(dataset, sample): if sf.has(sample.name, dataset): raise KeyError('SampleFiles already has an entry for %s' % sample.name) fns = sample.filenames print 'DBS has %i files for %s' % (len(fns), sample.name) d = {(sample.name, dataset): (len(fns), fns)} print "('%s:%s', '%s')," % (sample.name, dataset, sf._enc(d)) runem(cb) elif 'sfhas' in argv: neg = 'neg' in argv import SampleFiles as sf for dataset in datasets: for sample in samples: if sf.has(sample.name, dataset) != neg: print sample.name
#!/usr/bin/env python from JMTucker.Tools.ROOTTools import * from JMTucker.Tools.Sample import norm_from_file from JMTucker.Tools.Year import year from JMTucker.Tools.general import typed_from_argv, bool_from_argv from JMTucker.Tools import Samples import JMTucker.MFVNeutralino.AnalysisConstants as ac year = typed_from_argv(int, year, name='year') yearcheck = not bool_from_argv('noyearcheck') nosig = bool_from_argv('nosig') nodata = bool_from_argv('nodata') nobkg = bool_from_argv('nobkg') onlysig = bool_from_argv('onlysig') onlydata = bool_from_argv('onlydata') onlybkg = bool_from_argv('onlybkg') sumbkg = not bool_from_argv('nosumbkg') sumall = bool_from_argv('sumall') sort = not bool_from_argv('nosort') genmatch = bool_from_argv('genmatch') minnjets = typed_from_argv(int, name='minnjets') maxnjets = typed_from_argv(int, name='maxnjets') minht = typed_from_argv(float, name='minht') maxht = typed_from_argv(float, name='maxht') mindbv = typed_from_argv(float, name='mindbv') maxdbv = typed_from_argv(float, name='maxdbv') which = typed_from_argv(int, -1) ntks = ('mfvMiniTreeNtk3', 'mfvMiniTreeNtk4', 'mfvMiniTree')
#!/usr/bin/env python import os, sys from collections import defaultdict from pprint import pprint from JMTucker.Tools.DBS import files_in_dataset from JMTucker.Tools.general import typed_from_argv instance = typed_from_argv(int) if instance is None: instance = 'global' datasets = [x for x in sys.argv[1:] if x.count('/') == 3] for dataset in datasets: d = defaultdict(list) files = files_in_dataset(dataset, instance) print dataset, 'has', len(files), 'files' for f in files: num = os.path.basename(f).split('_')[1] d[num].append(f) for k,v in d.iteritems(): if len(v) > 1: print 'duplicate(s) for %s:' % k pprint(v)