def test_basics(self): with util.PartiallyMutable.unlock(): s = se.StorageConfiguration(output=[], input=['file://' + self.workdir]) s.activate() with fs.alternative(): info = Dataset(files='eggs').get_info() assert len(info.files) == 10 info = Dataset(files=['eggs', 'ham']).get_info() assert len(info.files) == 15 info = Dataset(files='eggs/1.txt').get_info() assert len(info.files) == 1
def test_flatten(self): with util.PartiallyMutable.unlock(): s = se.StorageConfiguration(output=[], input=['file://' + self.workdir]) s.activate() with fs.alternative(): info = Dataset(files=['spam']).get_info() assert len(info.files) == 8 info = Dataset(files=['spam'], patterns=['*.txt']).get_info() assert len(info.files) == 5 info = Dataset(files=['spam'], patterns=['[12].txt']).get_info() assert len(info.files) == 2
mod_loc = template_loc wf_fragments[step] = mod_loc if mod_tag == 'base': mod_tag = '' label_tag = "{p}_{c}{mod}_{r}".format(p=p,c=c,r=r,mod=mod_tag) print "\t\tLabel: {label}".format(label=label_tag) gs = Workflow( label='gs_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['gs']), sandbox=cmssw.Sandbox(release='CMSSW_9_3_6'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=False, globaltag=False, outputs=['HIG-RunIIFall17wmLHEGS-00040ND.root'], dataset=Dataset( files=lhe_dir, files_per_task=1, patterns=["*.root"] ), category=gs_resources ) digi = Workflow( label='digi_step_{tag}'.format(tag=label_tag), command='cmsRun {cfg}'.format(cfg=wf_fragments['digi']), sandbox=cmssw.Sandbox(release='CMSSW_9_4_0_patch1'), merge_size=-1, # Don't merge files we don't plan to keep cleanup_input=True, # Save the GEN-SIM step outputs=['HIG-RunIIFall17DRPremix-00823ND_step1.root'], dataset=ParentDataset( parent=gs, units_per_task=1
p, c, r = arr[2], arr[3], arr[4] cms_cmd = ['cmsRun', 'EFTLHEReader_cfg.py'] cms_cmd.extend(['datatier=MINIAODSIM']) print "\t[{n}/{tot}] mAOD Input: {dir}".format(n=idx + 1, tot=len(maod_dirs), dir=maod_dir) print "\tCommand: {cmd}".format(cmd=' '.join(cms_cmd)) output = Workflow(label='output_{p}_{c}_{r}'.format(p=p, c=c, r=r), command=' '.join(cms_cmd), merge_size='1.0G', cleanup_input=False, dataset=Dataset(files=maod_dir, files_per_task=5, patterns=["*.root"]), category=processing) wf.extend([output]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard=False, bad_exit_codes=[127, 160], log_level=1, ))
print "[{0:0>{w}}/{1:0>{w}}] Skipping unknown sample: {sample}".format( idx + 1, len(samples), sample=sample_name, w=width) continue print "[{0:0>{w}}/{1:0>{w}}] Sample: {sample}".format(idx + 1, len(samples), sample=sample_name, w=width) sample_loc = ds_helper.getData(sample_name, 'loc') is_eft = ds_helper.getData(sample_name, 'is_eft') if hadoop_mode: full_path = sample_loc.split("/hadoop")[1] rel_path = os.path.relpath(full_path, input_path) ds = Dataset( files=rel_path, #files_per_task=5, files_per_task=ds_helper.getData(sample_name, 'files_per_task'), patterns=["*.root"]) if is_eft: #merge_size = '256M' # EFT samples with many reweight points are O(25M) merge_size = '4.0G' else: merge_size = '512K' # non-EFT samples are O(50-100k) print "\tFullPath: {path}".format(path=full_path) print "\tInputPath: {path}".format(path=input_path) print "\tRelPath: {path}".format(path=rel_path) elif das_mode: ds = cmssw.Dataset( dataset=sample_loc, #events_per_task=100000 events_per_task=300000)
for idx, gen_dir in enumerate(gen_dirs): #arr = gen_dir.split('_') head, tail = os.path.split(gen_dir) arr = tail.split('_') p, c, r = arr[2], arr[3], arr[4] print "\t[{n}/{tot}] GEN Input: {dir}".format(n=idx + 1, tot=len(gen_dirs), dir=gen_dir) output = Workflow( label='output_{p}_{c}_{r}'.format(p=p, c=c, r=r), command='cmsRun EFTLHEReader_cfg.py', merge_size='1.0G', cleanup_input=False, dataset=Dataset( files=gen_dir, files_per_task= 5, # Remember that the GEN step already does 5-10 files per task patterns=["*.root"]), category=processing) wf.extend([output]) config = Config(label=master_label, workdir=workdir_path, plotdir=plotdir_path, storage=storage, workflows=wf, advanced=AdvancedOptions( dashboard=False, bad_exit_codes=[127, 160], log_level=1, ))
'rgoldouz/FullProduction/L1tracker_DAS_CMSSW112pre5/L1Stub_SingleElectron_D49_SW6p0', '100000', 'ele' ] } wf = [] for key, value in samples.items(): if 'DisplacedMu' not in key: continue print key Analysis = Workflow( label='FE_L1Analysis_%s' % (key), sandbox=cmssw.Sandbox( release='/afs/crc.nd.edu/user/r/rgoldouz/CMSSW_10_4_0'), dataset=Dataset(files=value[0], files_per_task=50), globaltag=False, command='python Lobster_check.py ' + value[1] + ' ' + value[2] + ' @inputfiles', extra_inputs=[ 'Lobster_check.py', '../lib/main.so', '../include/MyAnalysis.h', ], outputs=['ANoutput.root'], # dataset=Dataset( # files=value[0], # files_per_task=50, # patterns=["*.root"] # ), # merge_command='hadd @outputfiles @inputfiles',