Пример #1
0
    def test_basics(self):
        with util.PartiallyMutable.unlock():
            s = se.StorageConfiguration(output=[],
                                        input=['file://' + self.workdir])
            s.activate()

            with fs.alternative():
                info = Dataset(files='eggs').get_info()
                assert len(info.files) == 10

                info = Dataset(files=['eggs', 'ham']).get_info()
                assert len(info.files) == 15

                info = Dataset(files='eggs/1.txt').get_info()
                assert len(info.files) == 1
Пример #2
0
    def test_flatten(self):
        with util.PartiallyMutable.unlock():
            s = se.StorageConfiguration(output=[],
                                        input=['file://' + self.workdir])
            s.activate()

            with fs.alternative():
                info = Dataset(files=['spam']).get_info()
                assert len(info.files) == 8

                info = Dataset(files=['spam'], patterns=['*.txt']).get_info()
                assert len(info.files) == 5

                info = Dataset(files=['spam'],
                               patterns=['[12].txt']).get_info()
                assert len(info.files) == 2
Пример #3
0
                mod_loc = template_loc
            wf_fragments[step] = mod_loc
        if mod_tag == 'base': mod_tag = ''
        label_tag = "{p}_{c}{mod}_{r}".format(p=p,c=c,r=r,mod=mod_tag)
        print "\t\tLabel: {label}".format(label=label_tag)
        gs = Workflow(
            label='gs_step_{tag}'.format(tag=label_tag),
            command='cmsRun {cfg}'.format(cfg=wf_fragments['gs']),
            sandbox=cmssw.Sandbox(release='CMSSW_9_3_6'),
            merge_size=-1,  # Don't merge files we don't plan to keep
            cleanup_input=False,
            globaltag=False,
            outputs=['HIG-RunIIFall17wmLHEGS-00040ND.root'],
            dataset=Dataset(
                files=lhe_dir,
                files_per_task=1,
                patterns=["*.root"]
            ),
            category=gs_resources
        )

        digi = Workflow(
            label='digi_step_{tag}'.format(tag=label_tag),
            command='cmsRun {cfg}'.format(cfg=wf_fragments['digi']),
            sandbox=cmssw.Sandbox(release='CMSSW_9_4_0_patch1'),
            merge_size=-1,  # Don't merge files we don't plan to keep
            cleanup_input=True,    # Save the GEN-SIM step
            outputs=['HIG-RunIIFall17DRPremix-00823ND_step1.root'],
            dataset=ParentDataset(
                parent=gs,
                units_per_task=1
    p, c, r = arr[2], arr[3], arr[4]

    cms_cmd = ['cmsRun', 'EFTLHEReader_cfg.py']
    cms_cmd.extend(['datatier=MINIAODSIM'])

    print "\t[{n}/{tot}] mAOD Input: {dir}".format(n=idx + 1,
                                                   tot=len(maod_dirs),
                                                   dir=maod_dir)
    print "\tCommand: {cmd}".format(cmd=' '.join(cms_cmd))

    output = Workflow(label='output_{p}_{c}_{r}'.format(p=p, c=c, r=r),
                      command=' '.join(cms_cmd),
                      merge_size='1.0G',
                      cleanup_input=False,
                      dataset=Dataset(files=maod_dir,
                                      files_per_task=5,
                                      patterns=["*.root"]),
                      category=processing)
    wf.extend([output])

config = Config(label=master_label,
                workdir=workdir_path,
                plotdir=plotdir_path,
                storage=storage,
                workflows=wf,
                advanced=AdvancedOptions(
                    dashboard=False,
                    bad_exit_codes=[127, 160],
                    log_level=1,
                ))
        print "[{0:0>{w}}/{1:0>{w}}] Skipping unknown sample: {sample}".format(
            idx + 1, len(samples), sample=sample_name, w=width)
        continue
    print "[{0:0>{w}}/{1:0>{w}}] Sample: {sample}".format(idx + 1,
                                                          len(samples),
                                                          sample=sample_name,
                                                          w=width)

    sample_loc = ds_helper.getData(sample_name, 'loc')
    is_eft = ds_helper.getData(sample_name, 'is_eft')
    if hadoop_mode:
        full_path = sample_loc.split("/hadoop")[1]
        rel_path = os.path.relpath(full_path, input_path)
        ds = Dataset(
            files=rel_path,
            #files_per_task=5,
            files_per_task=ds_helper.getData(sample_name, 'files_per_task'),
            patterns=["*.root"])
        if is_eft:
            #merge_size = '256M'     # EFT samples with many reweight points are O(25M)
            merge_size = '4.0G'
        else:
            merge_size = '512K'  # non-EFT samples are O(50-100k)
        print "\tFullPath:  {path}".format(path=full_path)
        print "\tInputPath: {path}".format(path=input_path)
        print "\tRelPath:   {path}".format(path=rel_path)
    elif das_mode:
        ds = cmssw.Dataset(
            dataset=sample_loc,
            #events_per_task=100000
            events_per_task=300000)
Пример #6
0
for idx, gen_dir in enumerate(gen_dirs):
    #arr = gen_dir.split('_')
    head, tail = os.path.split(gen_dir)
    arr = tail.split('_')
    p, c, r = arr[2], arr[3], arr[4]
    print "\t[{n}/{tot}] GEN Input: {dir}".format(n=idx + 1,
                                                  tot=len(gen_dirs),
                                                  dir=gen_dir)
    output = Workflow(
        label='output_{p}_{c}_{r}'.format(p=p, c=c, r=r),
        command='cmsRun EFTLHEReader_cfg.py',
        merge_size='1.0G',
        cleanup_input=False,
        dataset=Dataset(
            files=gen_dir,
            files_per_task=
            5,  # Remember that the GEN step already does 5-10 files per task
            patterns=["*.root"]),
        category=processing)
    wf.extend([output])

config = Config(label=master_label,
                workdir=workdir_path,
                plotdir=plotdir_path,
                storage=storage,
                workflows=wf,
                advanced=AdvancedOptions(
                    dashboard=False,
                    bad_exit_codes=[127, 160],
                    log_level=1,
                ))
Пример #7
0
        'rgoldouz/FullProduction/L1tracker_DAS_CMSSW112pre5/L1Stub_SingleElectron_D49_SW6p0',
        '100000', 'ele'
    ]
}

wf = []

for key, value in samples.items():
    if 'DisplacedMu' not in key:
        continue
    print key
    Analysis = Workflow(
        label='FE_L1Analysis_%s' % (key),
        sandbox=cmssw.Sandbox(
            release='/afs/crc.nd.edu/user/r/rgoldouz/CMSSW_10_4_0'),
        dataset=Dataset(files=value[0], files_per_task=50),
        globaltag=False,
        command='python Lobster_check.py ' + value[1] + ' ' + value[2] +
        ' @inputfiles',
        extra_inputs=[
            'Lobster_check.py',
            '../lib/main.so',
            '../include/MyAnalysis.h',
        ],
        outputs=['ANoutput.root'],
        #        dataset=Dataset(
        #           files=value[0],
        #           files_per_task=50,
        #           patterns=["*.root"]
        #        ),
        #        merge_command='hadd @outputfiles @inputfiles',