Пример #1
0
def copy_and_skim(ntuple="", indir="", outdir="", overwrite=False):

    """
    Copy a Step1 ntuple from EOS and skim the events with the Step2 cut.

    Parameters
    ----------
    ntuple    : str
                The file name of the ntuple. This argument is positioned first 
                to conform with the usage of multiprocessing.Pool.apply_async().
    indir     : str
                The path to the parent directory of the input file.
    outdir    : str
                The path to the output directory.
    overwrite : bool
                Flag whether to forcibly copy the subtuple.
                Default is False.
    """

    # Copy the ntuple.
    if overwrite:
        cmsStage = ["cmsStage", "-f", indir + ntuple, outdir]
    else:
        cmsStage = ["cmsStage", indir + ntuple, outdir]

    return_code = sp.call(cmsStage, stdout=sp.PIPE, stderr=sp.PIPE)

    # Catch failed copies.
    if return_code:
        return ntuple
    # Skim the ntuple if a cut is specified.
    else:
        if not STEP2_CUT:
            print "--- {} copied.".format(ntuple)
        else:
            result = skim(outdir, ntuple, STEP2_CUT)

            # Delete the original ntuple.
            sp.check_call(["rm", outdir + ntuple])

            print "--- {} copied and skimmed from {!s} to {!s} entries.".format(ntuple, result[1], result[0])

            return result
Пример #2
0
def step3(group = ''):

    """
    Generate a Step3 ntuple.

    Parameters
    ----------
    group   : str
              The name used to refer to the group in settings.py.
    """

    print '\nGenerating Step3 Ntuple [{}]'.format(group)

    # Set the group as the Step3 ntuple's name.
    step3_file = STEP3_DIR + group + '.root'

    # Look up the group's samples and cuts.
    samples = GROUPS[group]['SAMPLES'] if 'SAMPLES' in GROUPS[group] else []
    cuts = GROUPS[group]['CUTS'] if 'CUTS' in GROUPS[group] else []
    
    # When samples is empty, assume the group name references a Step2 ntuple.
    if not samples:

        assert group in SAMPLES, '{} is an invalid sample!'.format(group)

        # A symlink is made to avoid duplicating the Step2 ntuple.
        sp.check_call(['ln', '-s', STEP2_DIR + group + '.root', step3_file]) 

    else:
        
        for s in samples:
            assert s in SAMPLES, '{} is an invalid sample!'.format(s)

        # Only one sample provided.
        if (len(samples) == 1):

            ntuple = samples[0] + '.root'

            # Create a symlink if there are no cuts.
            if not cuts:
                sp.check_call(['ln', '-s', STEP2_DIR + ntuple, step3_file])
            # Otherwise, skim the ntuple and then move the skimmed version.
            else:
                skim(STEP2_DIR, ntuple, cuts[0])
                sp.check_call(['mv', STEP2_DIR + 'skim_' + ntuple, step3_file])
        
        # Multiple samples provided.
        elif (len(samples) > 1):

            inputfiles = []        

            # Iterate over the samples and cuts simultaneously. This is why
            # the samples and their cuts must be provided in the same order.
            for sample, cut in izip_longest(samples, cuts, fillvalue = ''):
                
                ntuple = sample + '.root'
                
                # Append the original ntuple if there are no cuts.
                if not cut:
                    inputfiles.append(STEP2_DIR + ntuple)
                # Otherwise, append the skimmed ntuple.
                else:
                    skim(STEP2_DIR, ntuple, cut)
                    inputfiles.append(STEP2_DIR + 'skim_' + ntuple)

            # hadd the files together.             
            sp.check_call(['hadd', '-f', step3_file] + inputfiles)

            # Delete the skimmed ntuples after hadding.
            for skim_file in [_ for _ in inputfiles if 'skim' in _]:
                sp.check_call(['rm', skim_file])