def harvestTauIdEffSamples(channel = None, samples = None, inputFilePath = None,
                           outputFilePath = None, jobId = None,
                           tmpFilePath = None):

    # check that channel, samples, inputFilePath, outputFilePath, tmpFilePath and jobId
    # parameters are defined and non-empty
    if channel is None:
        raise ValueError("Undefined channel Parameter !!")
    if samples is None:
        raise ValueError("Undefined samples Parameter !!")
    if inputFilePath is None:
        raise ValueError("Undefined inputFilePath Parameter !!")
    if outputFilePath is None:
        raise ValueError("Undefined outputFilePath Parameter !!")
    if tmpFilePath is None:
        raise ValueError("Undefined tmpFilePath Parameter !!")
    if jobId is None:
        raise ValueError("Undefined jobId Parameter !!")

    if not os.path.exists(tmpFilePath):
        os.mkdir(tmpFilePath)
    if not os.path.exists(outputFilePath):
        os.mkdir(outputFilePath)

    # Use CASTOR to find the files to merge
    print "Finding CASTOR files"
    print(" inputFilePath = " + inputFilePath)
    print(" jobId = " + jobId)

    files_in_castor_info = castor.nslsl(inputFilePath)

    files_and_times = [
        (file_info['time'], file_info['path'])
        for file_info in files_in_castor_info
        if file_info['file'].find('_%s_' % jobId) != -1 ]
    # Sort files by modified time
    print "Sorting by modified time"
    files_and_times.sort()
    #print "files_and_times", files_and_times

    skim_harvest_jobs = []

    for sample in SAMPLES_TO_ANALYZE:
        print "Finding input files for", sample
        # Get final event skims that need to be merged
        event_files_to_merge = list(
            'rfio:%s' % file for time, file in files_and_times
            if file.find('tauIdEffSample_%s_%s_' %
                         (sample, jobId)) != -1)
        #print "event_files_to_merge", event_files_to_merge
        skim_output_path = os.path.join(
            outputFilePath, "tauIdEffSkim_%s_%s.root" % (sample, jobId))
        skim_harvest_jobs.append(
            (sample, skim_output_path, event_files_to_merge))

    print "Creating Makefile for skimmed event files"
    skim_MakefileName = "Makefile.mergeTauIdEffSkims_%s" % (jobId)
    buildMakefile(skim_harvest_jobs, tmpFilePath, skim_MakefileName,
                  merge_per_job = 7, harvest_tool = 'genericSkimMerger.py')

    print "Makefile built. In order to start harvesting, execute 'make -f %s -j 8 -k'" % skim_MakefileName
def harvestAnalysisResults(
        channel=None,
        samples=None,
        inputFilePath=None,
        outputFilePath=None,
        jobId=None,
        tmpFilePath=None,
        # Pre-scale and factorize samples
        ana_defs=None,
        plot_defs=None,
        plotters=None,
        use_job_report=False,
        useCastor=True):

    # check that channel, samples, inputFilePath, outputFilePath, tmpFilePath and jobId
    # parameters are defined and non-empty
    if channel is None:
        raise ValueError("Undefined channel Parameter !!")
    if samples is None:
        raise ValueError("Undefined samples Parameter !!")
    if inputFilePath is None:
        raise ValueError("Undefined inputFilePath Parameter !!")
    if outputFilePath is None:
        raise ValueError("Undefined outputFilePath Parameter !!")
    if tmpFilePath is None:
        raise ValueError("Undefined tmpFilePath Parameter !!")
    if jobId is None:
        raise ValueError("Undefined jobId Parameter !!")

    if not os.path.exists(tmpFilePath):
        os.mkdir(tmpFilePath)
    if not os.path.exists(outputFilePath):
        os.mkdir(outputFilePath)

    files_and_times = []

    #if not ana_defs or not use_job_report:
    if not use_job_report:
        # Use CASTOR to find the files to merge
        print "Finding CASTOR files"
        inputFilePath = '/castor/cern.ch' + '/' + inputFilePath
        inputFilePath = inputFilePath.replace('//', '/')
        inputFilePath = inputFilePath.replace('/castor/cern.ch/castor/cern.ch',
                                              '/castor/cern.ch')
        print(" inputFilePath = " + inputFilePath)

        files_and_times = [
            (file['time'], file['path'])
            for file in harvest_tools.clean_by_crab_id(
                file for file in harvest_tools.castor_source(inputFilePath)
                if '_%s_' % jobId in file['path'])
        ]

    else:
        print "Using job reports to find output files"
        for sample in samples['SAMPLES_TO_ANALYZE']:
            crab_dir = ''
            if useCastor:
                crab_dir = os.path.join(
                    'crab', 'crabdir_run%s_%s_%s' % (channel, sample, jobId))

                print "Getting output files from:", crab_dir
                files_and_times.extend(
                    (None, file['path'])
                    for file in harvest_tools.crabdir_source(crab_dir))

            else:

                crab_dir = os.path.join(
                    'crab', 'crabdir_run%s_%s_%s' % (channel, sample, jobId))

                print "Getting output files from:", crab_dir
                if not os.path.exists(crab_dir):
                    continue

                files_and_times.extend(
                    (None, file)
                    for file in harvest_tools.crabdir_source_stdout(crab_dir))

    #print files_and_times
    plot_harvest_jobs = []
    skim_harvest_jobs = []
    ntuple_harvest_jobs = []

    for sample in samples['SAMPLES_TO_ANALYZE']:
        print "Finding input files for", sample
        output_file = "harvested_%s_%s_%s.root" % (channel, sample, jobId)
        output_path = os.path.join(outputFilePath, output_file)
        files_to_merge = list()
        if useCastor:
            files_to_merge = list('rfio:%s' % file
                                  for time, file in files_and_times
                                  if file.find('plots_%s_%s_%s_' %
                                               (channel, sample, jobId)) != -1)
        else:
            files_to_merge = list('%s' % file for time, file in files_and_times
                                  if file.find('plots_%s_%s_%s_' %
                                               (channel, sample, jobId)) != -1)

        plot_harvest_jobs.append((sample, output_path, files_to_merge))

        # Get final event skims that need to be merged
        if useCastor:
            event_files_to_merge = list(
                'rfio:%s' % file for time, file in files_and_times
                if file.find('final_events_%s_%s_%s_' %
                             (channel, sample, jobId)) != -1)
        else:
            event_files_to_merge = list(
                '%s' % file for time, file in files_and_times
                if file.find('final_events_%s_%s_%s_' %
                             (channel, sample, jobId)) != -1)

        skim_output_path = os.path.join(
            outputFilePath, "skim_%s_%s_%s.root" % (channel, sample, jobId))
        skim_harvest_jobs.append(
            (sample, skim_output_path, event_files_to_merge))

        # Gen ntuple files that need to be merged
        ntuple_files_to_merge = list(
            '%s' % file for time, file in files_and_times
            if file.find('diTauNtuple_%s_%s_%s_' %
                         (channel, sample, jobId)) != -1)
        ntuple_output_path = os.path.join(
            outputFilePath, "ntuple_%s_%s_%s.root" % (channel, sample, jobId))
        ntuple_harvest_jobs.append(
            (sample, ntuple_output_path, ntuple_files_to_merge))

    print "Creating Makefile for histogram files"
    MakefileName = 'Makefile.harvest_%s_%s' % (channel, jobId)
    buildMakefile(plot_harvest_jobs,
                  tmpFilePath,
                  MakefileName,
                  merge_per_job=7,
                  ana_defs=ana_defs,
                  plot_defs=plot_defs,
                  plotters=plotters)

    print "Creating Makefile for skimmed event files"
    skim_MakefileName = "Makefile.mergeSkims_%s_%s" % (channel, jobId)
    # Make merge_per_job absurdly high, so it doesn't create unnecessary layers.
    buildMakefile(skim_harvest_jobs,
                  tmpFilePath,
                  skim_MakefileName,
                  merge_per_job=1e9,
                  harvest_tool='genericSkimMerger.py')

    print "Creating Makefile for ntuple files"
    ntuple_MakefileName = "Makefile.mergeNtuples_%s_%s" % (channel, jobId)
    # Make merge_per_job absurdly high, so it doesn't create unnecessary layers.
    buildMakefile(ntuple_harvest_jobs,
                  tmpFilePath,
                  ntuple_MakefileName,
                  merge_per_job=1e9,
                  harvest_tool='genericSkimMerger.py')

    print "Makefile built. In order to start harvesting, execute 'make -f %s -j 8 -k'" % MakefileName
Exemplo n.º 3
0
if not os.path.exists(WORKING_DIRECTORY):
    os.mkdir(WORKING_DIRECTORY)

# Map samples to their crab directories
crab_dir_map = {}

for sample in SAMPLES_TO_ANALYZE:
    crab_dir_map[sample] = os.path.join("crab", "crabdir_runAHtoMuTau_AHtoMuTau_%s_%s" % (sample, ID))

harvest_jobs = []

for sample in SAMPLES_TO_ANALYZE:
    print "Finding input files for", sample
    output_file = "harvested_%s_%s_%s.root" % (CHANNEL, sample, ID)
    output_path = os.path.join(PLOT_OUTPUT_DIRECTORY, output_file)
    files_to_merge = list("rfio:%s" % file for file in crab.map_lfns_to_castor(crab.lfns(crab_dir_map[sample])))
    harvest_jobs.append((sample, output_path, files_to_merge))

makefile_name = "Makefile2.harvest_%s_%s" % (CHANNEL, ID)
buildMakefile(
    harvest_jobs,
    WORKING_DIRECTORY,
    makefile_name,
    merge_per_job=7,
    ana_defs="TauAnalysis.Configuration.recoSampleDefinitionsAHtoMuTau_7TeV_grid_cfi",
    # Defintion of the plots to make
    plot_defs="TauAnalysis.Configuration.plotAHtoMuTau_cff",
    # Plotter modules
    plotters=["plotAHtoMuTau_woBtag", "plotAHtoMuTau_wBtag"],
)
# Map samples to their crab directories
crab_dir_map = {}

for sample in SAMPLES_TO_ANALYZE:
    crab_dir_map[sample] = os.path.join(
        'crab', 'crabdir_runAHtoMuTau_AHtoMuTau_%s_%s' % (sample, ID))

harvest_jobs = []

for sample in SAMPLES_TO_ANALYZE:
    print "Finding input files for", sample
    output_file = "harvested_%s_%s_%s.root" % (CHANNEL, sample, ID)
    output_path = os.path.join(PLOT_OUTPUT_DIRECTORY, output_file)
    files_to_merge = list(
        'rfio:%s' % file
        for file in crab.map_lfns_to_castor(crab.lfns(crab_dir_map[sample])))
    harvest_jobs.append((sample, output_path, files_to_merge))

makefile_name = 'Makefile2.harvest_%s_%s' % (CHANNEL, ID)
buildMakefile(
    harvest_jobs,
    WORKING_DIRECTORY,
    makefile_name,
    merge_per_job=7,
    ana_defs=
    "TauAnalysis.Configuration.recoSampleDefinitionsAHtoMuTau_7TeV_grid_cfi",
    # Defintion of the plots to make
    plot_defs="TauAnalysis.Configuration.plotAHtoMuTau_cff",
    # Plotter modules
    plotters=['plotAHtoMuTau_woBtag', 'plotAHtoMuTau_wBtag'])
def harvestAnalysisResults(channel = None, samples = None, inputFilePath = None,
                           outputFilePath = None, jobId = None,
                           tmpFilePath = None,
                           # Pre-scale and factorize samples
                           ana_defs = None, plot_defs = None, plotters = None,
                           use_job_report = False,
                           useCastor = True):
    
    # check that channel, samples, inputFilePath, outputFilePath, tmpFilePath and jobId
    # parameters are defined and non-empty
    if channel is None:
        raise ValueError("Undefined channel Parameter !!")
    if samples is None:
        raise ValueError("Undefined samples Parameter !!")
    if inputFilePath is None:
        raise ValueError("Undefined inputFilePath Parameter !!")
    if outputFilePath is None:
        raise ValueError("Undefined outputFilePath Parameter !!")
    if tmpFilePath is None:
        raise ValueError("Undefined tmpFilePath Parameter !!")
    if jobId is None:
        raise ValueError("Undefined jobId Parameter !!")

    if not os.path.exists(tmpFilePath):
        os.mkdir(tmpFilePath)
    if not os.path.exists(outputFilePath):
        os.mkdir(outputFilePath)

    files_and_times = []

    #if not ana_defs or not use_job_report:
    if not use_job_report:
        # Use CASTOR to find the files to merge
        print "Finding CASTOR files"
        inputFilePath = '/castor/cern.ch' + '/' + inputFilePath
        inputFilePath = inputFilePath.replace('//', '/')
        inputFilePath = inputFilePath.replace('/castor/cern.ch/castor/cern.ch', '/castor/cern.ch')
        print(" inputFilePath = " + inputFilePath)

        files_and_times = [
            (file['time'], file['path']) for file in 
            harvest_tools.clean_by_crab_id(
                file for file in harvest_tools.castor_source(
                    inputFilePath) if '_%s_' % jobId in file['path'])
        ]

    else:
        print "Using job reports to find output files"
        for sample in samples['SAMPLES_TO_ANALYZE']:
            crab_dir = ''
            if useCastor:
                crab_dir = os.path.join(
                    'crab', 'crabdir_run%s_%s_%s' % (channel, sample, jobId))

                print "Getting output files from:", crab_dir
                files_and_times.extend(
                    (None, file['path']) 
                    for file in harvest_tools.crabdir_source(crab_dir))

            else:

                crab_dir = os.path.join(
                    'crab', 'crabdir_run%s_%s_%s' % (channel, sample, jobId))

                print "Getting output files from:", crab_dir
                if not os.path.exists(crab_dir):
                    continue

                files_and_times.extend(
                    (None, file) 
                    for file in harvest_tools.crabdir_source_stdout(crab_dir))
                

    #print files_and_times
    plot_harvest_jobs = []
    skim_harvest_jobs = []
    ntuple_harvest_jobs = []

    for sample in samples['SAMPLES_TO_ANALYZE']:
        print "Finding input files for", sample
        output_file = "harvested_%s_%s_%s.root" % (channel, sample, jobId)
        output_path = os.path.join(outputFilePath, output_file)
        files_to_merge = list()
        if useCastor:
            files_to_merge = list(
                'rfio:%s' % file for time, file in files_and_times
                if file.find('plots_%s_%s_%s_' % (channel, sample, jobId)) != -1)
        else:
            files_to_merge = list(
                '%s' % file for time, file in files_and_times
                if file.find('plots_%s_%s_%s_' % (channel, sample, jobId)) != -1)


        plot_harvest_jobs.append( (sample, output_path, files_to_merge) )
        
        # Get final event skims that need to be merged
        if useCastor:
            event_files_to_merge = list(
                'rfio:%s' % file for time, file in files_and_times
                if file.find('final_events_%s_%s_%s_' %
                         (channel, sample, jobId)) != -1)
        else:
            event_files_to_merge = list(
                '%s' % file for time, file in files_and_times
                if file.find('final_events_%s_%s_%s_' %
                         (channel, sample, jobId)) != -1)

        skim_output_path = os.path.join(
            outputFilePath, "skim_%s_%s_%s.root" % (channel, sample, jobId))
        skim_harvest_jobs.append(
            (sample, skim_output_path, event_files_to_merge))

        # Gen ntuple files that need to be merged
        ntuple_files_to_merge = list(
            '%s' % file for time, file in files_and_times
            if file.find('diTauNtuple_%s_%s_%s_' % (channel, sample, jobId)) != -1)
        ntuple_output_path = os.path.join(
            outputFilePath, "ntuple_%s_%s_%s.root" % (channel, sample, jobId))
        ntuple_harvest_jobs.append(
            (sample, ntuple_output_path, ntuple_files_to_merge))

    print "Creating Makefile for histogram files"
    MakefileName = 'Makefile.harvest_%s_%s' % (channel, jobId)
    buildMakefile(plot_harvest_jobs, tmpFilePath, MakefileName,
                  merge_per_job = 7, ana_defs = ana_defs, plot_defs = plot_defs,
                  plotters = plotters)

    print "Creating Makefile for skimmed event files"
    skim_MakefileName = "Makefile.mergeSkims_%s_%s" % (channel, jobId)
    # Make merge_per_job absurdly high, so it doesn't create unnecessary layers.
    buildMakefile(skim_harvest_jobs, tmpFilePath, skim_MakefileName,
                  merge_per_job = 1e9, harvest_tool = 'genericSkimMerger.py')

    print "Creating Makefile for ntuple files"
    ntuple_MakefileName = "Makefile.mergeNtuples_%s_%s" % (channel, jobId)
    # Make merge_per_job absurdly high, so it doesn't create unnecessary layers.
    buildMakefile(ntuple_harvest_jobs, tmpFilePath, ntuple_MakefileName,
                  merge_per_job = 1e9, harvest_tool = 'genericSkimMerger.py')

    print "Makefile built. In order to start harvesting, execute 'make -f %s -j 8 -k'" % MakefileName