Пример #1
0
def get_max_run_number(fulldataset):
    """Return maximum run number whitelisted for the given dataset."""
    runwhitelist = get_from_deep_json(get_das_json(fulldataset),
                                      'RunWhitelist')
    if runwhitelist:
        return max(runwhitelist)
    else:
        return None
Пример #2
0
def get_config_urls(das):
    urls = []
    config_id = get_from_deep_json(das, 'ConfigCacheID')
    if config_id:
        cmd = 'curl -o ./inputs/config-store/%s.configFile -k '\
                '--key /tmp/x509up_u5073 '\
                '--cert /tmp/x509up_u5073 "%s/%s/configFile"' % \
                (config_id, CFG_CMSWEBURL, config_id)
        urls.append(cmd)
    return urls
Пример #3
0
def get_config_urls(das):
    urls = []
    config_ids = get_from_deep_json(das, 'config_id')
    if isinstance(config_ids, str):
        config_ids = [
            config_ids,
        ]
    if config_ids:
        for config_id in config_ids:
            cmd = 'curl -o ./inputs/config-store/%s.configFile -k '\
                  '--key /tmp/x509up_u5073 '\
                  '--cert /tmp/x509up_u5073 "%s/%s/configFile"' % \
                  (config_id, CFG_CMSWEBURL, config_id)
            urls.append(cmd)
    return urls
Пример #4
0
def create_selection_information(dataset):
    """Create box with selection information."""
    out = ''
    # generation steps
    out += '\n'
    out += '\n        <p><strong>Data taking / HLT</strong>'
    out += '\n        <br/>The collision data were assigned to different RAW datasets using the following <a href="/record/1700">HLT configuration</a>.</p>'
    process = 'FIXME'
    generator_text = 'FIXME'
    fulldataset = '/%s/Run2011A-12Oct2013-v%s/AOD' % (dataset,
                                                      get_version(dataset))
    config_id = get_from_deep_json(get_das_json(fulldataset), 'ConfigCacheID')
    if config_id:
        afile = config_id + '.configFile'
        process = get_process(afile)
        generator_text = get_title(afile)
    out += '\n        <p><strong>Data processing / RECO</strong>'
    out += '\n        <br/>This primary AOD dataset was processed from the RAW dataset by the following step:'
    out += '\n        <br/>Step: %s' % process
    out += '\n        <br/>Release: %s' % get_from_deep_json(
        get_das_json(fulldataset), 'CMSSWVersion')
    out += '\n        <br/>Global tag: %s' % get_from_deep_json(
        get_das_json(fulldataset), 'GlobalTag')
    if config_id:
        out += '\n        <br/><a href="/record/%s">%s</a>' % (
            LINK_INFO[config_id], generator_text)
    out += '\n        </p>'
    # HLT trigger paths:
    out += '\n     <p><strong>HLT trigger paths</strong>'
    out += '\n     <br/>The possible HLT trigger paths in this dataset are:'
    trigger_paths = get_trigger_paths_for_dataset(dataset)
    for trigger_path in trigger_paths:
        out += '\n      <br/><a href="/search?p=%s>%s</a>' % (trigger_path,
                                                              trigger_path)
    out += '</p>'
    return out
Пример #5
0
def get_input_dataset(das):
    return get_from_deep_json(das, 'input_dataset')
Пример #6
0
def get_generators(das):
    "Return string representing list of generators from the DAS json."
    generators = get_from_deep_json(das, 'generators')
    if isinstance(generators, list):
        generators = " ".join(generators)
    return generators
Пример #7
0
def get_global_tag(das):
    "Return global tagfrom DAS json."
    global_tag = get_from_deep_json(das, 'conditions')
    return global_tag.replace('::All', '')
Пример #8
0
def get_cmssw_release(das):
    "Return CMSSW release number from DAS json."
    return get_from_deep_json(das, 'release')
Пример #9
0
def get_number_of_bytes(das):
    "Return number of bytes from DAS json."
    return get_from_deep_json(das, 'size')
Пример #10
0
def get_number_of_files(das):
    "Return number of files from DAS json."
    return get_from_deep_json(das, 'nfiles')
Пример #11
0
def get_number_of_events(das):
    "Return number of events from DAS json."
    return get_from_deep_json(das, 'nevents')
Пример #12
0
def get_generator_text(dataset):
    """Return generator text for given dataset."""
    import os
    from create_config_file_records import get_title as get_generator_title
    from create_config_file_records import get_process
    config_ids = get_from_deep_json(get_das_json(dataset), 'config_id')
    if isinstance(config_ids, str):
        config_ids = [
            config_ids,
        ]
    process = ''
    if config_ids:
        for config_id in config_ids:
            afile = config_id + '.configFile'
            if process:
                process += ' '
            process += get_process(afile)
    lhe_info_needed = False
    out = '<p>'
    out += '<strong>Step %s</strong>' % process
    out += '<br>Release: %s' % get_from_deep_json(get_das_json(dataset),
                                                  'release')
    out += '<br>Global tag: %s' % get_from_deep_json(get_das_json(dataset),
                                                     'conditions')
    if config_ids:
        for config_id in config_ids:
            afile = config_id + '.configFile'
            generator_text = get_generator_title(afile)
            out += '<br><a href="/record/%s">%s</a>' % (LINK_INFO[config_id],
                                                        generator_text)
            if 'LHE' in generator_text:
                lhe_info_needed = True
    out += '<br>Output dataset: %s' % dataset
    out += '</p>'
    if lhe_info_needed:
        out += """
        <p><strong>Note</strong>
        <br>
To extract the exact LHE configuration, you can use the following script available in the <a href="/getting-started/CMS">CMS working environment</a> on the <a href="/VM/CMS">CMS Open Data VM</a>:

        <blockquote>
        <pre>
$ dumpLHEHeader.py input=file:somefile.root output=testout.lhe
        </pre>
        </blockquote>

where <code>"somefile.foot"</code> is one of the root files in this dataset.

For example, in the existing working area, you can read the generator information directly from any of the root files of this dataset on the CERN Open Data area (the path to the root files is available from the file index of the record):

        <blockquote>
        <pre>
cd CMSSW_5_3_32/src
cmsenv
dumpLHEHeader.py input=file:root://eospublic.cern.ch//eos/opendata/cms/MonteCarlo2011/Summer11LegDR/TTJets_MSDecays_dileptonic_matchingdown_7TeV-madgraph-tauola/AODSIM/PU_S13_START53_LV6_ext1-v1/60000/0282B13B-490E-E511-8E8A-001E67A3EF70.root output=testout.lhe
        </pre>
        </blockquote>
        </p>
        """
    return """
%s
""" % out
Пример #13
0
def main():
    "Do the job."

    print "%10s | %10s | %10s | %10s | %10s | %15s | %15s | %15s" % (
        'RECORD_ID', 'DATASET', '#FILES/256', '#FILES/856', '#FILES/DAS',
        '#BYTES/256', '#BYTES/856', '#BYTES/DAS')
    print "%10s | %10s | %10s | %10s | %10s | %15s | %15s | %15s" % (
        '-' * 10, '-' * 10, '-' * 10, '-' * 10, '-' * 10, '-' * 15, '-' * 15,
        '-' * 15)

    record_id = 'UNKNOWN'
    bytes_via_256 = 0
    bytes_via_856 = 0
    files_via_256 = 0
    files_via_856 = 0
    title = 'UNKNOWN'
    total_titles = 0
    total_files_via_256 = 0
    total_files_via_856 = 0
    total_files_via_das = 0
    total_bytes_via_256 = 0
    total_bytes_via_856 = 0
    total_bytes_via_das = 0
    for line in open(MARC_FILE, 'r').readlines():
        if '</record>' in line:
            files_via_das = get_from_deep_json(get_das_json(title), 'nfiles')
            bytes_via_das = get_from_deep_json(get_das_json(title), 'size')
            total_files_via_das += files_via_das
            total_bytes_via_das += bytes_via_das
            total_files_via_256 += files_via_256
            total_bytes_via_256 += bytes_via_256
            total_files_via_856 += files_via_856
            total_bytes_via_856 += bytes_via_856
            print "%10s | %10s | %10s | %10s | %10s | %15s | %15s | %15s" % (
                record_id, title[:10], files_via_256, files_via_856,
                files_via_das, bytes_via_256, bytes_via_856, bytes_via_das)
            if files_via_856 != files_via_256:
                print 'Whoops!'
            if bytes_via_856 != bytes_via_256:
                print 'Whoops!'
            if files_via_856 != files_via_das:
                print 'Whoops!'
            if bytes_via_856 != bytes_via_das:
                print 'Whoops!'
            record_id = 'UNKNOWN'
            bytes_via_256 = 0
            bytes_via_856 = 0
            files_via_256 = 0
            files_via_856 = 0
            title = 'UNKNOWN'
        else:
            match = re.match(
                r'^\s+<controlfield tag="001">([0-9]+)</controlfield>\s+$',
                line)
            if match:
                record_id = match.groups()[0]
            match = re.match(r'^\s+<subfield code="s">([0-9]+)</subfield>\s+$',
                             line)
            if match:
                bytes_via_856 += int(match.groups()[0])
                files_via_856 += 1
            match = re.match(r'^\s+<subfield code="b">([0-9]+)</subfield>\s+$',
                             line)
            if match:
                bytes_via_256 = int(match.groups()[0])
            match = re.match(r'^\s+<subfield code="f">([0-9]+)</subfield>\s+$',
                             line)
            if match:
                files_via_256 = int(match.groups()[0])
            match = re.match(r'^\s+<subfield code="a">(/.*AOD)</subfield>\s+$',
                             line)
            if match:
                title = match.groups()[0]

    print "%10s | %10s | %10s | %10s | %10s | %15s | %15s | %15s" % (
        'TOTAL', '', total_files_via_256, total_files_via_856,
        total_files_via_das, total_bytes_via_256, total_bytes_via_856,
        total_bytes_via_das)