示例#1
0
def getAllPaths(year):
    paths = []
    for hltData in loadJSON('../data/' + hltPrescalesJSON[year]).values():
        for hlt in hltData:
            if 'HLT_' in hlt[
                    1]:  #and any(x in hlt[1] for x in ['Ele', 'Pho', 'ele', 'pho', 'SC']):
                paths.append(hlt[1].split('_v')[0])
    return list(set(paths))
示例#2
0
def makeLowestSeedsPage(year, path):
    jsonDir = os.path.join('triggerPrescales', year, 'json', path)
    puData = loadJSON('../data/pileup_' + year + '.json')

    def lowestSeeds(l1SeedType):
        hasLowerSeed = {}
        isLowestSeed = {}
        for f in sorted(
                glob.glob(os.path.join(jsonDir,
                                       l1SeedType + '*prescale1.json'))):
            l1Seed = f.split('/')[-1].split('_prescale')[0].replace('er', '')
            lumis = subtractLumis(loadJSON(f), hasLowerSeed)
            if not len(lumis): continue
            try:
                isLowestSeed[
                    l1Seed] += lumis  # in case both 'er' and non-'er' should be added
            except:
                isLowestSeed[l1Seed] = lumis
            hasLowerSeed = orLumis(
                hasLowerSeed, copy.deepcopy(lumis))  # copy is important here
        return isLowestSeed

    nonIsoSeeds = lowestSeeds('L1_SingleEG')
    isoSeeds = lowestSeeds('L1_SingleIsoEG')
    try:
        os.makedirs(os.path.join(jsonDir, 'lowestSeeds'))
    except:
        pass

    with open('index.php') as template:
        with open(os.path.join(jsonDir, 'lowestSeeds', 'lowestSeeds.php'),
                  'w') as f:
            for line in template:
                if 'TITLE' in line:
                    f.write(
                        'echo "Lowest L1 seed (iso and non-iso) thresholds for '
                        + path + ' (' + year + ')";\n')
                elif 'DIV' in line:
                    f.write('\n')
                elif 'LISTSEEDS' in line:
                    for isoSeed in sorted(isoSeeds.keys()):
                        for nonIsoSeed in sorted(nonIsoSeeds.keys()):
                            lumis = andLumis(isoSeeds[isoSeed],
                                             nonIsoSeeds[nonIsoSeed])
                            if not len(lumis): continue
                            id = (isoSeed + '_' + nonIsoSeed).replace(
                                'L1_Single', '')
                            dumpJSON(
                                os.path.join(jsonDir, 'lowestSeeds',
                                             id + '.json'), lumis)
                            f.write('<li>' + id + ' <a href=' + id +
                                    '.json>(' +
                                    ('%.2f' % getIntLumi(lumis, puData)) +
                                    '/fb)</a><br>\n')
                else:
                    f.write(line)
示例#3
0
 def lowestSeeds(l1SeedType):
     hasLowerSeed = {}
     isLowestSeed = {}
     for f in sorted(
             glob.glob(os.path.join(jsonDir,
                                    l1SeedType + '*prescale1.json'))):
         l1Seed = f.split('/')[-1].split('_prescale')[0].replace('er', '')
         lumis = subtractLumis(loadJSON(f), hasLowerSeed)
         if not len(lumis): continue
         try:
             isLowestSeed[
                 l1Seed] += lumis  # in case both 'er' and non-'er' should be added
         except:
             isLowestSeed[l1Seed] = lumis
         hasLowerSeed = orLumis(
             hasLowerSeed, copy.deepcopy(lumis))  # copy is important here
     return isLowestSeed
示例#4
0
    s['precision'] = s['tp'] / (s['tp'] + s['fn'])
    # calculate recall: TP/(TP+FP)
    s['recall'] = s['tp'] / (s['tp'] + s['fp'])
    # calculate F1 Measure: 2*(precision*recall)/(precision+recall)
    s['f1_measure'] = 2 * (s['precision'] * s['recall']) / (s['precision'] +
                                                            s['recall'])

    for i in s:
        # show each percentage in terms of 0-100% and 3 decimal places
        s[i] = round(s[i] * 100, 2)
    return s


if __name__ == "__main__":
    import helpers
    dataset = helpers.loadJSON()
    dataset = tf(dataset)
    dataset = df(dataset)
    dataset = tfidf(dataset)
    gramSize = 2
    dataset = ngram(dataset, gramSize)
    dataset = preprocess_probabilities(dataset)
    # calculate naive bayes for our three methods
    # results_tf = naive_bayes(dataset, "tf")
    results_tfidf = naive_bayes(dataset, "tfidf")
    # results_ngrams = naive_bayes(dataset, "ngrams", gramSize)

    # tf_scores = evaluate(results_tf)
    tfidf_scores = evaluate(results_tfidf)
    # ngram_scores = evaluate(results_ngrams)
    # print("tf:\t", tf_scores)
示例#5
0
def analyzePath(args):
    path, year, minRun, maxRun = args

    # Set prescales[key1][key2][...][run] += lumis and create keys when needed
    def appendLumis(dictionary, keys, run, lumis):
        for k in keys:
            dictionary = dictionary.setdefault(k, {})
        try:
            dictionary[run] += lumis
        except:
            dictionary[run] = lumis

    # Load the needed JSON
    goodLumis = loadJSON('../data/' + certifiedLumis[year])
    runsInfo = loadJSON('../data/' + runInfoJSON[year])
    l1psData = loadJSON('../data/' + l1PrescalesJSON[year])
    hltpsData = loadJSON('../data/' + hltPrescalesJSON[year])
    puData = loadJSON('../data/' + pileUpJSON[year])

    # Possibility to restrict to specific run range if requested
    if minRun:
        goodLumis = {
            r: l
            for r, l in good_lumis.iteritems() if int(r) >= int(minRun)
        }
    if maxRun:
        goodLumis = {
            r: l
            for r, l in good_lumis.iteritems() if int(r) <= int(maxRun)
        }
    if minRun and maxRun: runSpec = 'Run ' + minRun + '-' + maxRun
    elif minRun: runSpec = year + ', run ' + minRun + ' and above'
    elif maxRun: runSpec = year + ', up to run ' + maxRun
    else: runSpec = year

    # Create dictionary prescales[path/seed][prescale][run] with lumi ranges
    prescales = {}
    for run in sorted(runsInfo.keys()):
        if minRun and int(run) < int(minRun): continue
        if maxRun and int(run) > int(maxRun): continue
        hltMenu = runsInfo[run]['hlt_menu']
        trigMode = runsInfo[run]['trig_mode']
        psCols = runsInfo[run]['ps_cols']

        prescaleMap = {path: get_hlt_prescales(hltpsData[hltMenu], path)}

        # if HLT path not found in this menu, put this run in the "not existing" group
        if not prescaleMap[path]:
            appendLumis(prescales, [path, 'NotExisting'], run,
                        [[1, 0xFFFFFFF]])
            continue

        # Get the OR of all L1 seeds
        l1OR = prescaleMap[path][-1]
        for l1trigger in l1OR.split(' OR '):
            prescaleMap[l1trigger] = get_l1_prescales(
                l1psData[trigMode], l1trigger.replace(' ', ''))

        # Loop over all prescale columns, and add their lumis to the correponding prescale for a path/seed
        for psColumn, lumis in psCols.iteritems():
            psIndex = int(psColumn) + 2
            for trigger, pathPrescales in prescaleMap.iteritems():
                if not pathPrescales: continue
                prescale = pathPrescales[psIndex]
                appendLumis(prescales, [trigger, prescale], run,
                            copy.deepcopy(lumis))

    # Check in which lumis the HLT path was off (prescale 0 or not existing) or on
    hltPathOff, hltPathOn = {}, {}
    for i in prescales[path]:
        if i in ['0', 'NotExisting']:
            hltPathOff = orLumis(hltPathOff, copy.deepcopy(prescales[path][i]))
        else:
            hltPathOn = orLumis(hltPathOn, copy.deepcopy(prescales[path][i]))

    # Some lumis were missing in the runInfo json, while they are present in the certified lumis; these have prescale column -1 in wbm which means all paths/seeds are unprescaled
    missingLumis = subtractLumis(goodLumis, orLumis(hltPathOn, hltPathOff))
    for trigger in prescales:
        try:
            prescales[trigger]['1'] = orLumis(prescales[trigger]['1'],
                                              missingLumis)
        except:
            prescales[trigger]['1'] = copy.deepcopy(missingLumis)
    hltPathOn = andLumis(goodLumis, orLumis(hltPathOn, missingLumis))

    # Check if there are missing lumis for the seeds
    for trigger in [t for t in prescales if t != path]:
        prescales[trigger]['NotIncluded'] = copy.deepcopy(hltPathOn)
        for ps in [
                ps for ps in prescales[trigger].keys() if ps != 'NotIncluded'
        ]:
            prescales[trigger]['NotIncluded'] = subtractLumis(
                prescales[trigger]['NotIncluded'], prescales[trigger][ps])

    # Clean up: only keep lumis which are certified, do not show the L1 seeds for lumis where the HLT path is off
    for trigger in prescales:
        for ps in prescales[trigger].keys():
            prescales[trigger][ps] = andLumis(
                prescales[trigger][ps],
                goodLumis if trigger == path else hltPathOn)
            if not len(prescales[trigger][ps]): del prescales[trigger][ps]

    # Remove those seeds with only 0 and NotInclude
    for trigger in [t for t in prescales if t != path]:
        if trigger != path:
            nonZeroKeys = [
                k for k in prescales[trigger].keys()
                if k not in ['0', 'NotIncluded']
            ]
            if not len(nonZeroKeys): del prescales[trigger]

    # Output to file
    topDir = os.path.join('triggerPrescales', year)
    jsonDir = os.path.join('triggerPrescales', year, 'json', path)
    try:
        os.makedirs(jsonDir)
    except:
        pass
    shutil.copy('index.php', 'triggerPrescales/index.php')
    shutil.copy('index.php', os.path.join(topDir, 'index.php'))

    div = prescalePlot(path, prescales, year, goodLumis, puData, runsInfo)
    with open('index.php') as template:
        with open(os.path.join(topDir, path + '.php'), 'w') as f:
            for line in template:
                if 'TITLE' in line:
                    f.write('echo "Prescales for ' + path + ' (' + runSpec +
                            ')";\n')
                elif 'DIV' in line:
                    f.write(div + '\n')
                elif 'LISTSEEDS' in line:
                    f.write(
                        '<div class="list2" style="margin-top: 2cm">\n<ul>\n')
                    for trigger in sorted(prescales.keys()):
                        if trigger == path:
                            f.write('<li><b>' + trigger + '</b> (' + runSpec +
                                    ') <br>\n')
                        else:
                            f.write('<li><b>' + trigger +
                                    '</b><br style="line-height:110%">\n')
                        for ps in sorted(prescales[trigger].keys()):
                            lumis = prescales[trigger][ps]
                            dumpJSON(
                                os.path.join(
                                    jsonDir,
                                    trigger + '_prescale' + str(ps) + '.json'),
                                lumis)
                            f.write('  ' + (
                                'prescale ' if not 'No' in str(ps) else '') +
                                    str(ps) + ' <a href=' + os.path.join(
                                        'json', path, trigger + '_prescale' +
                                        str(ps) + '.json') + '>(' +
                                    ('%.2f' % getIntLumi(lumis, puData)) +
                                    '/fb)</a>')
                            f.write(' <small><small><small>active in runs ' +
                                    runRanges(
                                        lumis, goodLumis if trigger ==
                                        path else hltPathOn) +
                                    '</small></small></small><br>\n')
                        f.write('</li>\n')
                    f.write('</ul></div>\n')
                else:
                    f.write(line)

    # In case we are dealing with onle L1_SingleEG and/or L1_SingleIsoEG seeds, make some overview of lowest seeds with prescale=1
    if all('L1_SingleEG' in seed or 'L1_SingleIsoEG' in seed
           for seed in prescales.keys() if 'HLT' not in seed):

        def lowestSeeds(l1SeedType):
            hasLowerSeed = {}
            isLowestSeed = {}
            for l1Seed in sorted(
                [seed for seed in prescales.keys() if l1SeedType in seed]):
                if not '1' in prescales[l1Seed]: continue
                lumis = subtractLumis(prescales[l1Seed]['1'], hasLowerSeed)
                if not len(lumis): continue
                try:
                    isLowestSeed[l1Seed.replace(
                        'er', ''
                    )] += lumis  # in case both 'er' and non-'er' should be added
                except:
                    isLowestSeed[l1Seed.replace('er', '')] = lumis
                hasLowerSeed = orLumis(
                    hasLowerSeed,
                    copy.deepcopy(lumis))  # copy is important here
            return isLowestSeed

        nonIsoSeeds = lowestSeeds('L1_SingleEG')
        isoSeeds = lowestSeeds('L1_SingleIsoEG')
        try:
            os.makedirs(os.path.join(jsonDir, 'lowestSeeds'))
        except:
            pass

        with open('index.php') as template:
            with open(os.path.join(jsonDir, 'lowestSeeds', 'lowestSeeds.php'),
                      'w') as f:
                for line in template:
                    if 'TITLE' in line:
                        f.write(
                            'echo "Lowest L1 seed (iso and non-iso) thresholds for '
                            + path + ' (' + year + ')";\n')
                    elif 'DIV' in line:
                        f.write('\n')
                    elif 'LISTSEEDS' in line:
                        for isoSeed in sorted(isoSeeds.keys()):
                            for nonIsoSeed in sorted(nonIsoSeeds.keys()):
                                lumis = andLumis(isoSeeds[isoSeed],
                                                 nonIsoSeeds[nonIsoSeed])
                                if not len(lumis): continue
                                id = (isoSeed + '_' + nonIsoSeed).replace(
                                    'L1_Single', '')
                                dumpJSON(
                                    os.path.join(jsonDir, 'lowestSeeds',
                                                 id + '.json'), lumis)
                                f.write('<li>' + id + ' <a href=' + id +
                                        '.json>(' +
                                        ('%.2f' % getIntLumi(lumis, puData)) +
                                        '/fb)</a><br>\n')
                    else:
                        f.write(line)

        print 'Made ' + os.path.join(jsonDir, 'lowestSeeds', 'lowestSeeds.php')