Пример #1
0
def get_xsec(xsec_file, my_dsid=None):
    with open(xsec_file) as f:
        f.readline()
        for l in f:
            # DSID is the first field
            if int(my_dsid) == int(l.rstrip().split()[0]):
                xsec = float(l.rstrip().split()[2])
                filter_eff = float(l.rstrip().split()[3])
                kfactor = float(l.rstrip().split()[4])

                logger.info("Got xsec: {}".format(xsec))
                return xsec * filter_eff * kfactor
        else:
            logger.error("Didn't find a xsec ... sorry!")
            return None
Пример #2
0
    def __init__(self,
                 selections=None,
                 processes=None,
                 weights=None,
                 lumifactor=None,
                 output_path=None,
                 output_name=None):

        logger.info("Initializing yieldsTable")

        self.selections = selections
        self.processes = [_fixLength(l,4) for l in processes]
        self.weights = weights
        self.lumifactor = lumifactor
        self.bkg_processes = [processname for processname,type,trees,processweight in self.processes if type == "background"]
Пример #3
0
client = InfluxDBClient(
    "dbod-eschanet.cern.ch", 8080, username, password, "monit_jobs", True, False
)

points_list = []

reader = mysql.connector.connect(
    user="******",
    password=password,
    host="dbod-sql-graf.cern.ch",
    port=5501,
    database="monit_jobs",
)
read_cursor = reader.cursor()

logger.info("Getting existing data.")
read_cursor.execute(
    "select panda_queue, resource, prod_source, avg1h_running_jobs, avg6h_running_jobs, avg12h_running_jobs, avg24h_running_jobs, avg7d_running_jobs, avg30d_running_jobs from jobs"
)

# Explicitly set timestamp in InfluxDB point. Avoids having multiple entries per 10 minute interval (can happen sometimes with acron)
epoch = datetime.utcfromtimestamp(0)


def unix_time_nanos(dt):
    return (dt - epoch).total_seconds() * 1e9


current_time = datetime.utcnow().replace(microsecond=0, second=0, minute=0)
unix = int(unix_time_nanos(current_time))
Пример #4
0
def run():

    config = ConfigParser.ConfigParser()
    config.read("config.cfg")

    password = config.get("credentials", "password")
    username = config.get("credentials", "username")
    database = config.get("credentials", "database")

    logger.info("Constructing InfluxDB queries.")

    if args.average == "1h":
        retention = "10m"
        delta = "2h"
        time_units = 6
    elif args.average == "1d":
        retention = "1h"
        delta = "2d"
        time_units = 24
    else:
        return 0

    client = InfluxDBClient(
        "dbod-eschanet.cern.ch", 8080, username, password, "monit_jobs", True, False
    )
    rs_distinct_sets = client.query(
        """select * from "{}"."jobs" where "prod_source" != '' group by panda_queue, prod_source, resource, job_status limit 1""".format(
            retention
        )
    )

    rs_result = client.query(
        """select * from "{}"."jobs" where time > now() - {} and "prod_source" != '' group by panda_queue, prod_source, resource, job_status """.format(
            retention, delta
        )
    )
    raw_dict = rs_result.raw
    series = raw_dict["series"]

    logger.info("Got data from InfluxDB.")
    logger.info("Averaging now.")

    # uploader = InfluxDBClient('dbod-eschanet.cern.ch', 8080, username, password, "test", True, False)

    points_list = []
    for rs in rs_distinct_sets.keys():
        rs = rs[1]  # rs is a tuple
        logger.debug(rs)

        filtered_points = [
            p
            for p in series
            if p["tags"]["panda_queue"] == rs["panda_queue"]
            and p["tags"]["resource"] == rs["resource"]
            and p["tags"]["prod_source"] == rs["prod_source"]
            and p["tags"]["job_status"] == rs["job_status"]
        ]

        if len(filtered_points) == 0:
            logger.debug("Got no points for this set of keys.")
            continue

        filtered_points = filtered_points[0]

        values = filtered_points["values"]
        tags = filtered_points["tags"]
        columns = filtered_points["columns"]

        # reverse in place, want to have latest points first
        values.reverse()

        # get me the last (most recent) point, because this is the one I want to overwrite.
        latest_value = values[0]

        # get averaged values
        if tags["job_status"] in ["failed", "finished", "cancelled", "closed"]:
            averaged_jobs = get_sum(time_units, values, columns.index("jobs"))
        else:
            averaged_jobs = get_average(time_units, values, columns.index("jobs"))
        # averaged_jobs = get_average(time_units, values, columns.index('jobs'))
        averaged_cpu = get_average(time_units, values, columns.index("resource_factor"))
        averaged_corepower = get_average(time_units, values, columns.index("corepower"))
        averaged_HS06_benchmark = get_average(
            time_units, values, columns.index("HS06_benchmark")
        )
        averaged_HS06_pledge = get_average(
            time_units, values, columns.index("federation_HS06_pledge")
        )

        # construct rest of the data dict
        data = dict(zip(columns, latest_value))

        time = data["time"].replace("T", " ").replace("Z", "")

        if args.average == "1h":
            hash = time.split(".")[-1].ljust(9, "0")
        else:
            # got no hashes in 1h aggregate data yet
            m = hashlib.md5()
            m.update(
                str(tags["panda_queue"])
                + str(tags["prod_source"])
                + str(tags["resource"])
                + str(tags["job_status"])
            )
            hash = str(int(m.hexdigest(), 16))[0:9]

        time = unix + int(hash)

        data.update(tags)
        data.pop("time", None)
        data.pop("jobs", None)
        data.pop("resource_factor", None)
        data.pop("corepower", None)
        data.pop("HS06_benchmark", None)
        data.pop("federation_HS06_pledge", None)

        json_body = {
            "measurement": "jobs",
            "tags": data,
            "time": time,
            "fields": {
                "jobs": averaged_jobs,
                "resource_factor": averaged_cpu,
                "corepower": averaged_corepower,
                "HS06_benchmark": averaged_HS06_benchmark,
                "federation_HS06_pledge": averaged_HS06_pledge,
            },
        }

        # sometimes I f**k up and then I want to kill the last measurement...
        if args.kill_last:
            for key, value in json_body["fields"].iteritems():
                json_body["fields"][key] = 0.0

        logger.debug(json_body)
        points_list.append(json_body)

    client.write_points(
        points=points_list, time_precision="n", retention_policy=args.average
    )
Пример #5
0
    logging.getLogger("yieldsTable").setLevel(logging.DEBUG)

if not args.configfile:
    raise Exception("Need to specify a config file")

if args.configfile:
    try:
        exec(open(args.configfile).read())
    except:
        print("can't read configfile {}".format(args.configfile))
        traceback.print_exc()

yieldsTable = yieldsTable(**config)
table = yieldsTable.createYieldstable()

logger.info("Got table")

########################################################
#
# okay, ugly TeX stuff starts here
#
########################################################

header = r'''\documentclass{standalone}
\usepackage{longtable}
\usepackage{booktabs}
\newcommand\MyHead[2]{%
  \multicolumn{1}{l}{\parbox{#1}{\centering #2}}
}
\begin{document}
'''
Пример #6
0
def texWrite():

    header = r'''
import ROOT
from ROOT import gSystem
gSystem.Load("libSusyFitter.so")

from systematic import Systematic
from configManager import configMgr
'''

    #if args.analysis == 'strong1L':
    #    header += r'''
    #Regions = [ 'BVEM', 'BTEM' ]
    #MeffBins = [ '_bin1', '_bin2', '_bin3', '_bin4']
    #'''

    header += r'''
{}Systematics={{}}
'''.format(args.background)

    if args.analysis == '1Lbb':

        main = r''''''

        for sys, d in values.items():
            main += '''
'''
            for region, uncertainties in d.items():
                up_uncertainties = uncertainties["up"]
                down_uncertainties = uncertainties["down"]
                ups = ""
                for up_unc in up_uncertainties:
                    if up_unc > 0:
                        up_unc = "+{}".format(abs(up_unc))
                    else:
                        up_unc = "-{}".format(abs(up_unc))
                    ups += "(1.{}),".format(up_unc)
                downs = ""
                for down_unc in down_uncertainties:
                    if down_unc > 0:
                        down_unc = "+{}".format(abs(down_unc))
                    else:
                        down_unc = "-{}".format(abs(down_unc))
                    downs += "(1.{}),".format(down_unc)
                ups = ups[:-1]
                downs = downs[:-1]

                main += '''{bkg}Systematics['{bkg}{syst}_{region}'] = Systematic("{bkg}{syst}", configMgr.weights, [{ups}], [{downs}], "user","userHistoSys")
'''.format(bkg=args.background, syst=sys, region=region, ups=ups, downs=downs)

        footer = r'''
def TheorUnc(generatorSyst):
    for key in {bkg}Systematics:
        name=key.split('_')[-1]

        if "SRLMincl" in name:
            generatorSyst.append((("{bkg}","SRLMinclEM"), {bkg}Systematics[key]))
        elif "SRMMincl" in name:
            generatorSyst.append((("{bkg}","SRMMinclEM"), {bkg}Systematics[key]))
        elif "SRHMincl" in name:
            generatorSyst.append((("{bkg}","SRHMinclEM"), {bkg}Systematics[key]))
        elif "SRLM" in name:
            generatorSyst.append((("{bkg}","SRLMEM"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","SRLMEl"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","SRLMMu"), {bkg}Systematics[key]))
        elif "SRMM" in name:
            generatorSyst.append((("{bkg}","SRMMEM"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","SRMMEl"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","SRMMMu"), {bkg}Systematics[key]))
        elif "SRHM" in name:
            generatorSyst.append((("{bkg}","SRHMEM"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","SRHMEl"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","SRHMMu"), {bkg}Systematics[key]))
        elif "TRLM" in name:
            generatorSyst.append((("{bkg}","TRLMEM"), {bkg}Systematics[key]))
        elif "TRMM" in name:
            generatorSyst.append((("{bkg}","TRMMEM"), {bkg}Systematics[key]))
        elif "TRHM" in name:
            generatorSyst.append((("{bkg}","TRHMEM"), {bkg}Systematics[key]))
        elif "WR" in name:
            generatorSyst.append((("{bkg}","WREM"), {bkg}Systematics[key]))
        elif "STCR" in name:
            generatorSyst.append((("{bkg}","STCREM"), {bkg}Systematics[key]))
        elif "VRtt1on" in name:
            generatorSyst.append((("{bkg}","VRtt1onEM"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt1onEl"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt1onMu"), {bkg}Systematics[key]))
        elif "VRtt2on" in name:
            generatorSyst.append((("{bkg}","VRtt2onEM"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt2onEl"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt2onMu"), {bkg}Systematics[key]))
        elif "VRtt3on" in name:
            generatorSyst.append((("{bkg}","VRtt3onEM"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt3onEl"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt3onMu"), {bkg}Systematics[key]))
        elif "VRtt1off" in name:
            generatorSyst.append((("{bkg}","VRtt1offEM"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt1offEl"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt1offMu"), {bkg}Systematics[key]))
        elif "VRtt2off" in name:
            generatorSyst.append((("{bkg}","VRtt2offEM"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt2offEl"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt2offMu"), {bkg}Systematics[key]))
        elif "VRtt3off" in name:
            generatorSyst.append((("{bkg}","VRtt3offEM"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt3offEl"), {bkg}Systematics[key]))
            generatorSyst.append((("{bkg}","VRtt3offMu"), {bkg}Systematics[key]))

'''.format(bkg=args.background)

    elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'):

        main = r''''''

        for sys, d in values.items():
            main += '''
'''
            for region, uncertainties in d.items():
                up_uncertainties = uncertainties["up"]
                down_uncertainties = uncertainties["down"]
                ups = ""
                for up_unc in up_uncertainties:
                    if up_unc > 0:
                        up_unc = "+{}".format(abs(up_unc))
                    else:
                        up_unc = "-{}".format(abs(up_unc))
                    ups += "(1.{}),".format(up_unc)
                downs = ""
                for down_unc in down_uncertainties:
                    if down_unc > 0:
                        down_unc = "+{}".format(abs(down_unc))
                    else:
                        down_unc = "-{}".format(abs(down_unc))
                    downs += "(1.{}),".format(down_unc)
                ups = ups[:-1]
                downs = downs[:-1]

                main += '''{bkg}Systematics['{bkg}{syst}_{region}'] = Systematic("{bkg}{syst}", configMgr.weights, [{ups}], [{downs}], "user","userHistoSys")
'''.format(bkg=args.background, syst=sys, region=region, ups=ups, downs=downs)

        if args.background == 'zjets':
            footer = r'''
def TheorUnc(generatorSyst):
    for key in {bkg}Systematics:
        name=key.split('_')

        generatorSyst.append((("{bkg}",name[1]), {bkg}Systematics[key]))
    return generatorSyst
'''.format(bkg=args.background)

        else:
            footer = r'''
def TheorUnc(generatorSyst):
    for key in {bkg}Systematics:
        # regex would be better suited, but not sure we have that available, so lets work around it
        region = key.split('_')[2]
        bin = key.split('_')[3]
        tower = region[3:5] # Here is to hoping this doesnt break. Fingers crossed!
        generatorSyst.append((("{bkg}_"+tower+"_"+bin,region), {bkg}Systematics[key]))
'''.format(bkg=args.background)

    content = header + main + footer
    if not os.path.exists("hf_configs/"):
        os.makedirs("hf_configs/")
    with open(
            "hf_configs/" + "theoryUncertainties_" + args.analysis + "_" +
            args.background + ".py", 'w') as f:
        f.write(content)
        logger.info("Wrote to file %s" % f)
Пример #7
0
def run():

    # Each time the scrapers are run, we update the PQ map
    pqs = pq_map.PQ_names_map(file="data/map_PQ_names.json")
    if not pqs.update(
        ifile="data/scraped_cric_pandaqueue.json",
        ofile="data/map_PQ_names.json",
        key="panda_resource",
    ):
        logger.warning("PQ map is not available")

    if argparse.interval == "10m":
        # Now run all the scrapers that should run in 10min intervals
        # First the PQ CRIC information
        cric = CRIC()
        raw_data = cric.download(
            url="https://atlas-cric.cern.ch/api/atlas/pandaqueue/query/?json"
        )
        json_data = cric.convert(data=raw_data, sort_field="panda_resource")
        if cric.save(file="data/scraped_cric_pandaqueue.json", data=json_data):
            logger.info("Scraped PQ CRIC")
        else:
            logger.error("Problem scraping PQ CRIC")

    elif argparse.interval == "1h":
        # Run all the scrapers that only need to be run once per hour (because they don't change too often)

        # Next the ATLAS sites CRIC information
        cric = CRIC()
        raw_data = cric.download(
            url="https://atlas-cric.cern.ch/api/atlas/site/query/?json"
        )
        json_data = cric.convert(data=raw_data, sort_field="name")
        if cric.save(file="data/scraped_cric_sites.json", data=json_data):
            logger.info("Scraped sites CRIC")
        else:
            logger.error("Problem scraping sites CRIC")

        # Now the DDM info from CRIC
        raw_data = cric.download(
            url="https://atlas-cric.cern.ch/api/atlas/ddmendpoint/query/?json"
        )
        json_data = cric.convert(data=raw_data, sort_field="site")
        if cric.save(file="data/scraped_cric_ddm.json", data=json_data):
            logger.info("Scraped DDM CRIC")
        else:
            logger.error("Problem scraping DDM CRIC")

        # Next up is REBUS, start with the actual federation map
        rebus = REBUS()
        raw_data = rebus.download(
            url="https://wlcg-cric.cern.ch/api/core/federation/query/?json"
        )
        json_data = rebus.convert(data=raw_data, sort_field="rcsites")
        if rebus.save(file="data/scraped_rebus_federations.json", data=json_data):
            logger.info("Scraped federations CRIC")
        else:
            logger.error("Problem scraping federations CRIC")

        # then the pledges
        # can actually use same JSON raw data as before
        json_data = rebus.convert(
            data=raw_data, sort_field="accounting_name", append_mode=True
        )
        if rebus.save(file="data/scraped_rebus_pledges.json", data=json_data):
            logger.info("Scraped pledges CRIC")
        else:
            logger.error("Problem scraping pledges CRIC")

        # we also get datadisk information from monit Grafana
        url = config.get("credentials_monit_grafana", "url")
        token = config.get("credentials_monit_grafana", "token")

        now = int(round(time.time() * 1000))
        date_to = now - 12 * 60 * 60 * 1000
        date_from = date_to - 24 * 60 * 60 * 1000

        period = """"gte":{0},"lte":{1}""".format(date_from, date_to)

        data = (
            """{"search_type":"query_then_fetch","ignore_unavailable":true,"index":["monit_prod_rucioacc_enr_site*"]}\n{"size":0,"query":{"bool":{"filter":[{"range":{"metadata.timestamp":{"""
            + period
            + ""","format":"epoch_millis"}}},{"query_string":{"analyze_wildcard":true,"query":"data.account:* AND data.campaign:* AND data.country:* AND data.cloud:* AND data.datatype:* AND data.datatype_grouped:* AND data.prod_step:* AND data.provenance:* AND data.rse:* AND data.scope:* AND data.experiment_site:* AND data.stream_name:* AND data.tier:* AND data.token:(\\\"ATLASDATADISK\\\" OR \\\"ATLASSCRATCHDISK\\\") AND data.tombstone:(\\\"primary\\\" OR \\\"secondary\\\") AND NOT(data.tombstone:UNKNOWN) AND data.rse:/.*().*/ AND NOT data.rse:/.*(none).*/"}}]}},"aggs":{"4":{"terms":{"field":"data.rse","size":500,"order":{"_term":"desc"},"min_doc_count":1},"aggs":{"1":{"sum":{"field":"data.files"}},"3":{"sum":{"field":"data.bytes"}}}}}}\n"""
        )

        headers = {
            "Accept": "application/json",
            "Content-Type": "application/json",
            "Authorization": "Bearer %s" % token,
        }

        grafana = Grafana(url=url, request=data, headers=headers)
        raw_data = grafana.download()
        pprint.pprint(raw_data)
        json_data = grafana.convert(data=raw_data.json())
        if grafana.save(file="data/scraped_grafana_datadisk.json", data=json_data):
            logger.info("Scraped datadisks from monit grafana")
        else:
            logger.error("Problem scraping datadisks from monit grafana")

        # TODO: not running ES scraper for now since the benchmark jobs are no longer being run
        # #get credentials
        # password = config.get("credentials_elasticsearch", "password")
        # username = config.get("credentials_elasticsearch", "username")
        # host = config.get("credentials_elasticsearch", "host")
        # arg = ([{'host': host, 'port': 9200}])
        # elasticsearch = ElasticSearch(arg,**{'http_auth':(username, password)})
        # kwargs = {
        #     'index' : "benchmarks-*",
        #     'body' : {
        #         "size" : 10000,"query" : {"match_all" : {},},
        #         "collapse": {"field": "metadata.PanDAQueue","inner_hits": {"name": "most_recent","size": 50,"sort": [{"timestamp": "desc"}]}
        #         }
        #     },
        #     'filter_path' : [""]
        # }
        # raw_data = elasticsearch.download(**kwargs)
        # json_data = elasticsearch.convert(data=raw_data)
        #
        # if elasticsearch.save(file='data/scraped_elasticsearch_benchmark.json', data=json_data):
        #     logger.info('Scraped benchmark results from ES')
        # else:
        #     logger.error('Problem scraping benchmark results from ES')

    else:
        # Nothing to do otherwise
        print("Dropping out")
Пример #8
0
        else:
            return expr
    for region in args.regions:
        if expr == region:
            return expr
    else:
        logger.error('Region not found: {}'.format(expr))
        return 0


if not (args.analysis or (args.background and args.regions)):
    logger.error('No analysis nor processes/regions given! Dropping out.')
    sys.exit()
elif not args.analysis and (args.background and args.regions):
    logger.info(
        'Did not provide analysis, but provided background and regions, so lets guess.'
    )
if args.analysis:
    logger.info('Considering analysis: %s' % args.analysis)
    if args.analysis == '1Lbb':
        args.regions = [
            'SRLMincl', 'SRMMincl', 'SRHMincl', 'SRLM', 'SRMM', 'SRHM', 'WR',
            'STCR', 'TRLM', 'TRMM', 'TRHM', 'VRtt1on', 'VRtt2on', 'VRtt3on',
            'VRtt1off', 'VRtt2off', 'VRtt3off'
        ]
    elif (args.analysis == 'strong1L' or args.analysis == 'alt_strong-1L'):
        regions = [
            'SR2J', 'SR4Jhighx', 'SR4Jlowx', 'SR6J', 'TR2J', 'WR2J',
            'TR4Jhighx', 'WR4Jhighx', 'TR4Jlowx', 'WR4Jlowx', 'TR6J', 'WR6J',
            'VR2Jmet', 'VR2Jmt', 'VR4Jhighxapl', 'VR4Jhighxmt',
            'VR4Jlowxhybrid', 'VR4Jlowxapl', 'VR6Japl', 'VR6Jmt'
Пример #9
0
def main():

    for f in args.files:
        logger.info('Got file: {}'.format(os.path.basename(f.name)))
        if not os.path.basename(f.name)[-4:] == ".tex":
            logger.error(
                'This is not a tex file. Do not try to fool me again! Skipping...'
            )
            continue

        #check if we can get a background matched!
        if args.background.lower() in os.path.basename(f.name).lower():
            logger.info('Found process: {}'.format(args.background))
        else:
            logger.error('No process found! Dropping out.')
            sys.exit()

        #now check if we can get the systematic variation name matched
        sys_matches = [
            s for s in args.systematics
            if s.lower() in os.path.basename(f.name).lower()
        ]
        if len(sys_matches) > 1:
            logger.warning(
                'Found more than one systematic variation matching filename: {}'
                .format(sys_matches))
            logger.warning('Will only take first one.')
        elif len(sys_matches) == 1:
            logger.info('Found systematic variation: {}'.format(
                sys_matches[0]))
        elif len(sys_matches) == 0:
            logger.error('No systematic variation found! Dropping out.')
            sys.exit()
        systematic = sys_matches[0]

        ##let's check if we are using an up or a down variation (or symmetric...)
        is_up = False
        is_down = False
        if "up" in os.path.basename(f.name).lower():
            is_up = True
            logger.info('This should be an UP variation.')
        elif "down" in os.path.basename(f.name).lower():
            is_down = True
            logger.info('This should be a DOWN variation.')
        else:
            logger.warning(
                'Probably neither up nor down, but a symmetrised table. Sure?')
        ##now comes the ugly parsing part
        ##can we do this at least not too ugly?

        lines = []
        #first, get the relevant part from the tex file. If the user has made it easy and tagged the respective parts with %tex2hf, we can simply use what's between it
        keywords = False
        with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as s:
            if s.find(b'tex2hf') != -1:
                logger.info(
                    'Found keywords in file, so now we can just use what is between them'
                )
                keywords = True
        if keywords == True:
            copy = False
            for line in f:
                if "tex2hf" in line.strip():
                    copy = not copy
                    continue
                elif copy:
                    lines.append(line.strip())
        else:
            #otherwise just drop out, I don't want to think about this any further ...
            logger.error(
                'You need to provide keywords. I am too lazy to think about something else. Put "tex2hf" before the first and after the last line (as a comment of course, you do not want this to show up in the table, do you?).'
            )
            sys.exit()

        for line in lines:
            #get rid of any symbols we don't need
            line = line.strip().replace("$", "").replace("\\", "")
            #latex columns, get the region first. Need to strip all whitespace
            region = "".join(line.split("&")[0].split())
            region = getRegionFromExpression(region)
            if region == 0:
                continue
            #then the uncertainty, usually in the last column

            # print("{} : {}".format(region, line.split("&")[-1]))
            uncertainty = round(
                float(
                    line.split("&")[-1].replace("pm", "").replace(
                        "%", "").strip()) / 100, 4)
            # print(uncertainty)
            if is_up:
                if uncertainty < -1.0:
                    uncertainty = -1.0
                    logger.warning(
                        'Uncertainty larger than 100%. Truncating to 1.-1.')
                values[systematic][region]["up"].append(uncertainty)
            elif is_down:
                if uncertainty < -1.0:
                    uncertainty = -1.0
                    logger.warning(
                        'Uncertainty larger than 100%. Truncating to 1.-1.')
                values[systematic][region]["down"].append(uncertainty)
            else:
                up_unc = abs(uncertainty)
                down_unc = -up_unc
                if abs(uncertainty) > 1.0:
                    logger.warning(
                        'Uncertainty larger than 100%. Truncating to 1.-1.')
                    down_unc = -1
                values[systematic][region]["up"].append(up_unc)
                values[systematic][region]["down"].append(down_unc)
Пример #10
0
    def createYieldstable(self):
        """
        create the yieldsTable
        table is returned as json
        """

        logger.info("Creating yieldstable")

        open_trees = {} # index "filename_treename"
        open_files = {}

        yields_dict = self._createOrderedDict()

        for process,type,processtrees,processweights in self.processes:

            logger.info("Projecting {}".format(process))

            raw = 0
            weighted = 0
            error = 0

            for selection,cuts in self.selections.iteritems():
                for filename, treename in processtrees:
                    index = "{}_{}".format(filename, treename)
                    if index in open_trees:
                        tree = open_trees[index]
                    else:
                        if filename in open_files:
                            rootfile = open_files[filename]
                        else:
                            rootfile = ROOT.TFile(filename)
                            open_files[filename] = rootfile
                        tree = rootfile.Get(treename)
                        open_trees[index] = tree

                    logger.debug("Projecting {} in file {} with selection {}".format(treename,filename,selection))

                    h = ROOT.TH1F("h","",1,0.5,1.5)
                    h.Sumw2()
                    combined_weights = "({})*({})".format(self.weights,processweights) if processweights else self.weights
                    tree.Project("h","1","({})*({})*({})".format(self.lumifactor,combined_weights,cuts))
                    #the following is not 100% safe because GetEntries acts weird if one adds processweights. So if you use process-specific weights instead of cuts, pay attention!
                    combined_cuts = "({})*({})".format(cuts,processweights) if processweights else cuts
                    yields_dict[process][selection]["raw"] += tree.GetEntries(combined_cuts)
                    yields_dict[process][selection]["weighted"] += h.Integral()
                    yields_dict[process][selection]["error"] += h.GetBinError(1)**2
                    del h
                yields_dict[process][selection]["error"] = math.sqrt(yields_dict[process][selection]["error"])

        for selection in self.selections:
            logger.debug("Summing up SM -- selection {}".format(selection))
            bkg_total_unweighted = 0
            bkg_total_weighted = 0
            bkg_total_error = 0
            for process in self.bkg_processes:
                logger.debug("Summing up SM -- process {}".format(process))
                bkg_total_unweighted += yields_dict[process][selection]["raw"]
                bkg_total_weighted += yields_dict[process][selection]["weighted"]
                bkg_total_error += yields_dict[process][selection]["error"]**2
            bkg_total_error = math.sqrt(bkg_total_error)
            yields_dict["Total SM"][selection] = {"raw":bkg_total_unweighted, "weighted":bkg_total_weighted, "error":bkg_total_error}

        return yields_dict
Пример #11
0
def run():

    config = ConfigParser.ConfigParser()
    config.read("config.cfg")

    password = config.get("credentials", "password")
    username = config.get("credentials", "username")
    database = config.get("credentials", "database")

    logger.info("Constructing InfluxDB queries.")
    logger.info("Getting distinct key sets")
    client = InfluxDBClient("dbod-eschanet.cern.ch", 8080, username, password,
                            "monit_jobs", True, False)
    rs_distinct_sets = client.query(
        """select panda_queue, prod_source, resource, job_status, jobs from "1h"."jobs" where time > now() - 30d and "prod_source" != '' group by panda_queue, prod_source, resource, job_status limit 1"""
    )

    logger.info("Getting 10m data")
    rs_result_24h = client.query(
        """select * from "10m"."jobs" where time > now() - 24h and "prod_source" != '' group by panda_queue, prod_source, resource, job_status """
    )
    logger.info("Got 10m data")
    raw_dict_24h = rs_result_24h.raw
    series_24h = raw_dict_24h["series"]

    logger.info("Getting 1d data")
    rs_result_30d = client.query(
        """select * from "1d"."jobs" where time > now() - 30d and "prod_source" != '' group by panda_queue, prod_source, resource, job_status """
    )
    logger.info("Got 1d data")
    raw_dict_30d = rs_result_30d.raw
    series_30d = raw_dict_30d["series"]

    logger.info("Got data from InfluxDB.")
    logger.info("Constructing MySQL connector.")

    cnx = mysql.connector.connect(
        user="******",
        password=password,
        host="dbod-sql-graf.cern.ch",
        port=5501,
        database="monit_jobs",
    )
    cursor = cnx.cursor()
    selector = cnx.cursor()

    # in mysql there may still be unique pq-resource combinations that don't exist anymore
    pqs_mysql = get_pq_from_mysql(selector)

    logger.info("Building data.")
    data, missing_pqs = get_derived_quantities(rs_distinct_sets, series_24h,
                                               series_30d, pqs_mysql)

    for point in get_list_to_upload(data):
        if args.debug:
            print(point)
        if not args.skipSubmit:
            cursor.execute(point)

    for pq, prod_source, resource in missing_pqs:
        cursor.execute(
            'DELETE FROM jobs WHERE panda_queue = "{panda_queue}" AND resource = "{resource}" AND prod_source = "{prod_source}"'
            .format(panda_queue=pq, resource=resource,
                    prod_source=prod_source))

    if not args.skipSubmit:
        cnx.commit()
        cursor.close()
        cnx.close()
Пример #12
0
if not args.method in ['norm', 'skewnorm']:
    logger.error("Provided smearing method not implemented!")
    raise ValueError("Sorry, need to exit here.")

# if not os.path.isdir(args.inputdir):
#     logger.error("Provided path does not exist or is not a directory!")
#     raise ValueError("Sorry, need to exit here.")

np.random.seed(args.seed)

for indx, f in enumerate(args.inputfiles):

    if not f.endswith("update.root"):
        continue

    logger.info("Updating " + f)
    treename = f.replace("_update.root", "_NoSys")
    file = uproot.open(os.path.join(f))
    tree = file[treename]

    df = tree.pandas.df(tree.keys())

    if args.method == 'skewnorm':
        distribution = skewnorm(float(args.skew),
                                loc=float(args.loc),
                                scale=float(args.scale))
        df[args.branch_name] = df[args.branch_name] * distribution.rvs(
            size=df[args.branch_name].shape)
    else:
        df[args.branch_name] = df[args.branch_name] * np.random.normal(
            float(args.mu), float(args.sigma), df[args.branch_name].shape)
Пример #13
0
args = parser.parse_args()

if not os.path.isfile(args.inputfile):
    logger.error("Provided ROOT file does not exist or is not a file!")
    raise ValueError("Sorry, need to exit here.")

if args.xsec_file:
    if not os.path.isfile(args.xsec_file):
        logger.error("Provided xsec file does not exist or is not a file!")
        raise ValueError("Sorry, need to exit here.")
    else:
        if not args.dsid:
            logger.warning(
                "Provided a xsec file, but not a DSID, will try to guess ...")
        else:
            logger.info("Provided xsec file and DSID, thanks mate!")


def get_xsec(xsec_file, my_dsid=None):
    with open(xsec_file) as f:
        f.readline()
        for l in f:
            # DSID is the first field
            if int(my_dsid) == int(l.rstrip().split()[0]):
                xsec = float(l.rstrip().split()[2])
                filter_eff = float(l.rstrip().split()[3])
                kfactor = float(l.rstrip().split()[4])

                logger.info("Got xsec: {}".format(xsec))
                return xsec * filter_eff * kfactor
        else:
Пример #14
0
                logger.error("Cannot figure out signal model.")
                raise ValueError("Sorry, need to exit here.")

            mass_string = mass_string.replace("p0", "").replace("p5", ".5")

            if not dict.has_key(mass_string):
                lowerMass = int(float(mass_string)) // 5 * 5
                upperMass = lowerMass + 5

                xsec = (dict[str(lowerMass)] + dict[str(upperMass)]) / 2.0
                return xsec
            else:
                return dict[mass_string]

        xsec = getxsec(f)
        logger.info("Found xsec: " + str(xsec))
        logger.info("Updating " + f)
        tf = ROOT.TFile(args.inputdir + "/" + f)
        # tree = tf.Get('OneLepton2016__ntuple')
        tree = tf.Get('EwkOneLeptonTwoBjets2018_simplifiedfit__ntuple')
        nentries = tree.GetEntries()
        sumofweights = 0
        for event in tree:
            sumofweights += event.eventWeight

        nBJet30_MV2c10 = array('i', [0])
        if int(sumofweights) != int(nentries):
            logger.warning(
                "SumW is not equal to nEntries. Did not expect that!")
        tree.SetBranchStatus("genWeight", 0)
        # tree.SetBranchAddress("nBJet20_MV2c10", nBJet30_MV2c10)
Пример #15
0
def run():

    config = ConfigParser.ConfigParser()
    config.read("config.cfg")

    password = config.get("credentials", "password")
    username = config.get("credentials", "username")
    database = config.get("credentials", "database")

    logger.info("Constructing MySQL connector.")

    reader = mysql.connector.connect(
        user="******",
        password=password,
        host="dbod-sql-graf.cern.ch",
        port=5501,
        database="monit_jobs",
    )
    read_cursor = reader.cursor()
    writer = mysql.connector.connect(
        user="******",
        password=password,
        host="dbod-sql-graf.cern.ch",
        port=5501,
        database="monit_jobs",
    )
    write_cursor = writer.cursor()

    logger.info("Getting existing data.")
    read_cursor.execute("select panda_queue,prod_source, resource from jobs")

    def getJSON(file):
        with open(file) as f:
            return json.load(f)

    panda_queues = getJSON("data/scraped_cric_pandaqueue.json")
    panda_resources = getJSON("data/map_PQ_names.json")
    datadisk_info = getJSON("data/scraped_grafana_datadisk.json")
    federations_resources = getJSON("data/scraped_rebus_federations.json")

    for (panda_queue, prod_source, resource) in read_cursor:
        try:
            nickname = panda_resources[
                panda_queue
            ]  # do the mapping to actual panda queue nicknames
            atlas_site = panda_queues[nickname]["atlas_site"]
        except:
            logger.warning(
                "Does not exist: queue: %s   Prod_source: %s    Resource: %s"
                % (panda_queue, prod_source, resource)
            )
            continue

        logger.debug(
            "Queue: %s    Prod_source: %s     Resource: %s"
            % (panda_queue, prod_source, resource)
        )

        atlas_site = panda_queues[nickname]["atlas_site"]
        type = panda_queues[nickname]["type"]
        cloud = panda_queues[nickname]["cloud"]
        country = panda_queues[nickname]["country"]
        federation = panda_queues.get(nickname, {}).get("rc", "None")
        site_state = panda_queues[nickname]["status"]
        tier = panda_queues[nickname]["tier"]
        resource_type = panda_queues[nickname].get("resource_type", "None")

        if "MCORE" in resource:
            if panda_queues[nickname]["corecount"]:
                resource_factor = float(panda_queues[nickname]["corecount"])
            else:
                resource_factor = 8.0
        else:
            resource_factor = 1.0

        ddm_names = (
            panda_queues.get(nickname, {}).get("astorages", {}).get("read_lan", [])
        )

        # ddm_names = panda_queues[nickname]["ddm"].split(",")
        datadisk_names = [d for d in ddm_names if "DATADISK" in d]

        if len(datadisk_names) > 1:
            logger.warning(
                "Got more than one datadisk for: %s, %s" % (atlas_site, datadisk_names)
            )

        try:
            datadisk_name = datadisk_names[0]
            datadisk_size = datadisk_info[datadisk_name]["bytes"] / (1e9)
            datadisk_files = datadisk_info[datadisk_name]["files"]
        except:
            logger.warning(
                "Datadisk not found for: %s, %s" % (atlas_site, datadisk_names)
            )
            datadisk_name = "NONE"
            datadisk_size = 0.0
            datadisk_files = 0

        add_point = '''INSERT INTO jobs (panda_queue, prod_source, resource) VALUES ("{panda_queue}","{prod_source}", "{resource}") ON DUPLICATE KEY UPDATE atlas_site="{atlas_site}", type="{type}", country="{country}", cloud="{cloud}",federation="{federation}", site_state="{site_state}", tier="{tier}",resource_factor="{resource_factor}",resource_type="{resource_type}", datadisk_name="{datadisk_name}", datadisk_occupied_gb="{datadisk_size}", datadisk_files="{datadisk_files}"'''.format(
            atlas_site=atlas_site,
            panda_queue=panda_queue,
            type=type,
            prod_source=prod_source,
            cloud=cloud,
            country=country,
            federation=federation,
            site_state=site_state,
            tier=tier,
            resource_factor=resource_factor,
            resource=resource,
            resource_type=resource_type,
            datadisk_name=datadisk_name,
            datadisk_size=datadisk_size,
            datadisk_files=datadisk_files,
        )

        if panda_queue == "ANALY_SiGNET":
            print(add_point)
            print(atlas_site)

        write_cursor.execute(add_point)

    writer.commit()
    read_cursor.close()
    write_cursor.close()
    reader.close()
    writer.close()
Пример #16
0
    '-s',
    '--signal',
    action="store_true",
    help="Use signal tree naming convention (default is background trees)")

args = parser.parse_args()

cmd = "rootls {}".format(args.file)
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

trees = output.split()

if args.signal:
    naming = 5
else:
    naming = 1
raw_names = [
    "_".join(fullname.split("_")[:naming]) + "_" for fullname in trees
]

unique_names = list(set(raw_names))

split_trees = [[t for t in trees if b in t] for b in unique_names]

length = len(split_trees[0])
for (l, bkg) in zip(split_trees, unique_names):
    logger.info("For process {}: {} trees".format(bkg, len(l)))
    if not len(l) == length:
        logger.error("Not the right length!")
Пример #17
0
parser.add_argument("-file", help="ROOT file")
parser.add_argument('--signal', help='Use signal', action='store_true')

args = parser.parse_args()

tf = ROOT.TFile.Open(args.file, "READ") if isinstance(
    args.file, basestring) else args.file

trees_list = []
checklist = []
processes = []

for key in tf.GetListOfKeys():
    trees_list.append(key.GetName())

logger.info("{} trees in {}".format(len(trees_list), args.file))

for treename in trees_list:
    # logger.info("Opening {}".format(treename))
    tree = tf.Get(treename)

    if not args.signal:
        processname = treename.split("_")[0]
    else:
        processname = "_".join(treename.split("_", 4)[:4])

    if not processname in processes:
        processes.append(processname)

    mc16a = 0
    mc16d = 0
Пример #18
0
    for test in testmodules:
        print(test)
    sys.exit(0)

suite = unittest.TestSuite()

# based on code snippet from http://stackoverflow.com/questions/1732438/how-do-i-run-all-python-unit-tests-in-a-directory#15630454
for postfix in tests:
    t = "test.test_" + postfix
    if "." in postfix:
        # i don't have a better solution yet, so hack for now
        importTest = ".".join(t.split(".")[:-2])
    else:
        importTest = t
    try:
        logger.info("Trying to import {}".format(importTest))
        mod = __import__(importTest, globals(), locals(), ['suite'])
    except ImportError:
        logger.error("Test {} not found - try {}".format(t, testmodules))
        raise
    try:
        # If the module defines a suite() function, call it to get the suite.
        suitefn = getattr(mod, 'suite')
        suite.addTest(suitefn())
    except (ImportError, AttributeError):
        # else, just load all the test cases from the module.
        logger.info("Loading test {}".format(t))
        suite.addTest(unittest.defaultTestLoader.loadTestsFromName(t))

result = unittest.TextTestRunner(verbosity=verbosity).run(suite)
sys.exit(not result.wasSuccessful())