示例#1
0
def parseCard(file, options):
    if type(file) == type("str"):
        raise RuntimeError, "You should pass as argument to parseCards a file object, stream or a list of lines, not a string"
    ret = Datacard()

    # resetting these here to defaults, parseCard will fill them up
    ret.discretes = []
    ret.groups = {}

    #
    nbins = -1
    nprocesses = -1
    nuisances = -1
    binline = []
    processline = []
    sigline = []
    shapesUseBin = False
    lineNumber = None

    try:
        getattr(options, "evaluateEdits")
    except:
        setattr(options, "evaluateEdits", True)

    try:
        for lineNumber, l in enumerate(file):
            f = l.split()
            if len(f) < 1: continue
            if f[0] == "imax":
                nbins = int(f[1]) if f[1] != "*" else -1
            if f[0] == "jmax":
                nprocesses = int(f[1]) + 1 if f[1] != "*" else -1
            if f[0] == "kmax":
                nuisances = int(f[1]) if f[1] != "*" else -1
            if f[0] == "shapes":
                if not options.bin:
                    raise RuntimeError, "Can use shapes only with binary output mode"
                if len(f) < 4: raise RuntimeError, "Malformed shapes line"
                if not ret.shapeMap.has_key(f[2]): ret.shapeMap[f[2]] = {}
                if ret.shapeMap[f[2]].has_key(f[1]):
                    raise RuntimeError, "Duplicate definition for process '%s', channel '%s'" % (
                        f[1], f[2])
                ret.shapeMap[f[2]][f[1]] = f[3:]
                if "$CHANNEL" in l: shapesUseBin = True
                if f[2] != "*": shapesUseBin = True
            if f[0] == "Observation" or f[0] == "observation":
                ret.obs = [float(x) for x in f[1:]]
                if nbins == -1: nbins = len(ret.obs)
                if len(ret.obs) != nbins:
                    raise RuntimeError, "Found %d observations but %d bins have been declared" % (
                        len(ret.obs), nbins)
                if binline != []:
                    if len(binline) != len(ret.obs):
                        raise RuntimeError, "Found %d bins (%s) but %d bins have been declared" % (
                            len(ret.bins), ret.bins, nbins)
                    ret.bins = binline
                    ret.obs = dict([(b, ret.obs[i])
                                    for i, b in enumerate(ret.bins)])
                    binline = []
            if f[0] == "bin":
                binline = []
                for b in f[1:]:
                    if re.match("[0-9]+", b):
                        raise RuntimeError, "Error: Bin %(b)s starts with a digit!" % locals(
                        )
                    binline.append(b)
            if f[0] == "process":
                if processline == []:  # first line contains names
                    processline = f[1:]
                    if len(binline) != len(processline):
                        raise RuntimeError, "'bin' line has a different length than 'process' line."
                    continue
                sigline = f[1:]  # second line contains ids
                if re.match("-?[0-9]+", processline[0]) and not re.match(
                        "-?[0-9]+", sigline[0]):
                    (processline, sigline) = (sigline, processline)
                if len(sigline) != len(processline):
                    raise RuntimeError, "'bin' line has a different length than 'process' line."
                hadBins = (len(ret.bins) > 0)
                for i, b in enumerate(binline):
                    p = processline[i]
                    s = (int(sigline[i]) <= 0
                         )  # <=0 for signals, >0 for backgrounds
                    ret.keyline.append((b, processline[i], s))
                    if hadBins:
                        if b not in ret.bins:
                            raise RuntimeError, "Bin %s not among the declared bins %s" % (
                                b, ret.bins)
                    else:
                        if b not in ret.bins: ret.bins.append(b)
                    if p not in ret.processes: ret.processes.append(p)
                if nprocesses == -1: nprocesses = len(ret.processes)
                if nbins == -1: nbins = len(ret.bins)
                if not options.noJMax:
                    if nprocesses != len(ret.processes):
                        raise RuntimeError, "Found %d processes (%s), declared jmax = %d" % (
                            len(ret.processes), ret.processes, nprocesses)
                if nbins != len(ret.bins):
                    raise RuntimeError, "Found %d bins (%s), declared imax = %d" % (
                        len(ret.bins), ret.bins, nbins)
                ret.exp = dict([(b, {}) for b in ret.bins])
                ret.isSignal = dict([(p, None) for p in ret.processes])
                if ret.obs != [] and type(
                        ret.obs
                ) == list:  # still as list, must change into map with bin names
                    ret.obs = dict([(b, ret.obs[i])
                                    for i, b in enumerate(ret.bins)])
                for (b, p, s) in ret.keyline:
                    if ret.isSignal[p] == None:
                        ret.isSignal[p] = s
                    elif ret.isSignal[p] != s:
                        raise RuntimeError, "Process %s is declared as signal in some bin and as background in some other bin" % p
                ret.signals = [p for p, s in ret.isSignal.items() if s == True]
                if len(ret.signals) == 0 and not options.allowNoSignal:
                    raise RuntimeError, "You must have at least one signal process (id <= 0)"
            if f[0] == "rate":
                if processline == []:
                    raise RuntimeError, "Missing line with process names before rate line"
                if sigline == []:
                    raise RuntimeError, "Missing line with process id before rate line"
                if len(f[1:]) != len(ret.keyline):
                    raise RuntimeError, "Malformed rate line: length %d, while bins and process lines have length %d" % (
                        len(f[1:]), len(ret.keyline))
                for (b, p, s), r in zip(ret.keyline, f[1:]):
                    ret.exp[b][p] = float(r)
                break  # rate is the last line before nuisances
        # parse nuisances
        for lineNumber, l in enumerate(file):
            if l.startswith("--"): continue
            l = re.sub("\\s*#.*", "", l)
            l = re.sub("(?<=\\s)-+(\\s|$)", " 0\\1", l)
            f = l.split()
            if len(f) <= 1: continue
            nofloat = False
            lsyst = f[0]
            pdf = f[1]
            args = []
            numbers = f[2:]
            if lsyst.endswith("[nofloat]"):
                lsyst = lsyst.replace("[nofloat]", "")
                nofloat = True
            if options.nuisancesToExclude and isVetoed(
                    lsyst, options.nuisancesToExclude):
                if options.verbose > 0:
                    stderr.write(
                        "Excluding nuisance %s selected by a veto pattern among %s\n"
                        % (lsyst, options.nuisancesToExclude))
                if nuisances != -1: nuisances -= 1
                continue
            if re.match("[0-9]+", lsyst): lsyst = "theta" + lsyst
            if pdf == "lnN" or pdf == "lnU" or pdf == "gmM" or pdf == "trG" or pdf.startswith(
                    "shape"):
                pass  # nothing special to do
            elif pdf == "gmN":
                args = [int(f[2])]
                numbers = f[3:]
            elif pdf == "unif":
                args = [float(f[2]), float(f[3])]
                numbers = f[4:]
            elif pdf == "dFD" or pdf == "dFD2":
                args = [float(f[2])]
                numbers = f[3:]
            elif pdf == "constr":
                args = f[2:]
                ret.systs.append([lsyst, nofloat, pdf, args, []])
                continue
            elif pdf == "param":
                # for parametric uncertainties, there's no line to account per bin/process effects
                # just assume everything else is an argument and move on
                args = f[2:]
                if len(args) <= 1:
                    raise RuntimeError, "Uncertainties of type 'param' must have at least two arguments (mean and sigma)"
                ret.systs.append([lsyst, nofloat, pdf, args, []])
                continue
            elif pdf == "flatParam":
                ret.flatParamNuisances[lsyst] = True
                #for flat parametric uncertainties, code already does the right thing as long as they are non-constant RooRealVars linked to the model
                continue
            elif pdf == "extArg":
                # look for additional parameters in workspaces
                ret.extArgs[lsyst] = f[:]
                continue
            elif pdf == "rateParam":
                if ("*" in f[3]) or ("*" in f[2]):  # all channels/processes
                    found = False
                    for c in ret.processes:
                        for b in ret.bins:
                            if (not fnmatch.fnmatch(c, f[3])): continue
                            if (not fnmatch.fnmatch(b, f[2])): continue
                            f_tmp = f[:]
                            f_tmp[2] = b
                            f_tmp[3] = c
                            addRateParam(lsyst, f_tmp, ret)
                            found = True
                    if not found:
                        raise RuntimeError, "rateParam %s with process %r bin %r doesn't match anything." % (
                            lsyst, f[3], f[2])
                else:
                    addRateParam(lsyst, f, ret)
                continue
            elif pdf == "discrete":
                args = f[2:]
                ret.discretes.append(lsyst)
                continue
            elif pdf == "edit":
                if nuisances != -1: nuisances = -1
                if options.evaluateEdits:
                    if options.verbose > 1:
                        print "Before edit: \n\t%s\n" % ("\n\t".join(
                            [str(x) for x in ret.systs]))
                    if options.verbose > 1:
                        print "Edit command: %s\n" % numbers
                    doEditNuisance(ret, numbers[0], numbers[1:])
                    if options.verbose > 1:
                        print "After edit: \n\t%s\n" % ("\n\t".join(
                            [str(x) for x in ret.systs]))
                else:
                    if numbers[0] in ["changepdf", "freeze"]:
                        ret.nuisanceEditLines.append([numbers[0], numbers[1:]])
                    else:
                        ret.nuisanceEditLines.append(
                            [numbers[0], numbers[1], numbers[2], numbers[3:]])
                continue
            elif pdf == "group":
                # This is not really a pdf type, but a way to be able to name groups of nuisances together
                groupName = lsyst
                groupNuisances = numbers

                if not groupNuisances:
                    raise RuntimeError, "Syntax error for group '%s': empty line after 'group'." % groupName

                defToks = ('=', '+=')
                defTok = groupNuisances.pop(0)
                if defTok not in defToks:
                    raise RuntimeError, "Syntax error for group '%s': first thing after 'group' is not '[+]=' but '%s'." % (
                        groupName, defTok)

                if groupName not in ret.groups:
                    if defTok == '=':
                        ret.groups[groupName] = set(groupNuisances)
                    else:
                        raise RuntimeError, "Cannot append to group '%s' as it was not yet defined." % groupName
                else:
                    if defTok == '+=':
                        ret.groups[groupName].update(set(groupNuisances))
                    else:
                        raise RuntimeError, "Will not redefine group '%s'. It previously contained '%s' and you now wanted it to contain '%s'." % (
                            groupName, ret.groups[groupName], groupNuisances)

                continue
            elif pdf == "autoMCStats":
                if len(f) > 5:
                    raise RuntimeError, "Syntax for autoMCStats should be 'channel autoMCStats threshold [include-signal = 0] [hist-mode = 0]"
                statThreshold = float(f[2])
                statIncludeSig = bool(int(f[3])) if len(f) >= 4 else False
                statHistMode = int(f[4]) if len(f) >= 5 else 1
                statFlags = (statThreshold, statIncludeSig, statHistMode)
                if "*" in lsyst:
                    for b in ret.bins:
                        if (not fnmatch.fnmatch(b, lsyst)): continue
                        ret.binParFlags[b] = statFlags
                else:
                    if lsyst not in ret.bins:
                        raise RuntimeError, " No such channel '%s', malformed line:\n   %s" % (
                            lsyst, ' '.join(f))
                    ret.binParFlags[lsyst] = statFlags
                continue
            else:
                raise RuntimeError, "Unsupported pdf %s" % pdf
            if len(numbers) < len(ret.keyline):
                raise RuntimeError, "Malformed systematics line %s of length %d: while bins and process lines have length %d" % (
                    lsyst, len(numbers), len(ret.keyline))
            errline = dict([(b, {}) for b in ret.bins])
            nonNullEntries = 0
            for (b, p, s), r in zip(ret.keyline, numbers):
                if "/" in r:  # "number/number"
                    if (pdf not in ["lnN", "lnU"]) and ("?" not in pdf):
                        raise RuntimeError, "Asymmetric errors are allowed only for Log-normals"
                    errline[b][p] = [float(x) for x in r.split("/")]
                    for v in errline[b][p]:
                        if v <= 0.00:
                            raise ValueError(
                                'Found "%s" in the nuisances affecting %s for %s. This would lead to NANs later on, so please fix it.'
                                % (r, p, b))
                else:
                    errline[b][p] = float(r)
                    #values of 0.0 are treated as 1.0; scrap negative values.
                    if pdf not in ["trG", "dFD", "dFD2"] and errline[b][p] < 0:
                        raise ValueError(
                            'Found "%s" in the nuisances affecting %s in %s. This would lead to NANs later on, so please fix it.'
                            % (r, p, b))
                # set the rate to epsilon for backgrounds with zero observed sideband events.
                if pdf == "gmN" and ret.exp[b][p] == 0 and float(r) != 0:
                    ret.exp[b][p] = 1e-6
            ret.systs.append([lsyst, nofloat, pdf, args, errline])
    except Exception, ex:
        if lineNumber != None:
            msg = "Error reading line %d" % (lineNumber + 1)
            if hasattr(file, 'name'):
                msg += " of file " + file.name

            msg += ": " + ex.args[0]
            ex.args = (msg, ) + ex.args[1:]

        raise
示例#2
0
def parseCard(file, options):
    if type(file) == type("str"):
        raise RuntimeError, "You should pass as argument to parseCards a file object, stream or a list of lines, not a string"
    ret = Datacard()
    ret.discretes=[]
    ret.groups={}
    #
    nbins      = -1; 
    nprocesses = -1; 
    nuisances  = -1;
    binline = []; processline = []; sigline = []
    shapesUseBin = False
    lineNumber = None
    try:
        for lineNumber,l in enumerate(file):
            f = l.split();
            if len(f) < 1: continue
            if f[0] == "imax": 
                nbins = int(f[1]) if f[1] != "*" else -1
            if f[0] == "jmax": 
                nprocesses = int(f[1])+1 if f[1] != "*" else -1
            if f[0] == "kmax": 
                nuisances = int(f[1]) if f[1] != "*" else -1
            if f[0] == "shapes":
                if not options.bin: raise RuntimeError, "Can use shapes only with binary output mode"
                if len(f) < 4: raise RuntimeError, "Malformed shapes line"
                if not ret.shapeMap.has_key(f[2]): ret.shapeMap[f[2]] = {}
                if ret.shapeMap[f[2]].has_key(f[1]): raise RuntimeError, "Duplicate definition for process '%s', channel '%s'" % (f[1], f[2])
                ret.shapeMap[f[2]][f[1]] = f[3:]
                if "$CHANNEL" in l: shapesUseBin = True
                if f[2] != "*":     shapesUseBin = True
            if f[0] == "Observation" or f[0] == "observation": 
                ret.obs = [ float(x) for x in f[1:] ]
                if nbins == -1: nbins = len(ret.obs)
                if len(ret.obs) != nbins: raise RuntimeError, "Found %d observations but %d bins have been declared" % (len(ret.obs), nbins)
                if binline != []:
                    if len(binline) != len(ret.obs): raise RuntimeError, "Found %d bins (%s) but %d bins have been declared" % (len(ret.bins), ret.bins, nbins)
                    ret.bins = binline
                    ret.obs = dict([(b,ret.obs[i]) for i,b in enumerate(ret.bins)])
                    binline = []
            if f[0] == "bin": 
                binline = []
                for b in f[1:]:
                    if re.match("[0-9]+", b):
                        if shapesUseBin: stderr.write("Warning: Bin %(b)s starts with a digit. Will call it 'bin%(b)s' but this may break shapes.\n" % locals())
                        b = "bin"+b
                        # TODO Here should be some patching of the shapes names in order to not get errors later.
                    binline.append(b)
            if f[0] == "process": 
                if processline == []: # first line contains names
                    processline = f[1:]
                    if len(binline) != len(processline): raise RuntimeError, "'bin' line has a different length than 'process' line."
                    continue
                sigline = f[1:] # second line contains ids
                if re.match("-?[0-9]+", processline[0]) and not re.match("-?[0-9]+", sigline[0]):
                    (processline,sigline) = (sigline,processline)
                if len(sigline) != len(processline): raise RuntimeError, "'bin' line has a different length than 'process' line."
                hadBins = (len(ret.bins) > 0)
                for i,b in enumerate(binline):
                    p = processline[i];
                    s = (int(sigline[i]) <= 0) # <=0 for signals, >0 for backgrounds
                    ret.keyline.append((b, processline[i], s))
                    if hadBins:
                        if b not in ret.bins: raise RuntimeError, "Bin %s not among the declared bins %s" % (b, ret.bins)
                    else:
                        if b not in ret.bins: ret.bins.append(b)
                    if p not in ret.processes: ret.processes.append(p)
                if nprocesses == -1: nprocesses = len(ret.processes)
                if nbins      == -1: nbins      = len(ret.bins)
                if not options.noJMax:
                    if nprocesses != len(ret.processes): raise RuntimeError, "Found %d processes (%s), declared jmax = %d" % (len(ret.processes),ret.processes,nprocesses)
                if nbins      != len(ret.bins):      raise RuntimeError, "Found %d bins (%s), declared imax = %d" % (len(ret.bins),ret.bins,nbins)
                ret.exp = dict([(b,{}) for b in ret.bins])
                ret.isSignal = dict([(p,None) for p in ret.processes])
                if ret.obs != [] and type(ret.obs) == list: # still as list, must change into map with bin names
                    ret.obs = dict([(b,ret.obs[i]) for i,b in enumerate(ret.bins)])
                for (b,p,s) in ret.keyline:
                    if ret.isSignal[p] == None: 
                        ret.isSignal[p] = s
                    elif ret.isSignal[p] != s:
                        raise RuntimeError, "Process %s is declared as signal in some bin and as background in some other bin" % p
                ret.signals = [p for p,s in ret.isSignal.items() if s == True]
                if len(ret.signals) == 0 and not options.allowNoSignal: raise RuntimeError, "You must have at least one signal process (id <= 0)"
            if f[0] == "rate":
                if processline == []: raise RuntimeError, "Missing line with process names before rate line" 
                if sigline == []:     raise RuntimeError, "Missing line with process id before rate line" 
                if len(f[1:]) != len(ret.keyline): raise RuntimeError, "Malformed rate line: length %d, while bins and process lines have length %d" % (len(f[1:]), len(ret.keyline))
                for (b,p,s),r in zip(ret.keyline,f[1:]):
                    ret.exp[b][p] = float(r)
                break # rate is the last line before nuisances
        # parse nuisances   
        for lineNumber,l in enumerate(file):
            if l.startswith("--"): continue
            l  = re.sub("\\s*#.*","",l)
            l = re.sub("(?<=\\s)-+(\\s|$)"," 0\\1",l);
            f = l.split();
            if len(f) <= 1: continue
            nofloat = False
            lsyst = f[0]; pdf = f[1]; args = []; numbers = f[2:];
            if lsyst.endswith("[nofloat]"):
              lsyst = lsyst.replace("[nofloat]","")
              nofloat = True
            if options.nuisancesToExclude and isVetoed(lsyst, options.nuisancesToExclude):
                if options.verbose > 0: stderr.write("Excluding nuisance %s selected by a veto pattern among %s\n" % (lsyst, options.nuisancesToExclude))
                if nuisances != -1: nuisances -= 1
                continue
            if re.match("[0-9]+",lsyst): lsyst = "theta"+lsyst
            if pdf == "lnN" or pdf == "lnU" or pdf == "gmM" or pdf == "trG" or pdf.startswith("shape"):
                pass # nothing special to do
            elif pdf == "gmN":
                args = [int(f[2])]; numbers = f[3:];
            elif pdf == "unif":
                args = [float(f[2]), float(f[3])]; numbers = f[4:];
            elif pdf == "dFD" or pdf == "dFD2":
                args = [float(f[2])];  numbers = f[3:];
            elif pdf == "param":
                # for parametric uncertainties, there's no line to account per bin/process effects
                # just assume everything else is an argument and move on
                args = f[2:]
                if len(args) <= 1: raise RuntimeError, "Uncertainties of type 'param' must have at least two arguments (mean and sigma)"
                ret.systs.append([lsyst,nofloat,pdf,args,[]])
                continue
            elif pdf == "flatParam":
                ret.flatParamNuisances[lsyst] = True
                #for flat parametric uncertainties, code already does the right thing as long as they are non-constant RooRealVars linked to the model
                continue
            elif pdf=="discrete":
                args = f[2:]
                ret.discretes.append(lsyst)
                continue
            elif pdf=="edit":
                if nuisances != -1: nuisances = -1
                if options.verbose > 1: print "Before edit: \n\t%s\n" % ("\n\t".join( [str(x) for x in ret.systs] ))
                if options.verbose > 1: print "Edit command: %s\n" % numbers
                doEditNuisance(ret, numbers[0], numbers[1:])
                if options.verbose > 1: print "After edit: \n\t%s\n" % ("\n\t".join( [str(x) for x in ret.systs] ))
                continue
            elif pdf=="group":
                # This is not really a pdf type, but a way to be able to name groups of nuisances together
                groupName = lsyst
                groupNuisances = numbers

                if not groupNuisances:
                    raise RuntimeError, "Syntax error for group '%s': empty line after 'group'." % groupName

                defToks = ('=','+=')
                defTok = groupNuisances.pop(0)
                if defTok not in defToks:
                    raise RuntimeError, "Syntax error for group '%s': first thing after 'group' is not '[+]=' but '%s'." % (groupName,defTok)
                
                if groupName not in ret.groups:
                    if defTok=='=':
                        ret.groups[groupName] = set(groupNuisances)
                    else:
                        raise RuntimeError, "Cannot append to group '%s' as it was not yet defined." % groupName                                                                                                    
                else:
                    if defTok=='+=' :
                        ret.groups[groupName].update( set(groupNuisances) )
                    else:
                        raise RuntimeError, "Will not redefine group '%s'. It previously contained '%s' and you now wanted it to contain '%s'." % (groupName,ret.groups[groupName],groupNuisances)                        

                continue
            else:
                raise RuntimeError, "Unsupported pdf %s" % pdf
            if len(numbers) < len(ret.keyline): raise RuntimeError, "Malformed systematics line %s of length %d: while bins and process lines have length %d" % (lsyst, len(numbers), len(ret.keyline))
            errline = dict([(b,{}) for b in ret.bins])
            nonNullEntries = 0 
            for (b,p,s),r in zip(ret.keyline,numbers):
                if "/" in r: # "number/number"
                    if (pdf not in ["lnN","lnU"]) and ("?" not in pdf): raise RuntimeError, "Asymmetric errors are allowed only for Log-normals"
                    errline[b][p] = [ float(x) for x in r.split("/") ]
                    for v in errline[b][p]:
                        if v <= 0.00: raise ValueError('Found "%s" in the nuisances affecting %s for %s. This would lead to NANs later on, so please fix it.'%(r,p,b))
                else:
                    errline[b][p] = float(r)
                    #values of 0.0 are treated as 1.0; scrap negative values.
                    if pdf not in ["trG", "dFD", "dFD2"] and errline[b][p] < 0: raise ValueError('Found "%s" in the nuisances affecting %s in %s. This would lead to NANs later on, so please fix it.'%(r,p,b))
                # set the rate to epsilon for backgrounds with zero observed sideband events.
                if pdf == "gmN" and ret.exp[b][p] == 0 and float(r) != 0: ret.exp[b][p] = 1e-6
            ret.systs.append([lsyst,nofloat,pdf,args,errline])
    except Exception, ex:
        if lineNumber != None:
            msg = "Error reading line %d" % (lineNumber + 1)
            if hasattr(file,'name'):
                msg += " of file " + file.name

            msg += ": " + ex.args[0]
            ex.args = (msg, ) + ex.args[1:]

        raise