def loadData(self,value): """ Uses the information in value to generate the data grid used for interpolation. """ if self._V: return if isinstance(value,str): val = self.evaluateString(value) else: val = value self.units = self.getUnits(val)[0] #Store standard units self.dataShape = self.getDataShape(val[0][0]) #Store the data (mass) format (useful if there are wildcards) values = self.removeUnits(val) #Remove units and store the normalization units values = self.removeWildCards(values) if len(values) < 1 or len(values[0]) < 2: raise SModelSError("input value not in correct format. expecting sth " \ "like [ [ [[ 300.*GeV,100.*GeV], "\ "[ 300.*GeV,100.*GeV] ], 10.*fb ], ... ] "\ "for upper limits or [ [ [[ 300.*GeV,100.*GeV],"\ " [ 300.*GeV,100.*GeV] ], .1 ], ... ] for "\ "efficiency maps. Received %s" % values[:80]) if not isinstance(self.units[-1],unum.Unum) and not isinstance(self.units[-1],float): raise SModelSError("Error obtaining units from value: %s " %values[:80]) self.y_values = np.array(values)[:,1] self.computeV(values)
def __init__(self, path=None): self.path = path if path: logger.debug('Creating object based on %s' % self.path) #Open the info file and get the information: if not os.path.isfile(path): logger.error("Info file %s not found" % path) raise SModelSError() from smodels.tools.stringTools import concatenateLines infoFile = open(self.path) content = concatenateLines(infoFile.readlines()) infoFile.close() #Get tags in info file: tags = [line.split(':', 1)[0].strip() for line in content] for i, tag in enumerate(tags): if not tag: continue line = content[i] value = line.split(':', 1)[1].strip() if tags.count(tag) == 1: self.addInfo(tag, value) else: logger.info("Ignoring unknown field %s found in file %s" % (tag, self.path)) continue
def __init__(self, data): self.points = None self.simplices = None self.transform = None if data and self.checkData(data): self.points = sorted(data) #Create simplices as the point intervals (using the sorted data) self.simplices = np.array( [[data.index(self.points[i + 1]), data.index(pt)] for i, pt in enumerate(self.points[:-1])]) transform = [] #Create trivial transformation to the baryocentric coordinates: for simplex in self.simplices: xmax, xmin = data[simplex[0]][0], data[simplex[1]][0] transform.append([[1. / (xmax - xmin)], [xmin]]) self.transform = np.array(transform) #Store convex hull (first and last point): self.convex_hull = np.array( [data.index(self.points[0]), data.index(self.points[-1])]) else: raise SModelSError()
def removeUnits(self, value): """ Remove units from unum objects. Uses the units defined in physicsUnits.standard units to normalize the data. :param value: Object containing units (e.g. [[100*GeV,100.*GeV],3.*pb]) :return: Object normalized to standard units (e.g. [[100,100],3000]) """ stdUnits = physicsUnits.standardUnits if isinstance(value,list): return [self.removeUnits(x) for x in value] elif isinstance(value,dict): return dict([[self.removeUnits(x),self.removeUnits(y)] for x,y in value.items()]) elif isinstance(value,unum.Unum): #Check if value has unit or not: if not value._unit: return value.asNumber() #Now try to normalize it by one of the standard pre-defined units: for unit in stdUnits: y = (value/unit).normalize() if not y._unit: return value.asNumber(unit) raise SModelSError("Could not normalize unit value %s using the standard units: %s" %(str(value),str(stdUnits))) else: return value
def checkForRedundancy(self,databaseParticles): """ In case of efficiency maps, check if any txnames have overlapping constraints. This would result in double counting, so we dont allow it. """ if self.getType() == "upperLimit": return False logger.debug ( "checking for redundancy" ) datasetElements = [] for tx in self.txnameList: if hasattr(tx, 'finalState'): finalState = tx.finalState else: finalState = ['MET','MET'] if hasattr(tx, 'intermediateState'): intermediateState = tx.intermediateState else: intermediateState = None for el in elementsInStr(str(tx.constraint)): newEl = Element(el,finalState,intermediateState, model=databaseParticles) datasetElements.append(newEl) combos = itertools.combinations(datasetElements, 2) for x,y in combos: if x == y and _complainAboutOverlappingConstraints: errmsg ="Constraints (%s) and (%s) appearing in dataset %s:%s overlap "\ "(may result in double counting)." % \ (x,y,self.getID(),self.globalInfo.id ) logger.error( errmsg ) raise SModelSError ( errmsg )
def initialize(self): if hasattr(self, "sock"): return ## already initialized # Create a TCP/IP socket self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.settimeout(240) # Connect the socket to the port where the server is listening self.server_address = (self.servername, self.port) self.ntries = 0 if not hasattr(self, "maxtries"): self.maxtries = 25 while self.ntries < self.maxtries: try: self.sock.connect(self.server_address) return except (socket.timeout, OSError, ConnectionRefusedError, ConnectionResetError, BrokenPipeError, ConnectionAbortedError) as e: dt = self.getWaitingTime() self.ntries += 1 self.log ( 'could not connect to %s after %d times. trying again in %d seconds' % \ ( self.nameAndPort(), self.ntries, dt ) ) time.sleep(dt) self.pprint( f'could not connect to database in initialize, after trying {self.ntries} times. aborting' ) raise SModelSError( "Could not connect to database in initialize, tried %d times" % self.ntries)
def createBinaryFile(self, filename=None): """ create a pcl file from the text database, potentially overwriting an old pcl file. """ ## make sure we have a model to pickle with the database! if self.txt_meta == None: logger.error( "Trying to create database pickle, but no txt_meta defined.") raise SModelSError() logger.debug( "database timestamp: %s, filecount: %s" % \ ( time.ctime( self.txt_meta.mtime ), self.txt_meta.filecount ) ) binfile = filename if binfile == None: binfile = self.pcl_meta.pathname if not hasattr(self,'databaseParticles') or \ type(self.databaseParticles) == type(None): self._setParticles(self._getParticles()) logger.debug(" * create %s" % binfile) with open(binfile, "wb") as f: logger.debug(" * load text database") self.loadTextDatabase() logger.debug( " * write %s db version %s, format version %s, %s" % \ ( binfile, self.txt_meta.databaseVersion, self.txt_meta.format_version, self.txt_meta.cTime() ) ) # ptcl = serializer.HIGHEST_PROTOCOL ptcl = min( 4, serializer.HIGHEST_PROTOCOL ) ## 4 is default protocol in python3.8, and highest protocol in 3.7 serializer.dump(self.txt_meta, f, protocol=ptcl) serializer.dump(self.expResultList, f, protocol=ptcl) serializer.dump(self.databaseParticles, f, protocol=ptcl) logger.info("%s created." % (binfile))
def getUnits(self, value): """ Get standard units for the input object. Uses the units defined in physicsUnits.standardUnits. (e.g. [[100*GeV,100.*GeV],3.*pb] -> returns [[GeV,GeV],fb] [[100*GeV,3.],[200.*GeV,2.*pb]] -> returns [[GeV,1.],[GeV,fb]] ) :param value: Object containing units (e.g. [[100*GeV,100.*GeV],3.*pb]) :return: Object with same structure containing the standard units used to normalize the data. """ stdUnits = physicsUnits.standardUnits if isinstance(value,list): return [self.getUnits(x) for x in value] elif isinstance(value,dict): return dict([[self.getUnits(x),self.getUnits(y)] for x,y in value.items()]) elif isinstance(value,unum.Unum): #Check if value has unit or not: if not value._unit: return 1. #Now try to find stadandard unit which matches: for unit in stdUnits: y = (value/unit).normalize() if not y._unit: return unit raise SModelSError("Could not find standard unit which matches %s. Using the standard units: %s" %(str(value),str(stdUnits))) else: return 1.
def evaluateString(self, value): """ Evaluate string. :param value: String expression. """ if not isinstance(value,str): raise SModelSError("Data should be in string format. Format %s found" %type(value)) try: val = eval(value,unitsDict) except: raise SModelSError("data string malformed: %s" %value) return val
def getCombinedUpperLimitFor(self, nsig, expected=False, deltas_rel=0.2): """ Get combined upper limit. :param nsig: list of signal events in each signal region/dataset. The list should obey the ordering in globalInfo.datasetOrder. :param expected: return expected, not observed value :param deltas_rel: relative uncertainty in signal (float). Default value is 20%. :returns: upper limit on sigma*eff """ if not hasattr(self.globalInfo, "covariance"): logger.error( "no covariance matrix given in globalInfo.txt for %s" % self.globalInfo.id) raise SModelSError( "no covariance matrix given in globalInfo.txt for %s" % self.globalInfo.id) cov = self.globalInfo.covariance if type(cov) != list: raise SModelSError("covariance field has wrong type: %s" % type(cov)) if len(cov) < 1: raise SModelSError("covariance matrix has length %d." % len(cov)) computer = UpperLimitComputer(ntoys=10000) nobs = [x.dataInfo.observedN for x in self._datasets] bg = [x.dataInfo.expectedBG for x in self._datasets] no = nobs ret = computer.ulSigma(Data(observed=no, backgrounds=bg, covariance=cov, third_moment=None, nsignal=nsig, deltas_rel=deltas_rel), marginalize=self._marginalize, expected=expected) #Convert limit on total number of signal events to a limit on sigma*eff ret = ret / self.globalInfo.lumi return ret
def computeV(self, values): """ Compute rotation matrix _V, and triangulation self.tri :param values: Nested array with the data values without units """ if not self._V is None: return #Convert nested mass arrays (with width tuples) to coordinates #(remove entries in mass corresponding to inclusive values, #select the required widths and combine masses and widths #in a flat array where the widths are the last entries) Morig = [self.dataToCoordinates(pt[0]) for pt in values] aM = np.array(Morig) MT = aM.T.tolist() self.delta_x = np.array([[sum(x) / len(Morig) for x in MT]]) M = [] for Mx in Morig: m = (np.array([Mx]) - self.delta_x).tolist()[0] M.append(m) try: ## we dont need thousands of points for SVD n = int(math.ceil(len(M) / 2000.)) Vt = svd(M[::n])[2] except LinAlgError as e: raise SModelSError( "exception caught when performing singular value decomposition: %s, %s" % (type(e), e)) V = Vt.T self._V = V ## self.round ( V ) Mp = [] ## the dimensionality of the whole mass space, disrespecting equal branches ## assumption self.full_dimensionality = len(Morig[0]) self.dimensionality = 0 for m in M: mp = np.dot(m, V) Mp.append(mp) nz = self.countNonZeros(mp) if nz > self.dimensionality: self.dimensionality = nz MpCut = [] for i in Mp: MpCut.append(i[:self.dimensionality].tolist()) if self.dimensionality > 1: self.tri = qhull.Delaunay(MpCut) else: self.tri = Delaunay1D(MpCut)
def sortDataSets(self): """ Sort datasets according to globalInfo.datasetOrder. """ if hasattr(self.globalInfo, "covariance"): datasets = self._datasets[:] if not hasattr(self.globalInfo, "datasetOrder" ): raise SModelSError("datasetOrder not given in globalInfo.txt for %s" % self.globalInfo.id ) datasetOrder = self.globalInfo.datasetOrder if isinstance(datasetOrder,str): datasetOrder = [datasetOrder] if len(datasetOrder) != len(datasets): raise SModelSError("Number of datasets in the datasetOrder field does not match the number of datasets for %s" %self.globalInfo.id) for dataset in datasets: if not dataset.getID() in datasetOrder: raise SModelSError("Dataset ID %s not found in datasetOrder" %dataset.getID()) dsIndex = datasetOrder.index(dataset.getID()) self._datasets[dsIndex] = dataset
def checkPathName(self, path, discard_zeroes): """ checks the path name, returns the base directory and the pickle file name. If path starts with http or ftp, fetch the description file and the database. returns the base directory and the pickle file name """ logger.debug('Try to set the path for the database to: %s', path) if path.startswith(("http://", "https://", "ftp://")): return self.fetchFromServer(path, discard_zeroes) if path.startswith(("file://")): path = path[7:] tmp = os.path.realpath(path) if os.path.isfile(tmp): base = os.path.dirname(tmp) return (base, tmp) if tmp[-4:] == ".pcl": self.source = "pcl" if not os.path.exists(tmp): if self.force_load == "pcl": logger.error("File not found: %s" % tmp) raise SModelSError() logger.info("File not found: %s. Will generate." % tmp) base = os.path.dirname(tmp) return (base, tmp) logger.error("Supplied a pcl filename, but %s is not a file." % tmp) raise SModelSError() path = tmp + '/' if not os.path.exists(path): logger.error('%s is no valid path!' % path) raise DatabaseNotFoundException("Database not found") m = Meta(path, discard_zeroes=discard_zeroes) self.source = "txt" return (path, path + m.getPickleFileName())
def computeV(self,values): """ Compute rotation matrix _V, and triangulation self.tri :param values: Nested array with the data values """ if not self._V is None: return Morig= [self.flattenArray(pt[0]) for pt in values] aM = np.array(Morig) MT = aM.T.tolist() self.delta_x = np.array([[ sum(x)/len(Morig) for x in MT ]]) M = [] for Mx in Morig: m=(np.array([Mx]) - self.delta_x).tolist()[0] M.append(m) try: ## we dont need thousands of points for SVD n = int(math.ceil(len(M)/2000.)) Vt=svd(M[::n])[2] except Exception as e: raise SModelSError("exception caught when performing singular value decomposition: %s, %s" %(type(e), e)) V=Vt.T self._V= V ## self.round ( V ) Mp=[] ## the dimensionality of the whole mass space, disrespecting equal branches ## assumption self.full_dimensionality = len(Morig[0]) self.dimensionality=0 for m in M: mp=np.dot(m,V) Mp.append ( mp ) nz=self.countNonZeros(mp) if nz>self.dimensionality: self.dimensionality=nz MpCut=[] for i in Mp: MpCut.append(i[:self.dimensionality].tolist() ) if self.dimensionality > 1: self.tri = qhull.Delaunay(MpCut) else: self.tri = Delaunay1D(MpCut)
def send(self, message, amount_expected=32): """ send the message. :param amount_expected: how many return bytes do you expect """ try: message = bytes(message, "UTF-8") # Send data # msg = b'query obs:ATLAS-SUSY-2017-01:SRHad-Low:TChiWH:[[500,100],[500,100]]' self.log('sending "%s"' % message) self.ntries = 0 while self.ntries < self.maxtries: try: self.sock.sendall(message) # Look for the response amount_received = 0 self.log('sent message') if amount_expected <= 0: return while amount_received < amount_expected: data = self.sock.recv(self.packetlength) amount_received += len(data) data = str(data)[2:-1] data = data.replace(" [fb]", "*fb") data = data.replace(" [pb]", "*pb") data = eval(data) self.log('received "%s"' % (data)) return data except (ConnectionRefusedError, ConnectionResetError, BrokenPipeError, ConnectionAbortedError) as e: dt = self.getWaitingTime() self.ntries += 1 self.log ( 'could not connect to %s. trying again in %d seconds' % \ ( self.nameAndPort(), dt ) ) time.sleep(dt) self.pprint( f"could not connect in send, after trying {self.ntries} times. aborting" ) raise SModelSError( f"Could not connect to database in send, tried {self.ntries} times" ) finally: self.log('closing socket') self.sock.close() del self.sock
def getSRUpperLimit(self, alpha=0.05, expected=False, compute=False, deltas_rel=0.2): """ Computes the 95% upper limit on the signal*efficiency for a given dataset (signal region). Only to be used for efficiency map type results. :param alpha: Can be used to change the C.L. value. The default value is 0.05 (= 95% C.L.) :param expected: Compute expected limit ( i.e. Nobserved = NexpectedBG ) :param deltas_rel: relative uncertainty in signal (float). Default value is 20%. :param compute: If True, the upper limit will be computed from expected and observed number of events. If False, the value listed in the database will be used instead. :return: upper limit value """ if not self.getType() == 'efficiencyMap': logger.error( "getSRUpperLimit can only be used for efficiency map results!") raise SModelSError() if not compute: if expected: try: return self.dataInfo.expectedUpperLimit except AttributeError: logger.info( "expectedUpperLimit field not found. Using observed UL instead." ) return self.dataInfo.upperLimit else: return self.dataInfo.upperLimit Nobs = self.dataInfo.observedN #Number of observed events if expected: Nobs = self.dataInfo.expectedBG Nexp = self.dataInfo.expectedBG #Number of expected BG events bgError = self.dataInfo.bgError # error on BG m = Data(Nobs, Nexp, bgError, detlas_rel=deltas_rel) computer = UpperLimitComputer(cl=1. - alpha) maxSignalXsec = computer.ulSigma(m) maxSignalXsec = maxSignalXsec / self.globalInfo.lumi return maxSignalXsec
def getEfficiencyFor(self, element): """ For upper limit results, checks if the input element falls inside the upper limit grid and has a non-zero reweigthing factor. If it does, returns efficiency = 1, else returns efficiency = 0. For efficiency map results, returns the signal efficiency including the lifetime reweighting. If a mass array is given as input, no lifetime reweighting will be applied. :param element: Element object or mass array with units. :return: efficiency (float) """ if self.txnameData.dataType == 'efficiencyMap': if hasattr(self, "dbClient"): query = self.getQueryStringForElement(element) logger.info ( "sending em query %s to %s:%d" % \ ( query, self.dbClient.servername, self.dbClient.port ) ) #print ( "query will be", query ) #return 0.001 eff = self.dbClient.query(query) else: eff = self.txnameData.getValueFor(element) if not eff or math.isnan(eff): eff = 0. #Element is outside the grid or has zero efficiency elif self.txnameData.dataType == 'upperLimit': if hasattr(self, "dbClient"): query = self.getQueryStringForElement(element) logger.info ( "sending query %s to %s:%d" % \ ( query, self.dbClient.servername, self.dbClient.port ) ) #print ( "query will be", query ) #return 0.001 ul = self.dbClient.query(query) else: ul = self.txnameData.getValueFor(element) if isinstance(element, Element): element._upperLimit = ul #Store the upper limit for convenience if ul is None: eff = 0. #Element is outside the grid or the decays do not correspond to the txname else: eff = 1. else: logger.error("Unknown txnameData type: %s" % self.txnameData.dataType) raise SModelSError() return eff
def getSRUpperLimit(self, alpha=0.05, expected=False, compute=False): """ Computes the 95% upper limit on the signal*efficiency for a given dataset (signal region). Only to be used for efficiency map type results. :param alpha: Can be used to change the C.L. value. The default value is 0.05 (= 95% C.L.) :param expected: Compute expected limit ( i.e. Nobserved = NexpectedBG ) :param compute: If True, the upper limit will be computed from expected and observed number of events. If False, the value listed in the database will be used instead. :return: upper limit value """ if not self.dataInfo.dataType == 'efficiencyMap': logger.error( "getSRUpperLimit can only be used for efficiency map results!") raise SModelSError() if not compute: if expected: try: return self.dataInfo.expectedUpperLimit except AttributeError: logger.info( "expectedUpperLimit field not found. Using observed UL instead." ) return self.dataInfo.upperLimit else: return self.dataInfo.upperLimit Nobs = self.dataInfo.observedN #Number of observed events if expected: Nobs = self.dataInfo.expectedBG Nexp = self.dataInfo.expectedBG #Number of expected BG events bgError = self.dataInfo.bgError # error on BG lumi = self.globalInfo.lumi if (lumi * fb).normalize()._unit: ID = self.globalInfo.id logger.error("Luminosity defined with wrong units for %s" % (ID)) return False maxSignalXsec = statistics.upperLimit(Nobs, Nexp, bgError, lumi, alpha) return maxSignalXsec
def __init__(self, value, dataType, Id, accept_errors_upto=.05, Leff_inner=None, Leff_outer=None): """ :param value: values in string format :param dataType: the dataType (upperLimit or efficiencyMap) :param Id: an identifier, must be unique for each TxNameData! :param _accept_errors_upto: If None, do not allow extrapolations outside of convex hull. If float value given, allow that much relative uncertainty on the upper limit / efficiency when extrapolating outside convex hull. This method can be used to loosen the equal branches assumption. :param Leff_inner: is the effective inner radius of the detector, given in meters (used for reweighting prompt decays). If None, default values will be used. :param Leff_outer: is the effective outer radius of the detector, given in meters (used for reweighting decays outside the detector). If None, default values will be used. """ self.dataType = dataType self._id = Id self._accept_errors_upto = accept_errors_upto self.Leff_inner = Leff_inner self.Leff_outer = Leff_outer self._V = None self.loadData(value) if self._keep_values: self.origdata = value if self.dataType == 'efficiencyMap': self.reweightF = defaultEffReweight elif self.dataType == 'upperLimit': self.reweightF = defaultULReweight else: raise SModelSError( "Default reweighting function not defined for data type %s" % self.dataType)
def getULFor(self, element, expected=False): """ Returns the upper limit (or expected) for element (only for upperLimit-type). Includes the lifetime reweighting (ul/reweight). If called for efficiencyMap results raises an error. If a mass array is given as input, no lifetime reweighting will be applied. :param element: Element object or mass array (with units) :param expected: look in self.txnameDataExp, not self.txnameData """ if hasattr(self, "dbClient"): ## we have a databaseClient, so we send the request ## over the network # query = "obs:ATLAS-SUSY-2013-05:ul:T2bb:[[300,100],[300,100]]" query = "obs:" if expected: query = "exp:" query += self.globalInfo.id + ":ul:" query += self.txName + ":" query += self.getMassVectorFromElement(element) logger.info ( "sending ul query %s to %s:%d" % \ ( query, self.dbClient.servername, self.dbClient.port ) ) from smodels.tools.physicsUnits import fb return self.dbClient.query(query) if not self.txnameData.dataType == 'upperLimit': logger.error("getULFor method can only be used in UL-type data.") raise SModelSError() if not expected: ul = self.txnameData.getValueFor(element) else: if not self.txnameDataExp: return None else: ul = self.txnameDataExp.getValueFor(element) return ul
def getEfficiencyFor(self, mass): """ For upper limit results, checks if the input mass falls inside the upper limit grid. If it does, returns efficiency = 1, else returns efficiency = 0. For efficiency map results, checks if the mass falls inside the efficiency map grid. If it does, returns the corresponding efficiency value, else returns efficiency = 0. :param element: Element object :return: efficiency (float) """ #Check if the element appears in Txname: val = self.txnameData.getValueFor(mass) if type(val) == type(fb): return 1. #The element has an UL, return 1 elif val is None or math.isnan(val): return 0. #The element mass is outside the data grid elif type(val) == type(1.): return val #The element has an eff else: logger.error("Unknown txnameData value: %s" % (str(type(val)))) raise SModelSError()
def formatInput(self,value,shapeArray): """ Format value according to the shape in shapeArray. If shapeArray contains entries = *, the corresponding entries in value will be ignored. :param value: Array to be formatted (e.g. [[200.,100.],[200.,100.]]) :param shapeArray: Array with format info (e.f. ['*',[float,float]]) :return: formatted array [[200.,100.]] """ if shapeArray == '*': return None elif isinstance(value,list): if len(shapeArray) != len(value): raise SModelSError("Input value and data shape mismatch (%s,%s)" %(len(shapeArray),len(value))) return [self.formatInput(xi,shapeArray[i]) for i,xi in enumerate(value) if not self.formatInput(xi,shapeArray[i]) is None] else: return value
def __init__(self, path, globalObj, infoObj): self.path = path self.globalInfo = globalObj self._infoObj = infoObj self.txnameData = None self.txnameDataExp = None ## expected Data self._topologyList = TopologyList() logger.debug('Creating object based on txname file: %s' % self.path) #Open the info file and get the information: if not os.path.isfile(path): logger.error("Txname file %s not found" % path) raise SModelSError() txtFile = open(path, 'r') txdata = txtFile.read() txtFile.close() if not "txName" in txdata: raise TypeError if not 'upperLimits' in txdata and not 'efficiencyMap' in txdata: raise TypeError txfile = open(self.path) content = concatenateLines(txfile.readlines()) txfile.close() #Get tags in info file: tags = [line.split(':', 1)[0].strip() for line in content] data = None expectedData = None dataType = None for i, tag in enumerate(tags): if not tag: continue line = content[i] value = line.split(':', 1)[1].strip() if tags.count(tag) == 1: if ';' in value: value = value.split(';') if tag == 'upperLimits' or tag == 'efficiencyMap': data = value dataType = tag elif tag == 'expectedUpperLimits': expectedData = value dataType = 'upperLimits' else: self.addInfo(tag, value) else: logger.info("Ignoring unknown field %s found in file %s" \ % (tag, self.path)) continue ident = self.globalInfo.id + ":" + dataType[0] + ":" + str( self._infoObj.dataId) ident += ":" + self.txName self.txnameData = TxNameData(data, dataType, ident) if expectedData: self.txnameDataExp = TxNameData(expectedData, dataType, ident) #Builds up a list of elements appearing in constraints: elements = [] if hasattr(self, 'constraint'): elements += [Element(el) for el in elementsInStr(self.constraint)] if hasattr(self, 'condition') and self.condition: conds = self.condition if not isinstance(conds, list): conds = [conds] for cond in conds: for el in elementsInStr(cond): newEl = Element(el) if not newEl in elements: elements.append(newEl) # Builds up TopologyList with all the elements appearing in constraints # and conditions: for el in elements: el.sortBranches() self._topologyList.addElement(el)
def coordinatesToData(self, point, rotMatrix=None, transVector=None): """ A function that return the original mass and width array (including the widths as tuples) for a given point in PCA space (inverse of dataToCoordinates). :param point: Point in PCA space (1D list with size equal to self.full_dimensionality or self.dimensionality) :param rotMatrix: Rotation matrix for PCA (e.g. self._V). If None, no rotation is performed. :param transVector: Translation vector for PCA (e.g. self.delta_x). If None no translation is performed :return: nested mass array including the widths as tuples (e.g. [[(200,1e-10),100],[(200,1e-10),100]]) """ if len(point) != self.full_dimensionality and len( point) != self.dimensionality: logger.error( "Wrong point dimensions (%i), it should be %i (reduced dimensions) or %i (full dimensionts)" % (len(point), self.dimensionality, self.full_dimensionality)) elif len(point) != self.full_dimensionality: pointFull = np.array(point[:]) pointFull = np.append(pointFull, [0.] * (self.full_dimensionality - len(point))) else: pointFull = np.array(point[:]) massAndWidths = pointFull if rotMatrix is not None: massAndWidths = np.dot(rotMatrix, massAndWidths) if transVector is not None: massAndWidths = massAndWidths + transVector massAndWidths = massAndWidths.tolist() if type(massAndWidths[0]) == list: massAndWidths = massAndWidths[0] #Extract masses and transformed widths masses = massAndWidths[:len(massAndWidths) - len(self.widthPosition)] xwidths = massAndWidths[len(massAndWidths) - len(self.widthPosition):] #Rescale widths and add unit: widths = [unscaleWidth(xw) for xw in xwidths] #Add units (make sure it is consistent with standardUnits) massUnit = [ unit for unit in physicsUnits.standardUnits if not (1 * GeV / unit).normalize()._unit ][0] masses = [m * massUnit for m in masses[:]] #Add inclusive entries to mass flatShape = flattenArray(self.dataShape) if len([x for x in flatShape if str(x) != '*']) != len(masses): logger.error( "Error trying to add inclusive entries (%s) to flat mass array (%s)." % (flatShape, masses)) raise SModelSError() masses = addInclusives(masses, flatShape) #Reshape masses according to dataShape: if len(masses) != len(flatShape): logger.error( "Number of elements in %s do not match the number of entries in %s" % (masses, self.dataShape)) raise SModelSError() massArray = reshapeList(masses, self.dataShape) #Add widths to the mass array if len(widths) != len(self.widthPosition): logger.error( "The number of converted widths (%i) is not the expected (%i)" % (len(widths), len(self.widthPosition))) raise SModelSError() #Combine masses and widths massAndWidthArray = [] for ibr, br in enumerate(massArray): if str(br) != '*': newBr = [(m, widths.pop(0)) if (ibr, im) in self.widthPosition else m for im, m in enumerate(br)] else: newBr = br massAndWidthArray.append(newBr) return massAndWidthArray
def getCombinedUpperLimitFor(self, nsig, expected=False, deltas_rel=0.2): """ Get combined upper limit. If covariances are given in globalInfo then simplified likelihood is used, else if json files are given pyhf cimbination is performed. :param nsig: list of signal events in each signal region/dataset. The list should obey the ordering in globalInfo.datasetOrder. :param expected: return expected, not observed value :param deltas_rel: relative uncertainty in signal (float). Default value is 20%. :returns: upper limit on sigma*eff """ if hasattr(self.globalInfo, "covariance" ): cov = self.globalInfo.covariance if type(cov) != list: raise SModelSError( "covariance field has wrong type: %s" % type(cov)) if len(cov) < 1: raise SModelSError( "covariance matrix has length %d." % len(cov)) computer = UpperLimitComputer(ntoys=10000) nobs = [x.dataInfo.observedN for x in self._datasets] bg = [x.dataInfo.expectedBG for x in self._datasets] no = nobs ret = computer.ulSigma(Data(observed=no, backgrounds=bg, covariance=cov, third_moment=None, nsignal=nsig, deltas_rel=deltas_rel), marginalize=self._marginalize, expected=expected) if ret != None: #Convert limit on total number of signal events to a limit on sigma*eff ret = ret/self.globalInfo.lumi logger.debug("SL upper limit : {}".format(ret)) return ret elif hasattr(self.globalInfo, "jsonFiles" ): logger.debug("Using pyhf") if all([s == 0 for s in nsig]): logger.warning("All signals are empty") return None ulcomputer, combinations = self.getPyhfComputer( nsig ) if ulcomputer.nWS == 1: ret = ulcomputer.ulSigma(expected=expected) ret = ret/self.globalInfo.lumi logger.debug("pyhf upper limit : {}".format(ret)) return ret else: # Looking for the best combination logger.debug('self.bestCB : {}'.format(self.bestCB)) if self.bestCB == None: logger.debug("Performing best expected combination") ulMin = float('+inf') for i_ws in range(ulcomputer.nWS): ul = ulcomputer.ulSigma(expected=True, workspace_index=i_ws) if ul == None: continue if ul < ulMin: ulMin = ul i_best = i_ws self.bestCB = combinations[i_best] # Keeping the index of the best combination for later logger.debug('Best combination : %s' % self.bestCB) # Computing upper limit using best combination if expected: try: ret = ulMin/self.globalInfo.lumi except NameError: ret = ulcomputer.ulSigma(expected=True, workspace_index=combinations.index(self.bestCB)) ret = ret/self.globalInfo.lumi else: ret = ulcomputer.ulSigma(expected=False, workspace_index=combinations.index(self.bestCB)) ret = ret/self.globalInfo.lumi logger.debug("pyhf upper limit : {}".format(ret)) return ret else: logger.error ( "no covariance matrix or json file given in globalInfo.txt for %s" % self.globalInfo.id ) raise SModelSError( "no covariance matrix or json file given in globalInfo.txt for %s" % self.globalInfo.id )
def getValueFor(self, element): """ Interpolates the value and returns the UL or efficiency for the respective element rescaled according to the reweighting function self.reweightF. For UL-type data the default rescaling is ul -> ul/(fraction of prompt decays) and for EM-type data it is eff -> eff*(fraction of prompt decays). If a mass array is given as input, no lifetime reweighting will be applied. :param element: Element object or mass array (with units) """ #For backward compatibility: if not hasattr(self, 'Leff_inner'): self.Leff_inner = None if not hasattr(self, 'Leff_outer'): self.Leff_outer = None #Compute reweight factor according to lifetime/widths #For the widths not used in interpolation we assume that the #analysis require prompt decays #(width=inf for intermediate particles and width=0 for the last particle) if isinstance(element, Element): #Replaced the widths to be used for interpolation #with "prompt" widths (inf for intermediate particles and zero for final particles). #This way the reweight factor is only applied for the widths not used #for interpolation (since inf and zero result in no reweighting). widths = [] for ibr, br in enumerate(element.totalwidth): widths.append([]) for iw, w in enumerate(br): if (ibr, iw) in self.widthPosition: if iw != len(br) - 1: widths[ibr].append(float('inf') * GeV) else: widths[ibr].append(0. * GeV) else: widths[ibr].append(w) reweightFactor = self.reweightF(widths, Leff_inner=self.Leff_inner, Leff_outer=self.Leff_outer) elif isinstance(element, list): reweightFactor = 1. else: logger.error( "Input of getValueFor must be an Element object or a mass array and not %s" % str(type(element))) raise SModelSError() #Returns None or zero, if reweightFactor is None or zero: if not reweightFactor: return reweightFactor #Extract the mass and width of the element #and convert it to the PCA coordinates (len(point) = self.full_dimensionality): point = self.dataToCoordinates(element, rotMatrix=self._V, transVector=self.delta_x) val = self.getValueForPoint(point) if not isinstance(val, (float, int, unum.Unum)): return val #Apply reweightFactor (if data has no width or partial width dependence) val *= reweightFactor return val
def loadBinaryFile(self, lastm_only=False): """ Load a binary database, returning last modified, file count, database. :param lastm_only: if true, the database itself is not read. :returns: database object, or None, if lastm_only == True. """ if lastm_only and self.pcl_meta.mtime: ## doesnt need to load database, and mtime is already ## loaded return None if not os.path.exists(self.pcl_meta.pathname): return None try: with open(self.pcl_meta.pathname, "rb") as f: t0 = time.time() pclfilename = self.pcl_meta.pathname self.pcl_meta = serializer.load(f) self.pcl_meta.pathname = pclfilename if self.force_load == "pcl": self.txt_meta = self.pcl_meta if not lastm_only: if not self.force_load == "pcl" and self.pcl_meta.needsUpdate( self.txt_meta): logger.warning("Something changed in the environment." "Regenerating.") self.createBinaryFile() return self logger.info( "loading binary db file %s format version %s" % (self.pcl_meta.pathname, self.pcl_meta.format_version)) if sys.version[0] == "2": self.expResultList = serializer.load(f) else: self.expResultList = serializer.load(f, encoding="latin1") t1 = time.time() - t0 logger.info( "Loaded database from %s in %.1f secs." % \ ( self.pcl_meta.pathname, t1 ) ) self.databaseParticles = None try: self.databaseParticles = serializer.load(f) except EOFError as e: pass ## a model does not *have* to be defined self.createLinksToModel() except (EOFError, ValueError) as e: os.unlink(self.pcl_meta.pathname) if lastm_only: self.pcl_meta.format_version = -1 self.pcl_meta.mtime = 0 return self logger.error( "%s is not readable (%s)." % \ ( self.pcl_meta.pathname, str(e) ) ) if self.source in ["http", "ftp", "pcl"]: logger.error( "source cannot be rebuilt. supply a different path to the database in your ini file." ) raise SModelSError() self.createBinaryFile() # self.txt_meta = self.pcl_meta return self
def fetchFromScratch(self, path, store, discard_zeroes): """ fetch database from scratch, together with description. :param store: filename to store json file. """ def sizeof_fmt(num, suffix='B'): for unit in ['', 'K', 'M', 'G', 'T', 'P']: if abs(num) < 1024.: return "%3.1f%s%s" % (num, unit, suffix) num /= 1024.0 return "%.1f%s%s" % (num, 'Yi', suffix) import requests try: r = requests.get(path, timeout=5) except requests.exceptions.RequestException as e: logger.error("Exception when trying to fetch database: %s" % e) logger.error( "Consider supplying a different database path in the ini file (possibly a local one)" ) raise SModelSError() if r.status_code != 200: logger.error( "Error %d: could not fetch %s from server." % \ ( r.status_code, path ) ) raise SModelSError() ## its new so store the description with open(store, "w") as f: f.write(r.text) if not "url" in r.json().keys(): logger.error("cannot parse json file %s." % path) raise SModelSError() size = r.json()["size"] cDir, defused = cacheDirectory(create=True, reportIfDefault=True) t0 = time.time() r2 = requests.get(r.json()["url"], stream=True, timeout=5) filename = os.path.join(cDir, r2.url.split("/")[-1]) msg = "caching the downloaded database in %s." % cDir if defused: msg += " If you want the pickled database file to be cached in a different location, set the environment variable SMODELS_CACHEDIR, e.g. to '/tmp'." logger.warning(msg) logger.info ( "need to fetch %s and store in %s. size is %s." % \ ( r.json()["url"], filename, sizeof_fmt ( size ) ) ) with open(filename, "wb") as dump: import fcntl fcntl.lockf(dump, fcntl.LOCK_EX) if not self.inNotebook(): ## \r doesnt work in notebook print(" " + " " * 51 + "<", end="\r") print("loading >", end="") for x in r2.iter_content(chunk_size=int(size / 50)): dump.write(x) dump.flush() print(".", end="") sys.stdout.flush() if self.inNotebook(): print("done.") else: print("") fcntl.lockf(dump, fcntl.LOCK_UN) dump.close() logger.info("fetched %s in %d secs." % (r2.url, time.time() - t0)) logger.debug("store as %s" % filename) self.force_load = "pcl" return ("./", "%s" % filename)
def __init__(self, base=None, force_load=None, discard_zeroes=True, progressbar=False, subpickle=True): """ :param base: path to the database, or pickle file (string), or http address. If None, "official", or "official_fastlim", use the official database for your code version (including fastlim results, if specified). If "latest", or "latest_fastlim", check for the latest database. :param force_load: force loading the text database ("txt"), or binary database ("pcl"), dont force anything if None :param discard_zeroes: discard txnames with only zeroes as entries. :param progressbar: show a progressbar when building pickle file (needs the python-progressbar module) :param subpickle: produce small pickle files per exp result. Should only be used when working on the database. """ self.url = base self.source = "" if force_load == None and base.endswith(".pcl"): force_load = "pcl" self.force_load = force_load self.subpickle = subpickle obase = base ## keep old name for more checks for 'latest' if base in [None, "official"]: from smodels.installation import officialDatabase base = officialDatabase() if base in ["official_fastlim"]: from smodels.installation import officialDatabase base = officialDatabase(fastlim=True) if base in ["latest"]: from smodels.installation import latestDatabase base = latestDatabase() if base in ["latest_fastlim"]: from smodels.installation import latestDatabase base = latestDatabase(fastlim=True) if base in ["unittest"]: from smodels.installation import testDatabase base = testDatabase() base, pclfile = self.checkPathName(base, discard_zeroes) self.pcl_meta = Meta(pclfile) self.expResultList = [] self.txt_meta = self.pcl_meta if not self.force_load == "pcl": self.txt_meta = Meta(base, discard_zeroes=discard_zeroes) self.progressbar = None if progressbar: try: import progressbar as P self.progressbar = P.ProgressBar(widgets=[ "Building Database ", P.Percentage(), P.Bar(marker=P.RotatingMarker()), P.ETA() ]) except ImportError as e: logger.warning( "progressbar requested, but python-progressbar is not installed." ) if self.force_load == "txt": self._setParticles() self.loadTextDatabase() self.txt_meta.printFastlimBanner() return if self.force_load == "pcl": self.loadBinaryFile() self._setParticles() self.pcl_meta.printFastlimBanner() if "latest" in obase: from smodels import installation codeVersion = installation.version() pclVersion = self.pcl_meta.databaseVersion if codeVersion[0] != pclVersion[0]: logger.error( "major versions of code and database differ! code=%s, database=%s" % (codeVersion[0], pclVersion[0])) return if self.force_load in [None, "none", "None"]: self.loadDatabase() self._setParticles() self.txt_meta.printFastlimBanner() return logger.error( "when initialising database: force_load=%s is not " \ "recognized. Valid values are: pcl, txt, None." % force_load ) raise SModelSError()
def dataToCoordinates(self, dataPoint, rotMatrix=None, transVector=None): """ Format a dataPoint to the format used for interpolation. All the units are removed, the widths are rescaled and the masses and widths are combined in a flat array. The input can be an Element object or a massAndWidth nested arrays (with tuples to store the relevant widths). :param dataPoint: Element object from which the mass and width arrays will be extracted or a nested mass array from the database, which contain tuples to include the width values :param rotMatrix: Rotation matrix for PCA (e.g. self._V). If None, no rotation is performed. :param transVector: Translation vector for PCA (e.g. self.delta_x). If None no translation is performed :return: Point (list of floats) """ #Collect the data if isinstance(dataPoint, Element): masses = dataPoint.mass widths = dataPoint.totalwidth elif isinstance(dataPoint, list): masses = [[mw[0] if isinstance(mw, tuple) else mw for mw in br] for br in dataPoint] widths = [[mw[1] if isinstance(mw, tuple) else None for mw in br] for br in dataPoint] else: logger.error( "dataPoint must be an element or a nested array including masses and widths" ) raise SModelSError() #Select the required masses (remove entries corresponding to inclusive entries in data) masses = removeInclusives(masses, self.dataShape) #Select the required widths (remove widths not used in interpolation) widths = [[ widths[ibr][im] for im, _ in enumerate(br) if (ibr, im) in self.widthPosition ] for ibr, br in enumerate(widths)] if None in removeUnits(flattenArray(widths), GeV): logger.error("Error obtaining widths from %s" % str(dataPoint)) raise SModelSError() #Remove units and flatten arrays: masses = flattenArray(masses) masses = removeUnits(masses, physicsUnits.standardUnits) widths = flattenArray(widths) widths = removeUnits(widths, physicsUnits.standardUnits) #Rescale widths: xwidths = [rescaleWidth(w) for w in widths] #Combine masses and rescaled widths in a single point point = masses + xwidths #Now transform to PCA coordinates (if rotMatrix and transVector are defined: if transVector is not None: point = np.array([point]) point = ((point - transVector)).tolist()[0] #Translate if rotMatrix is not None: point = np.dot(point, rotMatrix) # Rotate point = point.tolist() return point