Exemplo n.º 1
0
    def loadData(self,value):
        """
        Uses the information in value to generate the data grid used for
        interpolation.
        """

        if self._V:
            return
        
        if isinstance(value,str):
            val = self.evaluateString(value)
        else:
            val = value
            
        self.units = self.getUnits(val)[0] #Store standard units
        self.dataShape = self.getDataShape(val[0][0]) #Store the data (mass) format (useful if there are wildcards)        
        values = self.removeUnits(val) #Remove units and store the normalization units
        values = self.removeWildCards(values)


        if len(values) < 1 or len(values[0]) < 2:
                raise SModelSError("input value not in correct format. expecting sth " \
                               "like [ [ [[ 300.*GeV,100.*GeV], "\
                               "[ 300.*GeV,100.*GeV] ], 10.*fb ], ... ] "\
                               "for upper limits or [ [ [[ 300.*GeV,100.*GeV],"\
                               " [ 300.*GeV,100.*GeV] ], .1 ], ... ] for "\
                               "efficiency maps. Received %s" % values[:80])

                
        if not isinstance(self.units[-1],unum.Unum) and not isinstance(self.units[-1],float):
            raise SModelSError("Error obtaining units from value: %s " %values[:80])


        self.y_values = np.array(values)[:,1]
        self.computeV(values)
Exemplo n.º 2
0
    def __init__(self, path=None):

        self.path = path
        if path:
            logger.debug('Creating object based on  %s' % self.path)

            #Open the info file and get the information:
            if not os.path.isfile(path):
                logger.error("Info file %s not found" % path)
                raise SModelSError()
            from smodels.tools.stringTools import concatenateLines
            infoFile = open(self.path)
            content = concatenateLines(infoFile.readlines())
            infoFile.close()

            #Get tags in info file:
            tags = [line.split(':', 1)[0].strip() for line in content]
            for i, tag in enumerate(tags):
                if not tag: continue
                line = content[i]
                value = line.split(':', 1)[1].strip()
                if tags.count(tag) == 1:
                    self.addInfo(tag, value)
                else:
                    logger.info("Ignoring unknown field %s found in file %s" %
                                (tag, self.path))
                    continue
Exemplo n.º 3
0
    def __init__(self, data):

        self.points = None
        self.simplices = None
        self.transform = None
        if data and self.checkData(data):
            self.points = sorted(data)
            #Create simplices as the point intervals (using the sorted data)
            self.simplices = np.array(
                [[data.index(self.points[i + 1]),
                  data.index(pt)] for i, pt in enumerate(self.points[:-1])])
            transform = []
            #Create trivial transformation to the baryocentric coordinates:
            for simplex in self.simplices:
                xmax, xmin = data[simplex[0]][0], data[simplex[1]][0]
                transform.append([[1. / (xmax - xmin)], [xmin]])
            self.transform = np.array(transform)

            #Store convex hull (first and last point):
            self.convex_hull = np.array(
                [data.index(self.points[0]),
                 data.index(self.points[-1])])

        else:
            raise SModelSError()
Exemplo n.º 4
0
 def removeUnits(self, value):
     """
     Remove units from unum objects. Uses the units defined
     in physicsUnits.standard units to normalize the data.
     
     :param value: Object containing units (e.g. [[100*GeV,100.*GeV],3.*pb])
     
     :return: Object normalized to standard units (e.g. [[100,100],3000])
     """
     
     stdUnits = physicsUnits.standardUnits
     
     if isinstance(value,list):
         return [self.removeUnits(x) for x in value]
     elif isinstance(value,dict):
         return dict([[self.removeUnits(x),self.removeUnits(y)] for x,y in value.items()])
     elif isinstance(value,unum.Unum):
         #Check if value has unit or not:
         if not value._unit:
             return value.asNumber()
         #Now try to normalize it by one of the standard pre-defined units:
         for unit in stdUnits:
             y = (value/unit).normalize()
             if not y._unit:
                 return value.asNumber(unit)
         raise SModelSError("Could not normalize unit value %s using the standard units: %s" 
                            %(str(value),str(stdUnits)))
     else:
         return value
Exemplo n.º 5
0
 def checkForRedundancy(self,databaseParticles):
     """ In case of efficiency maps, check if any txnames have overlapping
         constraints. This would result in double counting, so we dont
         allow it. """
     if self.getType() == "upperLimit":
         return False
     logger.debug ( "checking for redundancy" )
     datasetElements = []
     for tx in self.txnameList:
         if hasattr(tx, 'finalState'):
             finalState = tx.finalState
         else:
             finalState = ['MET','MET']
         if hasattr(tx, 'intermediateState'):
             intermediateState = tx.intermediateState
         else:
             intermediateState = None
         for el in elementsInStr(str(tx.constraint)):
             newEl = Element(el,finalState,intermediateState,
                     model=databaseParticles)
             datasetElements.append(newEl)
     combos = itertools.combinations(datasetElements, 2)
     for x,y in combos:
         if x == y and _complainAboutOverlappingConstraints:
             errmsg ="Constraints (%s) and (%s) appearing in dataset %s:%s overlap "\
                     "(may result in double counting)." % \
                     (x,y,self.getID(),self.globalInfo.id )
             logger.error( errmsg )
             raise SModelSError ( errmsg )
Exemplo n.º 6
0
    def initialize(self):
        if hasattr(self, "sock"):
            return  ## already initialized
        # Create a TCP/IP socket
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.sock.settimeout(240)

        # Connect the socket to the port where the server is listening
        self.server_address = (self.servername, self.port)
        self.ntries = 0
        if not hasattr(self, "maxtries"):
            self.maxtries = 25
        while self.ntries < self.maxtries:
            try:
                self.sock.connect(self.server_address)
                return
            except (socket.timeout, OSError, ConnectionRefusedError,
                    ConnectionResetError, BrokenPipeError,
                    ConnectionAbortedError) as e:
                dt = self.getWaitingTime()
                self.ntries += 1
                self.log ( 'could not connect to %s after %d times. trying again in %d seconds' % \
                           ( self.nameAndPort(), self.ntries, dt ) )
                time.sleep(dt)
        self.pprint(
            f'could not connect to database in initialize, after trying {self.ntries} times. aborting'
        )
        raise SModelSError(
            "Could not connect to database in initialize, tried %d times" %
            self.ntries)
Exemplo n.º 7
0
 def createBinaryFile(self, filename=None):
     """ create a pcl file from the text database,
         potentially overwriting an old pcl file. """
     ## make sure we have a model to pickle with the database!
     if self.txt_meta == None:
         logger.error(
             "Trying to create database pickle, but no txt_meta defined.")
         raise SModelSError()
     logger.debug( "database timestamp: %s, filecount: %s" % \
                  ( time.ctime( self.txt_meta.mtime ), self.txt_meta.filecount ) )
     binfile = filename
     if binfile == None:
         binfile = self.pcl_meta.pathname
     if not hasattr(self,'databaseParticles') or \
         type(self.databaseParticles) == type(None):
         self._setParticles(self._getParticles())
     logger.debug(" * create %s" % binfile)
     with open(binfile, "wb") as f:
         logger.debug(" * load text database")
         self.loadTextDatabase()
         logger.debug(  " * write %s db version %s, format version %s, %s" % \
                 ( binfile, self.txt_meta.databaseVersion,
                   self.txt_meta.format_version, self.txt_meta.cTime() ) )
         # ptcl = serializer.HIGHEST_PROTOCOL
         ptcl = min(
             4, serializer.HIGHEST_PROTOCOL
         )  ## 4 is default protocol in python3.8, and highest protocol in 3.7
         serializer.dump(self.txt_meta, f, protocol=ptcl)
         serializer.dump(self.expResultList, f, protocol=ptcl)
         serializer.dump(self.databaseParticles, f, protocol=ptcl)
         logger.info("%s created." % (binfile))
Exemplo n.º 8
0
 def getUnits(self, value):
     """
     Get standard units for the input object.
     Uses the units defined in physicsUnits.standardUnits.
     (e.g. [[100*GeV,100.*GeV],3.*pb] -> returns [[GeV,GeV],fb]
     [[100*GeV,3.],[200.*GeV,2.*pb]] -> returns [[GeV,1.],[GeV,fb]] )
     
     :param value: Object containing units (e.g. [[100*GeV,100.*GeV],3.*pb])
     
     :return: Object with same structure containing the standard units used to
              normalize the data.
     """
     
     stdUnits = physicsUnits.standardUnits
     if isinstance(value,list):            
         return [self.getUnits(x) for x in value]
     elif isinstance(value,dict):
         return dict([[self.getUnits(x),self.getUnits(y)] 
                               for x,y in value.items()])
     elif isinstance(value,unum.Unum):
         #Check if value has unit or not:
         if not value._unit:
             return 1.
         #Now try to find stadandard unit which matches:
         for unit in stdUnits:
             y = (value/unit).normalize()
             if not y._unit:
                 return unit
         raise SModelSError("Could not find standard unit which matches %s. Using the standard units: %s" 
                            %(str(value),str(stdUnits)))
     else:
         return 1.    
Exemplo n.º 9
0
 def evaluateString(self, value):
     """
     Evaluate string.
     
     :param value: String expression.
     """
     
     if not isinstance(value,str):
         raise SModelSError("Data should be in string format. Format %s found" %type(value))
     
     try:
         val = eval(value,unitsDict)
     except:
         raise SModelSError("data string malformed: %s" %value)
     
     return val
Exemplo n.º 10
0
    def getCombinedUpperLimitFor(self, nsig, expected=False, deltas_rel=0.2):
        """
        Get combined upper limit.
        
        :param nsig: list of signal events in each signal region/dataset. The list
                        should obey the ordering in globalInfo.datasetOrder.
        :param expected: return expected, not observed value
        :param deltas_rel: relative uncertainty in signal (float). Default value is 20%.        
        
        :returns: upper limit on sigma*eff
        """

        if not hasattr(self.globalInfo, "covariance"):
            logger.error(
                "no covariance matrix given in globalInfo.txt for %s" %
                self.globalInfo.id)
            raise SModelSError(
                "no covariance matrix given in globalInfo.txt for %s" %
                self.globalInfo.id)
        cov = self.globalInfo.covariance
        if type(cov) != list:
            raise SModelSError("covariance field has wrong type: %s" %
                               type(cov))
        if len(cov) < 1:
            raise SModelSError("covariance matrix has length %d." % len(cov))

        computer = UpperLimitComputer(ntoys=10000)

        nobs = [x.dataInfo.observedN for x in self._datasets]
        bg = [x.dataInfo.expectedBG for x in self._datasets]
        no = nobs

        ret = computer.ulSigma(Data(observed=no,
                                    backgrounds=bg,
                                    covariance=cov,
                                    third_moment=None,
                                    nsignal=nsig,
                                    deltas_rel=deltas_rel),
                               marginalize=self._marginalize,
                               expected=expected)

        #Convert limit on total number of signal events to a limit on sigma*eff
        ret = ret / self.globalInfo.lumi

        return ret
Exemplo n.º 11
0
    def computeV(self, values):
        """
        Compute rotation matrix _V, and triangulation self.tri

        :param values: Nested array with the data values without units

        """

        if not self._V is None:
            return

        #Convert nested mass arrays (with width tuples) to coordinates
        #(remove entries in mass corresponding to inclusive values,
        #select the required widths and combine masses and widths
        #in a flat array where the widths are the last entries)
        Morig = [self.dataToCoordinates(pt[0]) for pt in values]

        aM = np.array(Morig)
        MT = aM.T.tolist()
        self.delta_x = np.array([[sum(x) / len(Morig) for x in MT]])
        M = []

        for Mx in Morig:
            m = (np.array([Mx]) - self.delta_x).tolist()[0]
            M.append(m)

        try:
            ## we dont need thousands of points for SVD
            n = int(math.ceil(len(M) / 2000.))
            Vt = svd(M[::n])[2]
        except LinAlgError as e:
            raise SModelSError(
                "exception caught when performing singular value decomposition: %s, %s"
                % (type(e), e))

        V = Vt.T
        self._V = V  ## self.round ( V )
        Mp = []

        ## the dimensionality of the whole mass space, disrespecting equal branches
        ## assumption
        self.full_dimensionality = len(Morig[0])
        self.dimensionality = 0
        for m in M:
            mp = np.dot(m, V)
            Mp.append(mp)
            nz = self.countNonZeros(mp)
            if nz > self.dimensionality:
                self.dimensionality = nz
        MpCut = []
        for i in Mp:
            MpCut.append(i[:self.dimensionality].tolist())

        if self.dimensionality > 1:
            self.tri = qhull.Delaunay(MpCut)
        else:
            self.tri = Delaunay1D(MpCut)
Exemplo n.º 12
0
    def sortDataSets(self):
        """
        Sort datasets according to globalInfo.datasetOrder.
        """
        if hasattr(self.globalInfo, "covariance"):
            datasets = self._datasets[:]
            if not hasattr(self.globalInfo, "datasetOrder" ):
                raise SModelSError("datasetOrder not given in globalInfo.txt for %s" % self.globalInfo.id )
            datasetOrder = self.globalInfo.datasetOrder
            if isinstance(datasetOrder,str):
                datasetOrder = [datasetOrder]

            if len(datasetOrder) != len(datasets):
                raise SModelSError("Number of datasets in the datasetOrder field does not match the number of datasets for %s"
                                   %self.globalInfo.id)
            for dataset in datasets:
                if not dataset.getID() in datasetOrder:
                    raise SModelSError("Dataset ID %s not found in datasetOrder" %dataset.getID())
                dsIndex = datasetOrder.index(dataset.getID())
                self._datasets[dsIndex] = dataset
Exemplo n.º 13
0
    def checkPathName(self, path, discard_zeroes):
        """
        checks the path name,
        returns the base directory and the pickle file name.
        If path starts with http or ftp, fetch the description file
        and the database.
        returns the base directory and the pickle file name
        """
        logger.debug('Try to set the path for the database to: %s', path)
        if path.startswith(("http://", "https://", "ftp://")):
            return self.fetchFromServer(path, discard_zeroes)
        if path.startswith(("file://")):
            path = path[7:]

        tmp = os.path.realpath(path)
        if os.path.isfile(tmp):
            base = os.path.dirname(tmp)
            return (base, tmp)

        if tmp[-4:] == ".pcl":
            self.source = "pcl"
            if not os.path.exists(tmp):
                if self.force_load == "pcl":
                    logger.error("File not found: %s" % tmp)
                    raise SModelSError()
                logger.info("File not found: %s. Will generate." % tmp)
                base = os.path.dirname(tmp)
                return (base, tmp)
            logger.error("Supplied a pcl filename, but %s is not a file." %
                         tmp)
            raise SModelSError()

        path = tmp + '/'
        if not os.path.exists(path):
            logger.error('%s is no valid path!' % path)
            raise DatabaseNotFoundException("Database not found")
        m = Meta(path, discard_zeroes=discard_zeroes)
        self.source = "txt"
        return (path, path + m.getPickleFileName())
Exemplo n.º 14
0
    def computeV(self,values):
        """
        Compute rotation matrix _V, and triangulation self.tri
        
        :param values: Nested array with the data values
        
        """
        
        if not self._V is None:
            return
        
        Morig= [self.flattenArray(pt[0]) for pt in values]
        
        aM = np.array(Morig)
        MT = aM.T.tolist()
        self.delta_x = np.array([[ sum(x)/len(Morig) for x in MT ]])
        M = []

        for Mx in Morig:
            m=(np.array([Mx]) - self.delta_x).tolist()[0]
            M.append(m)

        try:
            ## we dont need thousands of points for SVD
            n = int(math.ceil(len(M)/2000.))
            Vt=svd(M[::n])[2]
        except Exception as e:
            raise SModelSError("exception caught when performing singular value decomposition: %s, %s" %(type(e), e))

        V=Vt.T
        self._V= V ## self.round ( V )
        Mp=[]

        ## the dimensionality of the whole mass space, disrespecting equal branches
        ## assumption
        self.full_dimensionality = len(Morig[0])
        self.dimensionality=0
        for m in M:
            mp=np.dot(m,V)
            Mp.append ( mp )
            nz=self.countNonZeros(mp)
            if nz>self.dimensionality:
                self.dimensionality=nz
        MpCut=[]
        for i in Mp:
            MpCut.append(i[:self.dimensionality].tolist() )

        if self.dimensionality > 1:
            self.tri = qhull.Delaunay(MpCut)
        else:            
            self.tri = Delaunay1D(MpCut)           
Exemplo n.º 15
0
    def send(self, message, amount_expected=32):
        """ send the message.
        :param amount_expected: how many return bytes do you expect
        """
        try:
            message = bytes(message, "UTF-8")
            # Send data
            # msg = b'query obs:ATLAS-SUSY-2017-01:SRHad-Low:TChiWH:[[500,100],[500,100]]'
            self.log('sending "%s"' % message)
            self.ntries = 0
            while self.ntries < self.maxtries:
                try:
                    self.sock.sendall(message)

                    # Look for the response
                    amount_received = 0

                    self.log('sent message')
                    if amount_expected <= 0:
                        return

                    while amount_received < amount_expected:
                        data = self.sock.recv(self.packetlength)
                        amount_received += len(data)
                    data = str(data)[2:-1]
                    data = data.replace(" [fb]", "*fb")
                    data = data.replace(" [pb]", "*pb")
                    data = eval(data)
                    self.log('received "%s"' % (data))
                    return data

                except (ConnectionRefusedError, ConnectionResetError,
                        BrokenPipeError, ConnectionAbortedError) as e:
                    dt = self.getWaitingTime()
                    self.ntries += 1
                    self.log ( 'could not connect to %s. trying again in %d seconds' % \
                               ( self.nameAndPort(), dt ) )
                    time.sleep(dt)
            self.pprint(
                f"could not connect in send, after trying {self.ntries} times. aborting"
            )
            raise SModelSError(
                f"Could not connect to database in send, tried {self.ntries} times"
            )

        finally:
            self.log('closing socket')
            self.sock.close()
            del self.sock
Exemplo n.º 16
0
    def getSRUpperLimit(self,
                        alpha=0.05,
                        expected=False,
                        compute=False,
                        deltas_rel=0.2):
        """
        Computes the 95% upper limit on the signal*efficiency for a given dataset (signal region).
        Only to be used for efficiency map type results.

        :param alpha: Can be used to change the C.L. value. The default value is 0.05 (= 95% C.L.)
        :param expected: Compute expected limit ( i.e. Nobserved = NexpectedBG )
        :param deltas_rel: relative uncertainty in signal (float). Default value is 20%.        
        :param compute: If True, the upper limit will be computed
                        from expected and observed number of events. If False, the value listed
                        in the database will be used instead.
                        

        :return: upper limit value
        """

        if not self.getType() == 'efficiencyMap':
            logger.error(
                "getSRUpperLimit can only be used for efficiency map results!")
            raise SModelSError()

        if not compute:
            if expected:
                try:
                    return self.dataInfo.expectedUpperLimit
                except AttributeError:
                    logger.info(
                        "expectedUpperLimit field not found. Using observed UL instead."
                    )
                    return self.dataInfo.upperLimit
            else:
                return self.dataInfo.upperLimit

        Nobs = self.dataInfo.observedN  #Number of observed events
        if expected:
            Nobs = self.dataInfo.expectedBG
        Nexp = self.dataInfo.expectedBG  #Number of expected BG events
        bgError = self.dataInfo.bgError  # error on BG

        m = Data(Nobs, Nexp, bgError, detlas_rel=deltas_rel)
        computer = UpperLimitComputer(cl=1. - alpha)
        maxSignalXsec = computer.ulSigma(m)
        maxSignalXsec = maxSignalXsec / self.globalInfo.lumi

        return maxSignalXsec
Exemplo n.º 17
0
    def getEfficiencyFor(self, element):
        """
        For upper limit results, checks if the input element falls inside the
        upper limit grid and has a non-zero reweigthing factor.
        If it does, returns efficiency = 1, else returns
        efficiency = 0.  For efficiency map results, returns the
        signal efficiency including the lifetime reweighting.
        If a mass array is given as input, no lifetime reweighting will be applied.

        :param element: Element object or mass array with units.
        :return: efficiency (float)
        """

        if self.txnameData.dataType == 'efficiencyMap':
            if hasattr(self, "dbClient"):
                query = self.getQueryStringForElement(element)
                logger.info ( "sending em query %s to %s:%d" % \
                              ( query, self.dbClient.servername, self.dbClient.port ) )
                #print ( "query will be", query )
                #return 0.001
                eff = self.dbClient.query(query)
            else:
                eff = self.txnameData.getValueFor(element)

            if not eff or math.isnan(eff):
                eff = 0.  #Element is outside the grid or has zero efficiency
        elif self.txnameData.dataType == 'upperLimit':
            if hasattr(self, "dbClient"):
                query = self.getQueryStringForElement(element)
                logger.info ( "sending query %s to %s:%d" % \
                              ( query, self.dbClient.servername, self.dbClient.port ) )
                #print ( "query will be", query )
                #return 0.001
                ul = self.dbClient.query(query)
            else:
                ul = self.txnameData.getValueFor(element)
            if isinstance(element, Element):
                element._upperLimit = ul  #Store the upper limit for convenience
            if ul is None:
                eff = 0.  #Element is outside the grid or the decays do not correspond to the txname
            else:
                eff = 1.
        else:
            logger.error("Unknown txnameData type: %s" %
                         self.txnameData.dataType)
            raise SModelSError()

        return eff
Exemplo n.º 18
0
    def getSRUpperLimit(self, alpha=0.05, expected=False, compute=False):
        """
        Computes the 95% upper limit on the signal*efficiency for a given dataset (signal region).
        Only to be used for efficiency map type results.

        :param alpha: Can be used to change the C.L. value. The default value is 0.05 (= 95% C.L.)
        :param expected: Compute expected limit ( i.e. Nobserved = NexpectedBG )
        :param compute: If True, the upper limit will be computed
                        from expected and observed number of events. If False, the value listed
                        in the database will be used instead.

        :return: upper limit value
        """

        if not self.dataInfo.dataType == 'efficiencyMap':
            logger.error(
                "getSRUpperLimit can only be used for efficiency map results!")
            raise SModelSError()

        if not compute:
            if expected:
                try:
                    return self.dataInfo.expectedUpperLimit
                except AttributeError:
                    logger.info(
                        "expectedUpperLimit field not found. Using observed UL instead."
                    )
                    return self.dataInfo.upperLimit
            else:
                return self.dataInfo.upperLimit

        Nobs = self.dataInfo.observedN  #Number of observed events
        if expected:
            Nobs = self.dataInfo.expectedBG
        Nexp = self.dataInfo.expectedBG  #Number of expected BG events
        bgError = self.dataInfo.bgError  # error on BG
        lumi = self.globalInfo.lumi
        if (lumi * fb).normalize()._unit:
            ID = self.globalInfo.id
            logger.error("Luminosity defined with wrong units for %s" % (ID))
            return False

        maxSignalXsec = statistics.upperLimit(Nobs, Nexp, bgError, lumi, alpha)

        return maxSignalXsec
Exemplo n.º 19
0
    def __init__(self,
                 value,
                 dataType,
                 Id,
                 accept_errors_upto=.05,
                 Leff_inner=None,
                 Leff_outer=None):
        """
        :param value: values in string format
        :param dataType: the dataType (upperLimit or efficiencyMap)
        :param Id: an identifier, must be unique for each TxNameData!
        :param _accept_errors_upto: If None, do not allow extrapolations outside of
                convex hull.  If float value given, allow that much relative
                uncertainty on the upper limit / efficiency
                when extrapolating outside convex hull.
                This method can be used to loosen the equal branches assumption.
        :param Leff_inner: is the effective inner radius of the detector, given in meters (used for reweighting prompt decays). If None, default values will be used.
        :param Leff_outer: is the effective outer radius of the detector, given in meters (used for reweighting decays outside the detector). If None, default values will be used.


        """
        self.dataType = dataType
        self._id = Id
        self._accept_errors_upto = accept_errors_upto
        self.Leff_inner = Leff_inner
        self.Leff_outer = Leff_outer
        self._V = None
        self.loadData(value)
        if self._keep_values:
            self.origdata = value

        if self.dataType == 'efficiencyMap':
            self.reweightF = defaultEffReweight
        elif self.dataType == 'upperLimit':
            self.reweightF = defaultULReweight
        else:
            raise SModelSError(
                "Default reweighting function not defined for data type %s" %
                self.dataType)
Exemplo n.º 20
0
    def getULFor(self, element, expected=False):
        """
        Returns the upper limit (or expected) for element (only for upperLimit-type).
        Includes the lifetime reweighting (ul/reweight).
        If called for efficiencyMap results raises an error.
        If a mass array is given as input, no lifetime reweighting will be applied.

        :param element: Element object or mass array (with units)
        :param expected: look in self.txnameDataExp, not self.txnameData
        """
        if hasattr(self, "dbClient"):
            ## we have a databaseClient, so we send the request
            ## over the network
            # query = "obs:ATLAS-SUSY-2013-05:ul:T2bb:[[300,100],[300,100]]"
            query = "obs:"
            if expected:
                query = "exp:"
            query += self.globalInfo.id + ":ul:"
            query += self.txName + ":"
            query += self.getMassVectorFromElement(element)
            logger.info ( "sending ul query %s to %s:%d" % \
                          ( query, self.dbClient.servername, self.dbClient.port ) )
            from smodels.tools.physicsUnits import fb
            return self.dbClient.query(query)

        if not self.txnameData.dataType == 'upperLimit':
            logger.error("getULFor method can only be used in UL-type data.")
            raise SModelSError()

        if not expected:
            ul = self.txnameData.getValueFor(element)
        else:
            if not self.txnameDataExp:
                return None
            else:
                ul = self.txnameDataExp.getValueFor(element)

        return ul
Exemplo n.º 21
0
    def getEfficiencyFor(self, mass):
        """
        For upper limit results, checks if the input mass falls inside the
        upper limit grid.  If it does, returns efficiency = 1, else returns
        efficiency = 0.  For efficiency map results, checks if the mass falls
        inside the efficiency map grid.  If it does, returns the corresponding
        efficiency value, else returns efficiency = 0.

        :param element: Element object
        :return: efficiency (float)
        """

        #Check if the element appears in Txname:
        val = self.txnameData.getValueFor(mass)
        if type(val) == type(fb):
            return 1.  #The element has an UL, return 1
        elif val is None or math.isnan(val):
            return 0.  #The element mass is outside the data grid
        elif type(val) == type(1.):
            return val  #The element has an eff
        else:
            logger.error("Unknown txnameData value: %s" % (str(type(val))))
            raise SModelSError()
Exemplo n.º 22
0
    def formatInput(self,value,shapeArray):
        """
        Format value according to the shape in shapeArray.
        If shapeArray contains entries = *, the corresponding entries
        in value will be ignored.
        
        :param value: Array to be formatted (e.g. [[200.,100.],[200.,100.]])
        :param shapeArray: Array with format info (e.f. ['*',[float,float]])
        
        :return: formatted array [[200.,100.]]
        
        """

        if shapeArray == '*':
            return None
        elif isinstance(value,list):
            if len(shapeArray) != len(value): 
                raise SModelSError("Input value and data shape mismatch (%s,%s)" 
                                   %(len(shapeArray),len(value)))
            return [self.formatInput(xi,shapeArray[i]) for i,xi in enumerate(value) 
                    if not self.formatInput(xi,shapeArray[i]) is None]
        else:
            return value
Exemplo n.º 23
0
    def __init__(self, path, globalObj, infoObj):
        self.path = path
        self.globalInfo = globalObj
        self._infoObj = infoObj
        self.txnameData = None
        self.txnameDataExp = None  ## expected Data
        self._topologyList = TopologyList()

        logger.debug('Creating object based on txname file: %s' % self.path)
        #Open the info file and get the information:
        if not os.path.isfile(path):
            logger.error("Txname file %s not found" % path)
            raise SModelSError()
        txtFile = open(path, 'r')
        txdata = txtFile.read()
        txtFile.close()
        if not "txName" in txdata: raise TypeError
        if not 'upperLimits' in txdata and not 'efficiencyMap' in txdata:
            raise TypeError
        txfile = open(self.path)
        content = concatenateLines(txfile.readlines())
        txfile.close()

        #Get tags in info file:
        tags = [line.split(':', 1)[0].strip() for line in content]
        data = None
        expectedData = None
        dataType = None
        for i, tag in enumerate(tags):
            if not tag: continue
            line = content[i]
            value = line.split(':', 1)[1].strip()
            if tags.count(tag) == 1:
                if ';' in value: value = value.split(';')
                if tag == 'upperLimits' or tag == 'efficiencyMap':
                    data = value
                    dataType = tag
                elif tag == 'expectedUpperLimits':
                    expectedData = value
                    dataType = 'upperLimits'
                else:
                    self.addInfo(tag, value)
            else:
                logger.info("Ignoring unknown field %s found in file %s" \
                             % (tag, self.path))
                continue
        ident = self.globalInfo.id + ":" + dataType[0] + ":" + str(
            self._infoObj.dataId)
        ident += ":" + self.txName
        self.txnameData = TxNameData(data, dataType, ident)
        if expectedData:
            self.txnameDataExp = TxNameData(expectedData, dataType, ident)

        #Builds up a list of elements appearing in constraints:
        elements = []
        if hasattr(self, 'constraint'):
            elements += [Element(el) for el in elementsInStr(self.constraint)]
        if hasattr(self, 'condition') and self.condition:
            conds = self.condition
            if not isinstance(conds, list): conds = [conds]
            for cond in conds:
                for el in elementsInStr(cond):
                    newEl = Element(el)
                    if not newEl in elements: elements.append(newEl)

        # Builds up TopologyList with all the elements appearing in constraints
        # and conditions:
        for el in elements:
            el.sortBranches()
            self._topologyList.addElement(el)
Exemplo n.º 24
0
    def coordinatesToData(self, point, rotMatrix=None, transVector=None):
        """
        A function that return the original mass and width array (including the widths
        as tuples) for a given point in PCA space (inverse of dataToCoordinates).

        :param point: Point in PCA space (1D list with size equal
                      to self.full_dimensionality or self.dimensionality)

        :param rotMatrix: Rotation matrix for PCA (e.g. self._V).
                          If None, no rotation is performed.
        :param transVector: Translation vector for PCA (e.g. self.delta_x).
                            If None no translation is performed

        :return: nested mass array including the widths as tuples (e.g. [[(200,1e-10),100],[(200,1e-10),100]])
        """

        if len(point) != self.full_dimensionality and len(
                point) != self.dimensionality:
            logger.error(
                "Wrong point dimensions (%i), it should be %i (reduced dimensions) or %i (full dimensionts)"
                % (len(point), self.dimensionality, self.full_dimensionality))
        elif len(point) != self.full_dimensionality:
            pointFull = np.array(point[:])
            pointFull = np.append(pointFull, [0.] *
                                  (self.full_dimensionality - len(point)))
        else:
            pointFull = np.array(point[:])

        massAndWidths = pointFull
        if rotMatrix is not None:
            massAndWidths = np.dot(rotMatrix, massAndWidths)
        if transVector is not None:
            massAndWidths = massAndWidths + transVector

        massAndWidths = massAndWidths.tolist()
        if type(massAndWidths[0]) == list:
            massAndWidths = massAndWidths[0]
        #Extract masses and transformed widths
        masses = massAndWidths[:len(massAndWidths) - len(self.widthPosition)]
        xwidths = massAndWidths[len(massAndWidths) - len(self.widthPosition):]
        #Rescale widths and add unit:
        widths = [unscaleWidth(xw) for xw in xwidths]
        #Add units (make sure it is consistent with standardUnits)
        massUnit = [
            unit for unit in physicsUnits.standardUnits
            if not (1 * GeV / unit).normalize()._unit
        ][0]
        masses = [m * massUnit for m in masses[:]]
        #Add inclusive entries to mass
        flatShape = flattenArray(self.dataShape)
        if len([x for x in flatShape if str(x) != '*']) != len(masses):
            logger.error(
                "Error trying to add inclusive entries (%s) to flat mass array (%s)."
                % (flatShape, masses))
            raise SModelSError()
        masses = addInclusives(masses, flatShape)
        #Reshape masses according to dataShape:
        if len(masses) != len(flatShape):
            logger.error(
                "Number of elements in %s do not match the number of entries in %s"
                % (masses, self.dataShape))
            raise SModelSError()

        massArray = reshapeList(masses, self.dataShape)
        #Add widths to the mass array
        if len(widths) != len(self.widthPosition):
            logger.error(
                "The number of converted widths (%i) is not the expected (%i)"
                % (len(widths), len(self.widthPosition)))
            raise SModelSError()

        #Combine masses and widths
        massAndWidthArray = []
        for ibr, br in enumerate(massArray):
            if str(br) != '*':
                newBr = [(m, widths.pop(0)) if
                         (ibr, im) in self.widthPosition else m
                         for im, m in enumerate(br)]
            else:
                newBr = br
            massAndWidthArray.append(newBr)

        return massAndWidthArray
Exemplo n.º 25
0
    def getCombinedUpperLimitFor(self, nsig, expected=False, deltas_rel=0.2):
        """
        Get combined upper limit. If covariances are given in globalInfo then simplified likelihood is used, else if json files are given pyhf cimbination is performed.

        :param nsig: list of signal events in each signal region/dataset. The list
                        should obey the ordering in globalInfo.datasetOrder.
        :param expected: return expected, not observed value
        :param deltas_rel: relative uncertainty in signal (float). Default value is 20%.

        :returns: upper limit on sigma*eff
        """

        if hasattr(self.globalInfo, "covariance" ):
            cov = self.globalInfo.covariance
            if type(cov) != list:
                raise SModelSError( "covariance field has wrong type: %s" % type(cov))
            if len(cov) < 1:
                raise SModelSError( "covariance matrix has length %d." % len(cov))

            computer = UpperLimitComputer(ntoys=10000)

            nobs = [x.dataInfo.observedN for x in self._datasets]
            bg = [x.dataInfo.expectedBG for x in self._datasets]
            no = nobs

            ret = computer.ulSigma(Data(observed=no, backgrounds=bg, covariance=cov,
                                        third_moment=None, nsignal=nsig, deltas_rel=deltas_rel),
                                        marginalize=self._marginalize,
                                        expected=expected)

            if ret != None:
                #Convert limit on total number of signal events to a limit on sigma*eff
                ret = ret/self.globalInfo.lumi
            logger.debug("SL upper limit : {}".format(ret))
            return ret
        elif hasattr(self.globalInfo, "jsonFiles" ):
            logger.debug("Using pyhf")
            if all([s == 0 for s in nsig]):
                logger.warning("All signals are empty")
                return None
            ulcomputer, combinations = self.getPyhfComputer( nsig )
            if ulcomputer.nWS == 1:
                ret = ulcomputer.ulSigma(expected=expected)
                ret = ret/self.globalInfo.lumi
                logger.debug("pyhf upper limit : {}".format(ret))
                return ret
            else:
                # Looking for the best combination
                logger.debug('self.bestCB : {}'.format(self.bestCB))
                if self.bestCB == None:
                    logger.debug("Performing best expected combination")
                    ulMin = float('+inf')
                    for i_ws in range(ulcomputer.nWS):
                        ul = ulcomputer.ulSigma(expected=True, workspace_index=i_ws)
                        if ul == None:
                            continue
                        if ul < ulMin:
                            ulMin = ul
                            i_best = i_ws
                    self.bestCB = combinations[i_best] # Keeping the index of the best combination for later
                    logger.debug('Best combination : %s' % self.bestCB)
                # Computing upper limit using best combination
                if expected:
                    try:
                        ret = ulMin/self.globalInfo.lumi
                    except NameError:
                        ret = ulcomputer.ulSigma(expected=True, workspace_index=combinations.index(self.bestCB))
                        ret = ret/self.globalInfo.lumi
                else:
                    ret = ulcomputer.ulSigma(expected=False, workspace_index=combinations.index(self.bestCB))
                    ret = ret/self.globalInfo.lumi
                logger.debug("pyhf upper limit : {}".format(ret))
                return ret
        else:
            logger.error ( "no covariance matrix or json file given in globalInfo.txt for %s" % self.globalInfo.id )
            raise SModelSError( "no covariance matrix or json file given in globalInfo.txt for %s" % self.globalInfo.id )
Exemplo n.º 26
0
    def getValueFor(self, element):
        """
        Interpolates the value and returns the UL or efficiency for the
        respective element rescaled according to the reweighting function
        self.reweightF. For UL-type data the default rescaling is ul -> ul/(fraction of prompt decays)
        and for EM-type data it is eff -> eff*(fraction of prompt decays).
        If a mass array is given as input, no lifetime reweighting will be applied.

        :param element: Element object or mass array (with units)
        """

        #For backward compatibility:
        if not hasattr(self, 'Leff_inner'):
            self.Leff_inner = None
        if not hasattr(self, 'Leff_outer'):
            self.Leff_outer = None

        #Compute reweight factor according to lifetime/widths
        #For the widths not used in interpolation we assume that the
        #analysis require prompt decays
        #(width=inf for intermediate particles and width=0 for the last particle)
        if isinstance(element, Element):
            #Replaced the widths to be used for interpolation
            #with "prompt" widths (inf for intermediate particles and zero for final particles).
            #This way the reweight factor is only applied for the widths not used
            #for interpolation (since inf and zero result in no reweighting).
            widths = []
            for ibr, br in enumerate(element.totalwidth):
                widths.append([])
                for iw, w in enumerate(br):
                    if (ibr, iw) in self.widthPosition:
                        if iw != len(br) - 1:
                            widths[ibr].append(float('inf') * GeV)
                        else:
                            widths[ibr].append(0. * GeV)
                    else:
                        widths[ibr].append(w)
            reweightFactor = self.reweightF(widths,
                                            Leff_inner=self.Leff_inner,
                                            Leff_outer=self.Leff_outer)
        elif isinstance(element, list):
            reweightFactor = 1.
        else:
            logger.error(
                "Input of getValueFor must be an Element object or a mass array and not %s"
                % str(type(element)))
            raise SModelSError()

        #Returns None or zero, if reweightFactor is None or zero:
        if not reweightFactor:
            return reweightFactor

        #Extract the mass and width of the element
        #and convert it to the PCA coordinates (len(point) = self.full_dimensionality):
        point = self.dataToCoordinates(element,
                                       rotMatrix=self._V,
                                       transVector=self.delta_x)
        val = self.getValueForPoint(point)
        if not isinstance(val, (float, int, unum.Unum)):
            return val

        #Apply reweightFactor (if data has no width or partial width dependence)
        val *= reweightFactor

        return val
Exemplo n.º 27
0
    def loadBinaryFile(self, lastm_only=False):
        """
        Load a binary database, returning last modified, file count, database.

        :param lastm_only: if true, the database itself is not read.
        :returns: database object, or None, if lastm_only == True.
        """
        if lastm_only and self.pcl_meta.mtime:
            ## doesnt need to load database, and mtime is already
            ## loaded
            return None

        if not os.path.exists(self.pcl_meta.pathname):
            return None

        try:
            with open(self.pcl_meta.pathname, "rb") as f:
                t0 = time.time()
                pclfilename = self.pcl_meta.pathname
                self.pcl_meta = serializer.load(f)
                self.pcl_meta.pathname = pclfilename
                if self.force_load == "pcl":
                    self.txt_meta = self.pcl_meta
                if not lastm_only:
                    if not self.force_load == "pcl" and self.pcl_meta.needsUpdate(
                            self.txt_meta):
                        logger.warning("Something changed in the environment."
                                       "Regenerating.")
                        self.createBinaryFile()
                        return self
                    logger.info(
                        "loading binary db file %s format version %s" %
                        (self.pcl_meta.pathname, self.pcl_meta.format_version))
                    if sys.version[0] == "2":
                        self.expResultList = serializer.load(f)
                    else:
                        self.expResultList = serializer.load(f,
                                                             encoding="latin1")
                    t1 = time.time() - t0
                    logger.info( "Loaded database from %s in %.1f secs." % \
                            ( self.pcl_meta.pathname, t1 ) )
                    self.databaseParticles = None
                    try:
                        self.databaseParticles = serializer.load(f)
                    except EOFError as e:
                        pass  ## a model does not *have* to be defined
                    self.createLinksToModel()
        except (EOFError, ValueError) as e:
            os.unlink(self.pcl_meta.pathname)
            if lastm_only:
                self.pcl_meta.format_version = -1
                self.pcl_meta.mtime = 0
                return self
            logger.error( "%s is not readable (%s)." % \
                            ( self.pcl_meta.pathname, str(e) ) )
            if self.source in ["http", "ftp", "pcl"]:
                logger.error(
                    "source cannot be rebuilt. supply a different path to the database in your ini file."
                )
                raise SModelSError()
            self.createBinaryFile()
        # self.txt_meta = self.pcl_meta
        return self
Exemplo n.º 28
0
    def fetchFromScratch(self, path, store, discard_zeroes):
        """ fetch database from scratch, together with
            description.
            :param store: filename to store json file.
        """
        def sizeof_fmt(num, suffix='B'):
            for unit in ['', 'K', 'M', 'G', 'T', 'P']:
                if abs(num) < 1024.:
                    return "%3.1f%s%s" % (num, unit, suffix)
                num /= 1024.0
            return "%.1f%s%s" % (num, 'Yi', suffix)

        import requests
        try:
            r = requests.get(path, timeout=5)
        except requests.exceptions.RequestException as e:
            logger.error("Exception when trying to fetch database: %s" % e)
            logger.error(
                "Consider supplying a different database path in the ini file (possibly a local one)"
            )
            raise SModelSError()
        if r.status_code != 200:
            logger.error( "Error %d: could not fetch %s from server." % \
                           ( r.status_code, path ) )
            raise SModelSError()
        ## its new so store the description
        with open(store, "w") as f:
            f.write(r.text)
        if not "url" in r.json().keys():
            logger.error("cannot parse json file %s." % path)
            raise SModelSError()
        size = r.json()["size"]
        cDir, defused = cacheDirectory(create=True, reportIfDefault=True)
        t0 = time.time()
        r2 = requests.get(r.json()["url"], stream=True, timeout=5)
        filename = os.path.join(cDir, r2.url.split("/")[-1])
        msg = "caching the downloaded database in %s." % cDir
        if defused:
            msg += " If you want the pickled database file to be cached in a different location, set the environment variable SMODELS_CACHEDIR, e.g. to '/tmp'."
        logger.warning(msg)
        logger.info ( "need to fetch %s and store in %s. size is %s." % \
                      ( r.json()["url"], filename, sizeof_fmt ( size ) ) )
        with open(filename, "wb") as dump:
            import fcntl
            fcntl.lockf(dump, fcntl.LOCK_EX)
            if not self.inNotebook():  ## \r doesnt work in notebook
                print("         " + " " * 51 + "<", end="\r")
            print("loading >", end="")
            for x in r2.iter_content(chunk_size=int(size / 50)):
                dump.write(x)
                dump.flush()
                print(".", end="")
                sys.stdout.flush()
            if self.inNotebook():
                print("done.")
            else:
                print("")
            fcntl.lockf(dump, fcntl.LOCK_UN)
            dump.close()
        logger.info("fetched %s in %d secs." % (r2.url, time.time() - t0))
        logger.debug("store as %s" % filename)
        self.force_load = "pcl"
        return ("./", "%s" % filename)
Exemplo n.º 29
0
    def __init__(self,
                 base=None,
                 force_load=None,
                 discard_zeroes=True,
                 progressbar=False,
                 subpickle=True):
        """
        :param base: path to the database, or pickle file (string), or http
                     address. If None, "official", or "official_fastlim",
                     use the official database for your code version
                     (including fastlim results, if specified).
                     If "latest", or "latest_fastlim", check for the latest database.
        :param force_load: force loading the text database ("txt"),
                           or binary database ("pcl"), dont force anything if None
        :param discard_zeroes: discard txnames with only zeroes as entries.
        :param progressbar: show a progressbar when building pickle file
                            (needs the python-progressbar module)
        :param subpickle: produce small pickle files per exp result.
                          Should only be used when working on the database.
        """

        self.url = base
        self.source = ""
        if force_load == None and base.endswith(".pcl"):
            force_load = "pcl"
        self.force_load = force_load
        self.subpickle = subpickle
        obase = base  ## keep old name for more checks for 'latest'
        if base in [None, "official"]:
            from smodels.installation import officialDatabase
            base = officialDatabase()
        if base in ["official_fastlim"]:
            from smodels.installation import officialDatabase
            base = officialDatabase(fastlim=True)
        if base in ["latest"]:
            from smodels.installation import latestDatabase
            base = latestDatabase()
        if base in ["latest_fastlim"]:
            from smodels.installation import latestDatabase
            base = latestDatabase(fastlim=True)
        if base in ["unittest"]:
            from smodels.installation import testDatabase
            base = testDatabase()
        base, pclfile = self.checkPathName(base, discard_zeroes)
        self.pcl_meta = Meta(pclfile)
        self.expResultList = []
        self.txt_meta = self.pcl_meta
        if not self.force_load == "pcl":
            self.txt_meta = Meta(base, discard_zeroes=discard_zeroes)
        self.progressbar = None
        if progressbar:
            try:
                import progressbar as P
                self.progressbar = P.ProgressBar(widgets=[
                    "Building Database ",
                    P.Percentage(),
                    P.Bar(marker=P.RotatingMarker()),
                    P.ETA()
                ])
            except ImportError as e:
                logger.warning(
                    "progressbar requested, but python-progressbar is not installed."
                )

        if self.force_load == "txt":
            self._setParticles()
            self.loadTextDatabase()
            self.txt_meta.printFastlimBanner()
            return
        if self.force_load == "pcl":
            self.loadBinaryFile()
            self._setParticles()
            self.pcl_meta.printFastlimBanner()
            if "latest" in obase:
                from smodels import installation
                codeVersion = installation.version()
                pclVersion = self.pcl_meta.databaseVersion
                if codeVersion[0] != pclVersion[0]:
                    logger.error(
                        "major versions of code and database differ! code=%s, database=%s"
                        % (codeVersion[0], pclVersion[0]))
            return
        if self.force_load in [None, "none", "None"]:
            self.loadDatabase()
            self._setParticles()
            self.txt_meta.printFastlimBanner()
            return
        logger.error( "when initialising database: force_load=%s is not " \
                       "recognized. Valid values are: pcl, txt, None." % force_load )
        raise SModelSError()
Exemplo n.º 30
0
    def dataToCoordinates(self, dataPoint, rotMatrix=None, transVector=None):
        """
        Format a dataPoint to the format used for interpolation.
        All the units are removed, the widths are rescaled and the masses
        and widths are combined in a flat array.
        The input can be an Element object or a massAndWidth nested arrays
        (with tuples to store the relevant widths).

        :param dataPoint: Element object from which the mass and width arrays will be extracted or
                          a nested mass array from the database, which contain tuples to include
                          the width values

        :param rotMatrix: Rotation matrix for PCA (e.g. self._V).
                          If None, no rotation is performed.
        :param transVector: Translation vector for PCA (e.g. self.delta_x).
                            If None no translation is performed

        :return: Point (list of floats)
        """

        #Collect the data
        if isinstance(dataPoint, Element):
            masses = dataPoint.mass
            widths = dataPoint.totalwidth
        elif isinstance(dataPoint, list):
            masses = [[mw[0] if isinstance(mw, tuple) else mw for mw in br]
                      for br in dataPoint]
            widths = [[mw[1] if isinstance(mw, tuple) else None for mw in br]
                      for br in dataPoint]
        else:
            logger.error(
                "dataPoint must be an element or a nested array including masses and widths"
            )
            raise SModelSError()

        #Select the required masses (remove entries corresponding to inclusive entries in data)
        masses = removeInclusives(masses, self.dataShape)
        #Select the required widths (remove widths not used in interpolation)
        widths = [[
            widths[ibr][im] for im, _ in enumerate(br)
            if (ibr, im) in self.widthPosition
        ] for ibr, br in enumerate(widths)]
        if None in removeUnits(flattenArray(widths), GeV):
            logger.error("Error obtaining widths from %s" % str(dataPoint))
            raise SModelSError()

        #Remove units and flatten arrays:
        masses = flattenArray(masses)
        masses = removeUnits(masses, physicsUnits.standardUnits)
        widths = flattenArray(widths)
        widths = removeUnits(widths, physicsUnits.standardUnits)
        #Rescale widths:
        xwidths = [rescaleWidth(w) for w in widths]

        #Combine masses and rescaled widths in a single point
        point = masses + xwidths

        #Now transform to PCA coordinates (if rotMatrix and transVector are defined:
        if transVector is not None:
            point = np.array([point])
            point = ((point - transVector)).tolist()[0]  #Translate
        if rotMatrix is not None:
            point = np.dot(point, rotMatrix)  # Rotate
            point = point.tolist()

        return point