Python biogemeError примеры, biogeme.exceptions.biogemeError Python примеры использования

Пример #1

0

Показать файл

Файл: draws.py Проект: jiaodaxiaozi/biogeme

def getLatinHypercubeDraws(sampleSize,
                           numberOfDraws,
                           symmetric=False,
                           uniformNumbers=None):
    """ Implementation of the Modified Latin Hypercube Sampling proposed
        by Hess et al, 2006.


    :param sampleSize: number of observations for which draws must be
                       generated. If None, a one dimensional array
                       will be generated. If it has a values k, then k
                       series of draws will be generated
    :type sampleSize: int
    :param numberOfDraws: number of draws to generate.
    :type numberOfDraws: int
    :param symmetric: if True, draws from [-1: 1] are generated.
       If False, draws from [0: 1] are generated.  Default: False
    :type symmetric: bool
    :param uniformNumbers: numpy with uniformly distributed numbers.
       If None, the numpy uniform number generator is used.
    :type uniformNumbers: numpy.array

    :return: numpy array with the draws
    :rtype: numpy.array

    Example::

        latinHypercube = dr.getLatinHypercubeDraws(sampleSize=3, numberOfDraws=10)
        array([[0.43362897, 0.5275741 , 0.09215663, 0.94056236, 0.34376868,
                0.87195551, 0.41495219, 0.71736691, 0.23198736, 0.145561  ],
               [0.30520544, 0.78082964, 0.83591146, 0.2733167 , 0.53890906,
                0.61607469, 0.00699715, 0.17179441, 0.7557228 , 0.39733102],
               [0.49676864, 0.67073483, 0.9788854 , 0.5726069 , 0.11894558,
                0.05515471, 0.2640275 , 0.82093696, 0.92034628, 0.64866597]])
    """
    if numberOfDraws <= 0:
        raise excep.biogemeError(f'Invalid number of draws: {numberOfDraws}.')

    if sampleSize <= 0:
        raise excep.biogemeError(
            f'Invalid sample size: {sampleSize} when generating draws.')
    totalSize = numberOfDraws * sampleSize

    if uniformNumbers is None:
        uniformNumbers = np.random.uniform(size=totalSize)
    else:
        if uniformNumbers.size != totalSize:
            errorMsg = (f'A total of {totalSize} uniform draws '
                        f'must be provided, and not {uniformNumbers.size}.')
            raise excep.biogemeError(errorMsg)

    uniformNumbers.shape = (totalSize, )
    numbers = np.array([(float(i) + uniformNumbers[i]) / float(totalSize)
                        for i in range(totalSize)])
    if symmetric:
        numbers = 2.0 * numbers - 1.0

    np.random.shuffle(numbers)
    numbers.shape = (sampleSize, numberOfDraws)
    return numbers

Пример #2

0

Показать файл

Файл: database.py Проект: jiaodaxiaozi/biogeme

    def sampleWithoutReplacement(self,
                                 samplingRate,
                                 columnWithSamplingWeights=None):
        """ Replace the data set by a sample for stochastic algorithms

        :param samplingRate: the proportion of data to include in the sample.
        :type samplingRate: float
        :param columnWithSamplingWeights: name of the column with
              the sampling weights. If None, each row has equal probability.
        :param columnWithSamplingWeights: string

        :return: None
        """
        if self.isPanel():
            if self.fullIndividualMap is None:
                self.fullIndividualMap = self.individualMap
            else:
                # Check if the structure has not been modified since last sample
                if set(self.fullIndividualMap.columns) != set(self.individualMap.columns):
                    message = 'The structure of the database has been modified since last sample. '
                    left = set(self.fullIndividualMap.columns).\
                        difference(set(self.individualMap.columns))
                    if left:
                        message += f' Columns that disappeared: {left}'
                    right = set(self.individualMap.columns).\
                        difference(set(self.fullIndividualMap.columns))
                    if right:
                        message += f' Columns that were added: {right}'
                    raise excep.biogemeError(message)

            self.individualMap = \
                self.fullIndividualMap.sample(frac=samplingRate,
                                              weights=columnWithSamplingWeights)
            theMsg = (f'Full data: {self.fullIndividualMap.shape} '
                      f'Sampled data: {self.individualMap.shape}')
            self.logger.debug(theMsg)

        else:
            # Cross sectional data
            if self.fullData is None:
                self.fullData = self.data
            else:
                # Check if the structure has not been modified since last sample
                if set(self.fullData.columns) != set(self.data.columns):
                    message = 'The structure of the database has been modified since last sample. '
                    left = set(self.fullData.columns).difference(set(self.data.columns))
                    if left:
                        message += f' Columns that disappeared: {left}'
                    right = set(self.data.columns).difference(set(self.fullData.columns))
                    if right:
                        message += f' Columns that were added: {right}'
                    raise excep.biogemeError(message)

            self.data = self.fullData.sample(frac=samplingRate,
                                             weights=columnWithSamplingWeights)
            self.logger.debug(f'Full data: {self.fullData.shape} Sampled data: {self.data.shape}')

Пример #3

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def simulate(self, theBetaValues=None):
        """Applies the formulas to each row of the database.

        :param theBetaValues: values of the parameters to be used in
                the calculations. If None, the default values are
                used. Default: None.
        :type theBetaValues: dict(str, float)

        :return: a pandas data frame with the simulated value. Each
              row corresponds to a row in the database, and each
              column to a formula.

        :rtype: Pandas data frame

        Example::

              # Read the estimation results from a file
              results = res.bioResults(pickleFile = 'myModel.pickle')
              # Simulate the formulas using the nominal values
              simulatedValues = biogeme.simulate(betaValues)

        :raises biogemeError: if the number of parameters is incorrect

        """

        if self.database.isPanel():
            error_msg = ('Simulation for panel data is not yet'
                         ' implemented. Remove the "panel" '
                         'statement to simulate each observation.')
            raise excep.biogemeError(error_msg)

        if theBetaValues is None:
            betaValues = self.betaInitValues
        else:
            if not isinstance(theBetaValues, dict):
                err = (f'Deprecated. A dictionary must be provided. '
                       f'It can be obtained from results.getBetaValues()')
                raise excep.biogemeError(err)
            else:
                betaValues = list()
                for i in range(len(self.freeBetaNames)):
                    x = self.freeBetaNames[i]
                    if x in theBetaValues:
                        betaValues.append(theBetaValues[x])
                    else:
                        betaValues.append(self.betaInitValues[i])

        output = pd.DataFrame(index=self.database.data.index)
        for k, v in self.formulas.items():
            signature = v.getSignature()
            result = self.theC.simulateFormula(signature, betaValues,
                                               self.fixedBetaValues,
                                               self.database.data)
            output[k] = result
        return output

Пример #4

0

Показать файл

Файл: database.py Проект: jiaodaxiaozi/biogeme

 def useFullSample(self):
     """ Re-establish the full sample for calculation of the likelihood
     """
     if self.isPanel():
         if self.fullIndividualMap is None:
             raise excep.biogemeError('Full panel data set has not been saved.')
         self.individualMap = self.fullIndividualMap
     else:
         if self.fullData is None:
             raise excep.biogemeError('Full data set has not been saved.')
         self.data = self.fullData

Пример #5

0

Показать файл

Файл: draws.py Проект: jiaodaxiaozi/biogeme

def getUniform(sampleSize, numberOfDraws, symmetric=False):
    """ Uniform [0, 1] or [-1, 1] numbers

    :param sampleSize: number of observations for which draws must be
                       generated. If None, a one dimensional array
                       will be generated. If it has a values k, then k
                       series of draws will be generated
    :type sampleSize: int
    :param numberOfDraws: number of draws to generate.
    :type numberOfDraws: int
    :param symmetric: if True, draws from [-1: 1] are generated.
        If False, draws from [0: 1] are generated.  Default: False
    :type symmetric: bool
    :return: numpy array with the draws
    :rtype: numpy.array

    Example::

        draws = dr.getUniform(sampleSize=3, numberOfDraws=10, symmetric=False)
        array([[0.13053817, 0.63892308, 0.55031567, 0.26347854, 0.16730932,
                0.77745367, 0.48283887, 0.84247501, 0.20550219, 0.02373537],
               [0.68935846, 0.03363595, 0.36006669, 0.26709364, 0.54907706,
                0.22492104, 0.2494399 , 0.17323209, 0.52370401, 0.54091257],
               [0.40310204, 0.89916711, 0.86065005, 0.94277699, 0.09077065,
                0.40107731, 0.22554722, 0.47693135, 0.14058265, 0.17397031]])

        draws = dr.getUniform(sampleSize=3, numberOfDraws=10, symmetric=True)
        array([[ 0.74403237, -0.27995692,  0.33997421, -0.89405035, -0.129761  ,
                 0.86593325,  0.30657422,  0.82435619,  0.498482  ,  0.24561616],
               [-0.48239607, -0.29257815, -0.98342034,  0.68392813, -0.25379429,
                 0.49359859, -0.26459883,  0.14569724, -0.68860467, -0.40903446],
               [ 0.93251627, -0.85166912,  0.58096917,  0.39289882, -0.65088635,
                 0.40114744, -0.61327161,  0.08900539, -0.20985417,  0.67542226]])
    """
    if numberOfDraws <= 0:
        raise excep.biogemeError(f'Invalid number of draws: {numberOfDraws}.')

    if sampleSize <= 0:
        raise excep.biogemeError(
            f'Invalid sample size: {sampleSize} when generating draws.')
    totalSize = numberOfDraws * sampleSize

    uniformNumbers = np.random.uniform(size=totalSize)
    if symmetric:
        uniformNumbers = 2.0 * uniformNumbers - 1.0

    uniformNumbers.shape = (sampleSize, numberOfDraws)
    return uniformNumbers

Пример #6

0

Показать файл

Файл: testOptimization.py Проект: jiaodaxiaozi/biogeme

 def f(self, batch=None):
     if batch is not None:
         raise excep.biogemeError('This function is not data driven.')
     n = len(self.x)
     f = sum(100.0 * (self.x[i + 1] - self.x[i]**2)**2 +
             (1.0 - self.x[i])**2 for i in range(n - 1))
     return f

Пример #7

0

Показать файл

Файл: hamabs.py Проект: jiaodaxiaozi/biogeme

 def add(self, f, g, h, batch, discount=0.95):
     if g is not None:
         if self.n is None:
             self.n = len(g)
         elif len(g) != self.n:
             raise excep.biogemeError(
                 f'Incompatible dimensions {len(g)} and {self.n}')
     if h is not None:
         if h.shape != (self.n, self.n):
             raise excep.biogemeError(
                 f'Incompatible dimensions {h.shape} and ({self.n},{self.n})'
             )
     if batch <= 0.0 or batch > 1.0:
         raise excep.biogemeError(
             f'Batch size must be between 0 and 1: {batch}')
     self.f += [f]
     self.g += [g]
     self.h += [h]
     self.batch += [batch]
     return self.f_g_h(discount)

Пример #8

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def validate(self, estimationResults, slices=5):
        """Perform out-of-sample validation.

        The function performs the following tasks:

          - it shuffles the data set,
          - it splits the data set into slices of (approximatively) the same size,
          - each slice defines a validation set (the slice itself)
            and an estimation set (the rest of the data),
          - the model is re-estimated on the estimation set,
          - the estimated model is applied on the validation set,
          - the value of the log likelihood for each observation is reported.

        :param estimationResults: results of the model estimation based on the full data.
        :type estimationResults: biogeme.results.bioResults

        :param slices: number of slices.
        :type slices: int

        :return: a list containing as many items as slices. Each item
                 is the result of the simulation on the validation set.
        :rtype: list(pandas.DataFrame)

        """
        if self.database.isPanel():
            raise excep.biogemeError(
                'Validation for panel data is not yet implemented')
        # Split the database
        validationData = self.database.split(slices)

        keepDatabase = self.database

        allSimulationResults = []
        for v in validationData:
            # v[0] is the estimation data set
            self.database = db.Database('Estimation data', v[0])
            self.loglike.changeInitValues(estimationResults.getBetaValues())
            results = self.estimate()
            simulate = {'Loglikelihood': self.loglike}
            simBiogeme = BIOGEME(db.Database('Validation data', v[1]),
                                 simulate)
            simResult = simBiogeme.simulate(results.getBetaValues())
            allSimulationResults.append(simResult)
        self.database = keepDatabase
        if self.generatePickle:
            fname = f'{self.modelName}_validation'
            pickleFileName = bf.getNewFileName(fname, 'pickle')
            with open(pickleFileName, 'wb') as f:
                pickle.dump(allSimulationResults, f)
            self.logger.general(
                f'Simulation results saved in file {pickleFileName}')

        return allSimulationResults

Пример #9

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def getBoundsOnBeta(self, betaName):
        """ Returns the bounds on the parameter as defined by the user.

        :param betaName: name of the parameter
        :type betaName: string
        :return: lower bound, upper bound
        :rtype: tuple
        :raises biogemeError: if the name of the parameter is not found.
        """

        if betaName not in self.freeBetaNames:
            raise excep.biogemeError(f'Unknown parameter {betaName}')
        index = self.freeBetaNames.index(betaName)
        return self.bounds[index]

Пример #10

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def f(self, batch=None):
        if self.x is None:
            raise excep.biogemeError('The variables must be set first.')

        if batch is not None or self.batch is not None:
            self.batch = batch
            self.recalculate = True

        if self.fv is None:
            self.recalculate = True

        if self.recalculate:
            self.fv = self.like(self.x, self.scaled, self.batch)
            self.gv = None
            self.hv = None
            self.bhhhv = None

        return -self.fv

Пример #11

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def f_g_bhhh(self, batch=None):
        if batch is not None or self.batch is not None:
            self.batch = batch
            self.recalculate = True

        if self.x is None:
            raise excep.biogemeError('The variables must be set first.')

        if self.fv is None or self.gv is None or self.bhhhv is None:
            self.recalculate = True

        if self.recalculate:
            self.fv, self.gv, _, self.bhhhv = self.like_deriv(self.x,
                                                              self.scaled,
                                                              hessian=False,
                                                              bhhh=True,
                                                              batch=batch)
            self.hv = None

        return (-self.fv, -self.gv, -self.bhhhv)

Пример #12

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def _audit(self):
        """Each expression provides an audit function, that verifies its
           validity. Each formula is audited, and the list of errors
           and warnings reported.

           :raise biogemeError: if the formula has issues, an error is
                                detected and an exception is raised.

        """

        listOfErrors = []
        listOfWarnings = []
        for k, v in self.formulas.items():
            err, war = v.audit(self.database)
            listOfErrors += err
            listOfWarnings += war
        if listOfWarnings:
            self.logger.warning('\n'.join(listOfWarnings))
        if listOfErrors:
            self.logger.warning('\n'.join(listOfErrors))
            raise excep.biogemeError('\n'.join(listOfErrors))

Пример #13

0

Показать файл

Файл: database.py Проект: jiaodaxiaozi/biogeme

    def sampleIndividualMapWithReplacement(self, size=None):
        """ Extract a random sample of the individual map
            from a panel data database, with replacement.

        Useful for bootstrapping.

        :param size: size of the sample. If None, a sample of
                   the same size as the database will be generated.
                   Default: None.
        :type size: int

        :return: pandas dataframe with the sample.
        :rtype: pandas.DataFrame

        """
        if not self.isPanel():
            errorMsg = ('Function sampleIndividualMapWithReplacement'
                        ' is available only on panel data.')
            raise excep.biogemeError(errorMsg)

        if size is None:
            size = len(self.individualMap)
        sample = self.individualMap.iloc[np.random.randint(0, len(self.individualMap), size=size)]
        return sample

Пример #14

0

Показать файл

Файл: database.py Проект: jiaodaxiaozi/biogeme

    def panel(self, columnName):
        """ Defines the data as panel data

        :param columnName: name of the columns that identifies individuals.
        :type columnName: string

        """

        self.panelColumn = columnName

        # Check if the data is organized in consecutive entries
        # Number of groups of data
        nGroups = tools.countNumberOfGroups(self.data, self.panelColumn)
        sortedData = self.data.sort_values(by=[self.panelColumn])
        nIndividuals = tools.countNumberOfGroups(sortedData, self.panelColumn)
        if nGroups != nIndividuals:
            theError = (f'The data must be sorted so that the data'
                        f' for the same individual are consecutive.'
                        f' There are {nIndividuals} individuals '
                        f'in the sample, and {nGroups} groups of '
                        f'data for column {self.panelColumn}.')
            raise excep.biogemeError(theError)

        self.buildPanelMap()

Пример #15

0

Показать файл

Файл: draws.py Проект: jiaodaxiaozi/biogeme

def getHaltonDraws(sampleSize,
                   numberOfDraws,
                   symmetric=False,
                   base=2,
                   skip=0,
                   shuffled=False):
    """ Generate Halton draws

    :param sampleSize: number of observations for which draws must be
                       generated. If None, a one dimensional array
                       will be generated. If it has a values k, then k
                       series of draws will be generated
    :type sampleSize: int

    :param numberOfDraws: number of draws to generate.
    :type numberOfDraws: int

    :param symmetric: if True, draws from [-1: 1] are generated.
           If False, draws from [0: 1] are generated.  Default: False
    :type symmetric: bool

    :param base: generate Halton draws for a given basis.
            Ideally, it should be a prime number. Default: 2.
    :type base: int

    :param skip: the number of  elements of the sequence to be discarded.
    :type skip: int

    :param shuffled: if True, each series is shuffled
    :type shuffled: bool

    :return: numpy array with the draws
    :rtype: numpy.array

    Example::

        halton = dr.getHaltonDraws(sampleSize=2, numberOfDraws=10, base=3)
        array([[0.33333333, 0.66666667, 0.11111111, 0.44444444, 0.77777778,
                0.22222222, 0.55555556, 0.88888889, 0.03703704, 0.37037037],
               [0.7037037 , 0.14814815, 0.48148148, 0.81481481, 0.25925926,
                0.59259259, 0.92592593, 0.07407407, 0.40740741, 0.74074074]])
    """
    if numberOfDraws <= 0:
        raise excep.biogemeError(f'Invalid number of draws: {numberOfDraws}.')

    if sampleSize <= 0:
        raise excep.biogemeError(
            f'Invalid sample size: {sampleSize} when generating draws.')
    totalSize = numberOfDraws * sampleSize

    numbers = []
    skipped = 0
    for i in range(totalSize + 1 + skip):
        n, denom = 0., 1.
        while i > 0:
            i, remainder = divmod(i, base)
            denom *= base
            n += remainder / denom
        if skipped < skip:
            skipped += 1
        else:
            numbers.append(n)

    numbers = np.array(numbers[1:])
    if shuffled:
        np.random.shuffle(numbers)

    if symmetric:
        numbers = 2.0 * numbers - 1.0

    numbers.shape = (sampleSize, numberOfDraws)
    return numbers

Пример #16

0

Показать файл

Файл: testOptimization.py Проект: jiaodaxiaozi/biogeme

 def f_g_bhhh(self, batch=None):
     raise excep.biogemeError('This function is not data driven.')

Пример #17

0

Показать файл

Файл: testOptimization.py Проект: jiaodaxiaozi/biogeme

 def f_g_h(self, batch=None):
     if batch is not None:
         raise excep.biogemeError('This function is not data driven.')
     return self.f(), self.g(), self.h()

Пример #18

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def __init__(self,
                 database,
                 formulas,
                 userNotes=None,
                 numberOfThreads=None,
                 numberOfDraws=1000,
                 seed=None,
                 skipAudit=False,
                 removeUnusedVariables=True,
                 suggestScales=True,
                 missingData=99999):
        """Constructor

        :param database: choice data.
        :type database: biogeme.database

        :param formulas: expression or dictionary of expressions that
             define the model specification.  The concept is that each
             expression is applied to each entry of the database. The
             keys of the dictionary allow to provide a name to each
             formula.  In the estimation mode, two formulas are
             needed, with the keys 'loglike' and 'weight'. If only one
             formula is provided, it is associated with the label
             'loglike'. If no formula is labeled 'weight', the weight
             of each piece of data is supposed to be 1.0. In the
             simulation mode, the labels of each formula are used as
             labels of the resulting database.
        :type formulas: biogeme.expressions.Expression, or dict(biogeme.expressions.Expression)

        :param userNotes: these notes will be included in the report file.
        :type userNotes: str

        :param numberOfThreads: multi-threading can be used for
            estimation. This parameter defines the number of threads
            to be used. If the parameter is set to None, the number of
            available threads is calculated using
            cpu_count(). Ignored in simulation mode. Defaults: None.
        :type numberOfThreads:  int

        :param numberOfDraws: number of draws used for Monte-Carlo
            integration. Default: 1000.
        :type numberOfDraws: int

        :param seed: seed used for the pseudo-random number
            generation. It is useful only when each run should
            generate the exact same result. If None, a new seed is
            used at each run. Default: None.
        :type seed: int

        :param skipAudit: if True, does not check the validity of the
            formulas. It may save significant amount of time for large
            models and large data sets. Default: False.
        :type skipAudit: bool

        :param removeUnusedVariables: if True, all variables not used
           in the expression are removed from the database. Default:
           True.
        :type removeUnusedVariables: bool

        :param suggestScales: if True, Biogeme suggests the scaling of the variables in the database. Default: True. See also :func:`biogeme.database.Database.suggestScaling`
        :type suggestScales: bool. 

        :param missingData: if one variable has this value, it is
           assumed that a data is missing and an exception will be
           triggered. Default: 99999.
        :type missingData: float

        """

        ## Logger that controls the output of messages to the screen and log file.
        self.logger = logger
        if not skipAudit:
            database.data = database.data.replace({True: 1, False: 0})
            listOfErrors, listOfWarnings = database._audit()
            if listOfWarnings:
                self.logger.warning('\n'.join(listOfWarnings))
            if listOfErrors:
                self.logger.warning('\n'.join(listOfErrors))
                raise excep.biogemeError('\n'.join(listOfErrors))

        ## Keyword used for the name of the loglikelihood formula. Default: 'loglike'
        self.loglikeName = 'loglike'
        ## Keyword used for the name of the weight formula. Default: 'weight'
        self.weightName = 'weight'
        ## Name of the model. Default: 'biogemeModelDefaultName'
        self.modelName = 'biogemeModelDefaultName'
        ## monteCarlo is True if one of the expression involves a
        # Monte-Carlo integration.
        self.monteCarlo = False
        np.random.seed(seed)
        ## If True, the values are saved on a file each time the likelihood function is calculated
        self.saveIterations = False
        if not isinstance(formulas, dict):

            ## Object of type biogeme.expressions.Expression
            ## calculating the formula for the loglikelihood
            self.loglike = formulas

            ## Object of type biogeme.expressions.Expression
            ## calculating the weight of each observation in the
            ## sample
            self.weight = None

            ## Dictionary containing Biogeme formulas of type
            ## biogeme.expressions.Expression.
            # The keys are the names of the formulas.
            self.formulas = dict({self.loglikeName: formulas})
        else:
            self.loglike = formulas.get(self.loglikeName)
            self.weight = formulas.get(self.weightName)
            self.formulas = formulas

        ## biogeme.database object
        self.database = database

        ## User notes
        self.userNotes = userNotes

        ## Missing data
        self.missingData = missingData

        ## keep track of the sample of data used to calculate the
        ## stochastic gradient / hessian
        self.lastSample = None

        ## Init value of the likelihood function
        self.initLogLike = None

        self.usedVariables = set()
        for k, f in self.formulas.items():
            self.usedVariables = self.usedVariables.union(f.setOfVariables())
        if self.database.isPanel():
            self.usedVariables.add(self.database.panelColumn)
        if removeUnusedVariables:
            unusedVariables = set(
                self.database.data.columns) - self.usedVariables
            error_msg = (f'Remove {len(unusedVariables)} '
                         'unused variables from the database '
                         f'as only {len(self.usedVariables)} are used.')
            self.logger.general(error_msg)
            self.database.data = \
                self.database.data.drop(columns=list(unusedVariables))

        if suggestScales:
            suggestedScales = self.database.suggestScaling(
                columns=self.usedVariables)
            if not suggestedScales.empty:
                logger.detailed(
                    'It is suggested to scale the following variables.')
                for index, row in suggestedScales.iterrows():
                    error_msg = (
                        f'Multiply {row["Column"]} by\t{row["Scale"]} '
                        'because the largest (abs) value is\t'
                        f'{row["Largest"]}')
                    logger.detailed(error_msg)
                error_msg = ('To remove this feature, set the parameter '
                             'suggestScales to False when creating the '
                             'BIOGEME object.')
                logger.detailed(error_msg)

        if not skipAudit:
            self._audit()

        self.theC = cb.pyBiogeme()

        self._prepareDatabaseForFormula()
        self._prepareLiterals()

        ## Boolean variable, True if the HTML file with the results must be generated.
        self.generateHtml = True

        ## Boolean variable, True if the pickle file with the results must be generated.
        self.generatePickle = True

        ## Name of the column defining weights for batch sampling in
        ## stochastic optimization.
        self.columnForBatchSamplingWeights = None

        ## Number of threads used for parallel computing. Default: the number of CPU available.
        self.numberOfThreads = mp.cpu_count(
        ) if numberOfThreads is None else numberOfThreads
        start_time = datetime.now()
        self._generateDraws(numberOfDraws)
        if self.monteCarlo:
            self.theC.setDraws(self.database.theDraws)
        ## Time needed to generate the draws.
        self.drawsProcessingTime = datetime.now() - start_time
        if self.loglike is not None:

            ## Internal signature of the formula for the loglikelihood
            self.loglikeSignatures = self.loglike.getSignature()
            if self.weight is None:
                self.theC.setExpressions(self.loglikeSignatures,
                                         self.numberOfThreads)
            else:
                ## Internal signature of the formula for the weight
                self.weightSignatures = self.weight.getSignature()
                self.theC.setExpressions(self.loglikeSignatures,
                                         self.numberOfThreads,
                                         self.weightSignatures)

        ## Time needed to calculate the bootstrap standard errors
        self.bootstrap_time = None

        ## Results of the bootstrap calculation.
        self.bootstrap_results = None

        ## Information provided by the optimization algorithm after completion.
        self.optimizationMessages = None

        ## Name of the File where intermediate iterations are stotred
        self.file_iterations = None

        ## Default bounds, replacing None, for the CFSQP algorithm
        self.cfsqp_default_bounds = 1000

        ## Parameters to be transferred to the optimization algorithm
        self.algoParameters = None

        ## Optimization algorithm
        self.algorithm = None

        ## Store the best iteration found so far.
        self.bestIteration = None

Пример #19

0

Показать файл

Файл: database.py Проект: jiaodaxiaozi/biogeme

    def generateDraws(self, types, names, numberOfDraws):
        """Generate draws for each variable.


        :param types: A dict indexed by the names of the variables,
                      describing the types of draws. Each of them can
                      be a native type or any type defined by the
                      function database.setRandomNumberGenerators
        :type types: dict

        :param names: the list of names of the variables that require draws to be generated.
        :type names: list of strings

        :param numberOfDraws: number of draws to generate.
        :type numberOfDraws: int

        :return: a 3-dimensional table with draws. The 3 dimensions are

              1. number of individuals
              2. number of draws
              3. number of variables

        :rtype: numpy.array

        Example::

              types = {'randomDraws1': 'NORMAL_MLHS_ANTI',
                       'randomDraws2': 'UNIFORM_MLHS_ANTI',
                       'randomDraws3': 'UNIFORMSYM_MLHS_ANTI'}
              theDrawsTable = myData.generateDraws(types,
                  ['randomDraws1', 'randomDraws2', 'randomDraws3'], 10)

        """

        self.numberOfDraws = numberOfDraws
        # Dimensions of the draw table:
        # 1. number of variables
        # 2. number of individuals
        # 3. number of draws
        listOfDraws = [None]*len(names)
        for i, v in enumerate(names):
            name = v
            drawType = types[name]
            self.typesOfDraws[name] = drawType
            theGenerator = self.nativeRandomNumberGenerators.get(drawType)
            if theGenerator is None:
                theGenerator = self.userRandomNumberGenerators.get(drawType)
                if theGenerator is None:
                    native = self.nativeRandomNumberGenerators
                    user = self.userRandomNumberGenerators
                    errorMsg = (f'Unknown type of draws for '
                                f'variable {name}: {drawType}. '
                                f'Native types: {native}. '
                                f'User defined: {user}')
                    raise excep.biogemeError(errorMsg)
            listOfDraws[i] = theGenerator[0](self.getSampleSize(), numberOfDraws)
            if listOfDraws[i].shape != (self.getSampleSize(), numberOfDraws):
                errorMsg = (f'The draw generator for {name} must'
                            f' generate a numpy array of dimensions'
                            f' ({self.getSampleSize()}, {numberOfDraws})'
                            f' instead of {listOfDraws[i].shape}')
                raise excep.biogemeError(errorMsg)

        self.theDraws = np.array(listOfDraws)
        ## Draws as a three-dimensional numpy series. The dimensions are organized to be more
        # suited for calculation.
        # 1. number of individuals
        # 2. number of draws
        # 3. number of variables
        self.theDraws = np.moveaxis(self.theDraws, 0, -1)
        return self.theDraws

Пример #20

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def calculateLikelihoodAndDerivatives(self,
                                          x,
                                          scaled,
                                          hessian=False,
                                          bhhh=False,
                                          batch=None):
        """Calculate the value of the log likelihood function and its derivatives.

        :param x: vector of values for the parameters.
        :type x: list(float)

        :param hessian: if True, the hessian is calculated. Default: False.
        :type hessian: bool

        :param bhhh: if True, the BHHH matrix is calculated. Default: False.
        :type bhhh: bool

        :param batch: if not None, calculates the likelihood on a
                       random sample of the data. The value of the
                       parameter must be strictly between 0 and 1, and
                       represents the share of the data that will be
                       used. Default: None
        :type batch: float


        :return: f, g, h, bh where

                - f is the value of the function (float)
                - g is the gradient (numpy.array)
                - h is the hessian (numpy.array)
                - bh is the BHHH matrix (numpy.array)

        :rtype: tuple  float, numpy.array, numpy.array, numpy.array

        :raises ValueError: if the length of the list x is incorrect

        """

        if len(x) != len(self.betaInitValues):
            error_msg = (f'Input vector must be of length '
                         f'{len(self.betaInitValues)} and not {len(x)}')
            raise ValueError(error_msg)
        self._prepareDatabaseForFormula(batch)

        f, g, h, bh = self.theC.calculateLikelihoodAndDerivatives(
            x, self.fixedBetaValues, self.betaIds, hessian, bhhh)

        if len(self.freeBetaNames) <= 30:
            for i in range(len(self.freeBetaNames)):
                self.logger.debug(f'{self.freeBetaNames[i]}: {x[i]:10.7g}')
        hmsg = ''
        if hessian:
            hmsg = f'Hessian norm:  {np.linalg.norm(h):10.1g}'
        bhhhmsg = ''
        if bhhh:
            bhhhmsg = f'BHHH norm:  {np.linalg.norm(bh):10.1g}'
        self.logger.general(
            f'Log likelihood (N = {self.database.getSampleSize()}): {f:10.7g}'
            f' Gradient norm: {np.linalg.norm(g):10.1g}'
            f' {hmsg} {bhhhmsg}')

        if self.saveIterations:
            if self.bestIteration is None:
                self.bestIteration = f
            if f >= self.bestIteration:
                with open(self.file_iterations, 'w') as pf:
                    for i, v in enumerate(x):
                        print(f'{self.freeBetaNames[i]} = {v}', file=pf)

        if scaled:
            N = float(self.database.getSampleSize())
            if N == 0:
                raise excep.biogemeError(f'Sample size is {N}')

            return f / N, np.asarray(g) / N, np.asarray(h) / N, np.asarray(
                bh) / N
        return f, np.asarray(g), np.asarray(h), np.asarray(bh)

Пример #21

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def estimate(self,
                 bootstrap=0,
                 algorithm=opt.simpleBoundsNewtonAlgorithmForBiogeme,
                 algoParameters=None,
                 cfsqp_default_bounds=1000.0,
                 saveIterations=False,
                 file_iterations='__savedIterations.txt'):
        """Estimate the parameters of the model.

        :param bootstrap: number of bootstrap resampling used to
               calculate the variance-covariance matrix using
               bootstrapping. If the number is 0, bootstrapping is not
               applied. Default: 0.
        :type bootstrap: int

        :param algorithm: optimization algorithm to use for the
               maximum likelihood estimation. If None, cfsqp is
               . Default: Biogeme's Newton's algorithm with simple bounds.
        :type algorithm: function

        :param algoParameters: parameters to transfer to the optimization algorithm
        :type algoParameters: dict

        :param cfsqp_default_bounds: if the user does not provide bounds
              on the parameters, CFSQP assumes that the bounds are
              [-cfsqp_default_bounds, cfsqp_default_bounds]
        :type cfsqp_default_bounds: float

        :param saveIterations: if True, the values of the parameters
                               corresponding to the largest value of
                               the likelihood function are saved in a
                               pickle file at each iteration of the
                               algorithm. Default: False.
        :type saveIterations: bool

        :param file_iterations: name of the file where to save the
                               values of the parameters. Default:
                               '__savedIterations.txt'
        :type file_iterations: str

        :return: object containing the estimation results.
        :rtype: biogeme.bioResults

        Example::

            # Create an instance of biogeme
            biogeme  = bio.BIOGEME(database, logprob)

            # Gives a name to the model
            biogeme.modelName = 'mymodel'

            # Estimate the parameters
            results = biogeme.estimate()

        :raises biogemeError: if no expression has been provided for the likelihood

        """

        if self.loglike is None:
            raise excep.biogemeError(
                'No log likelihood function has been specificed')
        if len(self.freeBetaNames) == 0:
            raise excep.biogemeError(f'There is no parameter to estimate'
                                     f' in the formula: {self.loglike}.')

        self.algorithm = algorithm
        self.algoParameters = algoParameters

        self.cfsqp_default_bounds = cfsqp_default_bounds

        self.calculateInitLikelihood()
        self.saveIterations = saveIterations
        self.file_iterations = f'{file_iterations}'
        self.bestIteration = None

        start_time = datetime.now()
        #        yep.start('profile.out')

        #        yep.stop()

        output = self.optimize(self.betaInitValues)
        xstar, optimizationMessages = output
        ## Running time of the optimization algorithm
        optimizationMessages['Optimization time'] = datetime.now() - start_time
        ## Information provided by the optimization algorithm after completion.
        self.optimizationMessages = optimizationMessages

        fgHb = self.calculateLikelihoodAndDerivatives(xstar,
                                                      scaled=False,
                                                      hessian=True,
                                                      bhhh=True)
        if not np.isfinite(fgHb[2]).all():
            warning_msg = ('Numerical problems in calculating '
                           'the analytical hessian. Finite differences'
                           ' is tried instead.')
            self.logger.warning(warning_msg)
            finDiffHessian = self.likelihoodFiniteDifferenceHessian(xstar)
            if not np.isfinite(fgHb[2]).all():
                self.logger.warning(
                    'Numerical problems with finite difference hessian as well.'
                )
            else:
                fgHb = fgHb[0], fgHb[1], finDiffHessian, fgHb[3]
        ## numpy array, of size B x K,
        # where
        #        - B is the number of bootstrap iterations
        #        - K is the number pf parameters to estimate
        self.bootstrap_results = None
        if bootstrap > 0:
            start_time = datetime.now()

            self.logger.general(
                f'Re-estimate the model {bootstrap} times for bootstrapping')
            self.bootstrap_results = np.empty(shape=[bootstrap, len(xstar)])
            self.logger.temporarySilence()
            for b in range(bootstrap):
                if self.database.isPanel():
                    sample = self.database.sampleIndividualMapWithReplacement()
                    self.theC.setDataMap(sample)
                else:
                    sample = self.database.sampleWithReplacement()
                    self.theC.setData(sample)
                x_br, _ = self.optimize(xstar)
                self.bootstrap_results[b] = x_br

            ## Time needed to generate the bootstrap results
            self.bootstrap_time = datetime.now() - start_time
            self.logger.resume()
        rawResults = res.rawResults(self,
                                    xstar,
                                    fgHb,
                                    bootstrap=self.bootstrap_results)
        r = res.bioResults(rawResults)
        if self.generateHtml:
            r.writeHtml()
        if self.generatePickle:
            r.writePickle()
        return r

Пример #22

0

Показать файл

Файл: biogeme.py Проект: jiaodaxiaozi/biogeme

    def quickEstimate(self,
                      algorithm=opt.simpleBoundsNewtonAlgorithmForBiogeme,
                      algoParameters=None):
        """Estimate the parameters of the model. Same as estimate, where any extra 
           calculation is skipped (init loglikelihood, t-statistics, etc.)

        :param algorithm: optimization algorithm to use for the
               maximum likelihood estimation. If None, cfsqp is
               . Default: Biogeme's Newton's algorithm with simple bounds.
        :type algorithm: function

        :param algoParameters: parameters to transfer to the optimization algorithm
        :type algoParameters: dict

        :return: object containing the estimation results.
        :rtype: biogeme.results.bioResults

        Example::

            # Create an instance of biogeme
            biogeme  = bio.BIOGEME(database, logprob)

            # Gives a name to the model
            biogeme.modelName = 'mymodel'

            # Estimate the parameters
            results = biogeme.quickEstimate()

        :raises biogemeError: if no expression has been provided for the likelihood

        """

        if self.loglike is None:
            raise excep.biogemeError(
                'No log likelihood function has been specificed')
        if len(self.freeBetaNames) == 0:
            raise excep.biogemeError(f'There is no parameter to estimate'
                                     f' in the formula: {self.loglike}.')

        self.algorithm = algorithm
        self.algoParameters = algoParameters

        start_time = datetime.now()
        #        yep.start('profile.out')

        #        yep.stop()

        output = self.optimize(self.betaInitValues)
        xstar, optimizationMessages = output
        ## Running time of the optimization algorithm
        optimizationMessages['Optimization time'] = datetime.now() - start_time
        ## Information provided by the optimization algorithm after completion.
        self.optimizationMessages = optimizationMessages

        f = self.calculateLikelihood(xstar, scaled=False)

        fgHb = f, None, None, None
        rawResults = res.rawResults(self,
                                    xstar,
                                    fgHb,
                                    bootstrap=self.bootstrap_results)
        r = res.bioResults(rawResults)
        return r

Пример #23

0

Показать файл

Файл: draws.py Проект: jiaodaxiaozi/biogeme

def getNormalWichuraDraws(sampleSize,
                          numberOfDraws,
                          uniformNumbers=None,
                          antithetic=False):
    """Generate pseudo-random numbers from a normal distribution N(0, 1)

    It uses the Algorithm AS241 Appl. Statist. (1988) Vol. 37, No. 3,
    which produces the normal deviate z corresponding to a given lower
    tail area of p; z is accurate to about 1 part in :math:`10^{16}`.

    :param sampleSize: number of observations for which draws must be
                       generated. If None, a one dimensional array
                       will be generated. If it has a values k, then k
                       series of draws will be generated
    :type sampleSize: int

    :param numberOfDraws: number of draws to generate.
    :type numberOfDraws: int

    :param uniformNumbers: numpy with uniformly distributed numbers.
               If None, the numpy uniform number generator is used.
    :type uniformNumbers: numpy.array
    :param antithetic: if True, only half of the draws are
                       actually generated, and the series are completed
                       with their antithetic version.
    :type antithetic: bool

    :return: numpy array with the draws
    :rtype: numpy.array

    Example::

        draws = dr.getNormalWichuraDraws(sampleSize=3, numberOfDraws=10)
        array([[ 0.52418458, -1.04344204, -2.11642482,  0.48257162, -2.67188279,
                -1.89993283,  0.28251041, -0.38424425,  1.53182226,  0.30651874],
               [-0.7937038 , -0.07884121, -0.91005616, -0.98855175,  1.09405753,
                -0.5997651 , -1.70785113,  1.57571384, -0.33208723, -1.03510102],
               [-0.13853654,  0.92595498, -0.80136586,  1.68454196,  0.9955927 ,
                -0.28615154,  2.10635541,  0.0436191 , -0.25417774,  0.01026933]])

    """
    if numberOfDraws <= 0:
        raise excep.biogemeError(f'Invalid number of draws: {numberOfDraws}.')

    if antithetic:
        if 2 * int(numberOfDraws / 2) != numberOfDraws:
            errorMsg = (f'Please specify an even number of draws for '
                        f'antithetic draws. Requested number of '
                        f'{numberOfDraws}.')
            raise excep.biogemeError(errorMsg)
        numberOfDraws = int(numberOfDraws / 2)

    if sampleSize <= 0:
        raise excep.biogemeError(
            f'Invalid sample size: {sampleSize} when generating draws.')
    totalSize = numberOfDraws * sampleSize

    split2 = 5.e+00
    const1 = 0.180625e+00
    const2 = 1.6e+00
    a0 = 3.3871328727963666080e+00
    a1 = 1.3314166789178437745e+02
    a2 = 1.9715909503065514427e+03
    a3 = 1.3731693765509461125e+04
    a4 = 4.5921953931549871457e+04
    a5 = 6.7265770927008700853e+04
    a6 = 3.3430575583588128105e+04
    a7 = 2.5090809287301226727e+03
    b1 = 4.2313330701600911252e+01
    b2 = 6.8718700749205790830e+02
    b3 = 5.3941960214247511077e+03
    b4 = 2.1213794301586595867e+04
    b5 = 3.9307895800092710610e+04
    b6 = 2.8729085735721942674e+04
    b7 = 5.2264952788528545610e+03
    c0 = 1.42343711074968357734e+00
    c1 = 4.63033784615654529590e+00
    c2 = 5.76949722146069140550e+00
    c3 = 3.64784832476320460504e+00
    c4 = 1.27045825245236838258e+00
    c5 = 2.41780725177450611770e-01
    c6 = 2.27238449892691845833e-02
    c7 = 7.74545014278341407640e-04
    d1 = 2.05319162663775882187e+00
    d2 = 1.67638483018380384940e+00
    d3 = 6.89767334985100004550e-01
    d4 = 1.48103976427480074590e-01
    d5 = 1.51986665636164571966e-02
    d6 = 5.47593808499534494600e-04
    d7 = 1.05075007164441684324e-09
    e0 = 6.65790464350110377720e+00
    e1 = 5.46378491116411436990e+00
    e2 = 1.78482653991729133580e+00
    e3 = 2.96560571828504891230e-01
    e4 = 2.65321895265761230930e-02
    e5 = 1.24266094738807843860e-03
    e6 = 2.71155556874348757815e-05
    e7 = 2.01033439929228813265e-07
    f1 = 5.99832206555887937690e-01
    f2 = 1.36929880922735805310e-01
    f3 = 1.48753612908506148525e-02
    f4 = 7.86869131145613259100e-04
    f5 = 1.84631831751005468180e-05
    f6 = 1.42151175831644588870e-07
    f7 = 2.04426310338993978564e-15

    if uniformNumbers is None:
        uniformNumbers = np.random.uniform(size=totalSize)
    elif uniformNumbers.size != totalSize:
        errorMsg = (f'A total of {totalSize} uniform draws must be '
                    f'provided, and not {uniformNumbers.size}.')
        raise excep.biogemeError(errorMsg)
    uniformNumbers.shape = (totalSize, )

    q = uniformNumbers - 0.5
    draws = np.zeros(uniformNumbers.shape)
    r = np.zeros(uniformNumbers.shape)
    cond1 = np.abs(uniformNumbers) <= 0.45
    r[cond1] = const1 - q[cond1] * q[cond1]
    draws[cond1] = q[cond1] *\
        (((((((a7 * r[cond1] + a6) *\
              r[cond1] + a5) *\
             r[cond1] + a4) *\
            r[cond1] + a3) *\
           r[cond1] + a2) *\
          r[cond1] + a1) *\
         r[cond1] + a0) /\
         (((((((b7 * r[cond1] + b6) *\
               r[cond1] + b5) *\
              r[cond1] + b4) *\
             r[cond1] + b3) *\
            r[cond1] + b2) *\
           r[cond1] + b1) *\
          r[cond1] + 1)
    cond2 = np.abs(uniformNumbers) > 0.45
    cond2a = np.logical_and(cond2, q < 0.0)
    cond2b = np.logical_and(cond2, q >= 0.0)
    r[cond2a] = uniformNumbers[cond2a]
    r[cond2b] = 1 - uniformNumbers[cond2b]
    cond2c = np.logical_and(cond2, r <= 0)
    cond2d = np.logical_and(cond2, r > 0)
    draws[cond2c] = 0.0
    r[cond2d] = np.sqrt(-np.log(r[cond2d]))
    cond2d_a = np.logical_and(cond2d, r <= split2)
    cond2d_b = np.logical_and(cond2d, r > split2)
    r[cond2d_a] = r[cond2d_a] - const2
    draws[cond2d_a] = (((((((c7 * r[cond2d_a] + c6) *\
                            r[cond2d_a] + c5) *\
                           r[cond2d_a] + c4) *\
                          r[cond2d_a] + c3) *\
                         r[cond2d_a] + c2) *\
                        r[cond2d_a] + c1) *\
                       r[cond2d_a] + c0) /\
                       (((((((d7 * r[cond2d_a] + d6) *\
                             r[cond2d_a] + d5) *\
                            r[cond2d_a] + d4) *\
                           r[cond2d_a] + d3) *\
                          r[cond2d_a] + d2) *\
                         r[cond2d_a] + d1) *\
                        r[cond2d_a] + 1)
    r[cond2d_b] = r[cond2d_b] - split2
    draws[cond2d_b] = (((((((e7 * r[cond2d_b] + e6) *\
                            r[cond2d_b] + e5) *\
                           r[cond2d_b] + e4) *\
                          r[cond2d_b] + e3) *\
                         r[cond2d_b] + e2) *\
                        r[cond2d_b] + e1) *\
                       r[cond2d_b] + e0) /\
                       (((((((f7 * r[cond2d_b] + f6) *\
                             r[cond2d_b] + f5) *\
                            r[cond2d_b] + f4) *\
                           r[cond2d_b] + f3) *\
                          r[cond2d_b] + f2) *\
                         r[cond2d_b] + f1) *\
                        r[cond2d_b] + 1)
    draws[cond2a] = -draws[cond2a]

    draws.shape = (sampleSize, numberOfDraws)

    if antithetic:
        draws = np.concatenate((draws, -draws), axis=1)

    return draws

Пример #24

0

Показать файл

Файл: database.py Проект: jiaodaxiaozi/biogeme

    def __init__(self, name, pandasDatabase):
        """Constructor

        :param name: name of the database.
        :type name: string

        :param pandasDatabase: data stored in a pandas data frame.
        :type pandasDatabase: pandas.DataFrame

        """
        self.logger = msg.bioMessage()
        start_time = datetime.now()
        ## Name of the database. Used mainly for the file name when dumping data.
        self.name = name

        ## Pandas data frame containing the data.
        self.data = pandasDatabase
        self.fullData = pandasDatabase

        ## self.variables is initialized by _generateHeaders()
        self.variables = None
        self._generateHeaders()

        ## Number of observations removed by the function Database.remove
        self.excludedData = 0

        ## Name of the column identifying the individuals in a panel
        ## data context. None if data is not panel.
        self.panelColumn = None

        ## map identifying the range of observations for each
        ## individual in a panel data context. None if data is not
        ## panel.
        self.individualMap = None
        self.fullIndividualMap = None

        ## Initialize the dictionary containing random number
        ## generators with a series of native generators.
        self._initNativeRandomNumberGenerators()

        ## Dictionary containing user defined random number
        ## generators. Defined by the function
        ## Database.setRandomNumberGenerators that checks that
        ## reserved keywords are not used. The element of the
        ## dictionary is a tuple with two elements: (0) the function
        ## generating the draws, and (1) a string describing the type of draws
        self.userRandomNumberGenerators = dict()

        ## Number of draws generated by the function Database.generateDraws.
        ## Value 0 if this function is not called.
        self.numberOfDraws = 0
        ## Types of draws for Monte Carlo integration
        self.typesOfDraws = {}

        self._auditDone = False

        ## Draws for Monte-Carlo integration
        self.theDraws = None

        ## Availability expression to check
        self._avail = None

        ## Choice expression to check
        self._choice = None

        ## Expression to check
        self._expression = None

        listOfErrors, listOfWarnings = self._audit()
        if listOfWarnings:
            self.logger.warning('\n'.join(listOfWarnings))
        if listOfErrors:
            self.logger.warning('\n'.join(listOfErrors))
            raise excep.biogemeError('\n'.join(listOfErrors))

Пример #25

0

Показать файл

Файл: hamabs.py Проект: jiaodaxiaozi/biogeme

def hamabs(fct, initBetas, fixedBetas, betaIds, bounds, parameters=None):
    """
    Algorithm inspired by `Lederrey et al. (2019)`

    .. _`Lederrey et al. (2019)`: https://transp-or.epfl.ch/documents/technicalReports/LedLurHilBie19.pdf

    :param fct: object to calculate the objective function and its derivatives.
    :type obj: optimization.functionToMinimize

    :param initBetas: initial value of the parameters.
    :type initBetas: numpy.array

    :param fixedBetas: betas that stay fixed suring the optimization.
    :type fixedBetas: numpy.array

    :param betaIds: internal identifiers of the non fixed betas.
    :type betaIds: numpy.array

    :param bounds: list of tuples (ell,u) containing the lower and upper bounds for each free parameter. Note that this algorithm does not support bound constraints. Therefore, all the bounds must be None.
    :type bounds: list(tuples)

    :param parameters: dict of parameters to be transmitted to the  optimization routine:
         - tolerance: when the relative gradient is below that threshold, the algorithm has reached convergence (default:  :math:`\\varepsilon^{\\frac{1}{3}}`);
         - maxiter: the maximum number of iterations (default: 100).

    :type parameters: dict(string:float or int)

    :return: tuple x, messages, where

            - x is the solution found,
            - messages is a dictionary reporting various aspects related to the run of the algorithm.
    :rtype: numpy.array, dict(str:object)


    """

    for l, u in bounds:
        if l is not None or u is not None:
            raise excep.biogemeError(
                'This algorithm does not handle bound constraints. Remove the bounds, or select another algorithm.'
            )

    tol = np.finfo(np.float64).eps**0.3333
    maxiter = 1000
    # The size of the first batch is such that it can be increased 5 times
    firstBatch = 1.0 / 2.0**4
    # The critical of the batch when BFGS is applied allows for 2 increases
    hybrid = 1.0 / 2.0**2
    firstRadius = 1.0
    # Premature convergence for small batch sizes
    #scaleEps = 10.0
    # Maximum number of iterations before updating the batch size
    maxFailure = 2

    dogleg = False
    eta1 = 0.01
    eta2 = 0.9

    if parameters is not None:
        if 'tolerance' in parameters:
            tol = parameters['tolerance']
        if 'maxiter' in parameters:
            maxiter = parameters['maxiter']
        if 'firstBatch' in parameters:
            firstBatch = parameters['firstBatch']
        if 'firstRadius' in parameters:
            firstRadius = parameters['firstRadius']
        if 'hybrid' in parameters:
            hybrid = parameters['hybrid']
        if 'maxFailure' in parameters:
            maxFailure = parameters['maxFailure']
        if 'scaleEps' in parameters:
            scaleEps = parameters['scaleEps']
        if 'dogleg' in parameters:
            dogleg = parameters['dogleg']
        if 'eta1' in parameters:
            eta1 = parameters['eta1']
        if 'eta2' in parameters:
            eta2 = parameters['eta2']

    logger.detailed("** Optimization: HAMABS")

    avging = smoothing()

    k = 0
    xk = initBetas
    batch = firstBatch

    fct.setVariables(xk)
    f, g, h = fct.f_g_h(batch=batch)
    avgf, avgg, avgh = avging.add(f, g, h, batch)

    typx = np.ones(np.asarray(xk).shape)
    typf = max(np.abs(f), 1.0)

    if batch == 1.0:
        relgrad = opt.relativeGradient(xk, f, g, typx, typf)
        if relgrad <= tol:
            message = f"Relative gradient = {relgrad} <= {tol}"
            return xk, 0, 1, message

    delta = firstRadius
    cont = True

    maxDelta = np.finfo(float).max
    minDelta = np.finfo(float).eps

    # Collect statistics per iteration
    #    columns = ['Batch','f','relgrad','Time','AbsDiff', 'RelDiff', 'AbsEff', 'RelEff']
    #    stats = pd.DataFrame(columns=columns)

    while cont:
        logger.debug(f'***************** Iteration {k} **************')
        logger.debug(
            f'N={avging.numberOfValues()} xk={xk} avgf={avgf} delta={delta}')
        k += 1
        if batch <= hybrid:
            success, xc, fc, gc, hc, delta = generateCandidateSecondOrder(
                fct, xk, avgf, avgg, avgh, batch, delta, dogleg, maxFailure,
                maxDelta, eta1, eta2)
        else:
            success, xc, fc, gc, hc, delta = generateCandidateFirstOrder(
                fct, xk, avgf, avgg, avgh, batch, delta, dogleg, maxFailure,
                maxDelta, eta1, eta2)

        if success:
            xk = xc
            avgf, avgg, avgh = avging.add(fc, gc, hc, batch)
            if batch == 1.0:
                relgrad = opt.relativeGradient(xk, avgf, avgg, typx, typf)
                if relgrad <= tol:
                    message = f"Relative gradient = {relgrad} <= {tol}"
                    cont = False
        else:
            if batch < 1.0:
                batch = min(2.0 * batch, 1.0)
                delta = firstRadius
                if batch <= hybrid:
                    fct.setVariables(xk)
                    f, g, h = fct.f_g_h(batch=batch)
                    avgf, avgg, avgh = avging.add(f, g, h, batch)
                else:
                    fct.setVariables(xk)
                    f, g = fct.f_g(batch=batch)
                    avgf, avgg, _ = avging.add(f, g, None, batch)

        if delta <= minDelta:
            if batch == 1.0:
                message = f"Trust region is too small: {delta}"
                cont = False

        if k == maxiter:
            message = f"Maximum number of iterations reached: {maxiter}"
            cont = False
        logger.detailed(
            f"{k} f={avgf:10.7g} delta={delta:6.2g} batch={100*batch:6.2g}%")

    logger.detailed(message)
    messages = {
        'Algorithm': 'HAMABS prototype',
        'Relative gradient': relgrad,
        'Cause of termination': message,
        'Number of iterations': k
    }

    return xk, messages

Python biogemeError примеры использования