示例#1
0
class BootstrapRunner(AbstractRunner):
    def __init__(self, runnerArgument):
        """
        Parameters
        ----------
        runnerArgument: RunnerArgument

        Notes
        -----
        1. Uses METHOD_LEASTSQ for fitModel iterations.
        """
        super().__init__()
        #
        self.lastErr = ""
        self.fitter = runnerArgument.fitter
        self.numIteration = runnerArgument.numIteration
        self.kwargs = runnerArgument.kwargs
        self.synthesizerClass = runnerArgument.synthesizerClass
        if "logger" in self.fitter.__dict__.keys():
            self.logger = self.fitter.logger
        else:
            self.logger = Logger()
        self._isDone = not self._fitInitial()
        self.columns = self.fitter.selectedColumns
        # Initializations for bootstrap loop
        if not self.isDone:
            fittedTS = self.fitter.fittedTS.subsetColumns(self.columns,
                                                          isCopy=False)
            self.synthesizer = self.synthesizerClass(
                observedTS=self.fitter.observedTS.subsetColumns(self.columns,
                                                                isCopy=False),
                fittedTS=fittedTS,
                **self.kwargs)
            self.numSuccessIteration = 0
            if self.fitter.minimizerResult is None:
                self.fitter.fitModel()
            self.baseChisq = self.fitter.minimizerResult.redchi
            self.curIteration = 0
            self.fd = self.logger.getFileDescriptor()
            self.baseFittedStatistic = TimeseriesStatistic(
                self.fitter.observedTS.subsetColumns(
                    self.fitter.selectedColumns, isCopy=False))

    def report(self, id=None):
        if True:
            return
        if id is None:
            self._startTime = time.time()
        else:
            elapsed = time.time() - self._startTime
            print("%s: %2.3f" % (id, elapsed))

    @property
    def numWorkUnit(self):
        return self.numIteration

    @property
    def isDone(self):
        return self._isDone

    def run(self):
        """
        Runs the bootstrap.

        Returns
        -------
        BootstrapResult
        """
        def mkNullResult():
            fittedStatistic = TimeseriesStatistic(
                self.fitter.observedTS[self.fitter.selectedColumns])
            return BootstrapResult(self.fitter, 0, {}, fittedStatistic)

        #
        if self.isDone:
            return
        # Set up logging for this run
        if self.fd is not None:
            sys.stderr = self.fd
            sys.stdout = self.fd
        isSuccess = False
        bootstrapError = 0
        self.report()
        for _ in range(ITERATION_MULTIPLIER):
            newObservedTS = self.synthesizer.calculate()
            self.report("newObservedTS")
            # Update fitter to use the new observed data
            _ = self.fitter._updateObservedTS(newObservedTS, isCheck=False)
            self.report("updated fitter")
            # Try fitting
            try:
                self.fitter.fitModel(params=self.fitter.params)
                self.report("fitter.fit")
            except Exception as err:
                # Problem with the fit.
                msg = "modelFitterBootstrap. Fit failed on iteration %d."  \
                      % iteration
                self.logger.error(msg, err)
                bootstrapError += 1
                continue
            # Verify that there is a result
            if self.fitter.minimizerResult is None:
                continue
            # Check if the fit is of sufficient quality
            if self.fitter.minimizerResult.redchi > MAX_CHISQ_MULT * self.baseChisq:
                continue
            if self.fitter.params is None:
                continue
            isSuccess = True
            self.report("break")
            break
        # Create the result
        if isSuccess:
            self.numSuccessIteration += 1
            parameterDct = {
                k: [v]
                for k, v in self.fitter.params.valuesdict().items()
            }
            fittedStatistic = self.baseFittedStatistic.copy()
            fittedStatistic.accumulate(
                self.fitter.fittedTS.subsetColumns(self.fitter.selectedColumns,
                                                   isCopy=False))
            bootstrapResult = BootstrapResult(self.fitter,
                                              self.numSuccessIteration,
                                              parameterDct,
                                              fittedStatistic,
                                              bootstrapError=bootstrapError)
        else:
            bootstrapResult = mkNullResult()
            self._isDone = True
        # Close the logging file
        if self.fd is not None:
            if not self.fd.closed:
                self.fd.close()
        # See if completed work
        if self.numSuccessIteration >= self.numIteration:
            self._isDone = True
        return bootstrapResult

    def _fitInitial(self):
        """
        Do the initial fit.

        Returns
        -------
        bool
            successful fit
        """
        isSuccess = False
        for _ in range(MAX_TRIES):
            try:
                self.fitter.fitModel()  # Initialize model
                isSuccess = True
                break
            except Exception as err:
                self.lastErr = err
                msg = "Could not do initial fit"
                self.logger.error(msg, err)
        return isSuccess
示例#2
0
class Runner(object):
    """Runs tests on biomodels."""
    def __init__(self,
                 firstModel: int = 210,
                 numModel: int = 2,
                 pclPath=PCL_FILE,
                 figPath=FIG_PATH,
                 useExistingData: bool = False,
                 isPlot=IS_PLOT,
                 **kwargDct):
        """
        Parameters
        ----------
        firstModel: first model to use
        numModel: number of models to use
        pclPath: file to which results are saved
        useExistingData: use data in existing PCL file
        """
        self.useExistingData = useExistingData and os.path.isfile(pclPath)
        # Recover previously saved results if desired
        if self.useExistingData:
            self.restore(pclPath=pclPath)
        else:
            # Initialize based on type of context variable
            for name in CONTEXT:
                if name[-1:] == "s":
                    self.__setattr__(name, [])
                elif name[-3:] == "Dct":
                    self.__setattr__(name, {})
                elif name[-4:] == "Path":
                    self.__setattr__(name, None)
                elif name[0:2] == "is":
                    self.__setattr__(name, False)
                else:
                    self.__setattr__(name, 0)
        # Initialize to parameters for this instantiation
        self.firstModel = firstModel
        self.numModel = numModel
        self.pclPath = pclPath
        self.figPath = figPath
        self.kwargDct = kwargDct
        self.isPlot = isPlot
        self.useExistingData = useExistingData
        #
        if LOGGER in kwargDct.keys():
            self.logger = kwargDct[LOGGER]
        else:
            self.logger = Logger()
            kwargDct[LOGGER] = self.logger
        self.save()

    def _isListSame(self, list1, list2):
        diff = set(list1).symmetric_difference(list2)
        return len(diff) == 0

    def equals(self, other):
        selfKeys = list(self.__dict__.keys())
        otherKeys = list(other.__dict__.keys())
        if not self._isListSame(selfKeys, otherKeys):
            return False
        #
        for key, value in self.__dict__.items():
            if isinstance(value, list):
                isEqual = self._isListSame(value, other.__getattribute__(key))
                if not isEqual:
                    return False
            elif any([isinstance(value, t) for t in [int, str, float, bool]]):
                if self.__getattribute__(key) != other.__getattribute__(key):
                    return False
            else:
                pass
        #
        return True

    def run(self):
        """
        Runs the tests. Saves state after each tests.
        """
        # Processing models
        modelNums = self.firstModel + np.array(range(self.numModel))
        for modelNum in modelNums:
            if (modelNum in self.processedModels) and self.useExistingData:
                continue
            else:
                self.processedModels.append(modelNum)
                input_path = PATH_PAT % modelNum
                msg = "Model %s" % input_path
                self.logger.activity(msg)
                try:
                    harness = TestHarness(input_path, **self.kwargDct)
                    if len(harness.parametersToFit) == 0:
                        self.logger.result("No fitable parameters in model.")
                        self.save()
                        continue
                    harness.evaluate(stdResiduals=1.0,
                                     fractionParameterDeviation=1.0,
                                     relError=2.0)
                except Exception as err:
                    self.erroredModels.append(modelNum)
                    self.logger.error("TestHarness failed", err)
                    self.save()
                    continue
                # Parameters for model
                self.modelParameterDct[modelNum] =  \
                      list(harness.fitModelResult.parameterRelErrorDct.keys())
                # Relative error in initial fit
                values = [
                    v for v in
                    harness.fitModelResult.parameterRelErrorDct.values()
                ]
                self.fitModelRelerrors.extend(values)
                # Relative error in bootstrap
                values = [
                    v for v in
                    harness.bootstrapResult.parameterRelErrorDct.values()
                ]
                self.bootstrapRelerrors.extend(values)
                # Count models without exceptions
                self.nonErroredModels.append(modelNum)
                self.numNoError = len(self.nonErroredModels)
                self.save()
        # Check for plot
        if self.isPlot:
            self.plot()

    def save(self):
        """
        Saves state. Maintain in sync with self.restore().
        """
        if self.pclPath is not None:
            data = [self.__getattribute__(n) for n in CONTEXT]
            with (open(self.pclPath, "wb")) as fd:
                pickle.dump(data, fd)

    def restore(self, pclPath=None):
        """
        Restores state. Maintain in sync with self.save().
        """
        if pclPath is None:
            pclPath = self.pclPath
        if os.path.isfile(pclPath):
            with (open(pclPath, "rb")) as fd:
                data = pickle.load(fd)
            [self.__setattr__(n, v) for n, v in zip(CONTEXT, data)]
        else:
            raise ValueError("***Restart file %s does not exist" %
                             self.pclPath)

    @staticmethod
    def _pruneRelativeErrors(relativeErrors, maxError=MAX_RELATIVE_ERROR):
        """
        Deletes Nans. Removes very large values.

        Parameters
        ----------
        list: relative errors
        maxError: maximum relative error considered
        
        Returns
        -------
        list: pruned errors
        float: fraction pruned from non-nan values
        """
        noNanErrors = [v for v in relativeErrors if not np.isnan(v)]
        prunedErrors = [v for v in noNanErrors if v <= maxError]
        prunedFrc = 1 - len(prunedErrors) / len(noNanErrors)
        return prunedErrors, prunedFrc

    def plot(self):
        """
        Does all plots.
        """
        _, axes = plt.subplots(1, 2)
        prunedModelErrors, modelPrunedFrc =  \
            self._pruneRelativeErrors(self.fitModelRelerrors)
        prunedBootstrapErrors, bootstrapPrunedFrc =  \
            self._pruneRelativeErrors(self.bootstrapRelerrors)
        maxBin1 = self._plotRelativeErrors(axes[0], prunedModelErrors,
                                           FIT_MODEL, modelPrunedFrc)
        maxBin2 = self._plotRelativeErrors(axes[1],
                                           prunedBootstrapErrors,
                                           BOOTSTRAP,
                                           bootstrapPrunedFrc,
                                           isYLabel=False)
        maxBin = max(maxBin1, maxBin2)
        if maxBin > 0:
            axes[0].set_ylim([0, maxBin])
            axes[1].set_ylim([0, maxBin])
        #
        if len(self.processedModels) == 0:
            frac = 0.0
        else:
            frac = 1.0 * self.numNoError / len(self.processedModels)
        suptitle = "Models %d-%d. Fraction non-errored: %2.3f"
        lastModel = self.firstModel + len(self.processedModels) - 1
        suptitle = suptitle % (self.firstModel, lastModel, frac)
        plt.suptitle(suptitle)
        plt.show()
        plt.savefig(self.figPath)

    def _plotRelativeErrors(self,
                            ax,
                            relErrors,
                            title,
                            prunedFrc,
                            isYLabel=True):
        """
        Plots histogram of relative errors.

        Parameters
        ----------
        ax: Matplotlib.axes
        relErrors: list-float
        title: str
        prunedFrc: float
        isYlabel: bool

        Returns
        -------
        float: maximum number in a bin
        """
        rr = ax.hist(relErrors)
        fullTitle = "%s. Frc Pruned: %2.2f" % (title, prunedFrc)
        ax.set_title(fullTitle)
        ax.set_xlabel("relative error")
        if isYLabel:
            ax.set_ylabel("number parameters")
        ax.set_xlim([0, 1])
        return max(rr[0])
示例#3
0
class ModelFitterCore(rpickle.RPickler):
    def __init__(
        self,
        modelSpecification,
        observedData,
        parametersToFit=None,
        selectedColumns=None,
        fitterMethods=METHOD_FITTER_DEFAULTS,
        numFitRepeat=1,
        bootstrapMethods=METHOD_BOOTSTRAP_DEFAULTS,
        parameterLowerBound=PARAMETER_LOWER_BOUND,
        parameterUpperBound=PARAMETER_UPPER_BOUND,
        parameterDct={},
        fittedDataTransformDct={},
        logger=Logger(),
        isPlot=True,
        _loggerPrefix="",
        # The following must be kept in sync with ModelFitterBootstrap.bootstrap
        numIteration: int = 10,
        reportInterval: int = 1000,
        synthesizerClass=ObservationSynthesizerRandomizedResiduals,
        maxProcess: int = None,
        serializePath: str = None,
    ):
        """
        Constructs estimates of parameter values. 
    
        Parameters
        ----------
        modelSpecification: ExtendedRoadRunner/str
            roadrunner model or antimony model
        observedData: NamedTimeseries/str
            str: path to CSV file
        parametersToFit: list-str/None
            parameters in the model that you want to fit
            if None, no parameters are fit
        selectedColumns: list-str
            species names you wish use to fit the model
            default: all columns in observedData
        parameterLowerBound: float
            lower bound for the fitting parameters
        parameterUpperBound: float
            upper bound for the fitting parameters
        parameterDct: dict
            key: parameter name
            value: triple - (lowerVange, startingValue, upperRange)
        fittedDataTransformDct: dict
            key: column in selectedColumns
            value: function of the data in selectedColumns;
                   input: NamedTimeseries
                   output: array for the values of the column
        logger: Logger
        fitterMethods: str/list-str
            method used for minimization in fitModel
        numFitRepeat: int
            number of times fitting is repeated for a method
        bootstrapMethods: str/list-str
            method used for minimization in bootstrap
        numIteration: number of bootstrap iterations
        reportInterval: number of iterations between progress reports
        synthesizerClass: object that synthesizes new observations
            Must subclass ObservationSynthesizer
        maxProcess: Maximum number of processes to use. Default: numCPU
        serializePath: Where to serialize the fitter after bootstrap

        Usage
        -----
        parameterDct = {
            "k1": (1, 5, 10),  # name of parameter: low value, initial, high
            "k2": (2, 3, 6)}
        ftter = ModelFitter(roadrunnerModel, "observed.csv",
            parameterDct=parameterDct)
        fitter.fitModel()  # Do the fit
        fitter.bootstrap()  # Estimate parameter variance with bootstrap
        """
        if modelSpecification is not None:
            # Not the default constructor
            self._loggerPrefix = _loggerPrefix
            self.modelSpecification = modelSpecification
            self.parametersToFit = parametersToFit
            self.lowerBound = parameterLowerBound
            self.upperBound = parameterUpperBound
            self.bootstrapKwargs = dict(
                numIteration=numIteration,
                reportInterval=reportInterval,
                maxProcess=maxProcess,
                serializePath=serializePath,
            )
            self.parameterDct = self._updateParameterDct(parameterDct)
            self._numFitRepeat = numFitRepeat
            if self.parametersToFit is None:
                self.parametersToFit = [p for p in self.parameterDct.keys()]
            self.observedTS = observedData
            if self.observedTS is not None:
                self.observedTS = mkNamedTimeseries(observedData)
            #
            self.fittedDataTransformDct = fittedDataTransformDct
            #
            if (selectedColumns is None) and (self.observedTS is not None):
                selectedColumns = self.observedTS.colnames
            self.selectedColumns = selectedColumns
            # Construct array of non-nan observed values
            self._observedArr = self.observedTS[self.selectedColumns].flatten()
            # Other internal state
            self._fitterMethods = fitterMethods
            if isinstance(self._fitterMethods, str):
                if self._fitterMethods == METHOD_BOTH:
                    self._fitterMethods = METHOD_FITTER_DEFAULTS
                else:
                    self._fitterMethods = [self._fitterMethods]
            self._bootstrapMethods = bootstrapMethods
            if isinstance(self._bootstrapMethods, str):
                self._bootstrapMethods = [self._bootstrapMethods]
            self._isPlot = isPlot
            self._plotter = tp.TimeseriesPlotter(isPlot=self._isPlot)
            self._plotFittedTS = None  # Timeseries that is plotted
            self.logger = logger
            # The following are calculated during fitting
            self.roadrunnerModel = None
            self.minimizer = None  # lmfit.minimizer
            self.minimizerResult = None  # Results of minimization
            self.params = None  # params property in lmfit.minimizer
            self.fittedTS = self.observedTS.copy(
                isInitialize=True)  # Initialize
            self.residualsTS = None  # Residuals for selectedColumns
            self.bootstrapResult = None  # Result from bootstrapping
            # Validation checks
            self._validateFittedDataTransformDct()
        else:
            pass

    @classmethod
    def rpConstruct(cls):
        """
        Overrides rpickler.rpConstruct to create a method that
        constructs an instance without arguments.
        
        Returns
        -------
        Instance of cls
        """
        return cls(None, None, None)

    def rpRevise(self):
        """
        Overrides rpickler.
        """
        if not "logger" in self.__dict__.keys():
            self.logger = Logger()

    def _validateFittedDataTransformDct(self):
        if self.fittedDataTransformDct is not None:
            keySet = set(self.fittedDataTransformDct.keys())
            selectedColumnsSet = self.selectedColumns
            if (keySet is not None) and (selectedColumnsSet is not None):
                excess = set(keySet).difference(selectedColumnsSet)
                if len(excess) > 0:
                    msg = "Columns not in selectedColumns: %s" % str(excess)
                    raise ValueError(excess)

    def _transformFittedTS(self, data):
        """
        Updates the fittedTS taking into account required transformations.
 
        Parameters
        ----------
        data: np.ndarray
 
        Results
        ----------
        NamedTimeseries
        """
        colnames = list(self.selectedColumns)
        colnames.insert(0, TIME)
        fittedTS = NamedTimeseries(array=data[:, :], colnames=colnames)
        if self.fittedDataTransformDct is not None:
            for column, func in self.fittedDataTransformDct.items():
                if func is not None:
                    fittedTS[column] = func(fittedTS)
        return fittedTS

    def _updateParameterDct(self, parameterDct):
        """
        Handles values that are tuples instead of ParameterSpecification.
        """
        dct = dict(parameterDct)
        for name, value in parameterDct.items():
            if isinstance(value, tuple):
                dct[name] = ParameterSpecification(lower=value[0],
                                                   upper=value[1],
                                                   value=value[2])
        return dct

    @staticmethod
    def addParameter(parameterDct: dict, name: str, lower: float, upper: float,
                     value: float):
        """
        Adds a parameter to a list of parameters.

        Parameters
        ----------
        parameterDct: parameter dictionary to agument
        name: parameter name
        lower: lower range of parameter value
        upper: upper range of parameter value
        value: initial value
        
        Returns
        -------
        dict
        """
        parameterDct[name] = ParameterSpecification(lower=lower,
                                                    upper=upper,
                                                    value=value)

    def _adjustNames(self, antimonyModel:str, observedTS:NamedTimeseries)  \
          ->typing.Tuple[NamedTimeseries, list]:
        """
        Antimony exports can change the names of floating species
        by adding a "_" at the end. Check for this and adjust
        the names in observedTS.

        Return
        ------
        NamedTimeseries: newObservedTS
        list: newSelectedColumns
        """
        rr = te.loada(antimonyModel)
        dataNames = rr.simulate().colnames
        names = ["[%s]" % n for n in observedTS.colnames]
        missingNames = [n[1:-1] for n in set(names).difference(dataNames)]
        newSelectedColumns = list(self.selectedColumns)
        if len(missingNames) > 0:
            newObservedTS = observedTS.copy()
            self.logger.exception("Missing names in antimony export: %s" %
                                  str(missingNames))
            for name in observedTS.colnames:
                missingName = "%s_" % name
                if name in missingNames:
                    newObservedTS = newObservedTS.rename(name, missingName)
                    newSelectedColumns.remove(name)
                    newSelectedColumns.append(missingName)
        else:
            newObservedTS = observedTS
        return newObservedTS, newSelectedColumns

    def copy(self, isKeepLogger=False):
        """
        Creates a copy of the model fitter.
        Preserves the user-specified settings and the results
        of bootstrapping.
        """
        if not isinstance(self.modelSpecification, str):
            try:
                modelSpecification = self.modelSpecification.getAntimony()
            except Exception as err:
                self.logger.error(
                    "Problem wth conversion to Antimony. Details:", err)
                raise ValueError("Cannot proceed.")
            observedTS, selectedColumns = self._adjustNames(
                modelSpecification, self.observedTS)
        else:
            modelSpecification = self.modelSpecification
            observedTS = self.observedTS.copy()
            selectedColumns = self.selectedColumns
        #
        if isKeepLogger:
            logger = self.logger
        elif self.logger is not None:
            logger = self.logger.copy()
        else:
            logger = None
        newModelFitter = self.__class__(
            copy.deepcopy(modelSpecification),
            observedTS,
            copy.deepcopy(self.parametersToFit),
            selectedColumns=selectedColumns,
            fitterMethods=self._fitterMethods,
            bootstrapMethods=self._bootstrapMethods,
            parameterLowerBound=self.lowerBound,
            parameterUpperBound=self.upperBound,
            parameterDct=copy.deepcopy(self.parameterDct),
            fittedDataTransformDct=copy.deepcopy(self.fittedDataTransformDct),
            logger=logger,
            isPlot=self._isPlot)
        if self.bootstrapResult is not None:
            newModelFitter.bootstrapResult = self.bootstrapResult.copy()
            newModelFitter.params = newModelFitter.bootstrapResult.params
        else:
            newModelFitter.bootstrapResult = None
            newModelFitter.params = self.params
        return newModelFitter

    def _initializeRoadrunnerModel(self):
        """
        Sets self.roadrunnerModel.
        """
        if isinstance(self.modelSpecification,
                      te.roadrunner.extended_roadrunner.ExtendedRoadRunner):
            self.roadrunnerModel = self.modelSpecification
        elif isinstance(self.modelSpecification, str):
            self.roadrunnerModel = te.loada(self.modelSpecification)
        else:
            msg = 'Invalid model.'
            msg = msg + "\nA model must either be a Roadrunner model "
            msg = msg + "an Antimony model."
            raise ValueError(msg)

    def getDefaultParameterValues(self):
        """
        Obtain the original values of parameters.
        
        Returns
        -------
        dict:
            key: parameter name
            value: value of parameter
        """
        dct = {}
        self._initializeRoadrunnerModel()
        self.roadrunnerModel.reset()
        for parameterName in self.parametersToFit:
            dct[parameterName] = self.roadrunnerModel.model[parameterName]
        return dct

    def simulate(self,
                 params=None,
                 startTime=None,
                 endTime=None,
                 numPoint=None):
        """
        Runs a simulation. Defaults to parameter values in the simulation.

        Parameters
       ----------
        params: lmfit.Parameters
        startTime: float
        endTime: float
        numPoint: int

        Return
        ------
        NamedTimeseries
        """
        def set(default, parameter):
            # Sets to default if parameter unspecified
            if parameter is None:
                return default
            else:
                return parameter

        ##V
        block = Logger.join(self._loggerPrefix, "fitModel.simulate")
        guid = self.logger.startBlock(block)
        ## V
        sub1Block = Logger.join(block, "sub1")
        sub1Guid = self.logger.startBlock(sub1Block)
        startTime = set(self.observedTS.start, startTime)
        endTime = set(self.observedTS.end, endTime)
        numPoint = set(len(self.observedTS), numPoint)
        ##  V
        sub1aBlock = Logger.join(sub1Block, "sub1a")
        sub1aGuid = self.logger.startBlock(sub1aBlock)
        if self.roadrunnerModel is None:
            self._initializeRoadrunnerModel()
        self.roadrunnerModel.reset()
        ##  ^
        self.logger.endBlock(sub1aGuid)
        ##  V
        sub1bBlock = Logger.join(sub1Block, "sub1b")
        sub1bGuid = self.logger.startBlock(sub1bBlock)
        if params is not None:
            # Parameters have been specified
            self._setupModel(params)
        ##  ^
        self.logger.endBlock(sub1bGuid)
        # Do the simulation
        selectedColumns = list(self.selectedColumns)
        if not TIME in selectedColumns:
            selectedColumns.insert(0, TIME)
        ## ^
        self.logger.endBlock(sub1Guid)
        ## V
        roadrunnerBlock = Logger.join(block, "roadrunner")
        roadrunnerGuid = self.logger.startBlock(roadrunnerBlock)
        data = self.roadrunnerModel.simulate(startTime, endTime, numPoint,
                                             selectedColumns)
        self.logger.endBlock(roadrunnerGuid)
        ## ^
        # Select the required columns
        ## V
        sub2Block = Logger.join(block, "sub2")
        sub2Guid = self.logger.startBlock(sub2Block)
        fittedTS = NamedTimeseries(namedArray=data)
        self.logger.endBlock(sub2Guid)
        ## ^
        self.logger.endBlock(guid)
        ##^
        return fittedTS

    def updateFittedAndResiduals(self, **kwargs) -> np.ndarray:
        """
        Updates values of self.fittedTS and self.residualsTS
        based on self.params.

        Parameters
        ----------
        kwargs: dict
            arguments for simulation

        Instance Variables Updated
        --------------------------
        self.fittedTS
        self.residualsTS

        Returns
        -------
        1-d ndarray of residuals
        """
        self.fittedTS = self.simulate(**kwargs)  # Updates self.fittedTS
        cols = self.selectedColumns
        if self.residualsTS is None:
            self.residualsTS = self.observedTS.subsetColumns(cols)
        self.residualsTS[cols] = self.observedTS[cols] - self.fittedTS[cols]
        for col in cols:
            self.residualsTS[col] = np.nan_to_num(self.residualsTS[col])

    def _residuals(self, params) -> np.ndarray:
        """
        Compute the residuals between objective and experimental data
        Handle nan values in observedTS. This internal-only method
        is implemented to maximize efficieency.

        Parameters
        ----------
        kwargs: dict
            arguments for simulation

        Instance Variables Updated
        --------------------------
        self.residualsTS

        Returns
        -------
        1-d ndarray of residuals
        """
        block = Logger.join(self._loggerPrefix, "fitModel._residuals")
        guid = self.logger.startBlock(block)
        ##V
        self.roadrunnerModel.reset()
        self._setupModel(params)
        #
        roadrunnerBlock = Logger.join(block, "roadrunner")
        roadrunnerGuid = self.logger.startBlock(roadrunnerBlock)
        ## V
        #
        data = self.roadrunnerModel.simulate(self.observedTS.start,
                                             self.observedTS.end,
                                             len(self.observedTS),
                                             self.selectedColumns)
        ## ^
        self.logger.endBlock(roadrunnerGuid)
        #
        tailBlock = Logger.join(block, "tail")
        tailGuid = self.logger.startBlock(tailBlock)
        ## V
        residualsArr = self._observedArr - data.flatten()
        residualsArr = np.nan_to_num(residualsArr)
        ## ^
        self.logger.endBlock(tailGuid)
        ##^
        self.logger.endBlock(guid)
        #
        # Used for detailed debugging
        if False:
            self.logger.details("_residuals/std(residuals): %f" %
                                np.std(residualsArr))
            self.logger.details("_residuals/params: %s" % str(params))
        return residualsArr

    def fitModel(self, params: lmfit.Parameters = None, max_nfev: int = 100):
        """
        Fits the model by adjusting values of parameters based on
        differences between simulated and provided values of
        floating species.

        Parameters
        ----------
        params: starting values of parameters
        max_nfev: maximum number of function evaluations

        Example
        -------
        f.fitModel()
        """
        ParameterDescriptor = collections.namedtuple(
            "ParameterDescriptor",
            "params method std minimizer minimizerResult")
        block = Logger.join(self._loggerPrefix, "fitModel")
        guid = self.logger.startBlock(block)
        self._initializeRoadrunnerModel()
        if self.parametersToFit is None:
            # Compute fit and residuals for base model
            self.params = None
        else:
            if params is None:
                params = self.mkParams()
            # Fit the model to the data using one or more methods.
            # Choose the result with the lowest residual standard deviation
            paramDct = {}
            for method in self._fitterMethods:
                for _ in range(self._numFitRepeat):
                    minimizer = lmfit.Minimizer(self._residuals,
                                                params,
                                                max_nfev=max_nfev)
                    try:
                        minimizerResult = minimizer.minimize(method=method,
                                                             max_nfev=max_nfev)
                    except Exception as excp:
                        msg = "Error minimizing for method: %s" % method
                        self.logger.error(msg, excp)
                        continue
                    params = minimizerResult.params
                    std = np.std(self._residuals(params))
                    if method in paramDct.keys():
                        if std >= paramDct[method].std:
                            continue
                    paramDct[method] = ParameterDescriptor(
                        params=params.copy(),
                        method=method,
                        std=std,
                        minimizer=minimizer,
                        minimizerResult=minimizerResult,
                    )
            if len(paramDct) == 0:
                msg = "*** Minimizer failed for this model and data."
                raise ValueError(msg)
            # Select the result that has the smallest residuals
            sortedMethods = sorted(paramDct.keys(),
                                   key=lambda m: paramDct[m].std)
            bestMethod = sortedMethods[0]
            self.params = paramDct[bestMethod].params
            self.minimizer = paramDct[bestMethod].minimizer
            self.minimizerResult = paramDct[bestMethod].minimizerResult
        # Ensure that residualsTS and fittedTS match the parameters
        self.updateFittedAndResiduals(params=self.params)
        self.logger.endBlock(guid)

    def getFittedModel(self):
        """
        Provides the roadrunner model with fitted parameters

        Returns
        -------
        ExtendedRoadrunner
        """
        self._checkFit()
        self.roadrunnerModel.reset()
        self._setupModel(self.params)
        return self.roadrunnerModel

    def _setupModel(self, params):
        """
        Sets up the model for use based on the parameter parameters

        Parameters
        ----------
        params: lmfit.Parameters

        """
        pp = params.valuesdict()
        for parameter in self.parametersToFit:
            try:
                self.roadrunnerModel.model[parameter] = pp[parameter]
            except Exception as err:
                msg = "_modelFitterCore/_setupModel: Could not set value for %s"  \
                      % parameter
                self.logger.error(msg, err)

    def mkParams(self, parameterDct: dict = None) -> lmfit.Parameters:
        """
        Constructs lmfit parameters based on specifications.

        Parameters
        ----------
        parameterDct: key=name, value=ParameterSpecification
        
        Returns
        -------
        lmfit.Parameters
        """
        def get(value, base_value, multiplier):
            if value is not None:
                return value
            return base_value * multiplier

        #
        if parameterDct is None:
            parameterDct = self.parameterDct
        params = lmfit.Parameters()
        for parameterName in self.parametersToFit:
            if parameterName in parameterDct.keys():
                specification = parameterDct[parameterName]
                value = get(specification.value, specification.value, 1.0)
                if value > 0:
                    lower_factor = LOWER_PARAMETER_MULT
                    upper_factor = UPPER_PARAMETER_MULT
                else:
                    upper_factor = UPPER_PARAMETER_MULT
                    lower_factor = LOWER_PARAMETER_MULT
                lower = get(specification.lower, specification.value,
                            lower_factor)
                upper = get(specification.upper, specification.value,
                            upper_factor)
                if np.isclose(lower - upper, 0):
                    upper = 0.0001
                try:
                    params.add(parameterName,
                               value=value,
                               min=lower,
                               max=upper)
                except Exception as err:
                    msg = "modelFitterCore/mkParams parameterName %s" \
                          % parameterName
                    self.logger.error(msg, err)
            else:
                value = np.mean([self.lowerBound, self.upperBound])
                params.add(parameterName,
                           value=value,
                           min=self.lowerBound,
                           max=self.upperBound)
        return params

    def _checkFit(self):
        if self.params is None:
            raise ValueError("Must use fitModel before using this method.")

    def serialize(self, path):
        """
        Serialize the model to a path.

        Parameters
        ----------
        path: str
            File path
        """
        newModelFitter = self.copy()
        with open(path, "wb") as fd:
            rpickle.dump(newModelFitter, fd)

    @classmethod
    def deserialize(cls, path):
        """
        Deserialize the model from a path.

        Parameters
        ----------
        path: str
            File path

        Return
        ------
        ModelFitter
            Model is initialized.
        """
        with open(path, "rb") as fd:
            fitter = rpickle.load(fd)
        fitter._initializeRoadrunnerModel()
        return fitter
示例#4
0
class ModelFitterCore(rpickle.RPickler):

    # Subclasses used in interface
    class OptimizerMethod():
        def __init__(self, method, kwargs):
            self.method = method
            self.kwargs = kwargs

    def __init__(
        self,
        modelSpecification,
        observedData,
        parametersToFit=None,
        selectedColumns=None,
        fitterMethods=None,
        numFitRepeat=1,
        bootstrapMethods=None,
        parameterLowerBound=PARAMETER_LOWER_BOUND,
        parameterUpperBound=PARAMETER_UPPER_BOUND,
        parameterDct=None,
        fittedDataTransformDct=None,
        logger=Logger(),
        isPlot=True,
        _loggerPrefix="",
        # The following must be kept in sync with ModelFitterBootstrap.bootstrap
        numIteration: int = 10,
        reportInterval: int = 1000,
        maxProcess: int = None,
        serializePath: str = None,
    ):
        """
        Constructs estimates of parameter values.

        Parameters
        ----------
        modelSpecification: ExtendedRoadRunner/str
            roadrunner model or antimony model
        observedData: NamedTimeseries/str
            str: path to CSV file
        parametersToFit: list-str/None
            parameters in the model that you want to fit
            if None, no parameters are fit
        selectedColumns: list-str
            species names you wish use to fit the model
            default: all columns in observedData
        parameterLowerBound: float
            lower bound for the fitting parameters
        parameterUpperBound: float
            upper bound for the fitting parameters
        parameterDct: dict
            key: parameter name
            value: triple - (lowerVange, startingValue, upperRange)
        fittedDataTransformDct: dict
            key: column in selectedColumns
            value: function of the data in selectedColumns;
                   input: NamedTimeseries
                   output: array for the values of the column
        logger: Logger
        fitterMethods: str/list-str/list-OptimizerMethod
            method used for minimization in fitModel
        numFitRepeat: int
            number of times fitting is repeated for a method
        bootstrapMethods: str/list-str/list-OptimizerMethod
            method used for minimization in bootstrap
        numIteration: number of bootstrap iterations
        reportInterval: number of iterations between progress reports
        maxProcess: Maximum number of processes to use. Default: numCPU
        serializePath: Where to serialize the fitter after bootstrap

        Usage
        -----
        parameterDct = {
            "k1": (1, 5, 10),  # name of parameter: low value, initial, high
            "k2": (2, 3, 6)}
        ftter = ModelFitter(roadrunnerModel, "observed.csv",
            parameterDct=parameterDct)
        fitter.fitModel()  # Do the fit
        fitter.bootstrap()  # Estimate parameter variance with bootstrap
        """
        if modelSpecification is not None:
            # Not the default constructor
            self._loggerPrefix = _loggerPrefix
            self.modelSpecification = modelSpecification
            self.parametersToFit = parametersToFit
            self.lowerBound = parameterLowerBound
            self.upperBound = parameterUpperBound
            self.bootstrapKwargs = dict(
                numIteration=numIteration,
                reportInterval=reportInterval,
                maxProcess=maxProcess,
                serializePath=serializePath,
            )
            self.parameterDct = ModelFitterCore._updateParameterDct(
                parameterDct)
            self._numFitRepeat = numFitRepeat
            if self.parametersToFit is None:
                self.parametersToFit = list(self.parameterDct.keys())
            self.observedTS = observedData
            if self.observedTS is not None:
                self.observedTS = mkNamedTimeseries(observedData)
            #
            self.fittedDataTransformDct = fittedDataTransformDct
            #
            if (selectedColumns is None) and (self.observedTS is not None):
                selectedColumns = self.observedTS.colnames
            self.selectedColumns = selectedColumns
            if self.observedTS is not None:
                self._observedArr = self.observedTS[
                    self.selectedColumns].flatten()
            else:
                self._observedArr = None
            # Other internal state
            self._fitterMethods = self._makeMethods(fitterMethods,
                                                    METHOD_FITTER_DEFAULTS)
            self._bootstrapMethods = self._makeMethods(
                bootstrapMethods, METHOD_BOOTSTRAP_DEFAULTS)
            if isinstance(self._bootstrapMethods, str):
                self._bootstrapMethods = [self._bootstrapMethods]
            self._isPlot = isPlot
            self._plotter = tp.TimeseriesPlotter(isPlot=self._isPlot)
            self._plotFittedTS = None  # Timeseries that is plotted
            self.logger = logger
            # The following are calculated during fitting
            self.roadrunnerModel = None
            self.minimizer = None  # lmfit.minimizer
            self.minimizerResult = None  # Results of minimization
            self.params = None  # params property in lmfit.minimizer
            self.fittedTS = self.observedTS.copy(
                isInitialize=True)  # Initialize
            self.residualsTS = None  # Residuals for selectedColumns
            self.bootstrapResult = None  # Result from bootstrapping
            # Validation checks
            self._validateFittedDataTransformDct()
            self._bestParameters = _BestParameters(rssq=None, params=None)
        else:
            pass

    def _makeMethods(self, methods, default):
        """
        Creates a method dictionary.

        Parameters
        ----------
        methods: str/list-str/dict
            method used for minimization in fitModel
            dict: key-method, value-optional parameters

        Returns
        -------
        list-OptimizerMethod
            key: method name
            value: dict of optional parameters
        """
        if methods is None:
            methods = default
        if isinstance(methods, str):
            if methods == METHOD_BOTH:
                methods = METHOD_FITTER_DEFAULTS
            else:
                methods = [methods]
        if isinstance(methods, list):
            if isinstance(methods[0], str):
                results = [
                    ModelFitterCore.OptimizerMethod(method=m, kwargs={})
                    for m in methods
                ]
            else:
                results = methods
        else:
            raise RuntimeError("Must be a list")
        trues = [
            isinstance(m, ModelFitterCore.OptimizerMethod) for m in results
        ]
        if not all(trues):
            raise ValueError("Invalid methods: %s" % str(methods))
        return results

    @classmethod
    def mkParameters(
            cls,
            parameterDct: dict = None,
            parametersToFit: list = None,
            logger: Logger = Logger(),
            lowerBound: float = PARAMETER_LOWER_BOUND,
            upperBound: float = PARAMETER_UPPER_BOUND) -> lmfit.Parameters:
        """
        Constructs lmfit parameters based on specifications.

        Parameters
        ----------
        parameterDct: key=name, value=ParameterSpecification
        parametersToFit: list of parameters to fit
        logger: error logger
        lowerBound: lower value of range for parameters
        upperBound: upper value of range for parameters

        Returns
        -------
        lmfit.Parameters
        """
        def get(value, base_value, multiplier):
            if value is not None:
                return value
            return base_value * multiplier

        #
        if (parametersToFit is None) and (parameterDct is None):
            raise RuntimeError("Must specify one of these parameters.")
        if parameterDct is None:
            parameterDct = {}
        if parametersToFit is None:
            parametersToFit = parameterDct.keys()
        if logger is None:
            logger = logger()
        params = lmfit.Parameters()
        for parameterName in parametersToFit:
            if parameterName in parameterDct.keys():
                specification = parameterDct[parameterName]
                value = get(specification.value, specification.value, 1.0)
                if value > 0:
                    lower_factor = LOWER_PARAMETER_MULT
                    upper_factor = UPPER_PARAMETER_MULT
                else:
                    upper_factor = UPPER_PARAMETER_MULT
                    lower_factor = LOWER_PARAMETER_MULT
                lower = get(specification.lower, specification.value,
                            lower_factor)
                upper = get(specification.upper, specification.value,
                            upper_factor)
                if np.isclose(lower - upper, 0):
                    upper = 0.0001
                try:
                    params.add(parameterName,
                               value=value,
                               min=lower,
                               max=upper)
                except Exception as err:
                    msg = "modelFitterCore/mkParameters parameterName %s" \
                          % parameterName
                    logger.error(msg, err)
            else:
                value = np.mean([lowerBound, upperBound])
                params.add(parameterName,
                           value=value,
                           min=lowerBound,
                           max=upperBound)
        return params

    @classmethod
    def initializeRoadrunnerModel(cls, modelSpecification):
        """
        Sets self.roadrunnerModel.

        Parameters
        ----------
        modelSpecification: ExtendedRoadRunner/str

        Returns
        -------
        ExtendedRoadRunner
        """
        if isinstance(modelSpecification,
                      te.roadrunner.extended_roadrunner.ExtendedRoadRunner):
            roadrunnerModel = modelSpecification
        elif isinstance(modelSpecification, str):
            roadrunnerModel = te.loada(modelSpecification)
        else:
            msg = 'Invalid model.'
            msg = msg + "\nA model must either be a Roadrunner model "
            msg = msg + "an Antimony model."
            raise ValueError(msg)
        return roadrunnerModel

    @classmethod
    def setupModel(cls, roadrunner, parameters, logger=Logger()):
        """
        Sets up the model for use based on the parameter parameters

        Parameters
        ----------
        roadrunner: ExtendedRoadRunner
        parameters: lmfit.Parameters
        logger Logger
        """
        pp = parameters.valuesdict()
        for parameter in pp.keys():
            try:
                roadrunner.model[parameter] = pp[parameter]
            except Exception as err:
                msg = "_modelFitterCore.setupModel: Could not set value for %s"  \
                      % parameter
                logger.error(msg, err)

    @classmethod
    def runSimulation(
            cls,
            parameters=None,
            roadrunner=None,
            startTime=0,
            endTime=5,
            numPoint=30,
            selectedColumns=None,
            returnDataFrame=True,
            _logger=Logger(),
            _loggerPrefix="",
    ):
        """
        Runs a simulation. Defaults to parameter values in the simulation.

        Parameters
       ----------
        roadrunner: ExtendedRoadRunner/str
            Roadrunner model
        parameters: lmfit.Parameters
            lmfit parameters
        startTime: float
            start time for the simulation
        endTime: float
            end time for the simulation
        numPoint: int
            number of points in the simulation
        selectedColumns: list-str
            output columns in simulation
        returnDataFrame: bool
            return a DataFrame
        _logger: Logger
        _loggerPrefix: str


        Return
        ------
        NamedTimeseries (or None if fail to converge)
        """
        if isinstance(roadrunner, str):
            roadrunner = cls.initializeRoadrunnerModel(roadrunner)
        else:
            roadrunner.reset()
        if parameters is not None:
            # Parameters have been specified
            cls.setupModel(roadrunner, parameters, logger=_logger)
        # Do the simulation
        if selectedColumns is not None:
            newSelectedColumns = list(selectedColumns)
            if TIME not in newSelectedColumns:
                newSelectedColumns.insert(0, TIME)
            try:
                data = roadrunner.simulate(startTime, endTime, numPoint,
                                           newSelectedColumns)
            except Exception as err:
                _logger.error("Roadrunner exception: ", err)
                data = None
        else:
            try:
                data = roadrunner.simulate(startTime, endTime, numPoint)
            except Exception as err:
                _logger.exception("Roadrunner exception: %s", err)
                data = None
        if data is None:
            return data
        fittedTS = NamedTimeseries(namedArray=data)
        if returnDataFrame:
            result = fittedTS.to_dataframe()
        else:
            result = fittedTS
        return result

    @classmethod
    def rpConstruct(cls):
        """
        Overrides rpickler.rpConstruct to create a method that
        constructs an instance without arguments.

        Returns
        -------
        Instance of cls
        """
        return cls(None, None, None)

    def rpRevise(self):
        """
        Overrides rpickler.
        """
        if "logger" not in self.__dict__.keys():
            self.logger = Logger()

    def _validateFittedDataTransformDct(self):
        if self.fittedDataTransformDct is not None:
            keySet = set(self.fittedDataTransformDct.keys())
            selectedColumnsSet = self.selectedColumns
            if (keySet is not None) and (selectedColumnsSet is not None):
                excess = set(keySet).difference(selectedColumnsSet)
                if len(excess) > 0:
                    msg = "Columns not in selectedColumns: %s" % str(excess)
                    raise ValueError(msg)

    def _transformFittedTS(self, data):
        """
        Updates the fittedTS taking into account required transformations.

        Parameters
        ----------
        data: np.ndarray

        Results
        ----------
        NamedTimeseries
        """
        colnames = list(self.selectedColumns)
        colnames.insert(0, TIME)
        fittedTS = NamedTimeseries(array=data[:, :], colnames=colnames)
        if self.fittedDataTransformDct is not None:
            for column, func in self.fittedDataTransformDct.items():
                if func is not None:
                    fittedTS[column] = func(fittedTS)
        return fittedTS

    @staticmethod
    def _updateParameterDct(parameterDct):
        """
        Handles values that are tuples instead of ParameterSpecification.
        """
        if parameterDct is None:
            parameterDct = {}
        dct = dict(parameterDct)
        for name, value in parameterDct.items():
            if isinstance(value, tuple):
                dct[name] = ParameterSpecification(lower=value[0],
                                                   upper=value[1],
                                                   value=value[2])
        return dct

    @staticmethod
    def addParameter(parameterDct: dict, name: str, lower: float, upper: float,
                     value: float):
        """
        Adds a parameter to a list of parameters.

        Parameters
        ----------
        parameterDct: parameter dictionary to agument
        name: parameter name
        lower: lower range of parameter value
        upper: upper range of parameter value
        value: initial value

        Returns
        -------
        dict
        """
        parameterDct[name] = ParameterSpecification(lower=lower,
                                                    upper=upper,
                                                    value=value)

    def _adjustNames(self, antimonyModel:str, observedTS:NamedTimeseries)  \
          ->typing.Tuple[NamedTimeseries, list]:
        """
        Antimony exports can change the names of floating species
        by adding a "_" at the end. Check for this and adjust
        the names in observedTS.

        Return
        ------
        NamedTimeseries: newObservedTS
        list: newSelectedColumns
        """
        rr = te.loada(antimonyModel)
        dataNames = rr.simulate().colnames
        names = ["[%s]" % n for n in observedTS.colnames]
        missingNames = [n[1:-1] for n in set(names).difference(dataNames)]
        newSelectedColumns = list(self.selectedColumns)
        if len(missingNames) > 0:
            newObservedTS = observedTS.copy()
            self.logger.exception("Missing names in antimony export: %s" %
                                  str(missingNames))
            for name in observedTS.colnames:
                missingName = "%s_" % name
                if name in missingNames:
                    newObservedTS = newObservedTS.rename(name, missingName)
                    newSelectedColumns.remove(name)
                    newSelectedColumns.append(missingName)
        else:
            newObservedTS = observedTS
        return newObservedTS, newSelectedColumns

    def copy(self, isKeepLogger=False):
        """
        Creates a copy of the model fitter.
        Preserves the user-specified settings and the results
        of bootstrapping.
        """
        if not isinstance(self.modelSpecification, str):
            try:
                modelSpecification = self.modelSpecification.getAntimony()
            except Exception as err:
                self.logger.error(
                    "Problem wth conversion to Antimony. Details:", err)
                raise ValueError("Cannot proceed.")
            observedTS, selectedColumns = self._adjustNames(
                modelSpecification, self.observedTS)
        else:
            modelSpecification = self.modelSpecification
            observedTS = self.observedTS.copy()
            selectedColumns = self.selectedColumns
        #
        if isKeepLogger:
            logger = self.logger
        elif self.logger is not None:
            logger = self.logger.copy()
        else:
            logger = None
        newModelFitter = self.__class__(
            copy.deepcopy(modelSpecification),
            observedTS,
            copy.deepcopy(self.parametersToFit),
            selectedColumns=selectedColumns,
            fitterMethods=self._fitterMethods,
            bootstrapMethods=self._bootstrapMethods,
            parameterLowerBound=self.lowerBound,
            parameterUpperBound=self.upperBound,
            parameterDct=copy.deepcopy(self.parameterDct),
            fittedDataTransformDct=copy.deepcopy(self.fittedDataTransformDct),
            logger=logger,
            isPlot=self._isPlot)
        if self.bootstrapResult is not None:
            newModelFitter.bootstrapResult = self.bootstrapResult.copy()
            newModelFitter.params = newModelFitter.bootstrapResult.params
        else:
            newModelFitter.bootstrapResult = None
            newModelFitter.params = self.params
        return newModelFitter

    def initializeRoadRunnerModel(self):
        """
        Sets self.roadrunnerModel.
        """
        self.roadrunnerModel = ModelFitterCore.initializeRoadrunnerModel(
            self.modelSpecification)

    def getDefaultParameterValues(self):
        """
        Obtain the original values of parameters.

        Returns
        -------
        dict:
            key: parameter name
            value: value of parameter
        """
        dct = {}
        self.initializeRoadRunnerModel()
        self.roadrunnerModel.reset()
        for parameterName in self.parametersToFit:
            dct[parameterName] = self.roadrunnerModel.model[parameterName]
        return dct

    def simulate(self,
                 params=None,
                 startTime=None,
                 endTime=None,
                 numPoint=None):
        """
        Runs a simulation. Defaults to parameter values in the simulation.

        Parameters
       ----------
        params: lmfit.Parameters
        startTime: float
        endTime: float
        numPoint: int

        Return
        ------
        NamedTimeseries
        """
        def setValue(default, parameter):
            # Sets to default if parameter unspecified
            if parameter is None:
                return default
            return parameter

        #
        startTime = setValue(self.observedTS.start, startTime)
        endTime = setValue(self.observedTS.end, endTime)
        numPoint = setValue(len(self.observedTS), numPoint)
        #
        if self.roadrunnerModel is None:
            self.initializeRoadRunnerModel()
        #
        return ModelFitterCore.runSimulation(
            parameters=params,
            roadrunner=self.roadrunnerModel,
            startTime=startTime,
            endTime=endTime,
            numPoint=numPoint,
            selectedColumns=self.selectedColumns,
            _logger=self.logger,
            _loggerPrefix=self._loggerPrefix,
            returnDataFrame=False)

    def updateFittedAndResiduals(self, **kwargs) -> np.ndarray:
        """
        Updates values of self.fittedTS and self.residualsTS
        based on self.params.

        Parameters
        ----------
        kwargs: dict
            arguments for simulation

        Instance Variables Updated
        --------------------------
        self.fittedTS
        self.residualsTS

        Returns
        -------
        1-d ndarray of residuals
        """
        self.fittedTS = self.simulate(**kwargs)  # Updates self.fittedTS
        residualsArr = self._residuals(self.params)
        numRow = len(self.fittedTS)
        numCol = len(residualsArr) // numRow
        residualsArr = np.reshape(residualsArr, (numRow, numCol))
        cols = self.selectedColumns
        if self.residualsTS is None:
            self.residualsTS = self.observedTS.subsetColumns(cols)
        self.residualsTS[cols] = residualsArr

    def _residuals(self, params) -> np.ndarray:
        """
        Compute the residuals between objective and experimental data
        Handle nan values in observedTS. This internal-only method
        is implemented to maximize efficieency.

        Parameters
        ----------
        kwargs: dict
            arguments for simulation

        Returns
        -------
        1-d ndarray of residuals
        """
        data = ModelFitterCore.runSimulation(
            parameters=params,
            roadrunner=self.roadrunnerModel,
            startTime=self.observedTS.start,
            endTime=self.observedTS.end,
            numPoint=len(self.observedTS),
            selectedColumns=self.selectedColumns,
            _logger=self.logger,
            _loggerPrefix=self._loggerPrefix,
            returnDataFrame=False)
        if data is None:
            residualsArr = np.repeat(LARGE_RESIDUAL, len(self._observedArr))
        else:
            residualsArr = self._observedArr - data.flatten()
            residualsArr = np.nan_to_num(residualsArr)
        rssq = sum(residualsArr**2)
        if (self._bestParameters.rssq is None)  \
              or (rssq < self._bestParameters.rssq):
            self._bestParameters = _BestParameters(params=params.copy(),
                                                   rssq=rssq)
        return residualsArr

    def fitModel(self, params: lmfit.Parameters = None, max_nfev=100):
        """
        Fits the model by adjusting values of parameters based on
        differences between simulated and provided values of
        floating species.

        Parameters
        ----------
        params: starting values of parameters

        Example
        -------
        f.fitModel()
        """
        ParameterDescriptor = collections.namedtuple(
            "ParameterDescriptor",
            "params method rssq kwargs minimizer minimizerResult")
        MAX_NFEV = "max_nfev"
        block = Logger.join(self._loggerPrefix, "fitModel")
        guid = self.logger.startBlock(block)
        self.initializeRoadRunnerModel()
        self.params = None
        if self.parametersToFit is not None:
            if params is None:
                params = self.mkParams()
            # Fit the model to the data using one or more methods.
            # Choose the result with the lowest residual standard deviation
            paramResults = []
            lastExcp = None
            for idx, optimizerMethod in enumerate(self._fitterMethods):
                method = optimizerMethod.method
                kwargs = optimizerMethod.kwargs
                if MAX_NFEV not in kwargs:
                    kwargs[MAX_NFEV] = max_nfev
                for _ in range(self._numFitRepeat):
                    self._bestParameters = _BestParameters(params=None,
                                                           rssq=None)
                    minimizer = lmfit.Minimizer(self._residuals, params)
                    try:
                        minimizerResult = minimizer.minimize(method=method,
                                                             **kwargs)
                    except Exception as excp:
                        lastExcp = excp
                        msg = "Error minimizing for method: %s" % method
                        self.logger.error(msg, excp)
                        continue
                    params = self._bestParameters.params.copy()
                    rssq = np.sum(self._residuals(params)**2)
                    if len(paramResults) > idx:
                        if rssq >= paramResults[idx].rssq:
                            continue
                    parameterDescriptor = ParameterDescriptor(
                        params=params,
                        method=method,
                        rssq=rssq,
                        kwargs=dict(kwargs),
                        minimizer=minimizer,
                        minimizerResult=minimizerResult,
                    )
                    paramResults.append(parameterDescriptor)
            if len(paramResults) == 0:
                msg = "*** Minimizer failed for this model and data."
                self.logger.error(msg, lastExcp)
            else:
                # Select the result that has the smallest residuals
                sortedMethods = sorted(paramResults, key=lambda r: r.rssq)
                bestMethod = sortedMethods[0]
                self.params = bestMethod.params
                self.minimizer = bestMethod.minimizer
                self.minimizerResult = bestMethod.minimizerResult
        # Ensure that residualsTS and fittedTS match the parameters
        self.updateFittedAndResiduals(params=self.params)
        self.logger.endBlock(guid)

    def getFittedModel(self):
        """
        Provides the roadrunner model with fitted parameters

        Returns
        -------
        ExtendedRoadrunner
        """
        self._checkFit()
        self.roadrunnerModel.reset()
        self._setupModel(self.params)
        return self.roadrunnerModel

    def _setupModel(self, parameters):
        """
        Sets up the model for use based on the parameter parameters

        Parameters
        ----------
        parameters: lmfit.Parameters

        """
        ModelFitterCore.setupModel(self.roadrunnerModel,
                                   parameters,
                                   logger=self.logger)

    def mkParams(self, parameterDct: dict = None) -> lmfit.Parameters:
        """
        Constructs lmfit parameters based on specifications.

        Parameters
        ----------
        parameterDct: key=name, value=ParameterSpecification

        Returns
        -------
        lmfit.Parameters
        """
        if parameterDct is None:
            parameterDct = self.parameterDct
        return ModelFitterCore.mkParameters(
            parameterDct,
            parametersToFit=self.parametersToFit,
            logger=self.logger,
            lowerBound=self.lowerBound,
            upperBound=self.upperBound)

    def _checkFit(self):
        if self.params is None:
            raise ValueError("Must use fitModel before using this method.")

    def serialize(self, path):
        """
        Serialize the model to a path.

        Parameters
        ----------
        path: str
            File path
        """
        newModelFitter = self.copy()
        with open(path, "wb") as fd:
            rpickle.dump(newModelFitter, fd)

    @classmethod
    def deserialize(cls, path):
        """
        Deserialize the model from a path.

        Parameters
        ----------
        path: str
            File path

        Return
        ------
        ModelFitter
            Model is initialized.
        """
        with open(path, "rb") as fd:
            fitter = rpickle.load(fd)
        fitter.initializeRoadRunnerModel()
        return fitter
示例#5
0
    def mkParameters(
            cls,
            parameterDct: dict = None,
            parametersToFit: list = None,
            logger: Logger = Logger(),
            lowerBound: float = PARAMETER_LOWER_BOUND,
            upperBound: float = PARAMETER_UPPER_BOUND) -> lmfit.Parameters:
        """
        Constructs lmfit parameters based on specifications.

        Parameters
        ----------
        parameterDct: key=name, value=ParameterSpecification
        parametersToFit: list of parameters to fit
        logger: error logger
        lowerBound: lower value of range for parameters
        upperBound: upper value of range for parameters

        Returns
        -------
        lmfit.Parameters
        """
        def get(value, base_value, multiplier):
            if value is not None:
                return value
            return base_value * multiplier

        #
        if (parametersToFit is None) and (parameterDct is None):
            raise RuntimeError("Must specify one of these parameters.")
        if parameterDct is None:
            parameterDct = {}
        if parametersToFit is None:
            parametersToFit = parameterDct.keys()
        if logger is None:
            logger = logger()
        params = lmfit.Parameters()
        for parameterName in parametersToFit:
            if parameterName in parameterDct.keys():
                specification = parameterDct[parameterName]
                value = get(specification.value, specification.value, 1.0)
                if value > 0:
                    lower_factor = LOWER_PARAMETER_MULT
                    upper_factor = UPPER_PARAMETER_MULT
                else:
                    upper_factor = UPPER_PARAMETER_MULT
                    lower_factor = LOWER_PARAMETER_MULT
                lower = get(specification.lower, specification.value,
                            lower_factor)
                upper = get(specification.upper, specification.value,
                            upper_factor)
                if np.isclose(lower - upper, 0):
                    upper = 0.0001
                try:
                    params.add(parameterName,
                               value=value,
                               min=lower,
                               max=upper)
                except Exception as err:
                    msg = "modelFitterCore/mkParameters parameterName %s" \
                          % parameterName
                    logger.error(msg, err)
            else:
                value = np.mean([lowerBound, upperBound])
                params.add(parameterName,
                           value=value,
                           min=lowerBound,
                           max=upperBound)
        return params
示例#6
0
class Optimizer():
    """
    Implements an interface to optimizers with abstractions
    for multiple methods and performance reporting.
    The class also handles an oddity with lmfit that the final parameters
    returned may not be the best.

    Usage
    -----
    optimizer = Optimizer(calcResiduals, params, [cn.METHOD_LEASTSQ])
    optimizer.execute()
    """
    def __init__(self,
                 function,
                 initialParams,
                 methods,
                 logger=None,
                 isCollect=False):
        """
        Parameters
        ----------
        function: Funtion
           Arguments
            lmfit.parameters
            isInitialze (bool). True on first call the
            isGetBest (bool). True to retrieve best parameters
           returns residuals (if bool arguments are false)
        initialParams: lmfit.parameters
        methods: list-_helpers.OptimizerMethod
        isCollect: bool
           Collects performance statistcs
        """
        self._function = function
        self._methods = methods
        self._initialParams = initialParams
        self._isCollect = isCollect
        self.logger = logger
        if self.logger is None:
            self.logger = Logger()
        # Outputs
        self.performanceStats = []  # list of performance results
        self.qualityStats = []  # relative rssq
        self.params = None
        self.minimizerResult = None
        self.rssq = None

    def copyResults(self):
        """
        Copies of the results of the optimization.

        Returns
        -------
        Optimizer
        """
        newOptimizer = Optimizer(self._function,
                                 self._initialParams.copy(),
                                 self._methods,
                                 logger=self.logger,
                                 isCollect=self._isCollect)
        newOptimizer._function = None  # Not serializable
        #
        newOptimizer.performanceStats = copy.deepcopy(self.performanceStats)
        newOptimizer.qualityStats = copy.deepcopy(self.qualityStats)
        newOptimizer.minimizerResult = copy.deepcopy(self.minimizerResult)
        newOptimizer.params = None
        if self.params is not None:
            newOptimizer.params = self.params.copy()
        newOptimizer.rssq = self.rssq
        return newOptimizer

    @staticmethod
    def _setRandomValue(params):
        """
        Sets value to a uniformly distributed random number between min and max.

        Parameters
        ----------
        params: lmfit.Parameters
        
        Returns
        -------
        lmfit.Parameters
        """
        newParameters = lmfit.Parameters()
        for name, parameter in params.items():
            newValue = np.random.uniform(parameter.min, parameter.max)
            newParameters.add(name,
                              min=parameter.min,
                              max=parameter.max,
                              value=newValue)
        return newParameters

    def execute(self):
        """
        Performs the optimization on the function.
        Result is self.params
        """
        lastExcp = None
        self.params = self._initialParams.copy()
        minimizer = None
        for optimizerMethod in self._methods:
            method = optimizerMethod.method
            kwargs = optimizerMethod.kwargs
            wrapperFunction = _FunctionWrapper(self._function,
                                               isCollect=self._isCollect)
            minimizer = lmfit.Minimizer(wrapperFunction.execute, self.params)
            try:
                self.minimizerResult = minimizer.minimize(method=method,
                                                          **kwargs)
            except Exception as excp:
                lastExcp = excp
                msg = "Error minimizing for method: %s" % method
                self.logger.error(msg, excp)
                continue
            # Update the parameters
            if wrapperFunction.bestParamDct is not None:
                _helpers.updateParameterValues(self.params,
                                               wrapperFunction.bestParamDct)
            # Update other statistics
            self.rssq = wrapperFunction.rssq
            self.performanceStats.append(list(wrapperFunction.perfStatistics))
            self.qualityStats.append(list(wrapperFunction.rssqStatistics))
        if minimizer is None:
            msg = "*** Optimization failed."
            self.logger.error(msg, lastExcp)

    def report(self):
        """
        Reports the result of an optimization.

        Returns
        -------
        str
        """
        VARIABLE_STG = "[[Variables]]"
        CORRELATION_STG = "[[Correlations]]"
        if self.minimizerResult is None:
            raise ValueError("Must do fitModel before reportFit.")
        valuesDct = self.params.valuesdict()
        valuesStg = _helpers.ppDict(dict(valuesDct), indent=4)
        reportSplit = str(lmfit.fit_report(self.minimizerResult)).split("\n")
        # Eliminate Variables section
        inVariableSection = False
        trimmedReportSplit = []
        for line in reportSplit:
            if VARIABLE_STG in line:
                inVariableSection = True
            if CORRELATION_STG in line:
                inVariableSection = False
            if inVariableSection:
                continue
            trimmedReportSplit.append(line)
        # Construct the report
        newReportSplit = [VARIABLE_STG]
        newReportSplit.extend(valuesStg.split("\n"))
        newReportSplit.extend(trimmedReportSplit)
        return "\n".join(newReportSplit)

    def plotPerformance(self, isPlot=True):
        """
        Plots the statistics for running the objective function.
        """
        if not self._isCollect:
            msg = "Must construct with isCollect = True "
            msg += "to get performance plot."
            raise ValueError(msg)
        # Compute statistics
        TOT = "Tot"
        CNT = "Cnt"
        AVG = "Avg"
        IDX = "Idx"
        totalTimes = [sum(v) for v in self.performanceStats]
        counts = [len(v) for v in self.performanceStats]
        averages = [np.mean(v) for v in self.performanceStats]
        df = pd.DataFrame({
            IDX: range(len(self.performanceStats)),
            TOT: totalTimes,
            CNT: counts,
            AVG: averages,
        })
        #
        _, axes = plt.subplots(1, 3)
        df.plot.bar(x=IDX,
                    y=TOT,
                    ax=axes[0],
                    title="Total time",
                    xlabel="method")
        df.plot.bar(x=IDX,
                    y=AVG,
                    ax=axes[1],
                    title="Average time",
                    xlabel="method")
        df.plot.bar(x=IDX,
                    y=CNT,
                    ax=axes[2],
                    title="Number calls",
                    xlabel="method")
        if isPlot:
            plt.show()

    def plotQuality(self, isPlot=True):
        """
        Plots the quality results
        """
        if not self._isCollect:
            msg = "Must construct with isCollect = True "
            msg += "to get quality plots."
            raise ValueError(msg)
        ITERATION = "iteration"
        _, axes = plt.subplots(len(self._methods))
        minLength = min([len(v) for v in self.qualityStats])
        # Compute statistics
        dct = {
            self._methods[i].method: self.qualityStats[i][:minLength]
            for i in range(len(self._methods))
        }
        df = pd.DataFrame(dct)
        df[ITERATION] = range(minLength)
        #
        for idx, method in enumerate(self._methods):
            if "AxesSubplot" in str(type(axes)):
                ax = axes
            else:
                ax = axes[idx]
            df.plot.line(x=ITERATION, y=method.method, ax=ax, xlabel="")
            ax.set_ylabel("SSQ")
            if idx == len(self._methods) - 1:
                ax.set_xlabel(ITERATION)
        if isPlot:
            plt.show()

    @staticmethod
    def mkOptimizerMethod(methodNames=None,
                          methodKwargs=None,
                          maxFev=cn.MAX_NFEV_DFT):
        """
        Constructs an OptimizerMethod
        Parameters
        ----------
        methodNames: list-str/str
        methodKwargs: list-dict/dict

        Returns
        -------
        list-OptimizerMethod
        """
        if methodNames is None:
            methodNames = [cn.METHOD_LEASTSQ]
        if isinstance(methodNames, str):
            methodNames = [methodNames]
        if methodKwargs is None:
            methodKwargs = {}
        # Ensure that there is a limit of function evaluations
        newMethodKwargs = dict(methodKwargs)
        if cn.MAX_NFEV not in newMethodKwargs.keys():
            newMethodKwargs[cn.MAX_NFEV] = maxFev
        elif maxFev is None:
            del newMethodKwargs[cn.MAX_NFEV]
        methodKwargs = np.repeat(newMethodKwargs, len(methodNames))
        #
        result = [_helpers.OptimizerMethod(n, k) for n, k  \
              in zip(methodNames, methodKwargs)]
        return result

    @classmethod
    def optimize(cls,
                 function,
                 initialParams,
                 methods,
                 numRestart=0,
                 **kwargs):
        """
        Parameters
        ----------
        function: Funtion
           Arguments
            lmfit.parameters
            isInitialze (bool). True on first call the
            isGetBest (bool). True to retrieve best parameters
           returns residuals (if bool arguments are false)
        initialParams: lmfit.parameters
        methods: list-_helpers.OptimizerMethod
        numRestart: int
            Number of restarts with randomly chosen initial values

        Returns
        -------
        Optimizer
        """
        bestOptimizer = cls(function, initialParams, methods, **kwargs)
        bestOptimizer.execute()
        #
        for _ in range(numRestart):
            newInitialParams = Optimizer._setRandomValue(initialParams)
            newOptimizer = cls(function, newInitialParams, methods, **kwargs)
            newOptimizer.execute()
            if newOptimizer.rssq < bestOptimizer.rssq:
                bestOptimizer = newOptimizer
        return bestOptimizer