class BootstrapRunner(AbstractRunner): def __init__(self, runnerArgument): """ Parameters ---------- runnerArgument: RunnerArgument Notes ----- 1. Uses METHOD_LEASTSQ for fitModel iterations. """ super().__init__() # self.lastErr = "" self.fitter = runnerArgument.fitter self.numIteration = runnerArgument.numIteration self.kwargs = runnerArgument.kwargs self.synthesizerClass = runnerArgument.synthesizerClass if "logger" in self.fitter.__dict__.keys(): self.logger = self.fitter.logger else: self.logger = Logger() self._isDone = not self._fitInitial() self.columns = self.fitter.selectedColumns # Initializations for bootstrap loop if not self.isDone: fittedTS = self.fitter.fittedTS.subsetColumns(self.columns, isCopy=False) self.synthesizer = self.synthesizerClass( observedTS=self.fitter.observedTS.subsetColumns(self.columns, isCopy=False), fittedTS=fittedTS, **self.kwargs) self.numSuccessIteration = 0 if self.fitter.minimizerResult is None: self.fitter.fitModel() self.baseChisq = self.fitter.minimizerResult.redchi self.curIteration = 0 self.fd = self.logger.getFileDescriptor() self.baseFittedStatistic = TimeseriesStatistic( self.fitter.observedTS.subsetColumns( self.fitter.selectedColumns, isCopy=False)) def report(self, id=None): if True: return if id is None: self._startTime = time.time() else: elapsed = time.time() - self._startTime print("%s: %2.3f" % (id, elapsed)) @property def numWorkUnit(self): return self.numIteration @property def isDone(self): return self._isDone def run(self): """ Runs the bootstrap. Returns ------- BootstrapResult """ def mkNullResult(): fittedStatistic = TimeseriesStatistic( self.fitter.observedTS[self.fitter.selectedColumns]) return BootstrapResult(self.fitter, 0, {}, fittedStatistic) # if self.isDone: return # Set up logging for this run if self.fd is not None: sys.stderr = self.fd sys.stdout = self.fd isSuccess = False bootstrapError = 0 self.report() for _ in range(ITERATION_MULTIPLIER): newObservedTS = self.synthesizer.calculate() self.report("newObservedTS") # Update fitter to use the new observed data _ = self.fitter._updateObservedTS(newObservedTS, isCheck=False) self.report("updated fitter") # Try fitting try: self.fitter.fitModel(params=self.fitter.params) self.report("fitter.fit") except Exception as err: # Problem with the fit. msg = "modelFitterBootstrap. Fit failed on iteration %d." \ % iteration self.logger.error(msg, err) bootstrapError += 1 continue # Verify that there is a result if self.fitter.minimizerResult is None: continue # Check if the fit is of sufficient quality if self.fitter.minimizerResult.redchi > MAX_CHISQ_MULT * self.baseChisq: continue if self.fitter.params is None: continue isSuccess = True self.report("break") break # Create the result if isSuccess: self.numSuccessIteration += 1 parameterDct = { k: [v] for k, v in self.fitter.params.valuesdict().items() } fittedStatistic = self.baseFittedStatistic.copy() fittedStatistic.accumulate( self.fitter.fittedTS.subsetColumns(self.fitter.selectedColumns, isCopy=False)) bootstrapResult = BootstrapResult(self.fitter, self.numSuccessIteration, parameterDct, fittedStatistic, bootstrapError=bootstrapError) else: bootstrapResult = mkNullResult() self._isDone = True # Close the logging file if self.fd is not None: if not self.fd.closed: self.fd.close() # See if completed work if self.numSuccessIteration >= self.numIteration: self._isDone = True return bootstrapResult def _fitInitial(self): """ Do the initial fit. Returns ------- bool successful fit """ isSuccess = False for _ in range(MAX_TRIES): try: self.fitter.fitModel() # Initialize model isSuccess = True break except Exception as err: self.lastErr = err msg = "Could not do initial fit" self.logger.error(msg, err) return isSuccess
class Runner(object): """Runs tests on biomodels.""" def __init__(self, firstModel: int = 210, numModel: int = 2, pclPath=PCL_FILE, figPath=FIG_PATH, useExistingData: bool = False, isPlot=IS_PLOT, **kwargDct): """ Parameters ---------- firstModel: first model to use numModel: number of models to use pclPath: file to which results are saved useExistingData: use data in existing PCL file """ self.useExistingData = useExistingData and os.path.isfile(pclPath) # Recover previously saved results if desired if self.useExistingData: self.restore(pclPath=pclPath) else: # Initialize based on type of context variable for name in CONTEXT: if name[-1:] == "s": self.__setattr__(name, []) elif name[-3:] == "Dct": self.__setattr__(name, {}) elif name[-4:] == "Path": self.__setattr__(name, None) elif name[0:2] == "is": self.__setattr__(name, False) else: self.__setattr__(name, 0) # Initialize to parameters for this instantiation self.firstModel = firstModel self.numModel = numModel self.pclPath = pclPath self.figPath = figPath self.kwargDct = kwargDct self.isPlot = isPlot self.useExistingData = useExistingData # if LOGGER in kwargDct.keys(): self.logger = kwargDct[LOGGER] else: self.logger = Logger() kwargDct[LOGGER] = self.logger self.save() def _isListSame(self, list1, list2): diff = set(list1).symmetric_difference(list2) return len(diff) == 0 def equals(self, other): selfKeys = list(self.__dict__.keys()) otherKeys = list(other.__dict__.keys()) if not self._isListSame(selfKeys, otherKeys): return False # for key, value in self.__dict__.items(): if isinstance(value, list): isEqual = self._isListSame(value, other.__getattribute__(key)) if not isEqual: return False elif any([isinstance(value, t) for t in [int, str, float, bool]]): if self.__getattribute__(key) != other.__getattribute__(key): return False else: pass # return True def run(self): """ Runs the tests. Saves state after each tests. """ # Processing models modelNums = self.firstModel + np.array(range(self.numModel)) for modelNum in modelNums: if (modelNum in self.processedModels) and self.useExistingData: continue else: self.processedModels.append(modelNum) input_path = PATH_PAT % modelNum msg = "Model %s" % input_path self.logger.activity(msg) try: harness = TestHarness(input_path, **self.kwargDct) if len(harness.parametersToFit) == 0: self.logger.result("No fitable parameters in model.") self.save() continue harness.evaluate(stdResiduals=1.0, fractionParameterDeviation=1.0, relError=2.0) except Exception as err: self.erroredModels.append(modelNum) self.logger.error("TestHarness failed", err) self.save() continue # Parameters for model self.modelParameterDct[modelNum] = \ list(harness.fitModelResult.parameterRelErrorDct.keys()) # Relative error in initial fit values = [ v for v in harness.fitModelResult.parameterRelErrorDct.values() ] self.fitModelRelerrors.extend(values) # Relative error in bootstrap values = [ v for v in harness.bootstrapResult.parameterRelErrorDct.values() ] self.bootstrapRelerrors.extend(values) # Count models without exceptions self.nonErroredModels.append(modelNum) self.numNoError = len(self.nonErroredModels) self.save() # Check for plot if self.isPlot: self.plot() def save(self): """ Saves state. Maintain in sync with self.restore(). """ if self.pclPath is not None: data = [self.__getattribute__(n) for n in CONTEXT] with (open(self.pclPath, "wb")) as fd: pickle.dump(data, fd) def restore(self, pclPath=None): """ Restores state. Maintain in sync with self.save(). """ if pclPath is None: pclPath = self.pclPath if os.path.isfile(pclPath): with (open(pclPath, "rb")) as fd: data = pickle.load(fd) [self.__setattr__(n, v) for n, v in zip(CONTEXT, data)] else: raise ValueError("***Restart file %s does not exist" % self.pclPath) @staticmethod def _pruneRelativeErrors(relativeErrors, maxError=MAX_RELATIVE_ERROR): """ Deletes Nans. Removes very large values. Parameters ---------- list: relative errors maxError: maximum relative error considered Returns ------- list: pruned errors float: fraction pruned from non-nan values """ noNanErrors = [v for v in relativeErrors if not np.isnan(v)] prunedErrors = [v for v in noNanErrors if v <= maxError] prunedFrc = 1 - len(prunedErrors) / len(noNanErrors) return prunedErrors, prunedFrc def plot(self): """ Does all plots. """ _, axes = plt.subplots(1, 2) prunedModelErrors, modelPrunedFrc = \ self._pruneRelativeErrors(self.fitModelRelerrors) prunedBootstrapErrors, bootstrapPrunedFrc = \ self._pruneRelativeErrors(self.bootstrapRelerrors) maxBin1 = self._plotRelativeErrors(axes[0], prunedModelErrors, FIT_MODEL, modelPrunedFrc) maxBin2 = self._plotRelativeErrors(axes[1], prunedBootstrapErrors, BOOTSTRAP, bootstrapPrunedFrc, isYLabel=False) maxBin = max(maxBin1, maxBin2) if maxBin > 0: axes[0].set_ylim([0, maxBin]) axes[1].set_ylim([0, maxBin]) # if len(self.processedModels) == 0: frac = 0.0 else: frac = 1.0 * self.numNoError / len(self.processedModels) suptitle = "Models %d-%d. Fraction non-errored: %2.3f" lastModel = self.firstModel + len(self.processedModels) - 1 suptitle = suptitle % (self.firstModel, lastModel, frac) plt.suptitle(suptitle) plt.show() plt.savefig(self.figPath) def _plotRelativeErrors(self, ax, relErrors, title, prunedFrc, isYLabel=True): """ Plots histogram of relative errors. Parameters ---------- ax: Matplotlib.axes relErrors: list-float title: str prunedFrc: float isYlabel: bool Returns ------- float: maximum number in a bin """ rr = ax.hist(relErrors) fullTitle = "%s. Frc Pruned: %2.2f" % (title, prunedFrc) ax.set_title(fullTitle) ax.set_xlabel("relative error") if isYLabel: ax.set_ylabel("number parameters") ax.set_xlim([0, 1]) return max(rr[0])
class ModelFitterCore(rpickle.RPickler): def __init__( self, modelSpecification, observedData, parametersToFit=None, selectedColumns=None, fitterMethods=METHOD_FITTER_DEFAULTS, numFitRepeat=1, bootstrapMethods=METHOD_BOOTSTRAP_DEFAULTS, parameterLowerBound=PARAMETER_LOWER_BOUND, parameterUpperBound=PARAMETER_UPPER_BOUND, parameterDct={}, fittedDataTransformDct={}, logger=Logger(), isPlot=True, _loggerPrefix="", # The following must be kept in sync with ModelFitterBootstrap.bootstrap numIteration: int = 10, reportInterval: int = 1000, synthesizerClass=ObservationSynthesizerRandomizedResiduals, maxProcess: int = None, serializePath: str = None, ): """ Constructs estimates of parameter values. Parameters ---------- modelSpecification: ExtendedRoadRunner/str roadrunner model or antimony model observedData: NamedTimeseries/str str: path to CSV file parametersToFit: list-str/None parameters in the model that you want to fit if None, no parameters are fit selectedColumns: list-str species names you wish use to fit the model default: all columns in observedData parameterLowerBound: float lower bound for the fitting parameters parameterUpperBound: float upper bound for the fitting parameters parameterDct: dict key: parameter name value: triple - (lowerVange, startingValue, upperRange) fittedDataTransformDct: dict key: column in selectedColumns value: function of the data in selectedColumns; input: NamedTimeseries output: array for the values of the column logger: Logger fitterMethods: str/list-str method used for minimization in fitModel numFitRepeat: int number of times fitting is repeated for a method bootstrapMethods: str/list-str method used for minimization in bootstrap numIteration: number of bootstrap iterations reportInterval: number of iterations between progress reports synthesizerClass: object that synthesizes new observations Must subclass ObservationSynthesizer maxProcess: Maximum number of processes to use. Default: numCPU serializePath: Where to serialize the fitter after bootstrap Usage ----- parameterDct = { "k1": (1, 5, 10), # name of parameter: low value, initial, high "k2": (2, 3, 6)} ftter = ModelFitter(roadrunnerModel, "observed.csv", parameterDct=parameterDct) fitter.fitModel() # Do the fit fitter.bootstrap() # Estimate parameter variance with bootstrap """ if modelSpecification is not None: # Not the default constructor self._loggerPrefix = _loggerPrefix self.modelSpecification = modelSpecification self.parametersToFit = parametersToFit self.lowerBound = parameterLowerBound self.upperBound = parameterUpperBound self.bootstrapKwargs = dict( numIteration=numIteration, reportInterval=reportInterval, maxProcess=maxProcess, serializePath=serializePath, ) self.parameterDct = self._updateParameterDct(parameterDct) self._numFitRepeat = numFitRepeat if self.parametersToFit is None: self.parametersToFit = [p for p in self.parameterDct.keys()] self.observedTS = observedData if self.observedTS is not None: self.observedTS = mkNamedTimeseries(observedData) # self.fittedDataTransformDct = fittedDataTransformDct # if (selectedColumns is None) and (self.observedTS is not None): selectedColumns = self.observedTS.colnames self.selectedColumns = selectedColumns # Construct array of non-nan observed values self._observedArr = self.observedTS[self.selectedColumns].flatten() # Other internal state self._fitterMethods = fitterMethods if isinstance(self._fitterMethods, str): if self._fitterMethods == METHOD_BOTH: self._fitterMethods = METHOD_FITTER_DEFAULTS else: self._fitterMethods = [self._fitterMethods] self._bootstrapMethods = bootstrapMethods if isinstance(self._bootstrapMethods, str): self._bootstrapMethods = [self._bootstrapMethods] self._isPlot = isPlot self._plotter = tp.TimeseriesPlotter(isPlot=self._isPlot) self._plotFittedTS = None # Timeseries that is plotted self.logger = logger # The following are calculated during fitting self.roadrunnerModel = None self.minimizer = None # lmfit.minimizer self.minimizerResult = None # Results of minimization self.params = None # params property in lmfit.minimizer self.fittedTS = self.observedTS.copy( isInitialize=True) # Initialize self.residualsTS = None # Residuals for selectedColumns self.bootstrapResult = None # Result from bootstrapping # Validation checks self._validateFittedDataTransformDct() else: pass @classmethod def rpConstruct(cls): """ Overrides rpickler.rpConstruct to create a method that constructs an instance without arguments. Returns ------- Instance of cls """ return cls(None, None, None) def rpRevise(self): """ Overrides rpickler. """ if not "logger" in self.__dict__.keys(): self.logger = Logger() def _validateFittedDataTransformDct(self): if self.fittedDataTransformDct is not None: keySet = set(self.fittedDataTransformDct.keys()) selectedColumnsSet = self.selectedColumns if (keySet is not None) and (selectedColumnsSet is not None): excess = set(keySet).difference(selectedColumnsSet) if len(excess) > 0: msg = "Columns not in selectedColumns: %s" % str(excess) raise ValueError(excess) def _transformFittedTS(self, data): """ Updates the fittedTS taking into account required transformations. Parameters ---------- data: np.ndarray Results ---------- NamedTimeseries """ colnames = list(self.selectedColumns) colnames.insert(0, TIME) fittedTS = NamedTimeseries(array=data[:, :], colnames=colnames) if self.fittedDataTransformDct is not None: for column, func in self.fittedDataTransformDct.items(): if func is not None: fittedTS[column] = func(fittedTS) return fittedTS def _updateParameterDct(self, parameterDct): """ Handles values that are tuples instead of ParameterSpecification. """ dct = dict(parameterDct) for name, value in parameterDct.items(): if isinstance(value, tuple): dct[name] = ParameterSpecification(lower=value[0], upper=value[1], value=value[2]) return dct @staticmethod def addParameter(parameterDct: dict, name: str, lower: float, upper: float, value: float): """ Adds a parameter to a list of parameters. Parameters ---------- parameterDct: parameter dictionary to agument name: parameter name lower: lower range of parameter value upper: upper range of parameter value value: initial value Returns ------- dict """ parameterDct[name] = ParameterSpecification(lower=lower, upper=upper, value=value) def _adjustNames(self, antimonyModel:str, observedTS:NamedTimeseries) \ ->typing.Tuple[NamedTimeseries, list]: """ Antimony exports can change the names of floating species by adding a "_" at the end. Check for this and adjust the names in observedTS. Return ------ NamedTimeseries: newObservedTS list: newSelectedColumns """ rr = te.loada(antimonyModel) dataNames = rr.simulate().colnames names = ["[%s]" % n for n in observedTS.colnames] missingNames = [n[1:-1] for n in set(names).difference(dataNames)] newSelectedColumns = list(self.selectedColumns) if len(missingNames) > 0: newObservedTS = observedTS.copy() self.logger.exception("Missing names in antimony export: %s" % str(missingNames)) for name in observedTS.colnames: missingName = "%s_" % name if name in missingNames: newObservedTS = newObservedTS.rename(name, missingName) newSelectedColumns.remove(name) newSelectedColumns.append(missingName) else: newObservedTS = observedTS return newObservedTS, newSelectedColumns def copy(self, isKeepLogger=False): """ Creates a copy of the model fitter. Preserves the user-specified settings and the results of bootstrapping. """ if not isinstance(self.modelSpecification, str): try: modelSpecification = self.modelSpecification.getAntimony() except Exception as err: self.logger.error( "Problem wth conversion to Antimony. Details:", err) raise ValueError("Cannot proceed.") observedTS, selectedColumns = self._adjustNames( modelSpecification, self.observedTS) else: modelSpecification = self.modelSpecification observedTS = self.observedTS.copy() selectedColumns = self.selectedColumns # if isKeepLogger: logger = self.logger elif self.logger is not None: logger = self.logger.copy() else: logger = None newModelFitter = self.__class__( copy.deepcopy(modelSpecification), observedTS, copy.deepcopy(self.parametersToFit), selectedColumns=selectedColumns, fitterMethods=self._fitterMethods, bootstrapMethods=self._bootstrapMethods, parameterLowerBound=self.lowerBound, parameterUpperBound=self.upperBound, parameterDct=copy.deepcopy(self.parameterDct), fittedDataTransformDct=copy.deepcopy(self.fittedDataTransformDct), logger=logger, isPlot=self._isPlot) if self.bootstrapResult is not None: newModelFitter.bootstrapResult = self.bootstrapResult.copy() newModelFitter.params = newModelFitter.bootstrapResult.params else: newModelFitter.bootstrapResult = None newModelFitter.params = self.params return newModelFitter def _initializeRoadrunnerModel(self): """ Sets self.roadrunnerModel. """ if isinstance(self.modelSpecification, te.roadrunner.extended_roadrunner.ExtendedRoadRunner): self.roadrunnerModel = self.modelSpecification elif isinstance(self.modelSpecification, str): self.roadrunnerModel = te.loada(self.modelSpecification) else: msg = 'Invalid model.' msg = msg + "\nA model must either be a Roadrunner model " msg = msg + "an Antimony model." raise ValueError(msg) def getDefaultParameterValues(self): """ Obtain the original values of parameters. Returns ------- dict: key: parameter name value: value of parameter """ dct = {} self._initializeRoadrunnerModel() self.roadrunnerModel.reset() for parameterName in self.parametersToFit: dct[parameterName] = self.roadrunnerModel.model[parameterName] return dct def simulate(self, params=None, startTime=None, endTime=None, numPoint=None): """ Runs a simulation. Defaults to parameter values in the simulation. Parameters ---------- params: lmfit.Parameters startTime: float endTime: float numPoint: int Return ------ NamedTimeseries """ def set(default, parameter): # Sets to default if parameter unspecified if parameter is None: return default else: return parameter ##V block = Logger.join(self._loggerPrefix, "fitModel.simulate") guid = self.logger.startBlock(block) ## V sub1Block = Logger.join(block, "sub1") sub1Guid = self.logger.startBlock(sub1Block) startTime = set(self.observedTS.start, startTime) endTime = set(self.observedTS.end, endTime) numPoint = set(len(self.observedTS), numPoint) ## V sub1aBlock = Logger.join(sub1Block, "sub1a") sub1aGuid = self.logger.startBlock(sub1aBlock) if self.roadrunnerModel is None: self._initializeRoadrunnerModel() self.roadrunnerModel.reset() ## ^ self.logger.endBlock(sub1aGuid) ## V sub1bBlock = Logger.join(sub1Block, "sub1b") sub1bGuid = self.logger.startBlock(sub1bBlock) if params is not None: # Parameters have been specified self._setupModel(params) ## ^ self.logger.endBlock(sub1bGuid) # Do the simulation selectedColumns = list(self.selectedColumns) if not TIME in selectedColumns: selectedColumns.insert(0, TIME) ## ^ self.logger.endBlock(sub1Guid) ## V roadrunnerBlock = Logger.join(block, "roadrunner") roadrunnerGuid = self.logger.startBlock(roadrunnerBlock) data = self.roadrunnerModel.simulate(startTime, endTime, numPoint, selectedColumns) self.logger.endBlock(roadrunnerGuid) ## ^ # Select the required columns ## V sub2Block = Logger.join(block, "sub2") sub2Guid = self.logger.startBlock(sub2Block) fittedTS = NamedTimeseries(namedArray=data) self.logger.endBlock(sub2Guid) ## ^ self.logger.endBlock(guid) ##^ return fittedTS def updateFittedAndResiduals(self, **kwargs) -> np.ndarray: """ Updates values of self.fittedTS and self.residualsTS based on self.params. Parameters ---------- kwargs: dict arguments for simulation Instance Variables Updated -------------------------- self.fittedTS self.residualsTS Returns ------- 1-d ndarray of residuals """ self.fittedTS = self.simulate(**kwargs) # Updates self.fittedTS cols = self.selectedColumns if self.residualsTS is None: self.residualsTS = self.observedTS.subsetColumns(cols) self.residualsTS[cols] = self.observedTS[cols] - self.fittedTS[cols] for col in cols: self.residualsTS[col] = np.nan_to_num(self.residualsTS[col]) def _residuals(self, params) -> np.ndarray: """ Compute the residuals between objective and experimental data Handle nan values in observedTS. This internal-only method is implemented to maximize efficieency. Parameters ---------- kwargs: dict arguments for simulation Instance Variables Updated -------------------------- self.residualsTS Returns ------- 1-d ndarray of residuals """ block = Logger.join(self._loggerPrefix, "fitModel._residuals") guid = self.logger.startBlock(block) ##V self.roadrunnerModel.reset() self._setupModel(params) # roadrunnerBlock = Logger.join(block, "roadrunner") roadrunnerGuid = self.logger.startBlock(roadrunnerBlock) ## V # data = self.roadrunnerModel.simulate(self.observedTS.start, self.observedTS.end, len(self.observedTS), self.selectedColumns) ## ^ self.logger.endBlock(roadrunnerGuid) # tailBlock = Logger.join(block, "tail") tailGuid = self.logger.startBlock(tailBlock) ## V residualsArr = self._observedArr - data.flatten() residualsArr = np.nan_to_num(residualsArr) ## ^ self.logger.endBlock(tailGuid) ##^ self.logger.endBlock(guid) # # Used for detailed debugging if False: self.logger.details("_residuals/std(residuals): %f" % np.std(residualsArr)) self.logger.details("_residuals/params: %s" % str(params)) return residualsArr def fitModel(self, params: lmfit.Parameters = None, max_nfev: int = 100): """ Fits the model by adjusting values of parameters based on differences between simulated and provided values of floating species. Parameters ---------- params: starting values of parameters max_nfev: maximum number of function evaluations Example ------- f.fitModel() """ ParameterDescriptor = collections.namedtuple( "ParameterDescriptor", "params method std minimizer minimizerResult") block = Logger.join(self._loggerPrefix, "fitModel") guid = self.logger.startBlock(block) self._initializeRoadrunnerModel() if self.parametersToFit is None: # Compute fit and residuals for base model self.params = None else: if params is None: params = self.mkParams() # Fit the model to the data using one or more methods. # Choose the result with the lowest residual standard deviation paramDct = {} for method in self._fitterMethods: for _ in range(self._numFitRepeat): minimizer = lmfit.Minimizer(self._residuals, params, max_nfev=max_nfev) try: minimizerResult = minimizer.minimize(method=method, max_nfev=max_nfev) except Exception as excp: msg = "Error minimizing for method: %s" % method self.logger.error(msg, excp) continue params = minimizerResult.params std = np.std(self._residuals(params)) if method in paramDct.keys(): if std >= paramDct[method].std: continue paramDct[method] = ParameterDescriptor( params=params.copy(), method=method, std=std, minimizer=minimizer, minimizerResult=minimizerResult, ) if len(paramDct) == 0: msg = "*** Minimizer failed for this model and data." raise ValueError(msg) # Select the result that has the smallest residuals sortedMethods = sorted(paramDct.keys(), key=lambda m: paramDct[m].std) bestMethod = sortedMethods[0] self.params = paramDct[bestMethod].params self.minimizer = paramDct[bestMethod].minimizer self.minimizerResult = paramDct[bestMethod].minimizerResult # Ensure that residualsTS and fittedTS match the parameters self.updateFittedAndResiduals(params=self.params) self.logger.endBlock(guid) def getFittedModel(self): """ Provides the roadrunner model with fitted parameters Returns ------- ExtendedRoadrunner """ self._checkFit() self.roadrunnerModel.reset() self._setupModel(self.params) return self.roadrunnerModel def _setupModel(self, params): """ Sets up the model for use based on the parameter parameters Parameters ---------- params: lmfit.Parameters """ pp = params.valuesdict() for parameter in self.parametersToFit: try: self.roadrunnerModel.model[parameter] = pp[parameter] except Exception as err: msg = "_modelFitterCore/_setupModel: Could not set value for %s" \ % parameter self.logger.error(msg, err) def mkParams(self, parameterDct: dict = None) -> lmfit.Parameters: """ Constructs lmfit parameters based on specifications. Parameters ---------- parameterDct: key=name, value=ParameterSpecification Returns ------- lmfit.Parameters """ def get(value, base_value, multiplier): if value is not None: return value return base_value * multiplier # if parameterDct is None: parameterDct = self.parameterDct params = lmfit.Parameters() for parameterName in self.parametersToFit: if parameterName in parameterDct.keys(): specification = parameterDct[parameterName] value = get(specification.value, specification.value, 1.0) if value > 0: lower_factor = LOWER_PARAMETER_MULT upper_factor = UPPER_PARAMETER_MULT else: upper_factor = UPPER_PARAMETER_MULT lower_factor = LOWER_PARAMETER_MULT lower = get(specification.lower, specification.value, lower_factor) upper = get(specification.upper, specification.value, upper_factor) if np.isclose(lower - upper, 0): upper = 0.0001 try: params.add(parameterName, value=value, min=lower, max=upper) except Exception as err: msg = "modelFitterCore/mkParams parameterName %s" \ % parameterName self.logger.error(msg, err) else: value = np.mean([self.lowerBound, self.upperBound]) params.add(parameterName, value=value, min=self.lowerBound, max=self.upperBound) return params def _checkFit(self): if self.params is None: raise ValueError("Must use fitModel before using this method.") def serialize(self, path): """ Serialize the model to a path. Parameters ---------- path: str File path """ newModelFitter = self.copy() with open(path, "wb") as fd: rpickle.dump(newModelFitter, fd) @classmethod def deserialize(cls, path): """ Deserialize the model from a path. Parameters ---------- path: str File path Return ------ ModelFitter Model is initialized. """ with open(path, "rb") as fd: fitter = rpickle.load(fd) fitter._initializeRoadrunnerModel() return fitter
class ModelFitterCore(rpickle.RPickler): # Subclasses used in interface class OptimizerMethod(): def __init__(self, method, kwargs): self.method = method self.kwargs = kwargs def __init__( self, modelSpecification, observedData, parametersToFit=None, selectedColumns=None, fitterMethods=None, numFitRepeat=1, bootstrapMethods=None, parameterLowerBound=PARAMETER_LOWER_BOUND, parameterUpperBound=PARAMETER_UPPER_BOUND, parameterDct=None, fittedDataTransformDct=None, logger=Logger(), isPlot=True, _loggerPrefix="", # The following must be kept in sync with ModelFitterBootstrap.bootstrap numIteration: int = 10, reportInterval: int = 1000, maxProcess: int = None, serializePath: str = None, ): """ Constructs estimates of parameter values. Parameters ---------- modelSpecification: ExtendedRoadRunner/str roadrunner model or antimony model observedData: NamedTimeseries/str str: path to CSV file parametersToFit: list-str/None parameters in the model that you want to fit if None, no parameters are fit selectedColumns: list-str species names you wish use to fit the model default: all columns in observedData parameterLowerBound: float lower bound for the fitting parameters parameterUpperBound: float upper bound for the fitting parameters parameterDct: dict key: parameter name value: triple - (lowerVange, startingValue, upperRange) fittedDataTransformDct: dict key: column in selectedColumns value: function of the data in selectedColumns; input: NamedTimeseries output: array for the values of the column logger: Logger fitterMethods: str/list-str/list-OptimizerMethod method used for minimization in fitModel numFitRepeat: int number of times fitting is repeated for a method bootstrapMethods: str/list-str/list-OptimizerMethod method used for minimization in bootstrap numIteration: number of bootstrap iterations reportInterval: number of iterations between progress reports maxProcess: Maximum number of processes to use. Default: numCPU serializePath: Where to serialize the fitter after bootstrap Usage ----- parameterDct = { "k1": (1, 5, 10), # name of parameter: low value, initial, high "k2": (2, 3, 6)} ftter = ModelFitter(roadrunnerModel, "observed.csv", parameterDct=parameterDct) fitter.fitModel() # Do the fit fitter.bootstrap() # Estimate parameter variance with bootstrap """ if modelSpecification is not None: # Not the default constructor self._loggerPrefix = _loggerPrefix self.modelSpecification = modelSpecification self.parametersToFit = parametersToFit self.lowerBound = parameterLowerBound self.upperBound = parameterUpperBound self.bootstrapKwargs = dict( numIteration=numIteration, reportInterval=reportInterval, maxProcess=maxProcess, serializePath=serializePath, ) self.parameterDct = ModelFitterCore._updateParameterDct( parameterDct) self._numFitRepeat = numFitRepeat if self.parametersToFit is None: self.parametersToFit = list(self.parameterDct.keys()) self.observedTS = observedData if self.observedTS is not None: self.observedTS = mkNamedTimeseries(observedData) # self.fittedDataTransformDct = fittedDataTransformDct # if (selectedColumns is None) and (self.observedTS is not None): selectedColumns = self.observedTS.colnames self.selectedColumns = selectedColumns if self.observedTS is not None: self._observedArr = self.observedTS[ self.selectedColumns].flatten() else: self._observedArr = None # Other internal state self._fitterMethods = self._makeMethods(fitterMethods, METHOD_FITTER_DEFAULTS) self._bootstrapMethods = self._makeMethods( bootstrapMethods, METHOD_BOOTSTRAP_DEFAULTS) if isinstance(self._bootstrapMethods, str): self._bootstrapMethods = [self._bootstrapMethods] self._isPlot = isPlot self._plotter = tp.TimeseriesPlotter(isPlot=self._isPlot) self._plotFittedTS = None # Timeseries that is plotted self.logger = logger # The following are calculated during fitting self.roadrunnerModel = None self.minimizer = None # lmfit.minimizer self.minimizerResult = None # Results of minimization self.params = None # params property in lmfit.minimizer self.fittedTS = self.observedTS.copy( isInitialize=True) # Initialize self.residualsTS = None # Residuals for selectedColumns self.bootstrapResult = None # Result from bootstrapping # Validation checks self._validateFittedDataTransformDct() self._bestParameters = _BestParameters(rssq=None, params=None) else: pass def _makeMethods(self, methods, default): """ Creates a method dictionary. Parameters ---------- methods: str/list-str/dict method used for minimization in fitModel dict: key-method, value-optional parameters Returns ------- list-OptimizerMethod key: method name value: dict of optional parameters """ if methods is None: methods = default if isinstance(methods, str): if methods == METHOD_BOTH: methods = METHOD_FITTER_DEFAULTS else: methods = [methods] if isinstance(methods, list): if isinstance(methods[0], str): results = [ ModelFitterCore.OptimizerMethod(method=m, kwargs={}) for m in methods ] else: results = methods else: raise RuntimeError("Must be a list") trues = [ isinstance(m, ModelFitterCore.OptimizerMethod) for m in results ] if not all(trues): raise ValueError("Invalid methods: %s" % str(methods)) return results @classmethod def mkParameters( cls, parameterDct: dict = None, parametersToFit: list = None, logger: Logger = Logger(), lowerBound: float = PARAMETER_LOWER_BOUND, upperBound: float = PARAMETER_UPPER_BOUND) -> lmfit.Parameters: """ Constructs lmfit parameters based on specifications. Parameters ---------- parameterDct: key=name, value=ParameterSpecification parametersToFit: list of parameters to fit logger: error logger lowerBound: lower value of range for parameters upperBound: upper value of range for parameters Returns ------- lmfit.Parameters """ def get(value, base_value, multiplier): if value is not None: return value return base_value * multiplier # if (parametersToFit is None) and (parameterDct is None): raise RuntimeError("Must specify one of these parameters.") if parameterDct is None: parameterDct = {} if parametersToFit is None: parametersToFit = parameterDct.keys() if logger is None: logger = logger() params = lmfit.Parameters() for parameterName in parametersToFit: if parameterName in parameterDct.keys(): specification = parameterDct[parameterName] value = get(specification.value, specification.value, 1.0) if value > 0: lower_factor = LOWER_PARAMETER_MULT upper_factor = UPPER_PARAMETER_MULT else: upper_factor = UPPER_PARAMETER_MULT lower_factor = LOWER_PARAMETER_MULT lower = get(specification.lower, specification.value, lower_factor) upper = get(specification.upper, specification.value, upper_factor) if np.isclose(lower - upper, 0): upper = 0.0001 try: params.add(parameterName, value=value, min=lower, max=upper) except Exception as err: msg = "modelFitterCore/mkParameters parameterName %s" \ % parameterName logger.error(msg, err) else: value = np.mean([lowerBound, upperBound]) params.add(parameterName, value=value, min=lowerBound, max=upperBound) return params @classmethod def initializeRoadrunnerModel(cls, modelSpecification): """ Sets self.roadrunnerModel. Parameters ---------- modelSpecification: ExtendedRoadRunner/str Returns ------- ExtendedRoadRunner """ if isinstance(modelSpecification, te.roadrunner.extended_roadrunner.ExtendedRoadRunner): roadrunnerModel = modelSpecification elif isinstance(modelSpecification, str): roadrunnerModel = te.loada(modelSpecification) else: msg = 'Invalid model.' msg = msg + "\nA model must either be a Roadrunner model " msg = msg + "an Antimony model." raise ValueError(msg) return roadrunnerModel @classmethod def setupModel(cls, roadrunner, parameters, logger=Logger()): """ Sets up the model for use based on the parameter parameters Parameters ---------- roadrunner: ExtendedRoadRunner parameters: lmfit.Parameters logger Logger """ pp = parameters.valuesdict() for parameter in pp.keys(): try: roadrunner.model[parameter] = pp[parameter] except Exception as err: msg = "_modelFitterCore.setupModel: Could not set value for %s" \ % parameter logger.error(msg, err) @classmethod def runSimulation( cls, parameters=None, roadrunner=None, startTime=0, endTime=5, numPoint=30, selectedColumns=None, returnDataFrame=True, _logger=Logger(), _loggerPrefix="", ): """ Runs a simulation. Defaults to parameter values in the simulation. Parameters ---------- roadrunner: ExtendedRoadRunner/str Roadrunner model parameters: lmfit.Parameters lmfit parameters startTime: float start time for the simulation endTime: float end time for the simulation numPoint: int number of points in the simulation selectedColumns: list-str output columns in simulation returnDataFrame: bool return a DataFrame _logger: Logger _loggerPrefix: str Return ------ NamedTimeseries (or None if fail to converge) """ if isinstance(roadrunner, str): roadrunner = cls.initializeRoadrunnerModel(roadrunner) else: roadrunner.reset() if parameters is not None: # Parameters have been specified cls.setupModel(roadrunner, parameters, logger=_logger) # Do the simulation if selectedColumns is not None: newSelectedColumns = list(selectedColumns) if TIME not in newSelectedColumns: newSelectedColumns.insert(0, TIME) try: data = roadrunner.simulate(startTime, endTime, numPoint, newSelectedColumns) except Exception as err: _logger.error("Roadrunner exception: ", err) data = None else: try: data = roadrunner.simulate(startTime, endTime, numPoint) except Exception as err: _logger.exception("Roadrunner exception: %s", err) data = None if data is None: return data fittedTS = NamedTimeseries(namedArray=data) if returnDataFrame: result = fittedTS.to_dataframe() else: result = fittedTS return result @classmethod def rpConstruct(cls): """ Overrides rpickler.rpConstruct to create a method that constructs an instance without arguments. Returns ------- Instance of cls """ return cls(None, None, None) def rpRevise(self): """ Overrides rpickler. """ if "logger" not in self.__dict__.keys(): self.logger = Logger() def _validateFittedDataTransformDct(self): if self.fittedDataTransformDct is not None: keySet = set(self.fittedDataTransformDct.keys()) selectedColumnsSet = self.selectedColumns if (keySet is not None) and (selectedColumnsSet is not None): excess = set(keySet).difference(selectedColumnsSet) if len(excess) > 0: msg = "Columns not in selectedColumns: %s" % str(excess) raise ValueError(msg) def _transformFittedTS(self, data): """ Updates the fittedTS taking into account required transformations. Parameters ---------- data: np.ndarray Results ---------- NamedTimeseries """ colnames = list(self.selectedColumns) colnames.insert(0, TIME) fittedTS = NamedTimeseries(array=data[:, :], colnames=colnames) if self.fittedDataTransformDct is not None: for column, func in self.fittedDataTransformDct.items(): if func is not None: fittedTS[column] = func(fittedTS) return fittedTS @staticmethod def _updateParameterDct(parameterDct): """ Handles values that are tuples instead of ParameterSpecification. """ if parameterDct is None: parameterDct = {} dct = dict(parameterDct) for name, value in parameterDct.items(): if isinstance(value, tuple): dct[name] = ParameterSpecification(lower=value[0], upper=value[1], value=value[2]) return dct @staticmethod def addParameter(parameterDct: dict, name: str, lower: float, upper: float, value: float): """ Adds a parameter to a list of parameters. Parameters ---------- parameterDct: parameter dictionary to agument name: parameter name lower: lower range of parameter value upper: upper range of parameter value value: initial value Returns ------- dict """ parameterDct[name] = ParameterSpecification(lower=lower, upper=upper, value=value) def _adjustNames(self, antimonyModel:str, observedTS:NamedTimeseries) \ ->typing.Tuple[NamedTimeseries, list]: """ Antimony exports can change the names of floating species by adding a "_" at the end. Check for this and adjust the names in observedTS. Return ------ NamedTimeseries: newObservedTS list: newSelectedColumns """ rr = te.loada(antimonyModel) dataNames = rr.simulate().colnames names = ["[%s]" % n for n in observedTS.colnames] missingNames = [n[1:-1] for n in set(names).difference(dataNames)] newSelectedColumns = list(self.selectedColumns) if len(missingNames) > 0: newObservedTS = observedTS.copy() self.logger.exception("Missing names in antimony export: %s" % str(missingNames)) for name in observedTS.colnames: missingName = "%s_" % name if name in missingNames: newObservedTS = newObservedTS.rename(name, missingName) newSelectedColumns.remove(name) newSelectedColumns.append(missingName) else: newObservedTS = observedTS return newObservedTS, newSelectedColumns def copy(self, isKeepLogger=False): """ Creates a copy of the model fitter. Preserves the user-specified settings and the results of bootstrapping. """ if not isinstance(self.modelSpecification, str): try: modelSpecification = self.modelSpecification.getAntimony() except Exception as err: self.logger.error( "Problem wth conversion to Antimony. Details:", err) raise ValueError("Cannot proceed.") observedTS, selectedColumns = self._adjustNames( modelSpecification, self.observedTS) else: modelSpecification = self.modelSpecification observedTS = self.observedTS.copy() selectedColumns = self.selectedColumns # if isKeepLogger: logger = self.logger elif self.logger is not None: logger = self.logger.copy() else: logger = None newModelFitter = self.__class__( copy.deepcopy(modelSpecification), observedTS, copy.deepcopy(self.parametersToFit), selectedColumns=selectedColumns, fitterMethods=self._fitterMethods, bootstrapMethods=self._bootstrapMethods, parameterLowerBound=self.lowerBound, parameterUpperBound=self.upperBound, parameterDct=copy.deepcopy(self.parameterDct), fittedDataTransformDct=copy.deepcopy(self.fittedDataTransformDct), logger=logger, isPlot=self._isPlot) if self.bootstrapResult is not None: newModelFitter.bootstrapResult = self.bootstrapResult.copy() newModelFitter.params = newModelFitter.bootstrapResult.params else: newModelFitter.bootstrapResult = None newModelFitter.params = self.params return newModelFitter def initializeRoadRunnerModel(self): """ Sets self.roadrunnerModel. """ self.roadrunnerModel = ModelFitterCore.initializeRoadrunnerModel( self.modelSpecification) def getDefaultParameterValues(self): """ Obtain the original values of parameters. Returns ------- dict: key: parameter name value: value of parameter """ dct = {} self.initializeRoadRunnerModel() self.roadrunnerModel.reset() for parameterName in self.parametersToFit: dct[parameterName] = self.roadrunnerModel.model[parameterName] return dct def simulate(self, params=None, startTime=None, endTime=None, numPoint=None): """ Runs a simulation. Defaults to parameter values in the simulation. Parameters ---------- params: lmfit.Parameters startTime: float endTime: float numPoint: int Return ------ NamedTimeseries """ def setValue(default, parameter): # Sets to default if parameter unspecified if parameter is None: return default return parameter # startTime = setValue(self.observedTS.start, startTime) endTime = setValue(self.observedTS.end, endTime) numPoint = setValue(len(self.observedTS), numPoint) # if self.roadrunnerModel is None: self.initializeRoadRunnerModel() # return ModelFitterCore.runSimulation( parameters=params, roadrunner=self.roadrunnerModel, startTime=startTime, endTime=endTime, numPoint=numPoint, selectedColumns=self.selectedColumns, _logger=self.logger, _loggerPrefix=self._loggerPrefix, returnDataFrame=False) def updateFittedAndResiduals(self, **kwargs) -> np.ndarray: """ Updates values of self.fittedTS and self.residualsTS based on self.params. Parameters ---------- kwargs: dict arguments for simulation Instance Variables Updated -------------------------- self.fittedTS self.residualsTS Returns ------- 1-d ndarray of residuals """ self.fittedTS = self.simulate(**kwargs) # Updates self.fittedTS residualsArr = self._residuals(self.params) numRow = len(self.fittedTS) numCol = len(residualsArr) // numRow residualsArr = np.reshape(residualsArr, (numRow, numCol)) cols = self.selectedColumns if self.residualsTS is None: self.residualsTS = self.observedTS.subsetColumns(cols) self.residualsTS[cols] = residualsArr def _residuals(self, params) -> np.ndarray: """ Compute the residuals between objective and experimental data Handle nan values in observedTS. This internal-only method is implemented to maximize efficieency. Parameters ---------- kwargs: dict arguments for simulation Returns ------- 1-d ndarray of residuals """ data = ModelFitterCore.runSimulation( parameters=params, roadrunner=self.roadrunnerModel, startTime=self.observedTS.start, endTime=self.observedTS.end, numPoint=len(self.observedTS), selectedColumns=self.selectedColumns, _logger=self.logger, _loggerPrefix=self._loggerPrefix, returnDataFrame=False) if data is None: residualsArr = np.repeat(LARGE_RESIDUAL, len(self._observedArr)) else: residualsArr = self._observedArr - data.flatten() residualsArr = np.nan_to_num(residualsArr) rssq = sum(residualsArr**2) if (self._bestParameters.rssq is None) \ or (rssq < self._bestParameters.rssq): self._bestParameters = _BestParameters(params=params.copy(), rssq=rssq) return residualsArr def fitModel(self, params: lmfit.Parameters = None, max_nfev=100): """ Fits the model by adjusting values of parameters based on differences between simulated and provided values of floating species. Parameters ---------- params: starting values of parameters Example ------- f.fitModel() """ ParameterDescriptor = collections.namedtuple( "ParameterDescriptor", "params method rssq kwargs minimizer minimizerResult") MAX_NFEV = "max_nfev" block = Logger.join(self._loggerPrefix, "fitModel") guid = self.logger.startBlock(block) self.initializeRoadRunnerModel() self.params = None if self.parametersToFit is not None: if params is None: params = self.mkParams() # Fit the model to the data using one or more methods. # Choose the result with the lowest residual standard deviation paramResults = [] lastExcp = None for idx, optimizerMethod in enumerate(self._fitterMethods): method = optimizerMethod.method kwargs = optimizerMethod.kwargs if MAX_NFEV not in kwargs: kwargs[MAX_NFEV] = max_nfev for _ in range(self._numFitRepeat): self._bestParameters = _BestParameters(params=None, rssq=None) minimizer = lmfit.Minimizer(self._residuals, params) try: minimizerResult = minimizer.minimize(method=method, **kwargs) except Exception as excp: lastExcp = excp msg = "Error minimizing for method: %s" % method self.logger.error(msg, excp) continue params = self._bestParameters.params.copy() rssq = np.sum(self._residuals(params)**2) if len(paramResults) > idx: if rssq >= paramResults[idx].rssq: continue parameterDescriptor = ParameterDescriptor( params=params, method=method, rssq=rssq, kwargs=dict(kwargs), minimizer=minimizer, minimizerResult=minimizerResult, ) paramResults.append(parameterDescriptor) if len(paramResults) == 0: msg = "*** Minimizer failed for this model and data." self.logger.error(msg, lastExcp) else: # Select the result that has the smallest residuals sortedMethods = sorted(paramResults, key=lambda r: r.rssq) bestMethod = sortedMethods[0] self.params = bestMethod.params self.minimizer = bestMethod.minimizer self.minimizerResult = bestMethod.minimizerResult # Ensure that residualsTS and fittedTS match the parameters self.updateFittedAndResiduals(params=self.params) self.logger.endBlock(guid) def getFittedModel(self): """ Provides the roadrunner model with fitted parameters Returns ------- ExtendedRoadrunner """ self._checkFit() self.roadrunnerModel.reset() self._setupModel(self.params) return self.roadrunnerModel def _setupModel(self, parameters): """ Sets up the model for use based on the parameter parameters Parameters ---------- parameters: lmfit.Parameters """ ModelFitterCore.setupModel(self.roadrunnerModel, parameters, logger=self.logger) def mkParams(self, parameterDct: dict = None) -> lmfit.Parameters: """ Constructs lmfit parameters based on specifications. Parameters ---------- parameterDct: key=name, value=ParameterSpecification Returns ------- lmfit.Parameters """ if parameterDct is None: parameterDct = self.parameterDct return ModelFitterCore.mkParameters( parameterDct, parametersToFit=self.parametersToFit, logger=self.logger, lowerBound=self.lowerBound, upperBound=self.upperBound) def _checkFit(self): if self.params is None: raise ValueError("Must use fitModel before using this method.") def serialize(self, path): """ Serialize the model to a path. Parameters ---------- path: str File path """ newModelFitter = self.copy() with open(path, "wb") as fd: rpickle.dump(newModelFitter, fd) @classmethod def deserialize(cls, path): """ Deserialize the model from a path. Parameters ---------- path: str File path Return ------ ModelFitter Model is initialized. """ with open(path, "rb") as fd: fitter = rpickle.load(fd) fitter.initializeRoadRunnerModel() return fitter
def mkParameters( cls, parameterDct: dict = None, parametersToFit: list = None, logger: Logger = Logger(), lowerBound: float = PARAMETER_LOWER_BOUND, upperBound: float = PARAMETER_UPPER_BOUND) -> lmfit.Parameters: """ Constructs lmfit parameters based on specifications. Parameters ---------- parameterDct: key=name, value=ParameterSpecification parametersToFit: list of parameters to fit logger: error logger lowerBound: lower value of range for parameters upperBound: upper value of range for parameters Returns ------- lmfit.Parameters """ def get(value, base_value, multiplier): if value is not None: return value return base_value * multiplier # if (parametersToFit is None) and (parameterDct is None): raise RuntimeError("Must specify one of these parameters.") if parameterDct is None: parameterDct = {} if parametersToFit is None: parametersToFit = parameterDct.keys() if logger is None: logger = logger() params = lmfit.Parameters() for parameterName in parametersToFit: if parameterName in parameterDct.keys(): specification = parameterDct[parameterName] value = get(specification.value, specification.value, 1.0) if value > 0: lower_factor = LOWER_PARAMETER_MULT upper_factor = UPPER_PARAMETER_MULT else: upper_factor = UPPER_PARAMETER_MULT lower_factor = LOWER_PARAMETER_MULT lower = get(specification.lower, specification.value, lower_factor) upper = get(specification.upper, specification.value, upper_factor) if np.isclose(lower - upper, 0): upper = 0.0001 try: params.add(parameterName, value=value, min=lower, max=upper) except Exception as err: msg = "modelFitterCore/mkParameters parameterName %s" \ % parameterName logger.error(msg, err) else: value = np.mean([lowerBound, upperBound]) params.add(parameterName, value=value, min=lowerBound, max=upperBound) return params
class Optimizer(): """ Implements an interface to optimizers with abstractions for multiple methods and performance reporting. The class also handles an oddity with lmfit that the final parameters returned may not be the best. Usage ----- optimizer = Optimizer(calcResiduals, params, [cn.METHOD_LEASTSQ]) optimizer.execute() """ def __init__(self, function, initialParams, methods, logger=None, isCollect=False): """ Parameters ---------- function: Funtion Arguments lmfit.parameters isInitialze (bool). True on first call the isGetBest (bool). True to retrieve best parameters returns residuals (if bool arguments are false) initialParams: lmfit.parameters methods: list-_helpers.OptimizerMethod isCollect: bool Collects performance statistcs """ self._function = function self._methods = methods self._initialParams = initialParams self._isCollect = isCollect self.logger = logger if self.logger is None: self.logger = Logger() # Outputs self.performanceStats = [] # list of performance results self.qualityStats = [] # relative rssq self.params = None self.minimizerResult = None self.rssq = None def copyResults(self): """ Copies of the results of the optimization. Returns ------- Optimizer """ newOptimizer = Optimizer(self._function, self._initialParams.copy(), self._methods, logger=self.logger, isCollect=self._isCollect) newOptimizer._function = None # Not serializable # newOptimizer.performanceStats = copy.deepcopy(self.performanceStats) newOptimizer.qualityStats = copy.deepcopy(self.qualityStats) newOptimizer.minimizerResult = copy.deepcopy(self.minimizerResult) newOptimizer.params = None if self.params is not None: newOptimizer.params = self.params.copy() newOptimizer.rssq = self.rssq return newOptimizer @staticmethod def _setRandomValue(params): """ Sets value to a uniformly distributed random number between min and max. Parameters ---------- params: lmfit.Parameters Returns ------- lmfit.Parameters """ newParameters = lmfit.Parameters() for name, parameter in params.items(): newValue = np.random.uniform(parameter.min, parameter.max) newParameters.add(name, min=parameter.min, max=parameter.max, value=newValue) return newParameters def execute(self): """ Performs the optimization on the function. Result is self.params """ lastExcp = None self.params = self._initialParams.copy() minimizer = None for optimizerMethod in self._methods: method = optimizerMethod.method kwargs = optimizerMethod.kwargs wrapperFunction = _FunctionWrapper(self._function, isCollect=self._isCollect) minimizer = lmfit.Minimizer(wrapperFunction.execute, self.params) try: self.minimizerResult = minimizer.minimize(method=method, **kwargs) except Exception as excp: lastExcp = excp msg = "Error minimizing for method: %s" % method self.logger.error(msg, excp) continue # Update the parameters if wrapperFunction.bestParamDct is not None: _helpers.updateParameterValues(self.params, wrapperFunction.bestParamDct) # Update other statistics self.rssq = wrapperFunction.rssq self.performanceStats.append(list(wrapperFunction.perfStatistics)) self.qualityStats.append(list(wrapperFunction.rssqStatistics)) if minimizer is None: msg = "*** Optimization failed." self.logger.error(msg, lastExcp) def report(self): """ Reports the result of an optimization. Returns ------- str """ VARIABLE_STG = "[[Variables]]" CORRELATION_STG = "[[Correlations]]" if self.minimizerResult is None: raise ValueError("Must do fitModel before reportFit.") valuesDct = self.params.valuesdict() valuesStg = _helpers.ppDict(dict(valuesDct), indent=4) reportSplit = str(lmfit.fit_report(self.minimizerResult)).split("\n") # Eliminate Variables section inVariableSection = False trimmedReportSplit = [] for line in reportSplit: if VARIABLE_STG in line: inVariableSection = True if CORRELATION_STG in line: inVariableSection = False if inVariableSection: continue trimmedReportSplit.append(line) # Construct the report newReportSplit = [VARIABLE_STG] newReportSplit.extend(valuesStg.split("\n")) newReportSplit.extend(trimmedReportSplit) return "\n".join(newReportSplit) def plotPerformance(self, isPlot=True): """ Plots the statistics for running the objective function. """ if not self._isCollect: msg = "Must construct with isCollect = True " msg += "to get performance plot." raise ValueError(msg) # Compute statistics TOT = "Tot" CNT = "Cnt" AVG = "Avg" IDX = "Idx" totalTimes = [sum(v) for v in self.performanceStats] counts = [len(v) for v in self.performanceStats] averages = [np.mean(v) for v in self.performanceStats] df = pd.DataFrame({ IDX: range(len(self.performanceStats)), TOT: totalTimes, CNT: counts, AVG: averages, }) # _, axes = plt.subplots(1, 3) df.plot.bar(x=IDX, y=TOT, ax=axes[0], title="Total time", xlabel="method") df.plot.bar(x=IDX, y=AVG, ax=axes[1], title="Average time", xlabel="method") df.plot.bar(x=IDX, y=CNT, ax=axes[2], title="Number calls", xlabel="method") if isPlot: plt.show() def plotQuality(self, isPlot=True): """ Plots the quality results """ if not self._isCollect: msg = "Must construct with isCollect = True " msg += "to get quality plots." raise ValueError(msg) ITERATION = "iteration" _, axes = plt.subplots(len(self._methods)) minLength = min([len(v) for v in self.qualityStats]) # Compute statistics dct = { self._methods[i].method: self.qualityStats[i][:minLength] for i in range(len(self._methods)) } df = pd.DataFrame(dct) df[ITERATION] = range(minLength) # for idx, method in enumerate(self._methods): if "AxesSubplot" in str(type(axes)): ax = axes else: ax = axes[idx] df.plot.line(x=ITERATION, y=method.method, ax=ax, xlabel="") ax.set_ylabel("SSQ") if idx == len(self._methods) - 1: ax.set_xlabel(ITERATION) if isPlot: plt.show() @staticmethod def mkOptimizerMethod(methodNames=None, methodKwargs=None, maxFev=cn.MAX_NFEV_DFT): """ Constructs an OptimizerMethod Parameters ---------- methodNames: list-str/str methodKwargs: list-dict/dict Returns ------- list-OptimizerMethod """ if methodNames is None: methodNames = [cn.METHOD_LEASTSQ] if isinstance(methodNames, str): methodNames = [methodNames] if methodKwargs is None: methodKwargs = {} # Ensure that there is a limit of function evaluations newMethodKwargs = dict(methodKwargs) if cn.MAX_NFEV not in newMethodKwargs.keys(): newMethodKwargs[cn.MAX_NFEV] = maxFev elif maxFev is None: del newMethodKwargs[cn.MAX_NFEV] methodKwargs = np.repeat(newMethodKwargs, len(methodNames)) # result = [_helpers.OptimizerMethod(n, k) for n, k \ in zip(methodNames, methodKwargs)] return result @classmethod def optimize(cls, function, initialParams, methods, numRestart=0, **kwargs): """ Parameters ---------- function: Funtion Arguments lmfit.parameters isInitialze (bool). True on first call the isGetBest (bool). True to retrieve best parameters returns residuals (if bool arguments are false) initialParams: lmfit.parameters methods: list-_helpers.OptimizerMethod numRestart: int Number of restarts with randomly chosen initial values Returns ------- Optimizer """ bestOptimizer = cls(function, initialParams, methods, **kwargs) bestOptimizer.execute() # for _ in range(numRestart): newInitialParams = Optimizer._setRandomValue(initialParams) newOptimizer = cls(function, newInitialParams, methods, **kwargs) newOptimizer.execute() if newOptimizer.rssq < bestOptimizer.rssq: bestOptimizer = newOptimizer return bestOptimizer