def fit_with_minuit(cost_func: Union[cost_function.CostFunctionBase, cost_function.SimultaneousFit], minuit_args: T_FitArguments, x: np.ndarray, use_minos: Optional[bool] = False) -> Tuple[base.FitResult, iminuit.Minuit]: """ Perform a fit using the given cost function with Minuit. Args: cost_func: Cost function to be used with Minuit. minuit_args: Arguments for minuit. Need to set the initial value, limits, and error (step) of each parameter. x: x value(s) where the fit is evaluated, which will be stored in the fit result. use_minos: Calculate MINOS errors. They have to be accessed through the Minuit object. Default: False. Returns: (fit_result, Minuit object): The fit result extracts values from the Minuit object, but the Minuit object is also returned for good measure. """ # Validation # Will raise an exception if there are invalid arguments. _validate_minuit_args(cost_func = cost_func, minuit_args = minuit_args) # Set the error definition. # We check if it's set to the allow the user to override if they are so inclined. # (Overriding it should be pretty rare). if "errordef" not in minuit_args: # Log likelihood cost functions needs an errordef of 0.5 to scale the errors properly, while 1 should # be used for chi squared cost functions. error_def = 1.0 if isinstance(cost_func, (cost_function.LogLikelihood, cost_function.BinnedLogLikelihood)): error_def = 0.5 # Store the value. minuit_args["errordef"] = error_def # Perform the fit minuit = iminuit.Minuit(cost_func, **minuit_args) minuit.migrad() # Just in case (doesn't hurt anything, but may help in a few cases). minuit.hesse() if use_minos: minuit.minos() # Check that the fit is actually good if not minuit.migrad_ok(): raise base.FitFailed("Minimization failed! The fit is invalid!") # Create the fit result and calculate the errors. fit_result = base.FitResult.from_minuit(minuit, cost_func, x) # We can calculate the fit errors if the cost function has a single function. # If it's a simultaneous fit, it's unclear how best this should be handled. Perhaps it could # be unraveled and summed, but it's not obvious that that's the best approach. More likely, # one only wants the errors for an individual cost function, so we leave that to the user. # We use getattr instead of hasattr to help out mypy if isinstance(cost_func, cost_function.CostFunctionBase): errors = base.calculate_function_errors(cost_func.f, fit_result, x) else: errors = [] fit_result.errors = errors return fit_result, minuit
def calculate_errors(self, x: Optional[np.ndarray] = None) -> np.ndarray: """ Calculate the errors on the fit function for the given x values. Args: x: x values where the fit function error should be evaluated. If not specified, the x values over which the fit was performed will be used. Returns: The fit function error calculated at each x value. """ if x is None: x = self.fit_result.x return base.calculate_function_errors( func = self.fit_function, fit_result = self.fit_result, x = x, )
def test_binned_cost_functions_against_ROOT(logging_mixin: Any, cost_func: Any, fit_option: Any, setup_parabola: Any) -> None: """ Test the binned cost function implementations against ROOT. """ # Setup h, h_ROOT = setup_parabola ROOT = pytest.importorskip("ROOT") minuit_args: Dict[str, Union[float, Tuple[float, float]]] = { "scale": 1, "error_scale": 0.1, "limit_scale": (-1000, 1000), } log_likelihood = "L" in fit_option if cost_func == "probfit": probfit = pytest.importorskip("probfit") cost_func = probfit.Chi2Regression # Fit with ROOT fit_ROOT = ROOT.TF1("parabola", "[0] * TMath::Power(x, 2)", -10.5, 10.5) # Expect it to be around 1. fit_ROOT.SetParameter(0, minuit_args["scale"]) fit_result_ROOT = h_ROOT.Fit(fit_ROOT, fit_option + "0") logger.debug( f"ROOT: chi_2: {fit_result_ROOT.Chi2()}, ndf: {fit_result_ROOT.Ndf()}") # Fit with the defined cost function args: Dict[str, Any] = {"f": parabola} if issubclass(cost_func, cost_function.CostFunctionBase): args.update({"data": h}) # Test for weighted likelihood if "W" in fit_option: args.update({"use_weights": True}) else: args.update({"x": h.x, "y": h.y, "error": h.errors}) cost = cost_func(**args) fit_result, minuit = fit_integration.fit_with_minuit( cost, minuit_args, h.x) # Check the minimized value. # There is still something a bit different between ROOT's log likelihood calculation and mine. # However, the other parameters appear to agree, so it seems okay. if not log_likelihood: assert np.isclose(fit_result.minimum_val, fit_result_ROOT.MinFcnValue(), rtol=0.03) if cost_func is cost_function.BinnedLogLikelihood: # Calculate the chi squared equivalent and set that to be the minimum value for comparison. binned_chi_squared = cost_function._binned_chi_squared( h.x, h.y, h.errors, h.bin_edges, parabola, *list(fit_result.values_at_minimum.values())) unbinned_chi_squared = cost_function._chi_squared( h.x, h.y, h.errors, h.bin_edges, parabola, *list(fit_result.values_at_minimum.values())) logger.debug( f"minimal_val before changing: {fit_result.minimum_val}, ROOT func min: {fit_result_ROOT.MinFcnValue()}" ) logger.debug( f"binned chi_squared: {binned_chi_squared}, unbinned chi_squared: {unbinned_chi_squared}" ) fit_result.minimum_val = binned_chi_squared # Calculate errors. fit_result.errors = fit_base.calculate_function_errors( func=parabola, fit_result=fit_result, x=fit_result.x) # Check the result logger.debug( f"Fit chi_2: {fit_result.minimum_val}, ndf: {fit_result.nDOF}") # It won't agree exactly because ROOT appears to use the unbinned chi squared to calculate this value. # This can be seen because probfit agrees with ROOT. assert np.isclose(fit_result.minimum_val, fit_result_ROOT.Chi2(), rtol=0.035) assert np.isclose(fit_result.nDOF, fit_result_ROOT.Ndf()) # Check the parameters # Value assert np.isclose( fit_result.values_at_minimum["scale"], fit_result_ROOT.Parameter(0), rtol=0.05, ) # Error assert np.isclose(fit_result.errors_on_parameters["scale"], fit_result_ROOT.ParError(0), rtol=0.005) # Covariance matrix if issubclass(cost_func, cost_function.CostFunctionBase): covariance_ROOT = fit_result_ROOT.GetCovarianceMatrix() # Print the fit result, alongside the covariance fit_result_ROOT.Print("V") logger.debug(f"Covariance: {fit_result.covariance_matrix}") for i_name in fit_result.free_parameters: for j_name in fit_result.free_parameters: i_index = fit_result.free_parameters.index(i_name) j_index = fit_result.free_parameters.index(j_name) logger.debug( f"Checking covariance matrix parameters: ({i_name}:{i_index}, {j_name}:{j_index})" ) assert np.isclose(fit_result.covariance_matrix[(i_name, j_name)], covariance_ROOT(i_index, j_index), rtol=0.01) # Estimated distance to minimum assert np.isclose(minuit.fmin.edm, fit_result_ROOT.Edm(), atol=1e-3) # Check the effective chi squared. This won't work in the probfit case because we don't recognize # the type properly (and it's not worth the effort). if issubclass(cost_func, cost_function.CostFunctionBase): assert fit_result.effective_chi_squared(cost) == ( cost_function._binned_chi_squared( cost.data.x, cost.data.y, cost.data.errors, cost.data.bin_edges, cost.f, *fit_result.values_at_minimum.values(), ) if log_likelihood else fit_result.minimum_val)
def test_binned_cost_functions_against_ROOT(logging_mixin: Any, cost_func: Any, fit_option: Any, setup_parabola: Any) -> None: """ Test the binned cost function implementations against ROOT. """ # Setup h, h_ROOT = setup_parabola ROOT = pytest.importorskip("ROOT") minuit_args: Dict[str, Union[float, Tuple[float, float]]] = { "scale": 1, "error_scale": 0.1, "limit_scale": (-1000, 1000), } log_likelihood = "L" in fit_option if cost_func == "probfit": probfit = pytest.importorskip("probfit") cost_func = probfit.Chi2Regression # Fit with ROOT fit_ROOT = ROOT.TF1("parabola", "[0] * TMath::Power(x, 2)", -10.5, 10.5) # Expect it to be around 1. fit_ROOT.SetParameter(0, minuit_args["scale"]) fit_result_ROOT = h_ROOT.Fit(fit_ROOT, fit_option + "0") logger.debug( f"ROOT: chi_2: {fit_result_ROOT.Chi2()}, ndf: {fit_result_ROOT.Ndf()}") # Fit with the defined cost function args = {"f": parabola} if issubclass(cost_func, cost_function.CostFunctionBase): args.update({"data": h}) else: args.update({"x": h.x, "y": h.y, "error": h.errors}) cost = cost_func(**args) fit_result, _ = fit_integration.fit_with_minuit(cost, minuit_args, h.x) # Check the minimized value. # It doesn't appear that it will agree for log likelihood if not log_likelihood: assert np.isclose(fit_result.minimum_val, fit_result_ROOT.MinFcnValue(), rtol=0.03) if cost_func is cost_function.BinnedLogLikelihood: # Calculate the chi squared equivalent and set that to be the minimum value for comparison. binned_chi_squared = cost_function._binned_chi_squared( h.x, h.y, h.errors, h.bin_edges, parabola, *list(fit_result.values_at_minimum.values())) unbinned_chi_squared = cost_function._chi_squared( h.x, h.y, h.errors, h.bin_edges, parabola, *list(fit_result.values_at_minimum.values())) logger.debug( f"minimual_val before changing: {fit_result.minimum_val}, ROOT func min: {fit_result_ROOT.MinFcnValue()}" ) logger.debug( f"binned chi_squared: {binned_chi_squared}, unbinned chi_squared: {unbinned_chi_squared}" ) fit_result.minimum_val = binned_chi_squared # Calculate errors. fit_result.errors = fit_base.calculate_function_errors( func=parabola, fit_result=fit_result, x=fit_result.x) # Check the result logger.debug( f"Fit chi_2: {fit_result.minimum_val}, ndf: {fit_result.nDOF}") # It won't agree exactly because ROOT appears to use the unbinned chi squared to calculate this value. # This can be seen because probfit agress with ROOT. assert np.isclose(fit_result.minimum_val, fit_result_ROOT.Chi2(), rtol=0.035) assert np.isclose(fit_result.nDOF, fit_result_ROOT.Ndf()) # Check the parameters # Value assert np.isclose( fit_result.values_at_minimum["scale"], fit_result_ROOT.Parameter(0), rtol=0.05, ) # Error # TODO: For some reason, there error is substantially larger in the log likelihood cost function comapred to ROOT # This requires more investigation, but shouldn't totally derail progress at the moment. if not log_likelihood: assert np.isclose(fit_result.errors_on_parameters["scale"], fit_result_ROOT.ParError(0), rtol=0.005) # Check the effective chi squared. This won't work in the probfit case because we don't recognize # the type properly (and it's not worth the effort). if issubclass(cost_func, cost_function.CostFunctionBase): assert fit_result.effective_chi_squared(cost) == ( cost_function._binned_chi_squared( cost.data.x, cost.data.y, cost.data.errors, cost.data.bin_edges, cost.f, *fit_result.values_at_minimum.values()) if log_likelihood else fit_result.minimum_val)
def fit_with_minuit( cost_func: Union[cost_function.CostFunctionBase, cost_function.SimultaneousFit], minuit_args: T_FitArguments, x: npt.NDArray[Any], use_minos: Optional[bool] = False, ) -> Tuple[base.FitResult, iminuit.Minuit]: """Perform a fit using the given cost function with Minuit. Args: cost_func: Cost function to be used with Minuit. minuit_args: Arguments for minuit. Need to set the initial value, limits, and error (step) of each parameter. x: x value(s) where the fit is evaluated, which will be stored in the fit result. use_minos: Calculate MINOS errors. They have to be accessed through the Minuit object. Default: False. Returns: (fit_result, Minuit object): The fit result extracts values from the Minuit object, but the Minuit object is also returned for good measure. """ # Validation # Will raise an exception if there are invalid arguments. _validate_minuit_args(cost_func=cost_func, minuit_args=minuit_args) # Copy the minuit_args so we don't cause issues elsewhere when we pop values minuit_args = dict(minuit_args) # Set the error definition. # We check if it's set to the allow the user to override if they are so inclined. # (Overriding it should be pretty rare). if "errordef" not in minuit_args: # Log likelihood cost functions needs an errordef of 0.5 to scale the errors properly, while 1 should # be used for chi squared cost functions. error_def = 1.0 if isinstance( cost_func, (cost_function.LogLikelihood, cost_function.BinnedLogLikelihood)): error_def = 0.5 # Store the value. minuit_args["errordef"] = error_def # Transform into iminuit 2 args # This isn't the cleanest thing to do, but it avoids having to changes interfaces for now (July 2021) # Errors error_args_names = [k for k in minuit_args if "error_" in k] error_args = { k.replace("error_", ""): minuit_args.pop(k) for k in error_args_names if "error_" in k } # Limits limit_args_names = [k for k in minuit_args if "limit_" in k] limit_args = { k.replace("limit_", ""): minuit_args.pop(k) for k in limit_args_names if "limit_" in k } # Fixed fixed_args_names = [k for k in minuit_args if "fix_" in k] fixed_args = { k.replace("fix_", ""): minuit_args.pop(k) for k in fixed_args_names if "fix_" in k } # errordef error_def_arg = minuit_args.pop("errordef") # Perform the fit minuit = iminuit.Minuit(cost_func, **minuit_args) # Set iminuit 2 interface args # NOTE: Can't assign the values directly - need to loop parameter by parameter for k, v in limit_args.items(): minuit.limits[k] = v for k, v in fixed_args.items(): minuit.fixed[k] = v for k, v in error_args.items(): minuit.errors[k] = v minuit.errordef = error_def_arg # Improve minimization reliability. minuit.strategy = 2 minuit.migrad() # Just in case (doesn't hurt anything, but may help in a few cases). minuit.hesse() if use_minos: minuit.minos() # Check that the fit is actually good if not minuit.valid: raise base.FitFailed("Minimization failed! The fit is invalid!") # Check covariance matrix accuracy. We need to check it explicitly because It appears that it is not # included in the migrad_ok status check. if not minuit.accurate: raise base.FitFailed( "Covariance matrix is inaccurate! The fit is invalid!") # Create the fit result and calculate the errors. fit_result = base.FitResult.from_minuit(minuit, cost_func, x) # We can calculate the fit errors if the cost function has a single function. # If it's a simultaneous fit, it's unclear how best this should be handled. Perhaps it could # be unraveled and summed, but it's not obvious that that's the best approach. More likely, # one only wants the errors for an individual cost function, so we leave that to the user. # We use getattr instead of hasattr to help out mypy if isinstance(cost_func, cost_function.CostFunctionBase): errors = base.calculate_function_errors(cost_func.f, fit_result, x) else: errors = np.array([]) fit_result.errors = errors return fit_result, minuit