def _loss_func(self, model, data, fit_range, constraints, log_offset): nll = super()._loss_func( model=model, data=data, fit_range=fit_range, constraints=constraints, log_offset=log_offset, ) yields = [] nevents_collected = [] for mod, dat in zip(model, data): if not mod.is_extended: raise NotExtendedPDFError( f"The pdf {mod} is not extended but has to be (for an extended fit)" ) nevents = dat.n_events if dat.weights is None else z.reduce_sum( dat.weights) nevents = tf.cast(nevents, tf.float64) nevents_collected.append(nevents) yields.append(mod.get_yield()) yields = znp.stack(yields, axis=0) nevents_collected = znp.stack(nevents_collected, axis=0) term_new = tf.nn.log_poisson_loss(nevents_collected, znp.log(yields)) if log_offset is not None: term_new += log_offset nll += znp.sum(term_new, axis=0) return nll
def _rel_counts(self, x, norm): pdf = self.pdfs[0] edges = [znp.array(edge) for edge in self.axes.edges] edges_flat = [znp.reshape(edge, [-1]) for edge in edges] lowers = [edge[:-1] for edge in edges_flat] uppers = [edge[1:] for edge in edges_flat] lowers_meshed = znp.meshgrid(*lowers, indexing="ij") uppers_meshed = znp.meshgrid(*uppers, indexing="ij") shape = tf.shape(lowers_meshed[0]) lowers_meshed_flat = [ znp.reshape(lower_mesh, [-1]) for lower_mesh in lowers_meshed ] uppers_meshed_flat = [ znp.reshape(upper_mesh, [-1]) for upper_mesh in uppers_meshed ] lower_flat = znp.stack(lowers_meshed_flat, axis=-1) upper_flat = znp.stack(uppers_meshed_flat, axis=-1) options = {"type": "bins"} @z.function def integrate_one(limits): l, u = tf.unstack(limits) limits_space = zfit.Space(obs=self.obs, limits=[l, u]) return pdf.integrate(limits_space, norm=False, options=options) limits = znp.stack([lower_flat, upper_flat], axis=1) values = tf.vectorized_map(integrate_one, limits) values = znp.reshape(values, shape) if norm: values /= pdf.normalization(norm) return values
def _sample(self, n, limits: ZfitSpace): pdf = self.pdfs[0] # TODO: use real limits, currently not supported in binned sample sample = pdf.sample(n=n) edges = sample.space.binning.edges ndim = len(edges) edges = [znp.array(edge) for edge in edges] edges_flat = [znp.reshape(edge, [-1]) for edge in edges] lowers = [edge[:-1] for edge in edges_flat] uppers = [edge[1:] for edge in edges_flat] lowers_meshed = znp.meshgrid(*lowers, indexing="ij") uppers_meshed = znp.meshgrid(*uppers, indexing="ij") lowers_meshed_flat = [ znp.reshape(lower_mesh, [-1]) for lower_mesh in lowers_meshed ] uppers_meshed_flat = [ znp.reshape(upper_mesh, [-1]) for upper_mesh in uppers_meshed ] lower_flat = znp.stack(lowers_meshed_flat, axis=-1) upper_flat = znp.stack(uppers_meshed_flat, axis=-1) counts_flat = znp.reshape(sample.values(), (-1, )) counts_flat = tf.cast(counts_flat, znp.int32) # TODO: what if we have fractions? lower_flat_repeated = tf.repeat(lower_flat, counts_flat, axis=0) upper_flat_repeated = tf.repeat(upper_flat, counts_flat, axis=0) sample_unbinned = tf.random.uniform( (znp.sum(counts_flat), ndim), minval=lower_flat_repeated, maxval=upper_flat_repeated, dtype=self.dtype, ) return sample_unbinned
def _counts(self, x, norm): pdf = self.pdfs[0] edges = [znp.array(edge) for edge in self.axes.edges] edges_flat = [znp.reshape(edge, [-1]) for edge in edges] lowers = [edge[:-1] for edge in edges_flat] uppers = [edge[1:] for edge in edges_flat] lowers_meshed = znp.meshgrid(*lowers, indexing="ij") uppers_meshed = znp.meshgrid(*uppers, indexing="ij") shape = tf.shape(lowers_meshed[0]) lowers_meshed_flat = [ znp.reshape(lower_mesh, [-1]) for lower_mesh in lowers_meshed ] uppers_meshed_flat = [ znp.reshape(upper_mesh, [-1]) for upper_mesh in uppers_meshed ] lower_flat = znp.stack(lowers_meshed_flat, axis=-1) upper_flat = znp.stack(uppers_meshed_flat, axis=-1) options = {"type": "bins"} if pdf.is_extended: @z.function def integrate_one(limits): l, u = tf.unstack(limits) limits_space = zfit.Space(obs=self.obs, limits=[l, u]) return pdf.ext_integrate(limits_space, norm=False, options=options) missing_yield = False else: @z.function def integrate_one(limits): l, u = tf.unstack(limits) limits_space = zfit.Space(obs=self.obs, limits=[l, u]) return pdf.integrate(limits_space, norm=False, options=options) missing_yield = True limits = znp.stack([lower_flat, upper_flat], axis=1) try: values = tf.vectorized_map(integrate_one, limits)[:, 0] except ValueError: values = tf.map_fn(integrate_one, limits) values = znp.reshape(values, shape) if missing_yield: values *= self.get_yield() if norm: values /= pdf.normalization(norm) return values
def to_unbinned(self): meshed_center = znp.meshgrid(*self.axes.centers, indexing="ij") flat_centers = [ znp.reshape(center, (-1, )) for center in meshed_center ] centers = znp.stack(flat_centers, axis=-1) flat_weights = znp.reshape(self.values(), (-1, )) # TODO: flow? space = self.space.copy(binning=None) from zfit import Data return Data.from_tensor(obs=space, tensor=centers, weights=flat_weights)
def spline_interpolator(alpha, alphas, densities): alphas = alphas[None, :, None] shape = tf.shape(densities[0]) densities_flat = [znp.reshape(density, [-1]) for density in densities] densities_flat = znp.stack(densities_flat, axis=0) alpha_shaped = znp.reshape(alpha, [1, -1, 1]) y_flat = tfa.image.interpolate_spline( train_points=alphas, train_values=densities_flat[None, ...], query_points=alpha_shaped, order=2, ) y_flat = y_flat[0, 0] y = tf.reshape(y_flat, shape) return y
def to_minimize_func(values): nonlocal current_loss, nan_counter do_print = self.verbosity > 8 is_nan = False gradient = None value = None try: gradient, value = update_params_value_grad( loss, params, values) except tf.errors.InvalidArgumentError: err = "NaNs" is_nan = True except: err = "unknonw error" raise finally: if value is None: value = f"invalid, {err}" if gradient is None: gradient = [f"invalid, {err}"] * len(params) if do_print: print_gradient( params, run(values), [float(run(g)) for g in gradient], loss=run(value), ) loss_evaluated = run(value) is_nan = is_nan or np.isnan(loss_evaluated) if is_nan: nan_counter += 1 info_values = {} info_values["loss"] = run(value) info_values["old_loss"] = current_loss info_values["nan_counter"] = nan_counter value = self.strategy.minimize_nan(loss=loss, params=params, minimizer=self, values=info_values) else: nan_counter = 0 current_loss = value gradient = znp.stack(gradient) return value, gradient
def test_unbinned_data2D(): n = 751 gauss, gauss_binned, obs, obs_binned = create_gauss2d_binned(n, 50) data = znp.random.uniform([-5, 50], [10, 600], size=(1000, 2)) y_binned = gauss_binned.pdf(data) y_true = gauss.pdf(data) max_error = np.max(y_true) / 10 np.testing.assert_allclose(y_true, y_binned, atol=max_error) centers = obs_binned.binning.centers X, Y = znp.meshgrid(*centers, indexing="ij") centers = znp.stack([znp.reshape(t, (-1,)) for t in (X, Y)], axis=-1) ycenter_binned = gauss_binned.pdf(centers) ycenter_true = gauss.pdf(centers) np.testing.assert_allclose(ycenter_binned, ycenter_true, atol=max_error / 10) # for the extended case y_binned_ext = gauss_binned.ext_pdf(data) y_true_ext = gauss.ext_pdf(data) max_error_ext = np.max(y_true_ext) / 10 np.testing.assert_allclose(y_true_ext, y_binned_ext, atol=max_error_ext) ycenter_binned_ext = gauss_binned.ext_pdf(centers) ycenter_true_ext = gauss.ext_pdf(centers) np.testing.assert_allclose( ycenter_binned_ext, ycenter_true_ext, atol=max_error_ext / 10 ) x_outside = znp.array([[-7.0, 55], [3.0, 13], [2, 150], [12, 30], [14, 1000]]) y_outside = gauss_binned.pdf(x_outside) assert y_outside[0] == 0 assert y_outside[1] == 0 assert y_outside[2] > 0 assert y_outside[3] == 0 assert y_outside[4] == 0 y_outside_ext = gauss_binned.ext_pdf(x_outside) assert y_outside_ext[0] == 0 assert y_outside_ext[1] == 0 assert y_outside_ext[2] > 0 assert y_outside_ext[3] == 0 assert y_outside_ext[4] == 0
def unbinned_to_binindex(data, space, flow=False): if flow: warnings.warn( "Flow currently not fully supported. Values outside the edges are all 0." ) values = [znp.reshape(data.value(ob), (-1, )) for ob in space.obs] edges = [znp.reshape(edge, (-1, )) for edge in space.binning.edges] bins = [ tfp.stats.find_bins(x=val, edges=edge) for val, edge in zip(values, edges) ] stacked_bins = znp.stack(bins, axis=-1) if flow: stacked_bins += 1 bin_is_nan = tf.math.is_nan(stacked_bins) zeros = znp.zeros_like(stacked_bins) binindices = znp.where(bin_is_nan, zeros, stacked_bins) stacked_bins = znp.asarray(binindices, dtype=znp.int32) return stacked_bins
def _ext_pdf(self, x, norm): if not self._automatically_extended: raise SpecificFunctionNotImplemented pdf = self.pdfs[0] density = pdf.ext_pdf(x.space, norm=norm) density_flat = znp.reshape(density, (-1, )) centers_list = znp.meshgrid(*pdf.space.binning.centers, indexing="ij") centers_list_flat = [ znp.reshape(cent, (-1, )) for cent in centers_list ] centers = znp.stack(centers_list_flat, axis=-1) # [None, :, None] # TODO: only 1 dim now probs = tfa.image.interpolate_spline( train_points=centers[None, ...], train_values=density_flat[None, :, None], query_points=x.value()[None, ...], order=self.order, ) return probs[0, ..., 0]
def _minimize(self, loss, params): from .. import run minimizer_fn = tfp.optimizer.bfgs_minimize params = tuple(params) do_print = self.verbosity > 8 current_loss = None nan_counter = 0 # @z.function def update_params_value_grad(loss, params, values): for param, value in zip(params, tf.unstack(values, axis=0)): param.set_value(value) value, gradients = loss.value_gradient(params=params) return gradients, value def to_minimize_func(values): nonlocal current_loss, nan_counter do_print = self.verbosity > 8 is_nan = False gradient = None value = None try: gradient, value = update_params_value_grad( loss, params, values) except tf.errors.InvalidArgumentError: err = "NaNs" is_nan = True except: err = "unknonw error" raise finally: if value is None: value = f"invalid, {err}" if gradient is None: gradient = [f"invalid, {err}"] * len(params) if do_print: print_gradient( params, run(values), [float(run(g)) for g in gradient], loss=run(value), ) loss_evaluated = run(value) is_nan = is_nan or np.isnan(loss_evaluated) if is_nan: nan_counter += 1 info_values = {} info_values["loss"] = run(value) info_values["old_loss"] = current_loss info_values["nan_counter"] = nan_counter value = self.strategy.minimize_nan(loss=loss, params=params, minimizer=self, values=info_values) else: nan_counter = 0 current_loss = value gradient = znp.stack(gradient) return value, gradient initial_inv_hessian_est = tf.linalg.tensor_diag( [p.step_size for p in params]) minimizer_kwargs = dict( initial_position=znp.stack(params), x_tol=self.tol, # f_relative_tolerance=self.tolerance * 1e-5, # TODO: use edm for stopping criteria initial_inverse_hessian_estimate=initial_inv_hessian_est, parallel_iterations=1, max_iterations=self.max_calls, ) minimizer_kwargs.update(self.options) result = minimizer_fn(to_minimize_func, **minimizer_kwargs) # save result params_result = run(result.position) assign_values(params, values=params_result) info = { "n_eval": run(result.num_objective_evaluations), "n_iter": run(result.num_iterations), "grad": run(result.objective_gradient), "original": result, } edm = -999 fmin = run(result.objective_value) status = -999 converged = run(result.converged) params = OrderedDict((p, val) for p, val in zip(params, params_result)) result = FitResult( params=params, edm=edm, fmin=fmin, info=info, loss=loss, status=status, converged=converged, minimizer=self.copy(), ) return result