示例#1
0
    def from_hist(cls, hist: hist.NamedHist) -> BinnedData:
        """Create a binned dataset from a `hist` histogram.

        Args:
            hist: A NamedHist. The axes will be used as the binning in zfit.
        """
        from zfit import Space

        space = Space(binning=histaxes_to_binning(hist.axes))
        values = znp.asarray(hist.values(flow=flow))
        variances = hist.variances(flow=flow)
        if variances is not None:
            variances = znp.asarray(variances)
        holder = BinnedHolder(space=space, values=values, variances=variances)
        return cls(holder=holder)
示例#2
0
def hessian(params, hess):

    params = tf.stack(params)
    with tf.GradientTape(persistent=True,
                         watch_accessed_variables=False) as tape:
        tape.watch(params)
        with tf.GradientTape(persistent=True,
                             watch_accessed_variables=False) as tape2:
            tape2.watch(params)
            y = func(params)
        gradients = tape2.gradient(y, params)
        gradients_params = tf.stack([gradients, params])

    # if hess != 'diag':
    #     gradients_tf = znp.stack(gradients)
    if hess == 'diag':
        # computed_hessian = znp.stack([tape.gradient(grad, sources=param) for param, grad in zip(params, gradients)])
        def gradfunc(par_grad):
            pars = par_grad[1]
            grads = par_grad[0]
            tf.print(pars)
            tf.print(grads)
            return tape.gradient(grads, sources=pars)

        # gradfunc = lambda par_grad: par_grad[1]
        computed_hessian = tf.map_fn(gradfunc, gradients_params)
        # computed_hessian = tf.map_fn(gradfunc, list(zip(params, gradients)))
    else:
        computed_hessian = znp.asarray(
            tape.jacobian(
                gradients_tf,
                sources=params,
                experimental_use_pfor=True  # causes TF bug? Slow..
            ))
    return computed_hessian
示例#3
0
    def ext_pdf(
        self, x: ztyping.XType, norm: ztyping.LimitsType = None, *, norm_range=None
    ) -> ztyping.XType:
        if norm_range is not None:
            norm = norm_range
        if not self.is_extended:
            raise NotExtendedPDFError
        # convert the input argument to a standardized form
        x = self._convert_input_binned_x(x, none_is_space=True)
        norm = self._check_convert_norm(norm, none_is_error=True)
        # sort it and remember the original sorting
        original_space = x if isinstance(x, ZfitSpace) else x.space
        x = x.with_obs(self.space)

        # if it is unbinned, we get the binned version and gather the corresponding values
        is_unbinned = isinstance(x, ZfitUnbinnedData)
        binindices = None
        if is_unbinned:
            binindices = unbinned_to_binindex(x, self.space, flow=True)
            x = self.space

        values = self._call_ext_pdf(x, norm=norm)

        if (
            binindices is not None
        ):  # because we have the flow, so we need to make it here with pads
            padded_values = znp.pad(
                values,
                znp.ones((z._get_ndims(values), 2), dtype=znp.float64),
                mode="constant",
            )  # for overflow
            ordered_values = tf.gather_nd(padded_values, indices=binindices)
        else:
            ordered_values = move_axis_obs(self.space, original_space, values)
        return znp.asarray(ordered_values)
示例#4
0
文件: basepdf.py 项目: zfit/zfit
    def pdf(
        self,
        x: ztyping.XTypeInput,
        norm: ztyping.LimitsTypeInput = None,
        *,
        norm_range=None,
    ) -> ztyping.XType:
        """Probability density function, normalized over `norm`.

        Args:
          norm ():
          x: `float` or `double` `Tensor`.
          norm: :py:class:`~zfit.Space` to normalize over

        Returns:
          :py:class:`tf.Tensor` of type `self.dtype`.
        """
        assert norm_range is None
        norm = self._check_input_norm(norm, none_is_error=True)
        with self._convert_sort_x(x) as x:
            value = self._single_hook_pdf(x=x, norm=norm)
            if run.numeric_checks:
                z.check_numerics(
                    value,
                    message="Check if pdf output contains any NaNs of Infs")
            return znp.asarray(z.to_real(value))
示例#5
0
    def from_tensor(cls,
                    space: ZfitSpace,
                    values: znp.array,
                    variances: znp.array | None = None) -> BinnedData:
        """Create a binned dataset defined in *space* where values are considered to be the counts.

        Args:
            space: The space of the data. Variables need to match the values dimensions. The space has to be binned
                and carry the information about the edges.
            values: Actual counts of the histogram.
            variances: Uncertainties of the histogram values. If `True`, the uncertainties are taken to be poissonian
                distributed.
        """
        values = znp.asarray(values, znp.float64)
        if variances is True:
            variances = znp.sqrt(values)
        elif variances is not None:
            variances = znp.asarray(variances)
        return cls(holder=BinnedHolder(
            space=space, values=values, variances=variances))
示例#6
0
def _unbinned_nll_tf(
    model: ztyping.PDFInputType,
    data: ztyping.DataInputType,
    fit_range: ZfitSpace,
    log_offset=None,
):
    """Return the unbinned negative log likelihood for a PDF.

    Args:
        model: |@doc:loss.init.model| PDFs that return the normalized probability for
               *data* under the given parameters.
               If multiple model and data are given, they will be used
               in the same order to do a simultaneous fit. |@docend:loss.init.model|
        data: |@doc:loss.init.data| Dataset that will be given to the *model*.
               If multiple model and data are given, they will be used
               in the same order to do a simultaneous fit. |@docend:loss.init.data|
        fit_range:

    Returns:
        The unbinned nll
    """

    if is_container(model):
        nlls = [
            _unbinned_nll_tf(model=p,
                             data=d,
                             fit_range=r,
                             log_offset=log_offset)
            for p, d, r in zip(model, data, fit_range)
        ]
        # nlls_total = [nll.total for nll in nlls]
        # nlls_correction = [nll.correction for nll in nlls]
        # nlls_total_summed = znp.sum(input_tensor=nlls_total, axis=0)
        nlls_summed = znp.sum(nlls, axis=0)

        # nlls_correction_summed = znp.sum(input_tensor=nlls_correction, axis=0)
        # nll_finished = (nlls_total_summed, nlls_correction_summed)
        nll_finished = nlls_summed
    else:
        if fit_range is not None:
            with data.set_data_range(fit_range):
                probs = model.pdf(data, norm_range=fit_range)
        else:
            probs = model.pdf(data)
        log_probs = znp.log(probs + znp.asarray(1e-307, dtype=znp.float64)
                            )  # minor offset to avoid NaNs from log(0)
        nll = _nll_calc_unbinned_tf(
            log_probs=log_probs,
            weights=data.weights if data.weights is not None else None,
            log_offset=log_offset,
        )
        nll_finished = nll
    return nll_finished
示例#7
0
    def pdf(
        self, x: ztyping.XType, norm: ztyping.LimitsType = None, *, norm_range=None
    ) -> ztyping.XType:
        """Probability density function, evaluated at `x` or in the bins of `x`

        Args:
            x: values to evaluate the PDF at. If this is a `ZfitBinnedData`-like object, a histogram of *densities*
                will be returned. If x is a `ZfitUnbinnedData`-like object, the densities will be evaluated at the
                points of `x`.
            norm: |@doc:pdf.pdf.norm| Normalization of the function.
               By default, this is the `norm` of the PDF (which by default is the same as
               the space of the PDF). |@docend:pdf.pdf.norm|

        Returns:
            `Array-like`: probability density
        """
        if norm_range is not None:
            norm = norm_range

        # convert the input argument to a standardized form
        x = self._convert_input_binned_x(x, none_is_space=True)
        norm = self._check_convert_norm(norm, none_is_error=True)

        # sort it and remember the original sorting
        original_space = x if isinstance(x, ZfitSpace) else x.space
        x = x.with_obs(self.space)

        # if it is unbinned, we get the binned version and gather the corresponding values
        is_unbinned = isinstance(x, ZfitUnbinnedData)
        binindices = None
        if is_unbinned:
            binindices = unbinned_to_binindex(x, self.space, flow=True)
            x = self.space

        values = self._call_pdf(x, norm=norm)

        if (
            binindices is not None
        ):  # because we have the flow, so we need to make it here with pads
            padded_values = znp.pad(
                values,
                znp.ones((z._get_ndims(values), 2), dtype=znp.float64),
                mode="constant",
            )  # for overflow
            ordered_values = tf.gather_nd(padded_values, indices=binindices)
        else:
            ordered_values = move_axis_obs(self.space, original_space, values)
        return znp.asarray(ordered_values)
示例#8
0
def unbinned_to_binindex(data, space, flow=False):
    if flow:
        warnings.warn(
            "Flow currently not fully supported. Values outside the edges are all 0."
        )
    values = [znp.reshape(data.value(ob), (-1, )) for ob in space.obs]
    edges = [znp.reshape(edge, (-1, )) for edge in space.binning.edges]
    bins = [
        tfp.stats.find_bins(x=val, edges=edge)
        for val, edge in zip(values, edges)
    ]
    stacked_bins = znp.stack(bins, axis=-1)
    if flow:
        stacked_bins += 1
        bin_is_nan = tf.math.is_nan(stacked_bins)
        zeros = znp.zeros_like(stacked_bins)
        binindices = znp.where(bin_is_nan, zeros, stacked_bins)
        stacked_bins = znp.asarray(binindices, dtype=znp.int32)
    return stacked_bins
示例#9
0
文件: basepdf.py 项目: zfit/zfit
    def log_pdf(self,
                x: ztyping.XType,
                norm: ztyping.LimitsType = None,
                *,
                norm_range=None) -> ztyping.XType:
        """Log probability density function normalized over `norm_range`.

        Args:
          x: `float` or `double` `Tensor`.
          norm: :py:class:`~zfit.Space` to normalize over

        Returns:
          A `Tensor` of type `self.dtype`.
        """
        assert norm_range is None
        norm = self._check_input_norm(norm)
        with self._convert_sort_x(x) as x:
            return znp.asarray(
                z.to_real(self._single_hook_log_pdf(x=x, norm=norm)))
示例#10
0
    def _pdf(self, x, norm_range):
        valcache = self.valcache
        if valcache is None:
            valcache = tf.Variable(znp.zeros(shape=tf.shape(x)[0]),
                                   trainable=False,
                                   validate_shape=False,
                                   dtype=tf.float64)
            self.valcache = valcache

        params = list(self.pdfs[0].get_params())
        values = tf.stack(params)
        params_same = tf.math.reduce_all(
            tf.math.abs(values - self.param_values) < self.cache_tolerance)
        self.valcache_valid.assign(tf.math.logical_and(params_same,
                                                       self.do_caching),
                                   read_value=False)
        self.param_values.assign(values, read_value=False)
        value = cache_value(valcache, self.valcache_valid,
                            lambda: self.pdfs[0].pdf(x, norm_range))
        return znp.asarray(value)
示例#11
0
文件: data.py 项目: zfit/zfit
 def value(self, obs: ztyping.ObsTypeInput = None):
     return znp.asarray(self._value_internal(obs=obs))
示例#12
0
def to_real(x, dtype=ztypes.float):
    return znp.asarray(tf.cast(x, dtype=dtype))
示例#13
0
def cut_edges_and_bins(
    edges: Iterable[znp.array], limits: ZfitSpace, axis=None, unscaled=None
) -> tuple[list[znp.array], tuple[znp.array, znp.array], list | None]:
    """Cut the *edges* according to *limits* and calculate the bins inside.

    The edges within limits are calculated and returned together with the corresponding bin indices. The indices
    mark the lowest and the highest index of the edges that are returned. Additionally, the unscaled edges are returned.

    If the limits are between two edges, this will be treated as the new edge. If the limits are outside the edges,
    all edges in this direction will be returned (but not extended to the limit). For example:

    [0, 0.5, 1., 1.5, 2.] and the limits (0.8, 3.) will return [0.8, 1., 1.5, 2.], ([1], [4])

    .. code-block::

        cut_edges_and_bins([[0., 0.5, 1., 1.5, 2.]], ([[0.8]], [[3]]))



    Args:
        edges: Iterable of tensor-like objects that describe the edges of a histogram. Every object should have rank n
            (where n is the length of *edges*) but only have the dimension i filled out. These are
            tensors that are ready to be broadcasted together.
        limits: The limits that will be used to confine the edges


    Returns:
        edges, (lower bins, upper bins), unscaled_edges:  The edges and the bins are returned.
            The upper bin number corresponds to
            the highest bin which was still (partially) inside the limits **plus one** (so it's the index of the
            edge that is right outside). The unscaled edges are like *edges* but the last edge is the edge
            that is lying not inside anymore, so the actual edge of the last bin number returend.
            This can be used to determine the fraction cut away.
    """
    if axis is not None:
        axis = convert_to_container(axis)
    if unscaled is None:
        unscaled = False
    if unscaled:
        cut_unscaled_edges = []
    else:
        cut_unscaled_edges = None
    cut_scaled_edges = []

    all_lower_bins = []
    all_upper_bins = []
    if isinstance(limits, ZfitSpace):
        lower, upper = limits.limits
    else:
        lower, upper = limits
        lower = znp.asarray(lower)
        upper = znp.asarray(upper)
    lower_all = lower[0]
    upper_all = upper[0]
    rank = len(edges)
    current_axis = 0
    for i, edge in enumerate(edges):
        edge = znp.asarray(edge)
        edge = znp.reshape(edge, (-1,))
        if axis is None or i in axis:

            lower_i = lower_all[current_axis, None]
            edge_minimum = edge[0]
            # edge_minimum = tf.gather(edge, indices=0, axis=i)
            lower_i = znp.maximum(lower_i, edge_minimum)
            upper_i = upper_all[current_axis, None]
            edge_maximum = edge[-1]
            # edge_maximum = tf.gather(edge, indices=tf.shape(edge)[i] - 1, axis=i)
            upper_i = znp.minimum(upper_i, edge_maximum)
            # we get the bins that are just one too far. Then we update this whole bin tensor with the actual edge.
            # The bins index is the index below the value.
            lower_bin_float = tfp.stats.find_bins(
                lower_i, edge, extend_lower_interval=True, extend_upper_interval=True
            )
            lower_bin = tf.reshape(tf.cast(lower_bin_float, dtype=znp.int32), [-1])
            # lower_bins = tf.tensor_scatter_nd_update(zero_bins, [[i]], lower_bin)
            # +1 below because the outer bin is searched, meaning the one that is higher than the value

            upper_bin_float = tfp.stats.find_bins(
                upper_i, edge, extend_lower_interval=True, extend_upper_interval=True
            )
            upper_bin = tf.reshape(tf.cast(upper_bin_float, dtype=znp.int32), [-1]) + 1
            size = upper_bin - lower_bin
            new_edge = tf.slice(
                edge, lower_bin, size + 1
            )  # +1 because stop is exclusive
            new_edge = tf.tensor_scatter_nd_update(
                new_edge, [tf.constant([0]), size], [lower_i[0], upper_i[0]]
            )

            if unscaled:
                new_edge_unscaled = tf.slice(
                    edge, lower_bin, size + 1
                )  # +1 because stop is exclusive

            current_axis += 1
        else:
            lower_bin = [0]
            upper_bin = znp.asarray([edge.shape[0] - 1], dtype=znp.int32)
            new_edge = edge
            if unscaled:
                new_edge_unscaled = edge
        new_shape = [1] * rank
        new_shape[i] = -1
        new_edge = znp.reshape(new_edge, new_shape)
        all_lower_bins.append(lower_bin)
        all_upper_bins.append(upper_bin)
        cut_scaled_edges.append(new_edge)
        if unscaled:
            new_edge_unscaled = znp.reshape(new_edge_unscaled, new_shape)
            cut_unscaled_edges.append(new_edge_unscaled)

    # partial = axis is not None and len(axis) < rank
    #
    # if partial:
    #     scaled_edges_full = list(edges)
    #     for edge, ax in zip(cut_scaled_edges, axis):
    #         scaled_edges_full[ax] = edge
    #     scaled_edges = scaled_edges_full
    #     indices = tf.convert_to_tensor(axis)[:, None]
    #     lower_bins = tf.scatter_nd(indices, lower_bins, shape=(ndims,))
    #     upper_bins = tf.tensor_scatter_nd_update(tf.convert_to_tensor(values.shape),
    #                                              indices, upper_bins)
    # lower_bins_indices = tf.stack([lower_bins, dims], axis=-1)
    # upper_bins_indices = tf.stack([upper_bins, dims], axis=-1)
    # all_lower_bins = tf.cast(znp.sum(all_lower_bins, axis=0), dtype=znp.int32)
    all_lower_bins = tf.concat(all_lower_bins, axis=0)
    all_upper_bins = tf.concat(all_upper_bins, axis=0)
    return cut_scaled_edges, (all_lower_bins, all_upper_bins), cut_unscaled_edges
示例#14
0
def binned_rect_integration(
    *,
    limits: ZfitSpace,
    edges: Iterable[znp.array] | znp.array,
    counts: znp.array | None = None,
    density: znp.array | None = None,
    axis: Iterable[int] | int | None = None,
) -> znp.array:
    """Integrate a histogram over *limits*.

    This integrator does take into account that limits do not match the edges.

    Args:
        limits: Limits to integrate over. A possible binning is ignored.
        edges: The edges per axis. They should have the shape `(1,..., 1, n, 1, ..., 1)`, where n is the *ith* axis.
            `ZfitBinning` provides this format on the `edges` attribute.
        counts: Counts of the histogram. This is what most histograms have and is equal to the density multiplied by
            the binwidth.
            Exactly one of counts or density has to be provided.
        density: The density of a histogram is the bincount divided by the binwidth.
            Exactly one of counts or density has to be provided.
        axis: Which axes to integrate over. Defaults to all.

    Returns:
        Integral with shape corresponding to the non-integrated axes (or a scalar in case of all axes integrated).
    """
    edges = convert_to_container(edges)
    if not isinstance(limits, ZfitSpace):
        raise TypeError(f"limits has to be a ZfitSpace, not {limits}.")
    if counts is not None:
        if density is not None:
            raise ValueError("Either specify 'counts' or 'density' but not both.")
        is_density = False
        values = counts
    elif density is not None:
        is_density = True
        values = density
    else:
        raise ValueError("Need to specify either 'counts' or 'density', not None.")
    ndims = z._get_ndims(values)
    # partial = axis is not None and len(axis) < ndims
    if axis is not None:
        axis = convert_to_container(axis)
        if len(axis) > ndims:
            raise ValueError(
                f"axis {axis} is larger than values has ndims {values.shape}."
            )
    else:
        axis = list(range(ndims))

    scaled_edges, (lower_bins, upper_bins), unscaled_edges = cut_edges_and_bins(
        edges=edges, limits=limits, axis=axis, unscaled=True
    )

    values_cut = tf.slice(
        values, lower_bins, (upper_bins - lower_bins)
    )  # since limits are inclusive

    rank = values.shape.rank
    binwidths = []
    if not is_density:
        binwidths_unscaled = []
    # calculate the binwidth in each dimension
    for i, edge in enumerate(scaled_edges):
        edge_lower_index = [0] * rank
        # int32 is needed! Otherwise the gradient will fail
        edge_lowest_index = znp.array(edge_lower_index, dtype=znp.int32)

        edge_lower_index[i] = 1
        edge_lower_index = znp.array(edge_lower_index, dtype=znp.int32)
        edge_upper_index = [1] * rank
        edge_highest_index = edge_upper_index.copy()
        len_edge = tf.shape(edge)[i]
        edge_highest_index[i] = len_edge
        edge_highest_index = znp.asarray(edge_highest_index, dtype=znp.int32)
        edge_upper_index[i] = len_edge - 1  # len n -> index max is n - 1

        edge_upper_index = znp.asarray(edge_upper_index, dtype=znp.int32)
        lower_edge = tf.slice(
            edge, edge_lowest_index, (edge_upper_index - edge_lowest_index)
        )
        upper_edge = tf.slice(
            edge, edge_lower_index, (edge_highest_index - edge_lower_index)
        )
        binwidths.append(upper_edge - lower_edge)

        if not is_density:
            # unscaled edges to get the ratio
            lower_edge_unscaled = tf.slice(
                unscaled_edges[i],
                edge_lowest_index,
                (edge_upper_index - edge_lowest_index),
            )
            upper_edge_unscaled = tf.slice(
                unscaled_edges[i],
                edge_lower_index,
                (edge_highest_index - edge_lower_index),
            )
            binwidths_unscaled.append(upper_edge_unscaled - lower_edge_unscaled)

    binareas = reduce(
        operator.mul, binwidths
    )  # needs to be np as znp or tf can't broadcast otherwise
    if not is_density:  # scale the counts by the fraction. This is mostly one.
        binareas_uncut = np.prod(binwidths_unscaled, axis=0)
        binareas /= binareas_uncut
    values_cut *= binareas
    integral = tf.reduce_sum(values_cut, axis=axis)
    return integral