Пример #1
0
def test_onedim_sampling(interpolation):
    # there is a sampling shortcut, so we test if it also works without the shortcut
    obs_kernel = zfit.Space("obs1", limits=(-3, 1))
    obs = zfit.Space("obs1", limits=(5, 15))
    func1 = zfit.pdf.Uniform(6, 12, obs=obs)
    func2 = zfit.pdf.Uniform(11, 11.5, obs=obs)
    func = zfit.pdf.SumPDF([func1, func2], 0.5)

    func1k = zfit.pdf.Uniform(-2, 1, obs=obs_kernel)
    func2k = zfit.pdf.Uniform(-0.5, 1.0, obs=obs_kernel)
    funck = zfit.pdf.SumPDF([func1k, func2k], 0.5)
    conv = zfit.pdf.FFTConvPDFV1(
        func=func, kernel=funck, n=200, interpolation=interpolation
    )

    conv_nosample = FFTConvPDFV1NoSampling(
        func=func, kernel=funck, n=200, interpolation=interpolation
    )
    npoints_sample = 10000
    sample = conv.sample(npoints_sample)
    sample_nosample = conv_nosample.sample(npoints_sample)
    x = z.unstack_x(sample)
    xns = z.unstack_x(sample_nosample)
    assert (
        scipy.stats.ks_2samp(x, xns).pvalue > 1e-3
    )  # can vary a lot, but still means close
Пример #2
0
 def _analytic_integrate(self, limits, norm_range):
     lower, upper = limits._rect_limits_tf
     lower = z.unstack_x(lower)
     upper = z.unstack_x(upper)
     tf.debugging.assert_all_finite(
         (lower, upper),
         "Are infinite limits needed? Causes troubles with NaNs")
     return self.distribution.cdf(upper) - self.distribution.cdf(lower)
Пример #3
0
def func4_3deps(x):
    if isinstance(x, np.ndarray):
        a, b, c = x
    else:
        a, b, c = z.unstack_x(x)

    return a**2 + b**3 + 0.5 * c
Пример #4
0
    def _unnormalized_pdf(self, x):
        FL = self.parameters["FL"]
        AT2 = self.parameters["AT2"]
        P4p = self.parameters["P4p"]
        costheta_k, costheta_l, phi = z.unstack_x(x)

        sintheta_k = tf.sqrt(1.0 - costheta_k * costheta_k)
        sintheta_l = tf.sqrt(1.0 - costheta_l * costheta_l)

        sintheta_2k = 1.0 - costheta_k * costheta_k
        sintheta_2l = 1.0 - costheta_l * costheta_l

        sin2theta_k = 2.0 * sintheta_k * costheta_k
        cos2theta_l = 2.0 * costheta_l * costheta_l - 1.0

        pdf = (
            (3.0 / 4.0) * (1.0 - FL) * sintheta_2k
            + FL * costheta_k * costheta_k
            + (1.0 / 4.0) * (1.0 - FL) * sintheta_2k * cos2theta_l
            + -1.0 * FL * costheta_k * costheta_k * cos2theta_l
            + (1.0 / 2.0)
            * (1.0 - FL)
            * AT2
            * sintheta_2k
            * sintheta_2l
            * tf.cos(2.0 * phi)
            + tf.sqrt(FL * (1 - FL)) * P4p * sin2theta_k * sin2theta_l * tf.cos(phi)
        )

        return pdf
Пример #5
0
    def _unnormalized_pdf(self, x, norm_range=False):

        x = z.unstack_x(x)
        x_min = tf.reduce_min(self._grid)
        x_max = tf.reduce_max(self._grid)

        return tfp.math.interp_regular_1d_grid(x, x_min, x_max,
                                               self._grid_estimations)
Пример #6
0
    def __init__(self,
                 obs: ztyping.ObsTypeInput,
                 data: ztyping.ParamTypeInput,
                 num_grid_points=1024,
                 binning_method='linear',
                 weights: Union[None, np.ndarray, tf.Tensor] = None,
                 name: str = "KernelDensityEstimationISJ"):
        r"""
        Kernel Density Estimation is a non-parametric method to approximate the density of given points.
        .. math::
            f_h(x) =  \frac{1}{nh} \sum_{i=1}^n K\Big(\frac{x-x_i}{h}\Big)

        It is computed by using a trick described in a paper by Botev et al. that uses the fact, that the Kernel Density Estimation
        with a Gaussian Kernel is a solution to the Heat Euqation.

        Args:
            data: 1-D Tensor-like.
            bandwidth: Bandwidth of the kernel. Valid options are {'silverman', 'scott', 'adaptiveV1'} or a numerical.
                If a numerical is given, it as to be broadcastable to the batch and event shape of the distribution.
                A scalar or a `zfit.Parameter` will simply broadcast to `data` for a 1-D distribution.
            obs: Observables
            weights: Weights of each `data`, can be None or Tensor-like with shape compatible with `data`
            name: Name of the PDF
        """

        if isinstance(data, ZfitData):
            if data.weights is not None:
                if weights is not None:
                    raise OverdefinedError(
                        "Cannot specify weights and use a `ZfitData` with weights."
                    )
                else:
                    weights = data.weights

            if data.n_obs > 1:
                raise ShapeIncompatibleError(
                    f"KDE is 1 dimensional, but data {data} has {data.n_obs} observables."
                )
            data = z.unstack_x(data)

        shape_data = tf.shape(data)
        size = tf.cast(shape_data[0], ztypes.float)

        self._num_grid_points = tf.minimum(
            tf.cast(size, ztypes.int), tf.constant(num_grid_points,
                                                   ztypes.int))
        self._binning_method = binning_method
        self._data = tf.convert_to_tensor(data, ztypes.float)
        self._weights = weights
        self._grid = None
        self._grid_data = None

        self._bandwidth, self._grid_estimations, self._grid = isj_helper.calculate_bandwidth_and_density(
            self._data, self._num_grid_points, self._binning_method,
            self._weights)

        params = {}
        super().__init__(obs=obs, name=name, params=params)
Пример #7
0
def test_onedim_sampling():
    obs_kernel = zfit.Space("obs1", limits=(-3, 3))
    obs = zfit.Space("obs1", limits=(5, 15))
    func1 = zfit.pdf.Uniform(6, 12, obs=obs)
    func2 = zfit.pdf.Uniform(11, 11.5, obs=obs)
    func = zfit.pdf.SumPDF([func1, func2], 0.5)

    func1k = zfit.pdf.Uniform(-2, 1, obs=obs_kernel)
    func2k = zfit.pdf.Uniform(-0.5, 1., obs=obs_kernel)
    funck = zfit.pdf.SumPDF([func1k, func2k], 0.5)
    conv = zfit.pdf.FFTConvPDFV1(func=func, kernel=funck, n=200)

    conv_nosample = FFTConvPDFV1NoSampling(func=func, kernel=funck, n=200)
    npoints_sample = 10000
    sample = conv.sample(npoints_sample)
    sample_nosample = conv_nosample.sample(npoints_sample)
    x = z.unstack_x(sample)
    xns = z.unstack_x(sample_nosample)
    assert scipy.stats.ks_2samp(x, xns).pvalue > 1e-6  # can vary a lot, but still means close
Пример #8
0
    def unstack_x(self,
                  obs: ztyping.ObsTypeInput = None,
                  always_list: bool = False):
        """Return the unstacked data: a list of tensors or a single Tensor.

        Args:
            obs: which observables to return
            always_list: If True, always return a list (also if length 1)

        Returns:
            List(tf.Tensor)
        """
        return z.unstack_x(self.value(obs=obs))
Пример #9
0
    def _unnormalized_pdf(self, x):
        data = z.unstack_x(x)
        mu = self.params['mu']
        sigma = self.params['sigma']
        theta = self.params['theta']

        cond = tf.less_equal(data, -theta)
        exp_power = -(znp.power(znp.log(-data - theta) - mu, 2) / (2 * znp.power(sigma, 2)))
        outer_factor = (-data - theta) * sigma * znp.sqrt(2 * math.pi)
        func = tf.where(cond,
                        np.exp(exp_power) / outer_factor,
                        0.)
        return func
Пример #10
0
    def gradients(self,
                  x: ztyping.XType,
                  norm_range: ztyping.LimitsType,
                  params: ztyping.ParamsTypeOpt = None):
        warnings.warn(
            "Taking the gradient *this way* in TensorFlow is inefficient! Consider taking it with"
            "respect to the loss function.")
        if params is not None:
            params = convert_to_container(params)
        if params is None or isinstance(params[0], str):
            params = self.get_params(only_floating=False, names=params)

        probs = self.pdf(x, norm_range=norm_range)
        gradients = [
            tf.gradients(ys=prob, xs=params)
            for prob in z.unstack_x(probs, always_list=True)
        ]
        return tf.stack(gradients)
Пример #11
0
    def _sort_value(self, value, obs: Tuple[str]):
        obs = convert_to_container(value=obs, container=tuple)
        perm_indices = self.space.axes if self.space.axes != tuple(
            range(value.shape[-1])) else False

        # permutate = perm_indices is not None
        if obs:
            if not frozenset(obs) <= frozenset(self.obs):
                raise ValueError(
                    "The observable(s) {} are not contained in the dataset. "
                    "Only the following are: {}".format(
                        frozenset(obs) - frozenset(self.obs), self.obs))
            perm_indices = self.space.get_reorder_indices(obs=obs)
            # values = list(values[self.obs.index(o)] for o in obs if o in self.obs)
        if perm_indices:
            value = z.unstack_x(value, always_list=True)
            value = [value[i] for i in perm_indices]
            value = z.stack_x(value)

        return value
Пример #12
0
def exp_icdf(x, params, model):
    lambd = params['lambda']
    x = z.unstack_x(x)
    x = model._shift_x(x)
    return zfit.z.math.log(lambd * x) / lambd
Пример #13
0
def exp_icdf(x, params, model):
    lambd = params["lambda"]
    x = z.unstack_x(x)
    x = model._shift_x(x)
    return znp.log(lambd * x) / lambd
Пример #14
0
    def __init__(self,
                 obs: ztyping.ObsTypeInput,
                 data: ztyping.ParamTypeInput,
                 bandwidth: ztyping.ParamTypeInput = None,
                 num_grid_points=1024,
                 binning_method='linear',
                 implementation='cpp',
                 weights: Union[None, np.ndarray, tf.Tensor] = None,
                 betas=[0.25, 0.25],
                 name: str = "KernelDensityEstimationK1"):
        r"""
        Kernel Density Estimation is a non-parametric method to approximate the density of given points.
        .. math::
            f_h(x) =  \frac{1}{nh} \sum_{i=1}^n K\Big(\frac{x-x_i}{h}\Big)

        It is computed by using a convolution of the data with the kernels evaluated at fixed grid points and then 
        interpolating between this points to get an estimate for x.

        Args:
            data: 1-D Tensor-like.
            bandwidth: Bandwidth of the kernel. Valid options are {'silverman', 'scott', 'adaptiveV1'} or a numerical.
                If a numerical is given, it as to be broadcastable to the batch and event shape of the distribution.
                A scalar or a `zfit.Parameter` will simply broadcast to `data` for a 1-D distribution.
            obs: Observables
            weights: Weights of each `data`, can be None or Tensor-like with shape compatible with `data`
            name: Name of the PDF
        """

        if isinstance(data, ZfitData):
            if data.weights is not None:
                if weights is not None:
                    raise OverdefinedError(
                        "Cannot specify weights and use a `ZfitData` with weights."
                    )
                else:
                    weights = data.weights

            if data.n_obs > 1:
                raise ShapeIncompatibleError(
                    f"KDE is 1 dimensional, but data {data} has {data.n_obs} observables."
                )
            data = z.unstack_x(data)

        shape_data = tf.shape(data)
        size = tf.cast(shape_data[0], ztypes.float)

        self._num_grid_points = tf.minimum(
            tf.cast(size, ztypes.int), tf.constant(num_grid_points,
                                                   ztypes.int))
        self._binning_method = binning_method
        self._data = tf.convert_to_tensor(data, ztypes.float)
        self._bandwidth = tf.convert_to_tensor(bandwidth, ztypes.float)
        self._weights = weights
        self._betas = tf.convert_to_tensor(betas, ztypes.float)
        self._implementation = implementation
        self._grid = None
        self._grid_data = None

        self._grid = binning_helper.generate_grid(
            self._data, num_grid_points=self._num_grid_points)
        self._grid_data = binning_helper.bin(self._binning_method, self._data,
                                             self._grid, self._weights)

        if self._implementation == 'numpy':
            self._grid_estimations = hofmeyr_helper.calculate_estimate_numpy(
                self._grid, self._grid_data, self._betas, self._bandwidth)
        elif self._implementation == 'cpp':
            self._grid_estimations = hofmeyr_helper.calculate_estimate_cpp(
                self._grid, self._grid_data, self._betas, self._bandwidth)
        else:
            self._grid_estimations = hofmeyr_helper.calculate_estimate(
                self._grid, self._grid_data, self._betas, self._bandwidth)

        params = {'bandwidth': self._bandwidth}
        super().__init__(obs=obs, name=name, params=params)
Пример #15
0
 def _unnormalized_pdf(self, x: "zfit.Data", norm_range=False):
     value = z.unstack_x(x)  # TODO: use this? change shaping below?
     return self.distribution.prob(value=value, name="unnormalized_pdf")
Пример #16
0
def test_conv_2D_simple():
    zfit.run.set_graph_mode(False)  # TODO: remove, just for debugging
    raise WorkInProgressError("2D convolution not yet implemented, re-activate if so")
    n_points = 1000
    obs1 = zfit.Space("obs1", limits=(-2, 4))
    obs2 = zfit.Space("obs2", limits=(-6, 4))
    obskernel = obs1 * obs2

    param2 = zfit.Parameter('param2', 0.4)
    gauss1 = zfit.pdf.Gauss(1., 0.5, obs=obs1)
    gauss22 = zfit.pdf.CrystalBall(0.0, param2, -0.2, 3, obs=obs2)

    obs1func = zfit.Space("obs1", limits=(4, 10))
    obs2func = zfit.Space("obs2", limits=(-6, 4))
    obs_func = obs1func * obs2func

    gauss21 = zfit.pdf.Gauss(-0.5, param2, obs=obs2func)
    func1 = zfit.pdf.Uniform(5, 8, obs=obs1func)
    func2 = zfit.pdf.Uniform(6, 7, obs=obs1func)
    func = zfit.pdf.SumPDF([func1, func2], 0.5)
    func = func * gauss21
    gauss = gauss1 * gauss22
    conv = zfit.pdf.FFTConvPDFV1(func=func, kernel=gauss)

    start = obs_func.rect_lower
    stop = obs_func.rect_upper
    x_tensor = tf.random.uniform((n_points, 2), start, stop)
    x_tensor = tf.reshape(x_tensor, (-1, 2))
    linspace = tf.linspace(start, stop, num=n_points)
    linspace = tf.transpose(tf.meshgrid(*tf.unstack(linspace, axis=-1)))
    linspace_func = tf.reshape(linspace, (-1, 2))

    # linspace_full = tf.linspace((-8, -8), (12, 12), num=n_points)
    # linspace_full = tf.transpose(tf.meshgrid(*tf.unstack(linspace_full, axis=-1)))
    # linspace_full = tf.reshape(linspace_full, (-1, 2))

    linspace_kernel = tf.linspace(obskernel.rect_lower,
                                  obskernel.rect_upper, num=n_points)
    linspace_kernel = tf.transpose(tf.meshgrid(*tf.unstack(linspace_kernel, axis=-1)))
    linspace_kernel = tf.reshape(linspace_kernel, (-1, 2))
    # linspace_kernel = obskernel.filter(linspace_full)
    # linspace_func = obs_func.filter(linspace_full)

    x = zfit.Data.from_tensor(obs=obs_func, tensor=x_tensor)
    linspace_data = zfit.Data.from_tensor(obs=obs_func, tensor=linspace)
    probs_rnd = conv.pdf(x=x)
    probs = conv.pdf(x=linspace_data)

    # Numpy doesn't support ndim convolution?
    true_probs = true_conv_2d_np(func, gauss, obsfunc=obs_func,
                                 xfunc=linspace_func, xkernel=linspace_kernel)
    import matplotlib.pyplot as plt
    # np.testing.assert_allclose(probs, true_probs, rtol=0.2, atol=0.1)
    integral = conv.integrate(limits=obs_func)
    assert pytest.approx(1, rel=1e-3) == integral.numpy()
    probs_np = probs_rnd.numpy()
    assert len(probs_np) == n_points
    # probs_plot = np.reshape(probs_np, (-1, n_points))
    # x_plot = linspace[0:, ]
    # probs_plot_projx = np.sum(probs_plot, axis=0)
    # plt.plot(x_plot, probs_np)
    # probs_plot = np.reshape(probs_np, (n_points, n_points))
    # plt.imshow(probs_plot)
    # plt.show()

    true_probsr = tf.reshape(true_probs, (n_points, n_points))
    probsr = tf.reshape(probs, (n_points, n_points))
    plt.figure()
    plt.imshow(true_probsr, label='true probs')
    plt.title('true probs')

    plt.figure()
    plt.imshow(probsr, label='zfit conv')
    plt.title('zfit conv')

    plt.show(block=False)

    # test the sampling
    conv_nosample = FFTConvPDFV1NoSampling(func=func, kernel=gauss)

    npoints_sample = 10000
    sample = conv.sample(npoints_sample)
    sample_nosample = conv_nosample.sample(npoints_sample)
    x, y = z.unstack_x(sample)
    xns, yns = z.unstack_x(sample_nosample)

    plt.figure()
    plt.hist2d(x, y, bins=30)
    plt.title('custom sampling, addition')

    plt.figure()
    plt.hist2d(xns, yns, bins=30)
    plt.title('fallback sampling, accept-reject')

    plt.figure()
    plt.hist(x.numpy(), bins=50, label='custom', alpha=0.5)
    plt.hist(xns.numpy(), bins=50, label='fallback', alpha=0.5)
    plt.legend()
    plt.title('x')
    plt.show()

    plt.figure()
    plt.hist(y.numpy(), bins=50, label='custom', alpha=0.5)
    plt.hist(yns.numpy(), bins=50, label='fallback', alpha=0.5)
    plt.title('y')
    plt.legend()
    plt.show()
Пример #17
0
    def __init__(self,
                 obs: ztyping.ObsTypeInput,
                 data: ztyping.ParamTypeInput,
                 bandwidth: ztyping.ParamTypeInput = None,
                 kernel=tfd.Normal,
                 support=None,
                 use_grid=False,
                 num_grid_points=1024,
                 binning_method='linear',
                 weights: Union[None, np.ndarray, tf.Tensor] = None,
                 name: str = "KernelDensityEstimation"):
        r"""
        Kernel Density Estimation is a non-parametric method to approximate the density of given points.
        .. math::
            f_h(x) =  \frac{1}{nh} \sum_{i=1}^n K\Big(\frac{x-x_i}{h}\Big)

        Args:
            data: 1-D Tensor-like.
            bandwidth: Bandwidth of the kernel. Valid options are {'silverman', 'scott', 'adaptiveV1'} or a numerical.
                If a numerical is given, it as to be broadcastable to the batch and event shape of the distribution.
                A scalar or a `zfit.Parameter` will simply broadcast to `data` for a 1-D distribution.
            obs: Observables
            weights: Weights of each `data`, can be None or Tensor-like with shape compatible with `data`
            name: Name of the PDF
        """

        if isinstance(data, ZfitData):
            if data.weights is not None:
                if weights is not None:
                    raise OverdefinedError(
                        "Cannot specify weights and use a `ZfitData` with weights."
                    )
                else:
                    weights = data.weights

            if data.n_obs > 1:
                raise ShapeIncompatibleError(
                    f"KDE is 1 dimensional, but data {data} has {data.n_obs} observables."
                )
            data = z.unstack_x(data)

        shape_data = tf.shape(data)
        size = tf.cast(shape_data[0], ztypes.float)

        components_distribution_generator = lambda loc, scale: tfd.Independent(
            kernel(loc=loc, scale=scale))

        self._num_grid_points = tf.minimum(
            tf.cast(size, ztypes.int), tf.constant(num_grid_points,
                                                   ztypes.int))
        self._binning_method = binning_method
        self._data = tf.convert_to_tensor(data, ztypes.float)
        self._bandwidth = tf.convert_to_tensor(bandwidth, ztypes.float)
        self._kernel = kernel
        self._weights = weights
        self._grid = None
        self._grid_data = None

        if use_grid:
            self._grid = binning_helper.generate_grid(
                self._data, num_grid_points=self._num_grid_points)
            self._grid_data = binning_helper.bin(self._binning_method,
                                                 self._data, self._grid,
                                                 self._weights)

            mixture_distribution = tfd.Categorical(probs=self._grid_data)
            components_distribution = components_distribution_generator(
                loc=self._grid, scale=self._bandwidth)

        else:

            if weights is not None:
                probs = weights / tf.reduce_sum(weights)
            else:
                probs = tf.broadcast_to(1 / size,
                                        shape=(tf.cast(size, ztypes.int), ))

            mixture_distribution = tfd.Categorical(probs=probs)
            components_distribution = components_distribution_generator(
                loc=self._data, scale=self._bandwidth)

        dist_kwargs = lambda: dict(mixture_distribution=mixture_distribution,
                                   components_distribution=
                                   components_distribution)
        distribution = tfd.MixtureSameFamily

        params = {'bandwidth': self._bandwidth}

        super().__init__(obs=obs,
                         params=params,
                         dist_params={},
                         dist_kwargs=dist_kwargs,
                         distribution=distribution,
                         name=name)
Пример #18
0
 def func2_pure(self, x):
     x = z.unstack_x(x)
     return param2 * x + param3
Пример #19
0
 def func1_pure(self, x):
     x = z.unstack_x(x)
     return param1 * x
Пример #20
0
 def _func(self, x):
     mu = self.params["mu"]
     sigma = self.params["sigma"]
     x = z.unstack_x(x)
     return z.exp(-z.square((x - mu) / sigma))
Пример #21
0
def func3_2deps(x):
    a, b = z.unstack_x(x)
    return a**2 + b**2
Пример #22
0
 def _unnormalized_pdf(self, x):
     mu = self.params['mu']
     sigma = self.params['sigma']
     x = z.unstack_x(x)
     return z.exp(-z.square((x - mu) / sigma))
Пример #23
0
def func1_5deps(x):
    a, b, c, d, e = z.unstack_x(x)
    return a + b * c**2 + d**2 * e**3
Пример #24
0
 def _func(self, x):
     mu = self.params['mu']
     sigma = self.params['sigma']
     x = z.unstack_x(x)
     return z.exp(-z.square((x - mu) / sigma))
Пример #25
0
    def _unnormalized_pdf(self, x):  # implement function
        data = z.unstack_x(x)
        alpha = self.params['alpha']

        return z.exp(alpha * data)
Пример #26
0
 def _unnormalized_pdf(self, x):
     params = [self.params[param] for param in self._PARAMS]
     x = z.unstack_x(x)
     args = {name: para for name, para in zip(self._PARAMS, params)}
     return func(x, **args)