def test_rmh(self):
        n_samples = 10000
        n_features = 100

        def mean_1(t):
            return (np.abs(t - 0.25) - 2 * np.abs(t - 0.5) + np.abs(t - 0.75))

        X_0 = make_gaussian_process(n_samples=n_samples // 2,
                                    n_features=n_features,
                                    random_state=0)
        X_1 = make_gaussian_process(n_samples=n_samples // 2,
                                    n_features=n_features,
                                    mean=mean_1,
                                    random_state=1)
        X = skfda.concatenate((X_0, X_1))

        y = np.zeros(n_samples)
        y[n_samples // 2:] = 1

        correction = vs.recursive_maxima_hunting.GaussianSampleCorrection()
        stopping_condition = vs.recursive_maxima_hunting.ScoreThresholdStop(
            threshold=0.05)

        rmh = vs.RecursiveMaximaHunting(correction=correction,
                                        stopping_condition=stopping_condition)
        _ = rmh.fit(X, y)
        point_mask = rmh.get_support()
        points = X.grid_points[0][point_mask]
        np.testing.assert_allclose(points, [0.25, 0.5, 0.75], rtol=1e-1)
示例#2
0
    def test_concatenate(self):
        sample1 = np.arange(0, 10)
        sample2 = np.arange(10, 20)
        fd1 = FDataGrid([sample1]).to_basis(Fourier(n_basis=5))
        fd2 = FDataGrid([sample2]).to_basis(Fourier(n_basis=5))

        fd = concatenate([fd1, fd2])

        np.testing.assert_equal(fd.n_samples, 2)
        np.testing.assert_equal(fd.dim_codomain, 1)
        np.testing.assert_equal(fd.dim_domain, 1)
        np.testing.assert_array_equal(fd.coefficients, np.concatenate(
            [fd1.coefficients, fd2.coefficients]))
示例#3
0
    def test_concatenate2(self):
        sample1 = np.arange(0, 10)
        sample2 = np.arange(10, 20)
        fd1 = FDataGrid([sample1])
        fd2 = FDataGrid([sample2])

        fd1.argument_names = ["x"]
        fd1.coordinate_names = ["y"]
        fd = concatenate([fd1, fd2])

        np.testing.assert_equal(fd.n_samples, 2)
        np.testing.assert_equal(fd.dim_codomain, 1)
        np.testing.assert_equal(fd.dim_domain, 1)
        np.testing.assert_array_equal(fd.data_matrix[..., 0],
                                      [sample1, sample2])
        np.testing.assert_array_equal(fd1.argument_names, fd.argument_names)
        np.testing.assert_array_equal(fd1.coordinate_names,
                                      fd.coordinate_names)
示例#4
0
def _anova_bootstrap(fd_grouped, n_reps, random_state=None, p=2,
                     equal_var=True):

    n_groups = len(fd_grouped)
    if n_groups < 2:
        raise ValueError("At least two groups must be passed in fd_grouped.")

    for fd in fd_grouped[1:]:
        if not np.array_equal(fd.domain_range, fd_grouped[0].domain_range):
            raise ValueError("Domain range must match for every FData in "
                             "fd_grouped.")

    start, stop = fd_grouped[0].domain_range[0]

    sizes = [fd.n_samples for fd in fd_grouped]  # List with sizes of each group

    # Instance a random state object in case random_state is an int
    random_state = check_random_state(random_state)

    if equal_var:
        k_est = concatenate(fd_grouped).cov().data_matrix[0, ..., 0]
        k_est = [k_est] * len(fd_grouped)
    else:
        # Estimating covariances for each group
        k_est = [fd.cov().data_matrix[0, ..., 0] for fd in fd_grouped]

    # Number of sample points for gaussian processes have to match
    # the features of the covariances.
    n_features = k_est[0].shape[0]

    # Simulating n_reps observations for each of the n_groups gaussian
    # processes
    sim = [make_gaussian_process(n_reps, n_features=n_features, start=start,
                                 stop=stop, cov=k_est[i],
                                 random_state=random_state)
           for i in range(n_groups)]

    v_samples = np.empty(n_reps)
    for i in range(n_reps):
        fd = FDataGrid([s.data_matrix[i, ..., 0] for s in sim])
        v_samples[i] = v_asymptotic_stat(fd, sizes, p=p)
    return v_samples
示例#5
0
def oneway_anova(*args, n_reps=2000, return_dist=False, random_state=None,
                 p=2, equal_var=True):
    r"""
    Performs one-way functional ANOVA.

    This function implements an asymptotic method to test the following
    null hypothesis:

    Let :math:`\{X_i\}_{i=1}^k` be a set of :math:`k` independent samples
    each one with :math:`n_i` trajectories, and let :math:`E(X_i) = m_i(
    t)`. The null hypothesis is defined as:

    .. math::
        H_0: m_1(t) = \dots = m_k(t)

    This function calculates the value of the statistic
    :func:`~skfda.inference.anova.v_sample_stat` :math:`V_n` with the means
    of the given samples. Under the null hypothesis this statistic is
    asymptotically equivalent to
    :func:`~skfda.inference.anova.v_asymptotic_stat`, where each sample
    is replaced by a gaussian process, with mean zero and the same
    covariance function as the original.

    The simulation of the distribution of the asymptotic statistic :math:`V` is
    implemented using a bootstrap procedure. One observation of the
    :math:`k` different gaussian processes defined above is simulated,
    and the value of :func:`~skfda.inference.anova.v_asymptotic_stat` is
    calculated. This procedure is repeated `n_reps` times, creating a
    sampling distribution of the statistic.

    This procedure is from Cuevas[1].

    Args:
        fd1,fd2,.... (FDataGrid): The sample measurements for each each group.

        n_reps (int, optional): Number of simulations for the bootstrap
            procedure. Defaults to 2000 (This value may change in future
            versions).

        return_dist (bool, optional): Flag to indicate if the function should
            return a numpy.array with the sampling distribution simulated.

        random_state (optional): Random state.

        p (int, optional): p of the lp norm. Must be greater or equal
            than 1. If p='inf' or p=np.inf it is used the L infinity metric.
            Defaults to 2.

        equal_var (bool, optional): If True (default), perform a One-way
            ANOVA assuming the same covariance operator for all the groups,
            else considers an independent covariance operator for each group.

    Returns:
        Value of the sample statistic, p-value and sampling distribution of
        the simulated asymptotic statistic.

    Return type:
        (float, float, numpy.array)

    Raises:
        ValueError: In case of bad arguments.

    Examples:
        >>> from skfda.inference.anova import oneway_anova
        >>> from skfda.datasets import fetch_gait
        >>> from numpy.random import RandomState
        >>> from numpy import printoptions

        >>> fd = fetch_gait()["data"].coordinates[1]
        >>> fd1, fd2, fd3 = fd[:13], fd[13:26], fd[26:]
        >>> oneway_anova(fd1, fd2, fd3, random_state=RandomState(42))
        (179.52499999999998, 0.5945)
        >>> _, _, dist = oneway_anova(fd1, fd2, fd3, n_reps=3,
        ...     random_state=RandomState(42),
        ...     return_dist=True)
        >>> with printoptions(precision=4):
        ...     print(dist)
        [ 184.0698 212.7395  195.3663]

    References:
        [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An
        anova test for functional data". *Computational Statistics  Data
        Analysis*, 47:111-112, 02 2004
    """

    if len(args) < 2:
        raise ValueError("At least two groups must be passed as parameter.")
    if not all(isinstance(fd, FData) for fd in args):
        raise ValueError("Argument type must inherit FData.")
    if n_reps < 1:
        raise ValueError("Number of simulations must be positive.")

    fd_groups = args
    if not all([isinstance(fd, type(fd_groups[0])) for fd in fd_groups[1:]]):
        raise TypeError('Found mixed FData types in arguments.')

    for fd in fd_groups[1:]:
        if not np.array_equal(fd.domain_range, fd_groups[0].domain_range):
            raise ValueError("Domain range must match for every FData passed.")

    if isinstance(fd_groups[0], FDataGrid):
        # Creating list with all the sample points
        list_sample = [fd.sample_points[0].tolist() for fd in fd_groups]
        # Checking that the all the entries in the list are the same
        if not list_sample.count(list_sample[0]) == len(list_sample):
            raise ValueError("All FDataGrid passed must have the same sample "
                             "points.")
    else:  # If type is FDataBasis, check same basis
        list_basis = [fd.basis for fd in fd_groups]
        if not list_basis.count(list_basis[0]) == len(list_basis):
            raise NotImplementedError("Not implemented for FDataBasis with "
                                      "different basis.")

    # FData where each sample is the mean of each group
    fd_means = concatenate([fd.mean() for fd in fd_groups])

    # Base statistic
    vn = v_sample_stat(fd_means, [fd.n_samples for fd in fd_groups], p=p)

    # Computing sampling distribution
    simulation = _anova_bootstrap(fd_groups, n_reps,
                                  random_state=random_state, p=p,
                                  equal_var=equal_var)

    p_value = np.sum(simulation > vn) / len(simulation)

    if return_dist:
        return vn, p_value, simulation

    return vn, p_value
示例#6
0
# consists in 39 different trajectories, each representing the movement of the
# hip of each of the boys studied.
fig = fd_hip.plot()

###############################################################################
# The example is going to be divided in three different groups. Then we are
# going to apply the ANOVA procedure to this groups to test if the means of this
# three groups are equal or not.

fd_hip1 = fd_hip[0:13]
fd_hip2 = fd_hip[13:26]
fd_hip3 = fd_hip[26:39]
fd_hip.plot(group=[0 if i < 13 else 1 if i < 26 else 39 for i in range(39)])

means = [fd_hip1.mean(), fd_hip2.mean(), fd_hip3.mean()]
fd_means = skfda.concatenate(means)
fig = fd_means.plot()

###############################################################################
# At this point is time to perform the *ANOVA* test. This functionality is
# implemented in the function :func:`~skfda.inference.anova.oneway_anova`. As
# it consists in an asymptotic method it is possible to set the number of
# simulations necessary to approximate the result of the statistic. It is
# possible to set the :math:`p` of the :math:`L_p` norm used in the
# calculations (defaults 2).

v_n, p_val = oneway_anova(fd_hip1, fd_hip2, fd_hip3)

################################################################################
# The function returns first the statistic :func:`~skfda.inference.anova
# .v_sample_stat` used to measure the variability between groups,