def test_rmh(self):
        n_samples = 10000
        n_features = 100

        def mean_1(t):
            return (np.abs(t - 0.25) - 2 * np.abs(t - 0.5) + np.abs(t - 0.75))

        X_0 = make_gaussian_process(n_samples=n_samples // 2,
                                    n_features=n_features,
                                    random_state=0)
        X_1 = make_gaussian_process(n_samples=n_samples // 2,
                                    n_features=n_features,
                                    mean=mean_1,
                                    random_state=1)
        X = skfda.concatenate((X_0, X_1))

        y = np.zeros(n_samples)
        y[n_samples // 2:] = 1

        correction = vs.recursive_maxima_hunting.GaussianSampleCorrection()
        stopping_condition = vs.recursive_maxima_hunting.ScoreThresholdStop(
            threshold=0.05)

        rmh = vs.RecursiveMaximaHunting(correction=correction,
                                        stopping_condition=stopping_condition)
        _ = rmh.fit(X, y)
        point_mask = rmh.get_support()
        points = X.grid_points[0][point_mask]
        np.testing.assert_allclose(points, [0.25, 0.5, 0.75], rtol=1e-1)
	def get_XY_gaussian_process(self, n):
		""" Generates n gaussian processes with a linear trend

		Parameters
		----------
		n: int
			Number of samples.

		Returns
		-------
		X: array, shape (n, self.npoints, self.d)
			Array of sample paths. It is a 3-dimensional array, containing the coordinates in R^d of n piecewise
			linear paths, each composed of npoints.
		"""
		X = np.zeros((n, self.npoints, self.d))
		times = np.repeat(np.expand_dims(np.linspace(0, 1, self.npoints), -1), n * self.d, 1)
		times = times.reshape((self.npoints, n, self.d)).transpose((1, 0, 2))
		slope = 3 * (2 * np.random.random((n, self.d)) - 1)

		Y = np.sqrt(np.sum(slope ** 2, axis=1))
		slope = np.repeat(np.expand_dims(slope, 0), self.npoints, 0).transpose((1, 0, 2))
		for i in range(n):
			gp = make_gaussian_process(n_features=self.npoints, n_samples=self.d, cov=Exponential())
			X[i, :, :] = gp.data_matrix.T[0]

		X = X + slope * times

		return X, Y
Пример #3
0
def _anova_bootstrap(fd_grouped, n_reps, random_state=None, p=2,
                     equal_var=True):

    n_groups = len(fd_grouped)
    if n_groups < 2:
        raise ValueError("At least two groups must be passed in fd_grouped.")

    for fd in fd_grouped[1:]:
        if not np.array_equal(fd.domain_range, fd_grouped[0].domain_range):
            raise ValueError("Domain range must match for every FData in "
                             "fd_grouped.")

    start, stop = fd_grouped[0].domain_range[0]

    sizes = [fd.n_samples for fd in fd_grouped]  # List with sizes of each group

    # Instance a random state object in case random_state is an int
    random_state = check_random_state(random_state)

    if equal_var:
        k_est = concatenate(fd_grouped).cov().data_matrix[0, ..., 0]
        k_est = [k_est] * len(fd_grouped)
    else:
        # Estimating covariances for each group
        k_est = [fd.cov().data_matrix[0, ..., 0] for fd in fd_grouped]

    # Number of sample points for gaussian processes have to match
    # the features of the covariances.
    n_features = k_est[0].shape[0]

    # Simulating n_reps observations for each of the n_groups gaussian
    # processes
    sim = [make_gaussian_process(n_reps, n_features=n_features, start=start,
                                 stop=stop, cov=k_est[i],
                                 random_state=random_state)
           for i in range(n_groups)]

    v_samples = np.empty(n_reps)
    for i in range(n_reps):
        fd = FDataGrid([s.data_matrix[i, ..., 0] for s in sim])
        v_samples[i] = v_asymptotic_stat(fd, sizes, p=p)
    return v_samples
Пример #4
0
groups = np.full(n_samples * n_groups, 'Sample 1')
groups[10:20] = 'Sample 2'
groups[20:] = 'Sample 3'

###############################################################################
# First simulation uses a low :math:`\sigma^2 = 0.01` value. In this case the
# differences between the means of each group should be clear, and the
# p-value for the test should be near to zero.

sigma2 = 0.01
cov = WhiteNoise(variance=sigma2)

fd1 = make_gaussian_process(n_samples,
                            mean=m1,
                            cov=cov,
                            n_features=n_features,
                            random_state=1,
                            start=start,
                            stop=stop)
fd2 = make_gaussian_process(n_samples,
                            mean=m2,
                            cov=cov,
                            n_features=n_features,
                            random_state=2,
                            start=start,
                            stop=stop)
fd3 = make_gaussian_process(n_samples,
                            mean=m3,
                            cov=cov,
                            n_features=n_features,
                            random_state=3,
Пример #5
0
# In order to instantiate a
# :class:`~skfda.exploratory.visualization.SurfaceBoxplot`, a functional data
# object with bidimensional domain must be generated. In this example, a
# FDataGrid representing a function
# :math:`f : \mathbb{R}^2\longmapsto\mathbb{R}` is constructed,
# using as an example a Brownian process extruded into another dimension.
#
# The values of the Brownian process are generated using
# :func:`~skfda.datasets.make_gaussian_process`,
# Those functions return FDataGrid objects whose ``data_matrix``
# store the values needed.
n_samples = 10
n_features = 10

fd = make_gaussian_process(n_samples=n_samples,
                           n_features=n_features,
                           random_state=1)
fd.dataset_name = "Brownian process"

##############################################################################
# After, those values generated for one dimension on the domain are extruded
# along another dimension, obtaining a three-dimensional matrix or cube
# (two-dimensional domain and one-dimensional image).

cube = np.repeat(fd.data_matrix, n_features).reshape(
    (n_samples, n_features, n_features))

##############################################################################
# We can plot now the extruded trajectories.

fd_2 = FDataGrid(data_matrix=cube,