Пример #1
0
def load(n_samples=10000, n_features=10, n_manifolds=10, seed=0):
    """Generate a multi spiral bands dataset randomly in many dims

    The resulting dataset hence holds a set of instrically 2 dim manifolds
    (spiralling band) embedded in an arbitrarly higher dimensional space.

    Parameters
    ----------

    n_samples : number of sample to generate

    n_features : total number of dimension

    n_manifolds : number of manifolds to generate

    seed : reproducible pseudo random dataset generation


    Returns
    -------

    data : an array of shape (n_samples, n_features) embedding the generated
           hyber-swissroll

    manifolds : an array of size (n_manifolds, n_samples / n_manifolds, 2)
                that contains the unrolled manifolds

    t : the array of parameters value which is the intrinsic dimension
        common to all manifolds

    """

    assert n_features >= 3
    rng = np.random.RandomState(seed)

    data = []
    manifolds = []
    for i in xrange(n_manifolds):
        n_samples_m = n_samples / n_manifolds
        if i < n_samples % n_manifolds:
            n_samples_m += 1

        data_m, manifold_m = load_one(
            n_samples=n_samples_m,
            n_features=n_features,
            n_turns=rng.uniform(0.2, 2),
            radius=rng.uniform(0.5, 2),
            hole=rng.uniform() > 0.5,
            rotate=False,
            seed=seed,
        )
        data_m[:, 0] += rng.uniform(-2, 2)
        data_m[:, 1] += rng.uniform(-2, 2)
        data_m[:, 2] += rng.uniform(-2, 2)
        data_m = random_rotate(data_m, rng)

        data.append(data_m[:, rng.permutation(n_features)])
        manifolds.append(manifold_m)

    data = random_rotate(np.vstack(data), rng)

    t_without_holes = np.hstack([m[:, 0] for m in manifolds])

    return data, manifolds, t_without_holes
Пример #2
0
def load(n_samples=1000, n_features=3, rotate=True, n_turns=1.5, seed=0,
         radius=1.0, hole=False):
    """Generate a single spiral band dataset on the first 2 dims

    The third dim is fill uniformly. The remaining dims are left to zeros
    (before random rotation).

    The resulting dataset hence holds a instrically 2 dim manifold (a spiralling
    band) embedded in an arbitrarly higher dimensional space.

    Parameters
    ----------

    n_samples : number of sample to generate

    n_features : total number of dimension including the first two that include
                 the actual spiral data (when not rotated)

    n_turns : number of rotations (times 2 pi) for the spiral manifold

    rotate : boolean flag to rotate randomly the spiral iteratively on all
             dimensions

    hole : boolean flag to dig a rectangular hole in the middle of the roll
           band

    Returns
    -------

    data : an array of shape (n_samples, n_features) embedding the generated
           hyber-swissroll

    manifold : an array of size (n_samples, 2) that contains the unrolled
               manifold

    NB: if hole is True, the samples in the hole are removed hence the
    dimensions of the results will be smaller that n_samples

    """

    assert n_features >= 3
    rng = np.random.RandomState(seed)

    t = rng.uniform(low=0, high=1, size=n_samples)
    data = np.zeros((n_samples, n_features))

    # generate the 2D spiral data driven by a 1d parameter t
    max_rot = n_turns * 2 * np.pi
    data[:, 0] = radius = t * np.cos(t * max_rot)
    data[:, 1] = radius = t * np.sin(t * max_rot)

    # fill the third dim with the uniform band of width [-1, 1]
    data[:, 2] = rng.uniform(-1, 1.0, n_samples)

    # copy the manifold data before performing the rotation
    manifold = np.vstack((t * 2 - 1, data[:, 2])).T.copy()

    if hole:
        z = data[:, 2]
        indices = np.where(((0.3 > t) | (0.7 < t)) | ((-0.3 > z) | (0.3 < z)))
        data = data[indices]
        manifold = manifold[indices]

    if rotate:
        data = random_rotate(data, rng)
    return data, manifold