def load(n_samples=10000, n_features=10, n_manifolds=10, seed=0): """Generate a multi spiral bands dataset randomly in many dims The resulting dataset hence holds a set of instrically 2 dim manifolds (spiralling band) embedded in an arbitrarly higher dimensional space. Parameters ---------- n_samples : number of sample to generate n_features : total number of dimension n_manifolds : number of manifolds to generate seed : reproducible pseudo random dataset generation Returns ------- data : an array of shape (n_samples, n_features) embedding the generated hyber-swissroll manifolds : an array of size (n_manifolds, n_samples / n_manifolds, 2) that contains the unrolled manifolds t : the array of parameters value which is the intrinsic dimension common to all manifolds """ assert n_features >= 3 rng = np.random.RandomState(seed) data = [] manifolds = [] for i in xrange(n_manifolds): n_samples_m = n_samples / n_manifolds if i < n_samples % n_manifolds: n_samples_m += 1 data_m, manifold_m = load_one( n_samples=n_samples_m, n_features=n_features, n_turns=rng.uniform(0.2, 2), radius=rng.uniform(0.5, 2), hole=rng.uniform() > 0.5, rotate=False, seed=seed, ) data_m[:, 0] += rng.uniform(-2, 2) data_m[:, 1] += rng.uniform(-2, 2) data_m[:, 2] += rng.uniform(-2, 2) data_m = random_rotate(data_m, rng) data.append(data_m[:, rng.permutation(n_features)]) manifolds.append(manifold_m) data = random_rotate(np.vstack(data), rng) t_without_holes = np.hstack([m[:, 0] for m in manifolds]) return data, manifolds, t_without_holes
def load(n_samples=1000, n_features=3, rotate=True, n_turns=1.5, seed=0, radius=1.0, hole=False): """Generate a single spiral band dataset on the first 2 dims The third dim is fill uniformly. The remaining dims are left to zeros (before random rotation). The resulting dataset hence holds a instrically 2 dim manifold (a spiralling band) embedded in an arbitrarly higher dimensional space. Parameters ---------- n_samples : number of sample to generate n_features : total number of dimension including the first two that include the actual spiral data (when not rotated) n_turns : number of rotations (times 2 pi) for the spiral manifold rotate : boolean flag to rotate randomly the spiral iteratively on all dimensions hole : boolean flag to dig a rectangular hole in the middle of the roll band Returns ------- data : an array of shape (n_samples, n_features) embedding the generated hyber-swissroll manifold : an array of size (n_samples, 2) that contains the unrolled manifold NB: if hole is True, the samples in the hole are removed hence the dimensions of the results will be smaller that n_samples """ assert n_features >= 3 rng = np.random.RandomState(seed) t = rng.uniform(low=0, high=1, size=n_samples) data = np.zeros((n_samples, n_features)) # generate the 2D spiral data driven by a 1d parameter t max_rot = n_turns * 2 * np.pi data[:, 0] = radius = t * np.cos(t * max_rot) data[:, 1] = radius = t * np.sin(t * max_rot) # fill the third dim with the uniform band of width [-1, 1] data[:, 2] = rng.uniform(-1, 1.0, n_samples) # copy the manifold data before performing the rotation manifold = np.vstack((t * 2 - 1, data[:, 2])).T.copy() if hole: z = data[:, 2] indices = np.where(((0.3 > t) | (0.7 < t)) | ((-0.3 > z) | (0.3 < z))) data = data[indices] manifold = manifold[indices] if rotate: data = random_rotate(data, rng) return data, manifold