def circles(n_samples=100, noise=None, seed=None, factor=0.8, n_classes=2, *args, **kwargs): """Create circles separated by some value Args: n_samples: int, number of datapoints to generate noise: float or None, standard deviation of the Gaussian noise added seed: int or None, seed for the noise factor: float, size factor of the inner circles with respect to the outer ones n_classes: int, number of classes to generate Returns: Shuffled features and labels for 'circles' synthetic dataset of type `base.Dataset` Note: The multi-class support might not work as expected if `noise` is enabled TODO: - Generation of unbalanced data Credit goes to (under BSD 3 clause): B. Thirion, G. Varoquaux, A. Gramfort, V. Michel, O. Grisel, G. Louppe, J. Nothman """ if seed is not None: np.random.seed(seed) # Algo: 1) Generate initial circle, 2) For ever class generate a smaller radius circle linspace = np.linspace(0, 2*np.pi, n_samples // n_classes) circ_x = np.empty(0, dtype=np.int32) circ_y = np.empty(0, dtype=np.int32) base_cos = np.cos(linspace) base_sin = np.sin(linspace) y = np.empty(0, dtype=np.int32) for label in range(n_classes): circ_x = np.append(circ_x, base_cos) circ_y = np.append(circ_y, base_sin) base_cos *= factor base_sin *= factor y = np.append(y, label*np.ones(n_samples // n_classes, dtype=np.int32)) # Add more points if n_samples is not divisible by n_classes (unbalanced!) extras = n_samples % n_classes circ_x = np.append(circ_x, np.cos(np.random.rand(extras)*2*np.pi)) circ_y = np.append(circ_y, np.sin(np.random.rand(extras)*2*np.pi)) y = np.append(y, np.zeros(extras, dtype=np.int32)) # Reshape the features/labels X = np.vstack((circ_x, circ_y)).T y = np.hstack(y) # Shuffle the data indices = np.random.permutation(range(n_samples)) if noise is not None: X += np.random.normal(scale=noise, size=X.shape) return Dataset(data=X[indices], target=y[indices])
def circlesAndRects(n_samples=100): klass_reps = [rect(), circle()] X = [] Y = [] length = n_samples // len(klass_reps) for i, klass in enumerate(klass_reps): X.extend([klass] * length) y = np.zeros(10) y[i] = 1 Y.extend([y] * length) X = np.array(X) Y = np.array(Y) indices = np.random.permutation(range(n_samples)) #import pdb;pdb.set_trace(); return Dataset(data=X[indices], target=Y[indices])
def black_white(n_samples=100, noise=None, seed=None, factor=0.8, n_classes=2, *args, **kwargs): X1 = np.ones([n_samples // 2, 28 * 28], np.uint8) * 255 Y1 = ([[1] + [0] * 9]) * (n_samples // 2) X2 = np.zeros([n_samples - n_samples // 2, 28 * 28], np.uint8) Y2 = ([[0] + [1] + [0] * 8]) * (n_samples - n_samples // 2) indices = np.random.permutation(range(n_samples)) X = np.concatenate((X1, X2)) Y = np.concatenate((Y1, Y2)) #import pdb;pdb.set_trace(); return Dataset(data=X[indices], target=Y[indices])
def spirals(n_samples=100, noise=None, seed=None, mode='archimedes', n_loops=2, *args, **kwargs): """Create spirals Currently only binary classification is supported for spiral generation Args: n_samples: int, number of datapoints to generate noise: float or None, standard deviation of the Gaussian noise added seed: int or None, seed for the noise n_loops: int, number of spiral loops, doesn't play well with 'bernoulli' mode: str, how the spiral should be generated. Current implementations: 'archimedes': a spiral with equal distances between branches 'bernoulli': logarithmic spiral with branch distances increasing 'fermat': a spiral with branch distances decreasing (sqrt) Returns: Shuffled features and labels for 'spirals' synthetic dataset of type `base.Dataset` Raises: ValueError: If the generation `mode` is not valid TODO: - Generation of unbalanced data """ n_classes = 2 # I am not sure how to make it multiclass _modes = { 'archimedes': _archimedes_spiral, 'bernoulli': _bernoulli_spiral, 'fermat': _fermat_spiral } if mode is None or mode not in _modes: raise ValueError('Cannot generate spiral with mode %s' % mode) if seed is not None: np.random.seed(seed) linspace = np.linspace(0, 2 * n_loops * np.pi, n_samples // n_classes) spir_x = np.empty(0, dtype=np.int32) spir_y = np.empty(0, dtype=np.int32) y = np.empty(0, dtype=np.int32) for label in range(n_classes): base_cos, base_sin = _modes[mode](linspace, label * np.pi, *args, **kwargs) spir_x = np.append(spir_x, base_cos) spir_y = np.append(spir_y, base_sin) y = np.append(y, label * np.ones(n_samples // n_classes, dtype=np.int32)) # Add more points if n_samples is not divisible by n_classes (unbalanced!) extras = n_samples % n_classes if extras > 0: x_extra, y_extra = _modes[mode](np.random.rand(extras) * 2 * np.pi, *args, **kwargs) spir_x = np.append(spir_x, x_extra) spir_y = np.append(spir_y, y_extra) y = np.append(y, np.zeros(extras, dtype=np.int32)) # Reshape the features/labels X = np.vstack((spir_x, spir_y)).T y = np.hstack(y) # Shuffle the data indices = np.random.permutation(range(n_samples)) if noise is not None: X += np.random.normal(scale=noise, size=X.shape) return Dataset(data=X[indices], target=y[indices])