示例#1
0
def circles(n_samples=100, noise=None, seed=None, factor=0.8, n_classes=2, *args, **kwargs):
  """Create circles separated by some value

  Args:
    n_samples: int, number of datapoints to generate
    noise: float or None, standard deviation of the Gaussian noise added
    seed: int or None, seed for the noise
    factor: float, size factor of the inner circles with respect to the outer ones
    n_classes: int, number of classes to generate

  Returns:
    Shuffled features and labels for 'circles' synthetic dataset of type `base.Dataset`

  Note:
    The multi-class support might not work as expected if `noise` is enabled

  TODO:
    - Generation of unbalanced data

  Credit goes to (under BSD 3 clause):
    B. Thirion,
    G. Varoquaux,
    A. Gramfort,
    V. Michel,
    O. Grisel,
    G. Louppe,
    J. Nothman
  """
  if seed is not None:
    np.random.seed(seed)
  # Algo: 1) Generate initial circle, 2) For ever class generate a smaller radius circle
  linspace = np.linspace(0, 2*np.pi, n_samples // n_classes)
  circ_x = np.empty(0, dtype=np.int32)
  circ_y = np.empty(0, dtype=np.int32)
  base_cos = np.cos(linspace)
  base_sin = np.sin(linspace)

  y = np.empty(0, dtype=np.int32)
  for label in range(n_classes):
    circ_x = np.append(circ_x, base_cos)
    circ_y = np.append(circ_y, base_sin)
    base_cos *= factor
    base_sin *= factor
    y = np.append(y, label*np.ones(n_samples // n_classes, dtype=np.int32))

  # Add more points if n_samples is not divisible by n_classes (unbalanced!)
  extras = n_samples % n_classes
  circ_x = np.append(circ_x, np.cos(np.random.rand(extras)*2*np.pi))
  circ_y = np.append(circ_y, np.sin(np.random.rand(extras)*2*np.pi))
  y = np.append(y, np.zeros(extras, dtype=np.int32))

  # Reshape the features/labels
  X = np.vstack((circ_x, circ_y)).T
  y = np.hstack(y)

  # Shuffle the data
  indices = np.random.permutation(range(n_samples))
  if noise is not None:
    X += np.random.normal(scale=noise, size=X.shape)
  return Dataset(data=X[indices], target=y[indices])
示例#2
0
def circlesAndRects(n_samples=100):
    klass_reps = [rect(), circle()]
    X = []
    Y = []
    length = n_samples // len(klass_reps)
    for i, klass in enumerate(klass_reps):
        X.extend([klass] * length)
        y = np.zeros(10)
        y[i] = 1
        Y.extend([y] * length)
    X = np.array(X)
    Y = np.array(Y)
    indices = np.random.permutation(range(n_samples))
    #import pdb;pdb.set_trace();
    return Dataset(data=X[indices], target=Y[indices])
示例#3
0
def black_white(n_samples=100,
                noise=None,
                seed=None,
                factor=0.8,
                n_classes=2,
                *args,
                **kwargs):
    X1 = np.ones([n_samples // 2, 28 * 28], np.uint8) * 255
    Y1 = ([[1] + [0] * 9]) * (n_samples // 2)
    X2 = np.zeros([n_samples - n_samples // 2, 28 * 28], np.uint8)
    Y2 = ([[0] + [1] + [0] * 8]) * (n_samples - n_samples // 2)
    indices = np.random.permutation(range(n_samples))
    X = np.concatenate((X1, X2))
    Y = np.concatenate((Y1, Y2))
    #import pdb;pdb.set_trace();
    return Dataset(data=X[indices], target=Y[indices])
示例#4
0
def spirals(n_samples=100,
            noise=None,
            seed=None,
            mode='archimedes',
            n_loops=2,
            *args,
            **kwargs):
    """Create spirals

  Currently only binary classification is supported for spiral generation

  Args:
    n_samples: int, number of datapoints to generate
    noise: float or None, standard deviation of the Gaussian noise added
    seed: int or None, seed for the noise
    n_loops: int, number of spiral loops, doesn't play well with 'bernoulli'
    mode: str, how the spiral should be generated. Current implementations:
      'archimedes': a spiral with equal distances between branches
      'bernoulli': logarithmic spiral with branch distances increasing
      'fermat': a spiral with branch distances decreasing (sqrt)

  Returns:
    Shuffled features and labels for 'spirals' synthetic dataset of type
    `base.Dataset`

  Raises:
    ValueError: If the generation `mode` is not valid

  TODO:
    - Generation of unbalanced data
  """
    n_classes = 2  # I am not sure how to make it multiclass

    _modes = {
        'archimedes': _archimedes_spiral,
        'bernoulli': _bernoulli_spiral,
        'fermat': _fermat_spiral
    }

    if mode is None or mode not in _modes:
        raise ValueError('Cannot generate spiral with mode %s' % mode)

    if seed is not None:
        np.random.seed(seed)
    linspace = np.linspace(0, 2 * n_loops * np.pi, n_samples // n_classes)
    spir_x = np.empty(0, dtype=np.int32)
    spir_y = np.empty(0, dtype=np.int32)

    y = np.empty(0, dtype=np.int32)
    for label in range(n_classes):
        base_cos, base_sin = _modes[mode](linspace, label * np.pi, *args,
                                          **kwargs)
        spir_x = np.append(spir_x, base_cos)
        spir_y = np.append(spir_y, base_sin)
        y = np.append(y,
                      label * np.ones(n_samples // n_classes, dtype=np.int32))

    # Add more points if n_samples is not divisible by n_classes (unbalanced!)
    extras = n_samples % n_classes
    if extras > 0:
        x_extra, y_extra = _modes[mode](np.random.rand(extras) * 2 * np.pi,
                                        *args, **kwargs)
        spir_x = np.append(spir_x, x_extra)
        spir_y = np.append(spir_y, y_extra)
        y = np.append(y, np.zeros(extras, dtype=np.int32))

    # Reshape the features/labels
    X = np.vstack((spir_x, spir_y)).T
    y = np.hstack(y)

    # Shuffle the data
    indices = np.random.permutation(range(n_samples))
    if noise is not None:
        X += np.random.normal(scale=noise, size=X.shape)
    return Dataset(data=X[indices], target=y[indices])