Пример #1
0
def make_reber_classification(n_samples, invalid_size=0.5):
    """
    Generate random dataset for Reber grammar classification.
    Invalid words contains the same letters as at Reber grammar, but
    they are build whithout grammar rules.

    Parameters
    ----------
    n_samples : int
        Number of samples in dataset.
    invalid_size : float
        Proportion of invalid words in dataset, defaults to `0.5`. Value
        must be between 0 and 1.

    Returns
    -------
    tuple
        Return two lists. First contains words and second - labels for them.

    Examples
    --------
    >>> from neupy.datasets import make_reber_classification
    >>>
    >>> data, labels = make_reber_classification(10, invalid_size=0.5)
    >>> data
    array(['SXSXVSXXVX', 'VVPS', 'VVPSXTTS', 'VVS', 'VXVS', 'VVS',
           'PPTTTXPSPTV', 'VTTSXVPTXVXT', 'VSSXSTX', 'TTXVS'],
          dtype='<U12')
    >>> labels
    array([0, 1, 0, 1, 1, 1, 0, 0, 0, 1])
    """
    if n_samples < 2:
        raise ValueError("There are must be at least 2 samples")

    if invalid_size <= 0 or invalid_size >= 1:
        raise ValueError("`invalid_size` property must be "
                         "between zero and one")

    n_valid_words = int(math.ceil(n_samples * invalid_size))
    n_invalid_words = n_samples - n_valid_words

    valid_words = make_reber(n_valid_words)
    valid_labels = [1] * n_valid_words

    invalid_words = []
    invalid_labels = [0] * n_valid_words

    for i in range(n_invalid_words):
        word_length = randint(3, 14)
        word = [choice(avaliable_letters) for _ in range(word_length)]
        invalid_words.append(''.join(word))

    return shuffle(
        np.array(valid_words + invalid_words),
        np.array(valid_labels + invalid_labels)
    )
Пример #2
0
def make_reber_classification(n_samples, invalid_size=0.5):
    """
    Generate random dataset for Reber grammar classification.
    Invalid words contains the same letters as at Reber grammar, but
    they are build whithout grammar rules.

    Parameters
    ----------
    n_samples : int
        Number of samples in dataset.
    invalid_size : float
        Proportion of invalid words in dataset, defaults to `0.5`. Value
        must be between 0 and 1.

    Returns
    -------
    tuple
        Return two lists. First contains words and second - labels for them.

    Examples
    --------
    >>> from neupy.datasets import make_reber_classification
    >>>
    >>> data, labels = make_reber_classification(10, invalid_size=0.5)
    >>> data
    array(['SXSXVSXXVX', 'VVPS', 'VVPSXTTS', 'VVS', 'VXVS', 'VVS',
           'PPTTTXPSPTV', 'VTTSXVPTXVXT', 'VSSXSTX', 'TTXVS'],
          dtype='<U12')
    >>> labels
    array([0, 1, 0, 1, 1, 1, 0, 0, 0, 1])
    """
    if n_samples < 2:
        raise ValueError("There are must be at least 2 samples")

    if invalid_size <= 0 or invalid_size >= 1:
        raise ValueError("`invalid_size` property must be "
                         "between zero and one")

    n_valid_words = int(math.ceil(n_samples * invalid_size))
    n_invalid_words = n_samples - n_valid_words

    valid_words = make_reber(n_valid_words)
    valid_labels = [1] * n_valid_words

    invalid_words = []
    invalid_labels = [0] * n_valid_words

    for i in range(n_invalid_words):
        word_length = randint(3, 14)
        word = [choice(avaliable_letters) for _ in range(word_length)]
        invalid_words.append(''.join(word))

    return shuffle(np.array(valid_words + invalid_words),
                   np.array(valid_labels + invalid_labels))
Пример #3
0
 def test_shuffle_with_nones(self):
     input_with_nones = (None, None)
     actual_output = shuffle(*input_with_nones)
     self.assertEqual(input_with_nones, actual_output)
Пример #4
0
 def test_shuffle_invalid_shapes_exception(self):
     input_data = np.arange(10)
     with self.assertRaisesRegexp(ValueError, r'\(10,\), \(9,\)'):
         shuffle(input_data, input_data[:len(input_data) - 1])
Пример #5
0
 def test_shuffle_single_input(self):
     input_data = np.ones(10)
     shuffeled_data = shuffle(input_data)
     # Output suppose to be a shuffled array, but
     # not a tuple with shuffled array
     np.testing.assert_array_equal(input_data, shuffeled_data)
Пример #6
0
 def test_shuffle_empty_input(self):
     np.testing.assert_array_equal(tuple(), shuffle())
Пример #7
0
 def test_shuffle_basic(self):
     input_data = np.arange(10)
     shuffeled_data = shuffle(input_data, input_data)
     np.testing.assert_array_equal(*shuffeled_data)
Пример #8
0
 def test_shuffle_with_nones(self):
     input_with_nones = (None, None)
     actual_output = shuffle(*input_with_nones)
     self.assertEqual(input_with_nones, actual_output)
Пример #9
0
 def test_shuffle_invalid_shapes_exception(self):
     input_data = np.arange(10)
     with self.assertRaises(ValueError):
         shuffle(input_data, input_data[:len(input_data) - 1])
Пример #10
0
 def test_shuffle_single_input(self):
     input_data = np.ones(10)
     shuffeled_data = shuffle(input_data)
     # Output suppose to be a shuffled array, but
     # not a tuple with shuffled array
     np.testing.assert_array_equal(input_data, shuffeled_data)
Пример #11
0
 def test_shuffle_empty_input(self):
     np.testing.assert_array_equal(tuple(), shuffle())
Пример #12
0
 def test_shuffle_basic(self):
     input_data = np.arange(10)
     shuffeled_data = shuffle(input_data, input_data)
     np.testing.assert_array_equal(*shuffeled_data)
Пример #13
0
def make_reber_classification(n_samples,
                              invalid_size=0.5,
                              return_indeces=False):
    """
    Generate random dataset for Reber grammar classification.
    Invalid words contains the same letters as at Reber grammar, but
    they are build whithout grammar rules.

    Parameters
    ----------
    n_samples : int
        Number of samples in dataset.

    invalid_size : float
        Proportion of invalid words in dataset, defaults to ``0.5``.
        Value must be between ``0`` and ``1``.

    return_indeces : bool
        If ``True``, each word will be converted to array where each
        letter converted to the index. Defaults to ``False``.

    Returns
    -------
    tuple
        Return two lists. First contains words and second - labels for them.

    Examples
    --------
    >>> from neupy.datasets import make_reber_classification
    >>>
    >>> data, labels = make_reber_classification(10, invalid_size=0.5)
    >>> data
    array(['SXSXVSXXVX', 'VVPS', 'VVPSXTTS', 'VVS', 'VXVS', 'VVS',
           'PPTTTXPSPTV', 'VTTSXVPTXVXT', 'VSSXSTX', 'TTXVS'],
          dtype='<U12')
    >>> labels
    array([0, 1, 0, 1, 1, 1, 0, 0, 0, 1])
    >>>
    >>> data, labels = make_reber_classification(
    ...     4, invalid_size=0.5, return_indeces=True)
    >>> data
    array([array([1, 3, 1, 4]),
           array([0, 3, 0, 3, 0, 4, 3, 0, 4, 4]),
           array([1, 3, 1, 2, 3, 1, 2, 4]),
           array([0, 3, 0, 0, 3, 0, 4, 2, 4, 1, 0, 4, 0])], dtype=object)
    """
    if n_samples < 2:
        raise ValueError("There are must be at least 2 samples")

    if not 0 < invalid_size < 1:
        raise ValueError("`invalid_size` argument value must be between "
                         "zero and one, got {}".format(invalid_size))

    n_valid_words = int(math.ceil(n_samples * invalid_size))
    n_invalid_words = n_samples - n_valid_words

    valid_words = make_reber(n_valid_words)
    valid_labels = [1] * n_valid_words

    invalid_words = []
    invalid_labels = [0] * n_valid_words

    for i in range(n_invalid_words):
        word_length = randint(3, 14)
        word = [choice(avaliable_letters) for _ in range(word_length)]
        invalid_words.append(''.join(word))

    samples, labels = shuffle(np.array(valid_words + invalid_words),
                              np.array(valid_labels + invalid_labels))

    if return_indeces:
        samples = convert_letters_to_indeces(samples)

    return samples, labels