def test_shuffle(self): input_data = np.arange(10) shuffeled_data = shuffle(input_data, input_data) np.testing.assert_array_equal(*shuffeled_data) np.testing.assert_array_equal(tuple(), shuffle()) with self.assertRaises(ValueError): shuffle(input_data, input_data[:len(input_data) - 1])
def make_reber_classification(n_samples, invalid_size=0.5): """ Generate random dataset for Reber grammar classification. Invalid words contains the same letters as at Reber grammar, but they are build whithout grammar rules. Parameters ---------- n_samples : int Number of samples in dataset. invalid_size : float Proportion of invalid words in dataset, defaults to `0.5`. Value must be between 0 and 1, but not equal to them. Returns ------- tuple Return two lists. First contains words and second - labels for them. Examples -------- >>> from neupy.datasets import make_reber_classification >>> >>> data, labels = make_reber_classification(10, invalid_size=0.5) >>> data array(['SXSXVSXXVX', 'VVPS', 'VVPSXTTS', 'VVS', 'VXVS', 'VVS', 'PPTTTXPSPTV', 'VTTSXVPTXVXT', 'VSSXSTX', 'TTXVS'], dtype='<U12') >>> labels array([0, 1, 0, 1, 1, 1, 0, 0, 0, 1]) """ if n_samples < 2: raise ValueError("There are must be at least 2 samples.") if invalid_size <= 0 or invalid_size >= 1: raise ValueError("`invalid_size` property must be between zero and" "one, but not equal.") n_valid_words = int(math.ceil(n_samples * invalid_size)) n_invalid_words = n_samples - n_valid_words valid_words = make_reber(n_valid_words) valid_labels = [1] * n_valid_words invalid_words = [] invalid_labels = [0] * n_valid_words for i in range(n_invalid_words): word_length = randint(3, 14) word = [choice(avaliable_letters) for _ in range(word_length)] invalid_words.append(''.join(word)) return shuffle(np.array(valid_words + invalid_words), np.array(valid_labels + invalid_labels))
def make_reber_classification(n_samples, invalid_size=0.5): """ Generate random dataset for Reber grammar classification. Invalid words contains the same letters as at Reber grammar, but they are build whithout grammar rules. Parameters ---------- n_samples : int Number of samples in dataset. invalid_size : float Proportion of invalid words in dataset, defaults to `0.5`. Value must be between 0 and 1, but not equal to them. Returns ------- tuple Return two lists. First contains words and second - labels for them. Examples -------- >>> from neupy.datasets import make_reber_classification >>> >>> data, labels = make_reber_classification(10, invalid_size=0.5) >>> data array(['SXSXVSXXVX', 'VVPS', 'VVPSXTTS', 'VVS', 'VXVS', 'VVS', 'PPTTTXPSPTV', 'VTTSXVPTXVXT', 'VSSXSTX', 'TTXVS'], dtype='<U12') >>> labels array([0, 1, 0, 1, 1, 1, 0, 0, 0, 1]) """ if n_samples < 2: raise ValueError("There are must be at least 2 samples.") if invalid_size <= 0 or invalid_size >= 1: raise ValueError("`invalid_size` property must be between zero and" "one, but not equal.") n_valid_words = int(math.ceil(n_samples * invalid_size)) n_invalid_words = n_samples - n_valid_words valid_words = make_reber(n_valid_words) valid_labels = [1] * n_valid_words invalid_words = [] invalid_labels = [0] * n_valid_words for i in range(n_invalid_words): word_length = randint(3, 14) word = [choice(avaliable_letters) for _ in range(word_length)] invalid_words.append("".join(word)) return shuffle(np.array(valid_words + invalid_words), np.array(valid_labels + invalid_labels))