def generate_continuous_reberwords(number, minimumlength): D = [] for i in range(number): dataset = '' while len(dataset) < minimumlength: embeddedStep = random.choice('TP') dataset += 'B' + embeddedStep + make_reber( 1)[0] + embeddedStep + 'E' D.append(dataset) return D
def test_reber_wrod_generation(self): words = make_reber(50) self.assertEqual(50, len(words)) for word in words: self.assertTrue(is_valid_by_reber(word))
import numpy as np from neupy.datasets import make_reber import matplotlib.pyplot as plt import matplotlib n = [100, 200,500, 1000,2000,5000, 10000, 20000, 50000, 100000, 500000, 1000000, 5000000, 10000000] x = np.zeros(len(n)) for j, u in enumerate(n): data = make_reber(u) for word in data: x[j] += len(word) x[j] /= u plt.semilogx(n,abs(x-6)) plt.xlabel('N reber words') plt.ylabel(' err = |average - 6|') plt.title('Convergence of average reber word length') matplotlib.rcParams.update({'font.size': 12}) plt.show()
from neupy.datasets import make_reber # notes: zero padds asks for a mask to neglect zeros # We will not padd # look for to categorical from np.utils # use roll to shift y from x # Check fit models that are stateful, must forget between batches # must not forget between steps batch_size = 20 n_batch = 50 #redefine this. IT is to make 1000 reber words iteration = 10 #learning dataset = make_reber(batch_size * n_batch) dataset = ''.join(dataset) chars = sorted(list(set(dataset))) char_indices = dict((c, i) for i, c in enumerate(chars)) print('DataSet concatenated lenght:', len(dataset)) batches = [] next_char = [] step = 1 for i in range(0, len(dataset) - batch_size, step): batches.append(dataset[i:i + batch_size]) next_char.append(dataset[i + batch_size]) print('nb batches of size {}:'.format(batch_size), len(batches)) print('Vectorization...') X = np.zeros((len(batches), batch_size, len(chars)), dtype=np.bool)
N_Epoch = 80 print('Making dataset of {} chars * {} steps, with {} times batches of {}'.\ format(step_size,n_step,100,batch_size)) D = [] for i in range(n_batch * batch_size): dataset = '' while len(dataset) < step_size * n_step + 1 - 0.1 * step_size * n_step: embeddedStep = random.choice('TP') custom = 'P' if (embeddedStep == 'P'): custom = 'X' #dataset += 'B' + embeddedStep + 'B' + make_reber(1)[0] + custom + 'E' + embeddedStep + 'E' dataset += 'B' + embeddedStep + 'B' + make_reber( 1)[0] + 'E' + embeddedStep + 'E' D.append(dataset) #print(len(zip(*D)[0])) chars = sorted(list(set(D[0]))) char_indices = dict((c, i) for i, c in enumerate(chars)) print(char_indices) batches = [] next_char = [] print('Vectorization...') #X = np.zeros((n_batch*batch_size, step_size*n_step+1, len(chars)), dtype=np.bool) #for j, word in enumerate(D): # for i in range(step_size*n_step+1): # if(i<len(word)):
def test_reber_word_generation(self): words = make_reber(50) self.assertEqual(50, len(words)) for word in words: self.assertTrue(is_valid_by_reber(word))
def test_reber_expcetions(self): with self.assertRaises(ValueError): make_reber(n_words=0) with self.assertRaises(ValueError): make_reber(n_words=-1)
from keras.optimizers import RMSprop from neupy.datasets import make_reber from keras.models import Sequential, load_model from keras.layers import Dense, Dropout, Activation, TimeDistributed from keras.layers import LSTM from keras.utils import np_utils, plot_model import pydot import graphviz import numpy as np samples = make_reber(1000) chars = sorted(list(set(samples))) concatenated_samples = 'B' + 'EB'.join(samples) + 'E' X = concatenated_samples[0:(len(concatenated_samples) - 1)] Y = concatenated_samples[1:len(concatenated_samples)] chars = sorted(list(set(X))) print('total chars:', len(chars)) char_indices = dict((c, i) for i, c in enumerate(chars)) indices_char = dict((i, c) for i, c in enumerate(chars)) chars = sorted(list(set(X))) def batchify(X, Y, num_batches, batch_size, batch_length): retX = np.ndarray( shape=np.append([num_batches, batch_size, batch_length], X.shape[1:])) retY = np.ndarray( shape=np.append([num_batches, batch_size, batch_length], Y.shape[1:]))
from keras.optimizers import RMSprop from neupy.datasets import make_reber from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, TimeDistributed from keras.layers import LSTM from keras.utils import np_utils, plot_model import numpy as np samples = make_reber(1000) chars = sorted(list(set(samples))) concatenated_samples = 'B'+'EB'.join(samples)+'E' X = concatenated_samples[0:(len(concatenated_samples)-1)] Y = concatenated_samples[1:len(concatenated_samples)] chars = sorted(list(set(X))) print('total chars:', len(chars)) char_indices = dict((c, i) for i, c in enumerate(chars)) indices_char = dict((i, c) for i, c in enumerate(chars)) #Array = np.asarray([char_indices[c] for c in X]) categorized = np_utils.to_categorical([char_indices[c] for c in X]) categorizedY = np_utils.to_categorical([char_indices[c] for c in Y]) decategorized = np.argmax(categorized,axis=1) decoded = ''.join([indices_char[i] for i in decategorized]) def batchify(X,Y,num_batches,batch_size,batch_length):