class RNN: def __init__(self,vocab_size,embed_size,hidden_layer_sizes,batch_size,dropout=None): self.batch_size = batch_size self.vocab_size = vocab_size # Input Layer self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size) # Init update parameters self.update_params = self.input_layer.update_params # Init memory parameters fo Adagrad self.memory_params = self.input_layer.memory_params # Hidden layer layer_sizes = [self.input_layer.y] + hidden_layer_sizes self.hidden_layer_names = [] for i in range(len(layer_sizes)-1): name = 'hidden_layer_{}'.format(i+1) # begin names at 1, not 0 self.hidden_layer_names.append(name) hl = LSTMLayer(layer_sizes[i], layer_sizes[i+1], name) setattr(self,name,hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Output Layer self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1],vocab_size) # Update Parameters - Backprop self.update_params += self.output_layer.update_params # Memory Parameters for Adagrad self.memory_params += self.output_layer.memory_params def genHiddens(self,batch_size,layer): return np.zeros((batch_size,layer.y),dtype='float32'),np.zeros((batch_size,layer.y),dtype='float32') # pass the word into the network to set all the hidden states. def set_hiddens(self,X,S1,H1,S2,H2,S3,H3): e = self.input_layer.forward_prop(X) S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1) S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2) S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3) return S1,H1,S2,H2,S3,H3 # make predictions after the word has been sent through the # entire network. # The pred_unused is the input from the sequence we use to kick # off the prediction. We don't actually need a value, just a # sequence of same length as our input word so we know how many # letters to predict. def calc_preds(self,INIT_PRED,S1,H1,S2,H2,S3,H3): e = self.input_layer.forward_prop(INIT_PRED) S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1) S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2) S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3) pred = self.output_layer.forward_prop(H3) INIT_PRED = T.cast(T.argmax(pred),theano.config.floatX) # argmax returns an int, we need to keep everything floatX return pred,INIT_PRED,S1,H1,S2,H2,S3,H3 def calc_cost(self,pred,Y): return T.mean(T.nnet.categorical_crossentropy(pred,Y))
def __init__(self,vocab_size,embed_size,hidden_layer_sizes,batch_size,dropout=None): self.batch_size = batch_size self.vocab_size = vocab_size # Input Layer self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size) # Init update parameters self.update_params = self.input_layer.update_params # Init memory parameters fo Adagrad self.memory_params = self.input_layer.memory_params # Hidden layer layer_sizes = [self.input_layer.y] + hidden_layer_sizes self.hidden_layer_names = [] for i in range(len(layer_sizes)-1): name = 'hidden_layer_{}'.format(i+1) # begin names at 1, not 0 self.hidden_layer_names.append(name) hl = LSTMLayer(layer_sizes[i], layer_sizes[i+1], batch_size, name) setattr(self,name,hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Output Layer self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1],vocab_size) # Update Parameters - Backprop self.update_params += self.output_layer.update_params # Memory Parameters for Adagrad self.memory_params += self.output_layer.memory_params
class RNN: def __init__(self,vocab_size,embed_size,hidden_layer_sizes,batch_size,dropout=None): self.batch_size = batch_size self.vocab_size = vocab_size # Input Layer self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size) # Init update parameters self.update_params = self.input_layer.update_params # Init memory parameters fo Adagrad self.memory_params = self.input_layer.memory_params # Hidden layer layer_sizes = [self.input_layer.y] + hidden_layer_sizes self.hidden_layer_names = [] for i in range(len(layer_sizes)-1): name = 'hidden_layer_{}'.format(i+1) # begin names at 1, not 0 self.hidden_layer_names.append(name) hl = LSTMLayer(layer_sizes[i], layer_sizes[i+1], batch_size, name) setattr(self,name,hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Output Layer self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1],vocab_size) # Update Parameters - Backprop self.update_params += self.output_layer.update_params # Memory Parameters for Adagrad self.memory_params += self.output_layer.memory_params # pass the word into the network to set all the hidden states. def set_hiddens(self,X,S1,H1,S2,H2,S3,H3): e = self.input_layer.forward_prop(X) S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1) S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2) S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3) return S1,H1,S2,H2,S3,H3 # make predictions after the word has been sent through the # entire network. # The pred_unused is the input from the sequence we use to kick # off the prediction. We don't actually need a value, just a # sequence of same length as our input word so we know how many # letters to predict. def calc_preds(self,INIT_PRED,S1,H1,S2,H2,S3,H3): e = self.input_layer.forward_prop(INIT_PRED) S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1) S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2) S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3) pred = self.output_layer.forward_prop(H3) INIT_PRED = T.cast(T.argmax(pred),theano.config.floatX) # argmax returns an int, we need to keep everything floatX return pred,INIT_PRED,S1,H1,S2,H2,S3,H3 def calc_cost(self,pred,Y): return T.mean(T.nnet.categorical_crossentropy(pred,Y))
def __init__(self, vocab_size, embed_size, encoder_layer_sizes, decoder_layer_sizes, batch_size, dropout=None): self.batch_size = batch_size self.vocab_size = vocab_size # Input Layer self.input_layer = OneHot( vocab_size, batch_size, wh.eos) #EmbedLayer(vocab_size,embed_size,batch_size) # Init update parameters self.update_params = self.input_layer.update_params # Init memory parameters fo Adagrad self.memory_params = self.input_layer.memory_params self.current_loss = 0 self.trained_iterations = 0 # Encoder encoder_layer_sizes = [self.input_layer.y] + encoder_layer_sizes self.encoder_layer_names = [] for i in range(len(encoder_layer_sizes) - 1): name = 'encoder_layer_{}'.format(i + 1) # begin names at 1, not 0 self.encoder_layer_names.append(name) hl = EncoderDecoderLayer(encoder_layer_sizes[i], encoder_layer_sizes[i + 1], batch_size, name) setattr(self, name, hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Decoder decoder_layer_sizes = [encoder_layer_sizes[-1]] + decoder_layer_sizes self.decoder_layer_names = [] for i in range(len(decoder_layer_sizes) - 1): name = 'decoder_layer_{}'.format(i + 1) # begin names at 1, not 0 self.decoder_layer_names.append(name) hl = EncoderDecoderLayer(decoder_layer_sizes[i], decoder_layer_sizes[i + 1], batch_size, name) setattr(self, name, hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Output Layer self.output_layer = SoftmaxLayer(decoder_layer_sizes[-1], vocab_size) # Update Parameters - Backprop self.update_params += self.output_layer.update_params # Memory Parameters for Adagrad self.memory_params += self.output_layer.memory_params
def __init__(self,vocab_size,embed_size,hidden_layer_sizes,batch_size,dropout=None): self.batch_size = batch_size self.vocab_size = vocab_size # Input Layer self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size) # Init update parameters self.update_params = self.input_layer.update_params # Init memory parameters fo Adagrad self.memory_params = self.input_layer.memory_params # Hidden layer layer_sizes = [self.input_layer.y] + hidden_layer_sizes self.hidden_layer_names = [] for i in range(len(layer_sizes)-1): name = 'hidden_layer_{}'.format(i+1) # begin names at 1, not 0 self.hidden_layer_names.append(name) hl = LSTMLayer(layer_sizes[i], layer_sizes[i+1], name) setattr(self,name,hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Output Layer self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1],vocab_size) # Update Parameters - Backprop self.update_params += self.output_layer.update_params # Memory Parameters for Adagrad self.memory_params += self.output_layer.memory_params
class RNN: def __init__(self, vocab_size, embed_size, hidden_layer_sizes, batch_size, dropout=None): self.batch_size = batch_size self.vocab_size = vocab_size # Input Layer self.input_layer = OneHot( vocab_size, batch_size, -1) #EmbedLayer(vocab_size,embed_size,batch_size) # Init update parameters self.update_params = self.input_layer.update_params # Init memory parameters fo Adagrad self.memory_params = self.input_layer.memory_params # create a value to hold the current cost when training self.current_cost = None # Hidden layer layer_sizes = [self.input_layer.y] + hidden_layer_sizes self.hidden_layer_names = [] for i in range(len(layer_sizes) - 1): name = 'hidden_layer_{}'.format(i + 1) # begin names at 1, not 0 self.hidden_layer_names.append(name) hl = LSTMLayer(layer_sizes[i], layer_sizes[i + 1], name, dropout=0.5) setattr(self, name, hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Output Layer self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1], vocab_size) # Update Parameters - Backprop self.update_params += self.output_layer.update_params # Memory Parameters for Adagrad self.memory_params += self.output_layer.memory_params def genHiddens(self, batch_size, layer): return np.zeros((batch_size, layer.y), dtype='float32'), np.zeros( (batch_size, layer.y), dtype='float32') def calc_cost(self, X, Y, S1, H1, S2, H2): e = self.input_layer.forward_prop(X) S1, H1 = self.hidden_layer_1.forward_prop(e, S1, H1) S2, H2 = self.hidden_layer_2.forward_prop(H1, S2, H2) pred = self.output_layer.forward_prop(H2) cost = T.nnet.categorical_crossentropy(pred, Y).mean() return cost, pred, S1, H1, S2, H2
def __init__(self,vocab_size,embed_size,encoder_layer_sizes,decoder_layer_sizes,batch_size,dropout=None): self.batch_size = batch_size self.vocab_size = vocab_size # Input Layer self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size) # Init update parameters self.update_params = self.input_layer.update_params # Init memory parameters fo Adagrad self.memory_params = self.input_layer.memory_params self.current_loss = 0 self.trained_iterations = 0 # Encoder encoder_layer_sizes = [self.input_layer.y] + encoder_layer_sizes self.encoder_layer_names = [] for i in range(len(encoder_layer_sizes)-1): name = 'encoder_layer_{}'.format(i+1) # begin names at 1, not 0 self.encoder_layer_names.append(name) hl = LSTMLayer(encoder_layer_sizes[i], encoder_layer_sizes[i+1], batch_size, name) setattr(self,name,hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Decoder decoder_layer_sizes = [encoder_layer_sizes[-1]] + decoder_layer_sizes self.decoder_layer_names = [] for i in range(len(decoder_layer_sizes)-1): name = 'decoder_layer_{}'.format(i+1) # begin names at 1, not 0 self.decoder_layer_names.append(name) hl = LSTMLayer(decoder_layer_sizes[i], decoder_layer_sizes[i+1], batch_size, name) setattr(self,name,hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Output Layer self.output_layer = SoftmaxLayer(decoder_layer_sizes[-1],vocab_size) # Update Parameters - Backprop self.update_params += self.output_layer.update_params # Memory Parameters for Adagrad self.memory_params += self.output_layer.memory_params
class RNN: def __init__(self,vocab_size,embed_size,encoder_layer_sizes,decoder_layer_sizes,batch_size,dropout=None): self.batch_size = batch_size self.vocab_size = vocab_size # Input Layer self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size) # Init update parameters self.update_params = self.input_layer.update_params # Init memory parameters fo Adagrad self.memory_params = self.input_layer.memory_params self.current_loss = 0 self.trained_iterations = 0 # Encoder encoder_layer_sizes = [self.input_layer.y] + encoder_layer_sizes self.encoder_layer_names = [] for i in range(len(encoder_layer_sizes)-1): name = 'encoder_layer_{}'.format(i+1) # begin names at 1, not 0 self.encoder_layer_names.append(name) hl = LSTMLayer(encoder_layer_sizes[i], encoder_layer_sizes[i+1], batch_size, name) setattr(self,name,hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Decoder decoder_layer_sizes = [encoder_layer_sizes[-1]] + decoder_layer_sizes self.decoder_layer_names = [] for i in range(len(decoder_layer_sizes)-1): name = 'decoder_layer_{}'.format(i+1) # begin names at 1, not 0 self.decoder_layer_names.append(name) hl = LSTMLayer(decoder_layer_sizes[i], decoder_layer_sizes[i+1], batch_size, name) setattr(self,name,hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Output Layer self.output_layer = SoftmaxLayer(decoder_layer_sizes[-1],vocab_size) # Update Parameters - Backprop self.update_params += self.output_layer.update_params # Memory Parameters for Adagrad self.memory_params += self.output_layer.memory_params # pass the word into the network to set all the hidden states. def encode(self,X,*hiddens): hiddens = list(hiddens) o = self.input_layer.forward_prop(X) # len(hiddens) will always be an even number # because it contains the hidden state and hidden # output of each layer for i in range(len(self.encoder_layer_names)): n = self.encoder_layer_names[i] # Get the encoder layer encoder_layer = getattr(self,n) # Determine the indicies of the corresponding hidden states. # They will always be passed in order of layer (encoder1, encoder2, decoder 1, decoder 2, ...) # with state, then output. state = 2 * i # Because there are 2 elements in the hidden list for every 1 layer we double i output = state + 1 # By adding 1 we get the element after k, which is always the hidden output # Forward Propagate hiddens[state],hiddens[output] = encoder_layer.forward_prop(o,hiddens[state],hiddens[output]) o = hiddens[output] return hiddens # make predictions after the word has been sent through the # entire network. # The pred_unused is the input from the sequence we use to kick # off the prediction. We don't actually need a value, just a # sequence of same length as our input word so we know how many # letters to predict. def decode(self,INIT_PRED,*hiddens): hiddens = list(hiddens) for i in range(len(self.decoder_layer_names)): n = self.decoder_layer_names[i] # Get the decoder layer decoder_layer = getattr(self,n) # Determine the indicies of the corresponding hidden states. # They will always be passed in order of layer (encoder1, encoder2, decoder 1, decoder 2, ...) # with state, then output. state = 2 * i # Because there are 2 elements in the hidden list for every 1 layer we double i output = state + 1 # By adding 1 we get the element after k, which is always the hidden output # Forward Propagate hiddens[state],hiddens[output] = decoder_layer.forward_prop(INIT_PRED,hiddens[state],hiddens[output]) # Get predicton INIT_PRED= self.output_layer.forward_prop(hiddens[output]) pred = T.cast(T.argmax(INIT_PRED),theano.config.floatX) # Put all returns into a list so the scan function # doesn't have to decompile multiple lists return_list = [pred,INIT_PRED] + hiddens return return_list def calc_cost(self,pred,Y): return T.mean(T.nnet.categorical_crossentropy(pred,Y))
class RNN: def __init__(self, vocab_size, embed_size, encoder_layer_sizes, decoder_layer_sizes, batch_size, dropout=None): self.batch_size = batch_size self.vocab_size = vocab_size # Input Layer self.input_layer = OneHot( vocab_size, batch_size, wh.eos) #EmbedLayer(vocab_size,embed_size,batch_size) # Init update parameters self.update_params = self.input_layer.update_params # Init memory parameters fo Adagrad self.memory_params = self.input_layer.memory_params self.current_loss = 0 self.trained_iterations = 0 # Encoder encoder_layer_sizes = [self.input_layer.y] + encoder_layer_sizes self.encoder_layer_names = [] for i in range(len(encoder_layer_sizes) - 1): name = 'encoder_layer_{}'.format(i + 1) # begin names at 1, not 0 self.encoder_layer_names.append(name) hl = LSTMLayer(encoder_layer_sizes[i], encoder_layer_sizes[i + 1], batch_size, name) setattr(self, name, hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Decoder decoder_layer_sizes = [encoder_layer_sizes[-1]] + decoder_layer_sizes self.decoder_layer_names = [] for i in range(len(decoder_layer_sizes) - 1): name = 'decoder_layer_{}'.format(i + 1) # begin names at 1, not 0 self.decoder_layer_names.append(name) hl = LSTMLayer(decoder_layer_sizes[i], decoder_layer_sizes[i + 1], batch_size, name) setattr(self, name, hl) # Add the update parameters to the rnn class self.update_params += hl.update_params self.memory_params += hl.memory_params # Output Layer self.output_layer = SoftmaxLayer(decoder_layer_sizes[-1], vocab_size) # Update Parameters - Backprop self.update_params += self.output_layer.update_params # Memory Parameters for Adagrad self.memory_params += self.output_layer.memory_params # pass the word into the network to set all the hidden states. def encode(self, X, *hiddens): hiddens = list(hiddens) o = self.input_layer.forward_prop(X) # len(hiddens) will always be an even number # because it contains the hidden state and hidden # output of each layer for i in range(len(self.encoder_layer_names)): n = self.encoder_layer_names[i] # Get the encoder layer encoder_layer = getattr(self, n) # Determine the indicies of the corresponding hidden states. # They will always be passed in order of layer (encoder1, encoder2, decoder 1, decoder 2, ...) # with state, then output. state = 2 * i # Because there are 2 elements in the hidden list for every 1 layer we double i output = state + 1 # By adding 1 we get the element after k, which is always the hidden output # Forward Propagate hiddens[state], hiddens[output] = encoder_layer.forward_prop( o, hiddens[state], hiddens[output]) o = hiddens[output] return hiddens # make predictions after the word has been sent through the # entire network. # The pred_unused is the input from the sequence we use to kick # off the prediction. We don't actually need a value, just a # sequence of same length as our input word so we know how many # letters to predict. def decode(self, INIT_PRED, *hiddens): hiddens = list(hiddens) for i in range(len(self.decoder_layer_names)): n = self.decoder_layer_names[i] # Get the decoder layer decoder_layer = getattr(self, n) # Determine the indicies of the corresponding hidden states. # They will always be passed in order of layer (encoder1, encoder2, decoder 1, decoder 2, ...) # with state, then output. state = 2 * i # Because there are 2 elements in the hidden list for every 1 layer we double i output = state + 1 # By adding 1 we get the element after k, which is always the hidden output # Forward Propagate hiddens[state], hiddens[output] = decoder_layer.forward_prop( INIT_PRED, hiddens[state], hiddens[output]) # Get predicton INIT_PRED = self.output_layer.forward_prop(hiddens[output]) pred = T.cast(T.argmax(INIT_PRED), theano.config.floatX) # Put all returns into a list so the scan function # doesn't have to decompile multiple lists return_list = [pred, INIT_PRED] + hiddens return return_list def calc_cost(self, pred, Y): return T.mean(T.nnet.categorical_crossentropy(pred, Y))