예제 #1
0
class RNN:
    def __init__(self,vocab_size,embed_size,hidden_layer_sizes,batch_size,dropout=None):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        # Input Layer
        self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size)
        # Init update parameters
        self.update_params = self.input_layer.update_params
        # Init memory parameters fo Adagrad
        self.memory_params = self.input_layer.memory_params

        # Hidden layer
        layer_sizes = [self.input_layer.y] + hidden_layer_sizes
        self.hidden_layer_names = []
        for i in range(len(layer_sizes)-1):
            name = 'hidden_layer_{}'.format(i+1) # begin names at 1, not 0
            self.hidden_layer_names.append(name)
            hl = LSTMLayer(layer_sizes[i],
                               layer_sizes[i+1],
                               name)
            setattr(self,name,hl)
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Output Layer
        self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1],vocab_size)
        # Update Parameters - Backprop
        self.update_params += self.output_layer.update_params
        # Memory Parameters for Adagrad
        self.memory_params += self.output_layer.memory_params

    def genHiddens(self,batch_size,layer):
        return np.zeros((batch_size,layer.y),dtype='float32'),np.zeros((batch_size,layer.y),dtype='float32')

    # pass the word into the network to set all the hidden states.
    def set_hiddens(self,X,S1,H1,S2,H2,S3,H3):
        e = self.input_layer.forward_prop(X)
        S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1)
        S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2)
        S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3)
        return S1,H1,S2,H2,S3,H3

    # make predictions after the word has been sent through the
    # entire network.
    # The pred_unused is the input from the sequence we use to kick
    # off the prediction.  We don't actually need a value, just a
    # sequence of same length as our input word so we know how many
    # letters to predict.
    def calc_preds(self,INIT_PRED,S1,H1,S2,H2,S3,H3):
        e = self.input_layer.forward_prop(INIT_PRED)
        S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1)
        S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2)
        S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3)
        pred = self.output_layer.forward_prop(H3)
        INIT_PRED = T.cast(T.argmax(pred),theano.config.floatX)  # argmax returns an int, we need to keep everything floatX
        return pred,INIT_PRED,S1,H1,S2,H2,S3,H3

    def calc_cost(self,pred,Y):
        return T.mean(T.nnet.categorical_crossentropy(pred,Y))
예제 #2
0
    def __init__(self,vocab_size,embed_size,hidden_layer_sizes,batch_size,dropout=None):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        # Input Layer
        self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size)
        # Init update parameters
        self.update_params = self.input_layer.update_params
        # Init memory parameters fo Adagrad
        self.memory_params = self.input_layer.memory_params
        
        # Hidden layer
        layer_sizes = [self.input_layer.y] + hidden_layer_sizes
        self.hidden_layer_names = []
        for i in range(len(layer_sizes)-1):
            name = 'hidden_layer_{}'.format(i+1) # begin names at 1, not 0
            self.hidden_layer_names.append(name)
            hl = LSTMLayer(layer_sizes[i],
                               layer_sizes[i+1],
                               batch_size,
                               name)
            setattr(self,name,hl)                
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Output Layer
        self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1],vocab_size)
        # Update Parameters - Backprop
        self.update_params += self.output_layer.update_params
        # Memory Parameters for Adagrad
        self.memory_params += self.output_layer.memory_params
예제 #3
0
class RNN:
    def __init__(self,vocab_size,embed_size,hidden_layer_sizes,batch_size,dropout=None):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        # Input Layer
        self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size)
        # Init update parameters
        self.update_params = self.input_layer.update_params
        # Init memory parameters fo Adagrad
        self.memory_params = self.input_layer.memory_params
        
        # Hidden layer
        layer_sizes = [self.input_layer.y] + hidden_layer_sizes
        self.hidden_layer_names = []
        for i in range(len(layer_sizes)-1):
            name = 'hidden_layer_{}'.format(i+1) # begin names at 1, not 0
            self.hidden_layer_names.append(name)
            hl = LSTMLayer(layer_sizes[i],
                               layer_sizes[i+1],
                               batch_size,
                               name)
            setattr(self,name,hl)                
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Output Layer
        self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1],vocab_size)
        # Update Parameters - Backprop
        self.update_params += self.output_layer.update_params
        # Memory Parameters for Adagrad
        self.memory_params += self.output_layer.memory_params

    # pass the word into the network to set all the hidden states.
    def set_hiddens(self,X,S1,H1,S2,H2,S3,H3):
        e = self.input_layer.forward_prop(X)
        S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1)
        S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2)
        S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3)
        return S1,H1,S2,H2,S3,H3

    # make predictions after the word has been sent through the
    # entire network.
    # The pred_unused is the input from the sequence we use to kick
    # off the prediction.  We don't actually need a value, just a
    # sequence of same length as our input word so we know how many
    # letters to predict.
    def calc_preds(self,INIT_PRED,S1,H1,S2,H2,S3,H3):
        e = self.input_layer.forward_prop(INIT_PRED)
        S1,H1 = self.hidden_layer_1.forward_prop(e,S1,H1)
        S2,H2 = self.hidden_layer_2.forward_prop(H1,S2,H2)
        S3,H3 = self.hidden_layer_3.forward_prop(H2,S3,H3)
        pred = self.output_layer.forward_prop(H3)
        INIT_PRED = T.cast(T.argmax(pred),theano.config.floatX)  # argmax returns an int, we need to keep everything floatX
        return pred,INIT_PRED,S1,H1,S2,H2,S3,H3

    def calc_cost(self,pred,Y):
        return T.mean(T.nnet.categorical_crossentropy(pred,Y))
예제 #4
0
    def __init__(self,
                 vocab_size,
                 embed_size,
                 encoder_layer_sizes,
                 decoder_layer_sizes,
                 batch_size,
                 dropout=None):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        # Input Layer
        self.input_layer = OneHot(
            vocab_size, batch_size,
            wh.eos)  #EmbedLayer(vocab_size,embed_size,batch_size)
        # Init update parameters
        self.update_params = self.input_layer.update_params
        # Init memory parameters fo Adagrad
        self.memory_params = self.input_layer.memory_params
        self.current_loss = 0
        self.trained_iterations = 0

        # Encoder
        encoder_layer_sizes = [self.input_layer.y] + encoder_layer_sizes
        self.encoder_layer_names = []
        for i in range(len(encoder_layer_sizes) - 1):
            name = 'encoder_layer_{}'.format(i + 1)  # begin names at 1, not 0
            self.encoder_layer_names.append(name)
            hl = EncoderDecoderLayer(encoder_layer_sizes[i],
                                     encoder_layer_sizes[i + 1], batch_size,
                                     name)
            setattr(self, name, hl)
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Decoder
        decoder_layer_sizes = [encoder_layer_sizes[-1]] + decoder_layer_sizes
        self.decoder_layer_names = []
        for i in range(len(decoder_layer_sizes) - 1):
            name = 'decoder_layer_{}'.format(i + 1)  # begin names at 1, not 0
            self.decoder_layer_names.append(name)
            hl = EncoderDecoderLayer(decoder_layer_sizes[i],
                                     decoder_layer_sizes[i + 1], batch_size,
                                     name)
            setattr(self, name, hl)
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Output Layer
        self.output_layer = SoftmaxLayer(decoder_layer_sizes[-1], vocab_size)
        # Update Parameters - Backprop
        self.update_params += self.output_layer.update_params
        # Memory Parameters for Adagrad
        self.memory_params += self.output_layer.memory_params
예제 #5
0
    def __init__(self,vocab_size,embed_size,hidden_layer_sizes,batch_size,dropout=None):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        # Input Layer
        self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size)
        # Init update parameters
        self.update_params = self.input_layer.update_params
        # Init memory parameters fo Adagrad
        self.memory_params = self.input_layer.memory_params

        # Hidden layer
        layer_sizes = [self.input_layer.y] + hidden_layer_sizes
        self.hidden_layer_names = []
        for i in range(len(layer_sizes)-1):
            name = 'hidden_layer_{}'.format(i+1) # begin names at 1, not 0
            self.hidden_layer_names.append(name)
            hl = LSTMLayer(layer_sizes[i],
                               layer_sizes[i+1],
                               name)
            setattr(self,name,hl)
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Output Layer
        self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1],vocab_size)
        # Update Parameters - Backprop
        self.update_params += self.output_layer.update_params
        # Memory Parameters for Adagrad
        self.memory_params += self.output_layer.memory_params
예제 #6
0
class RNN:
    def __init__(self,
                 vocab_size,
                 embed_size,
                 hidden_layer_sizes,
                 batch_size,
                 dropout=None):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        # Input Layer
        self.input_layer = OneHot(
            vocab_size, batch_size,
            -1)  #EmbedLayer(vocab_size,embed_size,batch_size)
        # Init update parameters
        self.update_params = self.input_layer.update_params
        # Init memory parameters fo Adagrad
        self.memory_params = self.input_layer.memory_params
        # create a value to hold the current cost when training
        self.current_cost = None

        # Hidden layer
        layer_sizes = [self.input_layer.y] + hidden_layer_sizes
        self.hidden_layer_names = []
        for i in range(len(layer_sizes) - 1):
            name = 'hidden_layer_{}'.format(i + 1)  # begin names at 1, not 0
            self.hidden_layer_names.append(name)
            hl = LSTMLayer(layer_sizes[i],
                           layer_sizes[i + 1],
                           name,
                           dropout=0.5)
            setattr(self, name, hl)
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Output Layer
        self.output_layer = SoftmaxLayer(hidden_layer_sizes[-1], vocab_size)
        # Update Parameters - Backprop
        self.update_params += self.output_layer.update_params
        # Memory Parameters for Adagrad
        self.memory_params += self.output_layer.memory_params

    def genHiddens(self, batch_size, layer):
        return np.zeros((batch_size, layer.y), dtype='float32'), np.zeros(
            (batch_size, layer.y), dtype='float32')

    def calc_cost(self, X, Y, S1, H1, S2, H2):
        e = self.input_layer.forward_prop(X)
        S1, H1 = self.hidden_layer_1.forward_prop(e, S1, H1)
        S2, H2 = self.hidden_layer_2.forward_prop(H1, S2, H2)
        pred = self.output_layer.forward_prop(H2)
        cost = T.nnet.categorical_crossentropy(pred, Y).mean()
        return cost, pred, S1, H1, S2, H2
예제 #7
0
    def __init__(self,vocab_size,embed_size,encoder_layer_sizes,decoder_layer_sizes,batch_size,dropout=None):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        # Input Layer
        self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size)
        # Init update parameters
        self.update_params = self.input_layer.update_params
        # Init memory parameters fo Adagrad
        self.memory_params = self.input_layer.memory_params
        self.current_loss = 0
        self.trained_iterations = 0
        # Encoder
        encoder_layer_sizes = [self.input_layer.y] + encoder_layer_sizes
        self.encoder_layer_names = []
        for i in range(len(encoder_layer_sizes)-1):
            name = 'encoder_layer_{}'.format(i+1) # begin names at 1, not 0
            self.encoder_layer_names.append(name)
            hl = LSTMLayer(encoder_layer_sizes[i],
                               encoder_layer_sizes[i+1],
                               batch_size,
                               name)
            setattr(self,name,hl)                
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Decoder
        decoder_layer_sizes = [encoder_layer_sizes[-1]] + decoder_layer_sizes
        self.decoder_layer_names = []
        for i in range(len(decoder_layer_sizes)-1):
            name = 'decoder_layer_{}'.format(i+1) # begin names at 1, not 0
            self.decoder_layer_names.append(name)
            hl = LSTMLayer(decoder_layer_sizes[i],
                               decoder_layer_sizes[i+1],
                               batch_size,
                               name)
            setattr(self,name,hl)                
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Output Layer
        self.output_layer = SoftmaxLayer(decoder_layer_sizes[-1],vocab_size)
        # Update Parameters - Backprop
        self.update_params += self.output_layer.update_params
        # Memory Parameters for Adagrad
        self.memory_params += self.output_layer.memory_params
예제 #8
0
class RNN:
    def __init__(self,vocab_size,embed_size,encoder_layer_sizes,decoder_layer_sizes,batch_size,dropout=None):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        # Input Layer
        self.input_layer = OneHot(vocab_size,batch_size,wh.eos)#EmbedLayer(vocab_size,embed_size,batch_size)
        # Init update parameters
        self.update_params = self.input_layer.update_params
        # Init memory parameters fo Adagrad
        self.memory_params = self.input_layer.memory_params
        self.current_loss = 0
        self.trained_iterations = 0
        # Encoder
        encoder_layer_sizes = [self.input_layer.y] + encoder_layer_sizes
        self.encoder_layer_names = []
        for i in range(len(encoder_layer_sizes)-1):
            name = 'encoder_layer_{}'.format(i+1) # begin names at 1, not 0
            self.encoder_layer_names.append(name)
            hl = LSTMLayer(encoder_layer_sizes[i],
                               encoder_layer_sizes[i+1],
                               batch_size,
                               name)
            setattr(self,name,hl)                
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Decoder
        decoder_layer_sizes = [encoder_layer_sizes[-1]] + decoder_layer_sizes
        self.decoder_layer_names = []
        for i in range(len(decoder_layer_sizes)-1):
            name = 'decoder_layer_{}'.format(i+1) # begin names at 1, not 0
            self.decoder_layer_names.append(name)
            hl = LSTMLayer(decoder_layer_sizes[i],
                               decoder_layer_sizes[i+1],
                               batch_size,
                               name)
            setattr(self,name,hl)                
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Output Layer
        self.output_layer = SoftmaxLayer(decoder_layer_sizes[-1],vocab_size)
        # Update Parameters - Backprop
        self.update_params += self.output_layer.update_params
        # Memory Parameters for Adagrad
        self.memory_params += self.output_layer.memory_params

    # pass the word into the network to set all the hidden states.
    def encode(self,X,*hiddens):
        hiddens = list(hiddens)
        o = self.input_layer.forward_prop(X)
        # len(hiddens) will always be an even number
        # because it contains the hidden state and hidden
        # output of each layer
        for i in range(len(self.encoder_layer_names)):
            n = self.encoder_layer_names[i]
            # Get the encoder layer
            encoder_layer = getattr(self,n)
            # Determine the indicies of the corresponding hidden states.
            # They will always be passed in order of layer (encoder1, encoder2, decoder 1, decoder 2, ...)
            # with state, then output.
            state = 2 * i # Because there are 2 elements in the hidden list for every 1 layer we double i
            output = state + 1 # By adding 1 we get the element after k, which is always the hidden output
            # Forward Propagate
            hiddens[state],hiddens[output] = encoder_layer.forward_prop(o,hiddens[state],hiddens[output])
            o = hiddens[output]
        return hiddens

    # make predictions after the word has been sent through the
    # entire network.
    # The pred_unused is the input from the sequence we use to kick
    # off the prediction.  We don't actually need a value, just a
    # sequence of same length as our input word so we know how many
    # letters to predict.
    def decode(self,INIT_PRED,*hiddens):
        hiddens = list(hiddens)
        for i in range(len(self.decoder_layer_names)):
            n = self.decoder_layer_names[i]
            # Get the decoder layer
            decoder_layer = getattr(self,n)
            # Determine the indicies of the corresponding hidden states.
            # They will always be passed in order of layer (encoder1, encoder2, decoder 1, decoder 2, ...)
            # with state, then output.
            state = 2 * i # Because there are 2 elements in the hidden list for every 1 layer we double i
            output = state + 1 # By adding 1 we get the element after k, which is always the hidden output
            # Forward Propagate
            hiddens[state],hiddens[output] = decoder_layer.forward_prop(INIT_PRED,hiddens[state],hiddens[output])
        # Get predicton 
        INIT_PRED= self.output_layer.forward_prop(hiddens[output])
        pred = T.cast(T.argmax(INIT_PRED),theano.config.floatX)
        # Put all returns into a list so the scan function
        # doesn't have to decompile multiple lists
        return_list = [pred,INIT_PRED] + hiddens
        return return_list

    def calc_cost(self,pred,Y):
        return T.mean(T.nnet.categorical_crossentropy(pred,Y))
예제 #9
0
class RNN:
    def __init__(self,
                 vocab_size,
                 embed_size,
                 encoder_layer_sizes,
                 decoder_layer_sizes,
                 batch_size,
                 dropout=None):
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        # Input Layer
        self.input_layer = OneHot(
            vocab_size, batch_size,
            wh.eos)  #EmbedLayer(vocab_size,embed_size,batch_size)
        # Init update parameters
        self.update_params = self.input_layer.update_params
        # Init memory parameters fo Adagrad
        self.memory_params = self.input_layer.memory_params
        self.current_loss = 0
        self.trained_iterations = 0
        # Encoder
        encoder_layer_sizes = [self.input_layer.y] + encoder_layer_sizes
        self.encoder_layer_names = []
        for i in range(len(encoder_layer_sizes) - 1):
            name = 'encoder_layer_{}'.format(i + 1)  # begin names at 1, not 0
            self.encoder_layer_names.append(name)
            hl = LSTMLayer(encoder_layer_sizes[i], encoder_layer_sizes[i + 1],
                           batch_size, name)
            setattr(self, name, hl)
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Decoder
        decoder_layer_sizes = [encoder_layer_sizes[-1]] + decoder_layer_sizes
        self.decoder_layer_names = []
        for i in range(len(decoder_layer_sizes) - 1):
            name = 'decoder_layer_{}'.format(i + 1)  # begin names at 1, not 0
            self.decoder_layer_names.append(name)
            hl = LSTMLayer(decoder_layer_sizes[i], decoder_layer_sizes[i + 1],
                           batch_size, name)
            setattr(self, name, hl)
            # Add the update parameters to the rnn class
            self.update_params += hl.update_params
            self.memory_params += hl.memory_params

        # Output Layer
        self.output_layer = SoftmaxLayer(decoder_layer_sizes[-1], vocab_size)
        # Update Parameters - Backprop
        self.update_params += self.output_layer.update_params
        # Memory Parameters for Adagrad
        self.memory_params += self.output_layer.memory_params

    # pass the word into the network to set all the hidden states.
    def encode(self, X, *hiddens):
        hiddens = list(hiddens)
        o = self.input_layer.forward_prop(X)
        # len(hiddens) will always be an even number
        # because it contains the hidden state and hidden
        # output of each layer
        for i in range(len(self.encoder_layer_names)):
            n = self.encoder_layer_names[i]
            # Get the encoder layer
            encoder_layer = getattr(self, n)
            # Determine the indicies of the corresponding hidden states.
            # They will always be passed in order of layer (encoder1, encoder2, decoder 1, decoder 2, ...)
            # with state, then output.
            state = 2 * i  # Because there are 2 elements in the hidden list for every 1 layer we double i
            output = state + 1  # By adding 1 we get the element after k, which is always the hidden output
            # Forward Propagate
            hiddens[state], hiddens[output] = encoder_layer.forward_prop(
                o, hiddens[state], hiddens[output])
            o = hiddens[output]
        return hiddens

    # make predictions after the word has been sent through the
    # entire network.
    # The pred_unused is the input from the sequence we use to kick
    # off the prediction.  We don't actually need a value, just a
    # sequence of same length as our input word so we know how many
    # letters to predict.
    def decode(self, INIT_PRED, *hiddens):
        hiddens = list(hiddens)
        for i in range(len(self.decoder_layer_names)):
            n = self.decoder_layer_names[i]
            # Get the decoder layer
            decoder_layer = getattr(self, n)
            # Determine the indicies of the corresponding hidden states.
            # They will always be passed in order of layer (encoder1, encoder2, decoder 1, decoder 2, ...)
            # with state, then output.
            state = 2 * i  # Because there are 2 elements in the hidden list for every 1 layer we double i
            output = state + 1  # By adding 1 we get the element after k, which is always the hidden output
            # Forward Propagate
            hiddens[state], hiddens[output] = decoder_layer.forward_prop(
                INIT_PRED, hiddens[state], hiddens[output])
        # Get predicton
        INIT_PRED = self.output_layer.forward_prop(hiddens[output])
        pred = T.cast(T.argmax(INIT_PRED), theano.config.floatX)
        # Put all returns into a list so the scan function
        # doesn't have to decompile multiple lists
        return_list = [pred, INIT_PRED] + hiddens
        return return_list

    def calc_cost(self, pred, Y):
        return T.mean(T.nnet.categorical_crossentropy(pred, Y))