예제 #1
0
파일: SdA.py 프로젝트: nnkoushik/tetris
    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=784,
        hidden_layers_sizes=[500, 500],
        corruption_levels=[0.1, 0.1]
    ):

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = theano.shared(value=numpy.zeros((1,),
                dtype=theano.config.floatX
            ),
            name='y',
            borrow=True
        ) # the labels are presented as 1D vector of
                                # [double] vector
        # end-snippet-1

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        # start-snippet-2
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
        # end-snippet-2
        # We now need to add a value function computing
        self.valueLayer = ValueFunction(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
        )

        self.params.extend(self.valueLayer.params)
        # construct a function that implements one step of finetunining

        # calculate the squared error for the value function
        self.finetune_cost = self.valueLayer.cost(self.y)

        self.error = self.valueLayer.cost(self.y)
예제 #2
0
파일: SdA.py 프로젝트: nnkoushik/tetris
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 corruption_levels=[0.1, 0.1]):

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = theano.shared(
            value=numpy.zeros((1, ), dtype=theano.config.floatX),
            name='y',
            borrow=True)  # the labels are presented as 1D vector of
        # [double] vector
        # end-snippet-1

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        # start-snippet-2
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
        # end-snippet-2
        # We now need to add a value function computing
        self.valueLayer = ValueFunction(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
        )

        self.params.extend(self.valueLayer.params)
        # construct a function that implements one step of finetunining

        # calculate the squared error for the value function
        self.finetune_cost = self.valueLayer.cost(self.y)

        self.error = self.valueLayer.cost(self.y)
예제 #3
0
파일: SdA.py 프로젝트: nnkoushik/tetris
class SdA(object):

    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=784,
        hidden_layers_sizes=[500, 500],
        corruption_levels=[0.1, 0.1]
    ):

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = theano.shared(value=numpy.zeros((1,),
                dtype=theano.config.floatX
            ),
            name='y',
            borrow=True
        ) # the labels are presented as 1D vector of
                                # [double] vector
        # end-snippet-1

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        # start-snippet-2
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
        # end-snippet-2
        # We now need to add a value function computing
        self.valueLayer = ValueFunction(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
        )

        self.params.extend(self.valueLayer.params)
        # construct a function that implements one step of finetunining

        # calculate the squared error for the value function
        self.finetune_cost = self.valueLayer.cost(self.y)

        self.error = self.valueLayer.cost(self.y)

#    def __getstate__(self):
#        state_list = []
#        for i in xrange(self.n_layers):
#            state_list.append(self.sigmoid_layers[i].__getstate__())
#        state_list.append(self.valueLayer.__getstate__())
#        return state_list    
#
#    def __setstate__(self, state_list):
#        self.params = []
#        for i in xrange(self.n_layers):
#            self.sigmoid_layers[i].__setstate__(state_list[i])
#            self.dA_layers[i].__setstate__(state_list[i])
#            self.params.extend(sigmoid_layers[i].params)
#        self.valueLayer.__setstate__(state_list[-1])
#        self.params.extend(self.valueLayer.params)  
        
    def compute_val(self, inp):
        curr = numpy.copy(inp)
        for level in self.sigmoid_layers:
            curr = level.compute_val(curr)
        curr = self.valueLayer.compute_val(curr)
        return curr

    def pretraining_functions(self, train_set_x, batch_size):

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        corruption_level = T.scalar('corruption')  # % of corruption to use
        learning_rate = T.scalar('lr')  # learning rate to use
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.dA_layers:
            # get the cost and the updates list
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            # compile the theano function
            fn = theano.function(
                inputs=[
                    index,
                    theano.Param(corruption_level, default=0.2),
                    theano.Param(learning_rate, default=0.1)
                ],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin: batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                         the has to contain three pairs, `train`,
                         `valid`, `test` in this order, where each pair
                         is formed of two Theano variables, one for the
                         datapoints, the other for the labels

        :type batch_size: int
        :param batch_size: size of a minibatch

        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        '''

        (train_set_x, train_set_y) = datasets
        #(valid_set_x, valid_set_y) = datasets[1]
        #(test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        #n_valid_batches /= batch_size
        #n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        #n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = [
            (param, param - gparam * learning_rate)
            for param, gparam in zip(self.params, gparams)
        ]

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: train_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='train',
        )
#
#        test_score_i = theano.function(
#            [index],
#            self.errors,
#            givens={
#                self.x: test_set_x[
#                    index * batch_size: (index + 1) * batch_size
#                ],
#                self.y: test_set_y[
#                    index * batch_size: (index + 1) * batch_size
#                ]
#            },
#            name='test'
#        )
#
#        valid_score_i = theano.function(
#            [index],
#            self.errors,
#            givens={
#                self.x: valid_set_x[
#                    index * batch_size: (index + 1) * batch_size
#                ],
#                self.y: valid_set_y[
#                    index * batch_size: (index + 1) * batch_size
#                ]
#            },
#            name='valid'
#        )
#
#        # Create a function that scans the entire validation set
#        def valid_score():
#            return [valid_score_i(i) for i in xrange(n_valid_batches)]
#
#        # Create a function that scans the entire test set
#        def test_score():
#            return [test_score_i(i) for i in xrange(n_test_batches)]
#
        return train_fn, 0, 0
예제 #4
0
파일: SdA.py 프로젝트: nnkoushik/tetris
class SdA(object):
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 corruption_levels=[0.1, 0.1]):

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = theano.shared(
            value=numpy.zeros((1, ), dtype=theano.config.floatX),
            name='y',
            borrow=True)  # the labels are presented as 1D vector of
        # [double] vector
        # end-snippet-1

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        # start-snippet-2
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
        # end-snippet-2
        # We now need to add a value function computing
        self.valueLayer = ValueFunction(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
        )

        self.params.extend(self.valueLayer.params)
        # construct a function that implements one step of finetunining

        # calculate the squared error for the value function
        self.finetune_cost = self.valueLayer.cost(self.y)

        self.error = self.valueLayer.cost(self.y)


#    def __getstate__(self):
#        state_list = []
#        for i in xrange(self.n_layers):
#            state_list.append(self.sigmoid_layers[i].__getstate__())
#        state_list.append(self.valueLayer.__getstate__())
#        return state_list
#
#    def __setstate__(self, state_list):
#        self.params = []
#        for i in xrange(self.n_layers):
#            self.sigmoid_layers[i].__setstate__(state_list[i])
#            self.dA_layers[i].__setstate__(state_list[i])
#            self.params.extend(sigmoid_layers[i].params)
#        self.valueLayer.__setstate__(state_list[-1])
#        self.params.extend(self.valueLayer.params)

    def compute_val(self, inp):
        curr = numpy.copy(inp)
        for level in self.sigmoid_layers:
            curr = level.compute_val(curr)
        curr = self.valueLayer.compute_val(curr)
        return curr

    def pretraining_functions(self, train_set_x, batch_size):

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        corruption_level = T.scalar('corruption')  # % of corruption to use
        learning_rate = T.scalar('lr')  # learning rate to use
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.dA_layers:
            # get the cost and the updates list
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            # compile the theano function
            fn = theano.function(
                inputs=[
                    index,
                    theano.Param(corruption_level, default=0.2),
                    theano.Param(learning_rate, default=0.1)
                ],
                outputs=cost,
                updates=updates,
                givens={self.x: train_set_x[batch_begin:batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                         the has to contain three pairs, `train`,
                         `valid`, `test` in this order, where each pair
                         is formed of two Theano variables, one for the
                         datapoints, the other for the labels

        :type batch_size: int
        :param batch_size: size of a minibatch

        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        '''

        (train_set_x, train_set_y) = datasets
        #(valid_set_x, valid_set_y) = datasets[1]
        #(test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        #n_valid_batches /= batch_size
        #n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        #n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = [(param, param - gparam * learning_rate)
                   for param, gparam in zip(self.params, gparams)]

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='train',
        )
        #
        #        test_score_i = theano.function(
        #            [index],
        #            self.errors,
        #            givens={
        #                self.x: test_set_x[
        #                    index * batch_size: (index + 1) * batch_size
        #                ],
        #                self.y: test_set_y[
        #                    index * batch_size: (index + 1) * batch_size
        #                ]
        #            },
        #            name='test'
        #        )
        #
        #        valid_score_i = theano.function(
        #            [index],
        #            self.errors,
        #            givens={
        #                self.x: valid_set_x[
        #                    index * batch_size: (index + 1) * batch_size
        #                ],
        #                self.y: valid_set_y[
        #                    index * batch_size: (index + 1) * batch_size
        #                ]
        #            },
        #            name='valid'
        #        )
        #
        #        # Create a function that scans the entire validation set
        #        def valid_score():
        #            return [valid_score_i(i) for i in xrange(n_valid_batches)]
        #
        #        # Create a function that scans the entire test set
        #        def test_score():
        #            return [test_score_i(i) for i in xrange(n_test_batches)]
        #
        return train_fn, 0, 0