示例#1
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=1,
                 corruption_levels=[0.1, 0.1]):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.matrix('y')  # the labels are presented as 1D vector of
        # [int] labels

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.get_prediction = theano.function(inputs=[self.x],
                                              outputs=[self.logLayer.y_pred])
        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        # self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.finetune_cost = self.logLayer.squared_error(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
示例#2
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500], hidden_recurrent=150, n_outs=1):
     	    self.sigmoid_layers = []
            self.rnnrbm_layers = []
            self.params = []
            self.n_layers = len(hidden_layers_sizes)

            assert self.n_layers > 0

            if not theano_rng:
                theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
            # allocate symbolic variables for the data
            self.x = T.matrix('x')  # the data is presented as rasterized images
            self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                     # [int] labels

            # The SdA is an MLP, for which all weights of intermediate layers
            # are shared with a different denoising autoencoders
            # We will first construct the SdA as a deep multilayer perceptron,
            # and when constructing each sigmoidal layer we also construct a
            # denoising autoencoder that shares weights with that layer
            # During pretraining we will train these autoencoders (which will
            # lead to chainging the weights of the MLP as well)
            # During finetunining we will finish training the SdA by doing
            # stochastich gradient descent on the MLP

            for i in xrange(self.n_layers):
                # construct the sigmoidal layer

                # the size of the input is either the number of hidden units of
                # the layer below or the input size if we are on the first layer
                if i == 0:
                    input_size = n_ins
                else:
                    input_size = hidden_layers_sizes[i - 1]

                # the input to this layer is either the activation of the hidden
                # layer below or the input of the SdA if you are on the first
                # layer
                if i == 0:
                    layer_input = self.x
                else:
                    layer_input = self.sigmoid_layers[-1].output

                # its arguably a philosophical question...
                # but we are going to only declare that the parameters of the
                # sigmoid_layers are parameters of the StackedDAA
                # the visible biases in the dA are parameters of those
                # dA, but not the SdA

                # Construct a denoising autoencoder that shared weights with this
                # layer
                if i==0:
                    rnnrbm_layer = RnnRbm(n_visible=input_size,
                    input=layer_input, n_hidden=hidden_layers_sizes[i],
                    n_hidden_recurrent=hidden_recurrent,lr=0.001)
                                        
                    self.rnnrbm_layers.append(rnnrbm_layer)
                    sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                                input=layer_input,
                                                n_in=input_size,
                                                n_out=hidden_layers_sizes[i],
                                                activation=T.nnet.sigmoid,
                                                W=rnnrbm_layer.W, b=rnnrbm_layer.bh_t)
                    # add the layer to our list of layers
                    self.sigmoid_layers.append(sigmoid_layer)
 
                else :
                	sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                                input=layer_input,
                                                n_in=input_size,
                                                n_out=hidden_layers_sizes[i],
                                                activation=T.nnet.sigmoid
	    				                        )
                	# add the layer to our list of layers
                	self.sigmoid_layers.append(sigmoid_layer)
                self.params.extend(sigmoid_layer.params)
	        # We now need to add a logistic layer on top of the MLP
            self.logLayer = LogisticRegression(
                             input=self.sigmoid_layers[-1].output,
                             n_in=hidden_layers_sizes[-1], n_out=n_outs)
            self.get_prediction = theano.function(
                inputs=[self.x],
                outputs=[self.logLayer.y_pred]
                )
            self.get_py = theano.function(
                inputs=[self.x],
                outputs=[self.logLayer.p_y_given_x]
                )
            self.params.extend(self.logLayer.params)
            # construct a function that implements one step of finetunining

            # compute the cost for second phase of training,
            # defined as the negative log likelihood
            #self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
            self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
            # compute the gradients with respect to the model parameters
            # symbolic variable that points to the number of errors made on the
            # minibatch given by self.x and self.y
            self.errors = self.logLayer.errors(self.y)
示例#3
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=1):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        
        self.get_prediction = theano.function(
	    inputs = [self.x],
	    outputs = [self.logLayer.y_pred]
	    )

        self.get_py = theano.function(
	    inputs = [self.x],
	    outputs = [self.logLayer.p_y_given_x]
        )
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        #self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) 

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
示例#4
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=1,
                 y_type=1,
                 gbrbm=False,
                 dropout=False,
                 activation_function=None):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.dropout = dropout
        self.sigmoid_layers = []
        self.dropout_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        rectified_linear_activation = lambda x: T.maximum(0.0, x)
        # activation_function = T.nnet.sigmoid
        if not activation_function:
            print 'Sigmoid'
            self.activation_function = T.nnet.sigmoid
        else:
            if activation_function == 'ReLU':
                print 'ReLU'
                self.activation_function = rectified_linear_activation
            else:
                print 'Sigmoid'
                self.activation_function = T.nnet.sigmoid

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        if y_type == 0:
            self.y = T.matrix('y')  # the labels are presented as 1D vector
        else:
            self.y = T.ivector('y')  # the labels are presented as 1D vector
            # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
                if dropout:
                    dropout_layer_input = _dropout_from_layer(numpy_rng,
                                                              self.x,
                                                              p=0.2)
            else:
                layer_input = self.sigmoid_layers[-1].output
                if dropout:
                    dropout_layer_input = self.dropout_layers[-1].output
            if dropout:
                print 'Dropout'
                dropout_layer = DropoutHiddenLayer(
                    rng=numpy_rng,
                    input=dropout_layer_input,
                    n_in=input_size,
                    n_out=hidden_layers_sizes[i],
                    activation=self.activation_function,
                )
                sigmoid_layer = HiddenLayer(
                    rng=numpy_rng,
                    input=layer_input,
                    n_in=input_size,
                    n_out=hidden_layers_sizes[i],
                    activation=self.activation_function,
                    W=dropout_layer.W * (0.8 if i == 0 else 0.5),
                    b=dropout_layer.b)
                self.dropout_layers.append(dropout_layer)
                self.params.extend(dropout_layer.params)

            else:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                            input=layer_input,
                                            n_in=input_size,
                                            n_out=hidden_layers_sizes[i],
                                            activation=T.nnet.sigmoid)
                self.params.extend(sigmoid_layer.params)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # Construct an RBM that shared weights with this layer
            if dropout:
                layer = dropout_layer
            else:
                layer = sigmoid_layer

            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=layer.W,
                            hbias=layer.b,
                            y_type=y_type,
                            gbrbm=gbrbm)

            self.rbm_layers.append(rbm_layer)
        if dropout:
            self.dropout_output_layer = LogisticRegression(
                input=self.dropout_layers[-1].output,
                n_in=hidden_layers_sizes[-1],
                n_out=n_outs,
                y_type=y_type)

            self.logLayer = LogisticRegression(
                input=self.sigmoid_layers[-1].output,
                n_in=hidden_layers_sizes[-1],
                n_out=n_outs,
                y_type=y_type,
                W=self.dropout_output_layer.W * 0.5,
                b=self.dropout_output_layer.b)
        else:
            self.logLayer = LogisticRegression(
                input=self.sigmoid_layers[-1].output,
                n_in=hidden_layers_sizes[-1],
                n_out=n_outs,
                y_type=y_type)

        self.get_prediction = theano.function(inputs=[self.x],
                                              outputs=[self.logLayer.y_pred])
        self.get_py = theano.function(inputs=[self.x],
                                      outputs=[self.logLayer.p_y_given_x])

        if dropout:
            self.get_prediction_dropout = theano.function(
                inputs=[self.x], outputs=[self.dropout_output_layer.y_pred])
            self.get_py = theano.function(
                inputs=[self.x],
                outputs=[self.dropout_output_layer.p_y_given_x])
        if dropout:
            self.params.extend(self.dropout_output_layer.params)
        else:
            self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        #self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        if y_type == 0:
            if dropout:
                self.dropout_finetune_cost = self.dropout_output_layer.squared_error(
                    self.y)
            self.finetune_cost = self.logLayer.squared_error(self.y)
        else:
            if dropout:
                self.dropout_finetune_cost = self.dropout_output_layer.negative_log_likelihood(
                    self.y)
            self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        if dropout:
            self.dropout_errors = self.dropout_output_layer.errors(self.y)
        self.errors = self.logLayer.errors(self.y)