Пример #1
0
class MLP(object):
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):

        self.x = input
        self.y = label

        if rng is None:
            rng = numpy.random.RandomState(1234)

        # construct hidden_layer (tanh or sigmoid so far)
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=numpy.tanh)

        # construct log_layer (softmax)
        self.log_layer = LogisticRegression(input=self.hidden_layer.output,
                                            label=self.y,
                                            n_in=n_hidden,
                                            n_out=n_out)

    def train(self):
        layer_input = self.hidden_layer.forward()
        self.log_layer.train(input=layer_input)
        self.hidden_layer.backward(prev_layer=self.log_layer)
        

    def predict(self, x):
        x = self.hidden_layer.output(x)
        return self.log_layer.predict(x)
def tuneThreshold():
    """
        Explore different values of threshold to see which one fits best
    """
    thresholds = np.linspace(0.4,0.6, 10)
    
    bestAcc = 0.0
    bestModel = None
    X_tr, y_tr, w_tr = loadData()
    m, n = X_tr.shape
    for th in thresholds:
        model = LogisticRegression(features=['PRI_tau_eta',
                                            'PRI_lep_eta',
                                            'DER_deltar_tau_lep',
                                            'PRI_met_sumet',
                                            'DER_mass_transverse_met_lep'],
                                    threshold=th)
        model.train(X_tr, y_tr, w_tr)
        p, r = model.predict(X_tr)
        #calculate some accuracy on the same train set
        acc =  100.0*(p.flatten() == y_tr.flatten()).sum()/m
        print "%s %s%%"%(th, acc)
        if acc > bestAcc:
            bestAcc = acc
            bestModel = model
    
    #save the best model
    bestModel.save('data/logisticRegression%.2f.txt'%acc)
Пример #3
0
    def __init__(self, image_shape = [28, 12], filter_shape = [5, 5],
                nkerns = [20, 50], batch_size = 500):
        self.layers = []
        rng = np.random.RandomState(23455)

        # generate symbolic variables for input (x and y represent a
        # minibatch)
        self.x = T.matrix('x')  # data, presented as rasterized images
        self.y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

        layer0_input = self.x.reshape((batch_size, 1, image_shape[0], image_shape[0]))
        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
        # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
        # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
        layer0 = Layer(
            rng,
            input=layer0_input,
            image_shape=(batch_size, 1, image_shape[0], image_shape[0]),
            filter_shape=(nkerns[0], 1, filter_shape[0], filter_shape[0]),
            poolsize=(2, 2)
        )
        self.layers.append(layer0)

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
        # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
        # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
        layer1 = Layer(
            rng,
            input=layer0.output,
            image_shape=(batch_size, nkerns[0], image_shape[1], image_shape[1]),
            filter_shape=(nkerns[1], nkerns[0], filter_shape[1], filter_shape[1]),
            poolsize=(2, 2)
        )
        self.layers.append(layer1)

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
        # or (500, 50 * 4 * 4) = (500, 800) with the default values.
        layer2_input = layer1.output.flatten(2)

        # construct a fully-connected sigmoidal layer
        layer2 = HiddenLayer(
            input=layer2_input,
            rng = rng,
            n_in=nkerns[1] * 4 * 4,
            n_out=500,
            activ=T.tanh
        )
        self.layers.append(layer2)

        # classify the values of the fully-connected sigmoidal layer
        layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
        self.layers.append(layer3)

        # the cost we minimize during training is the NLL of the model
        self.cost = layer3.negative_log_likelihood(self.y)
    def test_classification(self, X, y):
        logreg = LogisticRegression(lam_2=0.5)
        logreg.train(X, y)
        print("predict", logreg.predict(X[0]))
        print("error:", sum((np.array([logreg.predict(x)
                                       for x in X]) - np.array(y))**2))
        print("F:", logreg.F(logreg.w, X, y))
        print("w:", logreg.w)

        print(logreg.fevals, logreg.gevals, logreg.adp)
Пример #5
0
def cross_validation(X,y,bsize, fold, eta, solver="SGD", wdecay=0):
    from sklearn.cross_validation import StratifiedKFold
    from LogisticRegression import LogisticRegression
    scores=[]
    skf = StratifiedKFold( y, fold) 
    for train_index, test_index in skf:
        X_train, X_test, y_train, y_test = X[train_index,:], X[test_index,:], y[train_index], y[test_index]
        lr = LogisticRegression(learning=solver,weight_decay=wdecay,eta_0=eta, batch_size=bsize)
        lr.fit(X_train,y_train)
        scores.append(lr.score(X_test,y_test))
    return np.mean(scores)
Пример #6
0
    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
                 rng=None):
        
        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if rng is None:
            rng = numpy.random.RandomState(1234)

        
        assert self.n_layers > 0


        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()
                
            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)


            # construct rbm_layer
            rbm_layer = RBM(input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layer_sizes[i],
                            W=sigmoid_layer.W,     # W, b are shared
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)


        # layer for output using Logistic Regression
        #print self.sigmoid_layers[-1].sample_h_given_v().shape
        self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
                                            label=self.y,
                                            n_in=hidden_layer_sizes[-1],
                                            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()
Пример #7
0
def main():
    #model = FakeModel() #TODO model parameters
    model = LogisticRegression(features=['PRI_tau_eta',
                                        'PRI_lep_eta',
                                        'DER_deltar_tau_lep',
                                        'PRI_met_sumet',
                                        'DER_mass_transverse_met_lep'])
    #load some previously saved model parameters                                    
    model.load('data/logisticRegression69.61.txt')
    
    #load test data
    data = np.loadtxt("data/test.csv", delimiter=',', skiprows=1)
    ids = data[:,0].astype(int) #first column is id
    X = data[:,1:31] #30 features
    
    #make predictions (ranking and label)
    r, p = model.predict(X)
    
    #save to file
    save(ids, r, p)
Пример #8
0
class NeuralNetwork:
    def __init__(self, rng, n_in, n_out, hl):

        #  will contain basically a list of Hidden Layers objects.
        self.layers = []

        inp_size = n_in
        for i in range(len(hl)):
            HL = HiddenLayer(rng, inp_size, hl[i])
            self.layers.append(HL)
            inp_size = hl[i]

        self.op = LogisticRegression(inp_size, n_out)
        self.params = []
        for l in self.layers:
            self.params = self.params + l.params

        self.params = self.params + self.op.params
        # self.params = [l.params for l in self.layers]

        #  forward pass is here

    def forward(self, x):
        act = [x]

        for i, l in enumerate(self.layers):
            act.append(l.output(act[i]))

        return act

    def cost(self, x, y):
        act = self.forward(x)
        estimate = act[-1]
        return self.op.cost(estimate, y)

    def calcAccuracy(self, x, y):
        act = self.forward(x)
        ll = act[-1]
        return self.op.calcAccuracy(ll, y)
Пример #9
0
    def __init__(self, numpy_rng, theano_rng = None, n_ins=784,
                hidden_layers_sizes = [500, 500], n_outs = 10, mode = 'dA'):
        self.sigmoid_layers = []
        self.ae_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        self.x = T.matrix('x')
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data

        for i in range(self.n_layers):
            if i==0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i-1]
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng = numpy_rng, input = layer_input,
                                        n_in = input_size,
                                        n_out = hidden_layers_sizes[i],
                                        activ = T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            #initialize dA or sA
            if mode == 'sA':
                ae_layer = SparseAE(numpy_rng = numpy_rng, theano_rng = theano_rng,
                                    input = layer_input, n_visible = input_size,
                                    n_hidden = hidden_layers_sizes[i],
                                    W = sigmoid_layer.W, bhid = sigmoid_layer.b)
            else:
                ae_layer = DenoiseAE(numpy_rng = numpy_rng, theano_rng = theano_rng,
                                    input = layer_input, n_visible = input_size,
                                    n_hidden = hidden_layers_sizes[i],
                                    W = sigmoid_layer.W, bhid = sigmoid_layer.b)
            self.ae_layers.append(ae_layer)

        self.logLayer = LogisticRegression(input = self.sigmoid_layers[-1].output,
                                            n_in = hidden_layers_sizes[-1],
                                            n_out = n_outs)
        self.params.extend(self.logLayer.params)
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
Пример #10
0
    def __init__(self, input, label,\
                 n_in, hidden_layer_sizes, n_out,\
                 rng=None, activation=ReLU):

        self.x = input
        self.y = label

        self.hidden_layers = []
        self.n_layers = len(hidden_layer_sizes)
        
        if rng is None:
            rng = numpy.random.RandomState(1234)

        assert self.n_layers > 0


        # construct multi-layer 
        for i in xrange(self.n_layers):

            # layer_size
            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layer_sizes[i-1]

            # layer_input
            if i == 0:
                layer_input = self.x

            else:
                layer_input = self.hidden_layers[-1].output()

            # construct hidden_layer
            hidden_layer = HiddenLayer(input=layer_input,
                                       n_in=input_size,
                                       n_out=hidden_layer_sizes[i],
                                       rng=rng,
                                       activation=activation)
            
            self.hidden_layers.append(hidden_layer)


        # layer for ouput using Logistic Regression (softmax)
        self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(),
                                            label=self.y,
                                            n_in=hidden_layer_sizes[-1],
                                            n_out=n_out)
 def testF(self, X, y):
     logreg = LogisticRegression(lam_2=0.5)
     logreg.train(X, y)
     print("f complete")
     print(logreg.f(logreg.w, X[0], y[0]))
     print("f for first entry")
     print(logreg.f(logreg.w, X[0], y[0]))
     print("F")
     print(logreg.F(logreg.w, X, y))
     print("g ")
     print(logreg.g(logreg.w, X[0], y[0]))
Пример #12
0
    def __init__(self, rng, n_in, n_out, hl):

        #  will contain basically a list of Hidden Layers objects.
        self.layers = []

        inp_size = n_in
        for i in range(len(hl)):
            HL = HiddenLayer(rng, inp_size, hl[i])
            self.layers.append(HL)
            inp_size = hl[i]

        self.op = LogisticRegression(inp_size, n_out)
        self.params = []
        for l in self.layers:
            self.params = self.params + l.params

        self.params = self.params + self.op.params
Пример #13
0
    def __init__(self, input, label, n_in, n_hidden, n_out, rng=None):

        self.x = input
        self.y = label

        if rng is None:
            rng = numpy.random.RandomState(1234)

        # construct hidden_layer (tanh or sigmoid so far)
        self.hidden_layer = HiddenLayer(input=self.x,
                                        n_in=n_in,
                                        n_out=n_hidden,
                                        rng=rng,
                                        activation=numpy.tanh)

        # construct log_layer (softmax)
        self.log_layer = LogisticRegression(input=self.hidden_layer.output,
                                            label=self.y,
                                            n_in=n_hidden,
                                            n_out=n_out)
    def __init__(self, np_rng, theano_rng=None, n_ins=784, hidden_layer_sizes=[500, 500], n_outs=10):
        
        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layer_sizes)
        
        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(np_rng.randint(2 ** 30))
     
        self.x = T.matrix('x') 
        self.y = T.ivector('y') 
        
        for i in xrange(self.n_layers):
            if i == 0:
                n_in = n_ins
                layer_input = self.x
            else:
                n_in = hidden_layer_sizes[i-1]
                layer_input = self.sigmoid_layers[-1].output

            n_out = hidden_layer_sizes[i]            
            
            sigmoid_layer = HiddenLayer(np_rng, layer_input, n_in, n_out, activation=T.nnet.sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)
            
            self.params.extend(sigmoid_layer.params)
            
            dA_layer = AutoEncoder(np_rng, n_in, n_out, theano_rng=theano_rng, input=layer_input, 
                                   W=sigmoid_layer.W, b_hid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
            
        self.log_layer = LogisticRegression(self.sigmoid_layers[-1].output, self.y, hidden_layer_sizes[-1], n_outs)
        self.params.extend(self.log_layer.params)

        self.finetune_cost = self.log_layer.negative_log_likelihood()
        self.errors = self.log_layer.errors()        
Пример #15
0
    def modifyModel(self, batch_size, dataset):
        print("Start to modify the model---------------------------")
        dataset = dataset
        self.batch_size = batch_size

        """
            create Model
        """

        datasets = YiWenData.load_data(dataset)

        test_set_x, test_set_y = datasets[2]

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch

        # start-snippet-1
        x = T.matrix('x')  # the data is presented as rasterized images

        # [int] labels

        print('... building the model')

        self.layer0_input = x.reshape((self.batch_size, 1, 28, 28))

        self.layer0.modify(self.layer0_input, (self.batch_size, 1, 28, 28))

        self.layer1.modify(self.layer0.output, (self.batch_size, self.nkerns[0], 12, 12))

        self.layer2_input = self.layer1.output.flatten(2)

        # construct a fully-connected sigmoidal layer
        self.layer2 = self.layer2

        # classify the values of the fully-connected sigmoidal layer
        self.layer3 = LogisticRegression(input=self.layer2.output, n_in=500, n_out=7)

        print("-------batch_size---------batch_size---------batch_size------------",self.layer0.image_shape)
Пример #16
0
    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=3600,
        hidden_layers_sizes=[500, 500],
        n_outs=6,
        corruption_levels=[0.1, 0.1]
    ):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
            
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
                                        
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
                          
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )

        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
def LogisticRegression_demo(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600):
    datasets = load_multi()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
    
    print '... building the model'

    
    index = T.lscalar()  
    x = T.matrix('x') 
    y = T.ivector('y')  

    classifier = LogisticRegression(x, y, n_in=103, n_out=9)
    
    test_model = theano.function(inputs=[index],
                                 outputs=classifier.errors(),
                                 givens={x: test_set_x[index * batch_size: (index + 1) * batch_size],
                                         y: test_set_y[index * batch_size: (index + 1) * batch_size]})
                                         
    validate_model = theano.function(inputs=[index],
                                     outputs=classifier.errors(),
                                     givens={x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                             y: valid_set_y[index * batch_size:(index + 1) * batch_size]})
                                             
    cost, updates = classifier.get_cost_updates(learning_rate=learning_rate)
    
    train_model = theano.function(inputs=[index],
                                  outputs=cost,
                                  updates=updates,
                                  givens={x: train_set_x[index * batch_size:(index + 1) * batch_size],
                                          y: train_set_y[index * batch_size:(index + 1) * batch_size]})
                                          
    print '... training the model'
    
    patience = 5000  
    patience_increase = 2  
                                  
    improvement_threshold = 0.995  
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss

                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of best'
                       ' model %f %%') %
                        (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
                 (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
Пример #18
0
## These are the hyperparameters to the classifiers. You may need to
# adjust these as you try to find the best fit for each classifier.

# Logistic Regression parameters
eta = .001
lambda_parameter = .1


# Do not change anything below this line!!
# -----------------------------------------------------------------

# Read from file and extract X and Y
df = pd.read_csv("fruit.csv")
X = df[['width', 'height']].values
Y = (df['fruit'] - 1).values

nb1 = GaussianGenerativeModel(isSharedCovariance=False)
nb1.fit(X,Y)
nb1.visualize("generative_result_separate_covariances.png")

nb2 = GaussianGenerativeModel(isSharedCovariance=True)
nb2.fit(X,Y)
nb2.visualize("generative_result_shared_covariances.png")

lr = LogisticRegression(eta=eta, lambda_parameter=lambda_parameter)
lr.fit(X,Y)
lr.visualize('logistic_regression_result.png')



    def __init__(self, rng, input, n_in, n_hidden1, n_hidden2, n_hidden3,
                 n_out):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden1: int
        :param n_hidden1: number of hidden units for first layer

        :type n_hidden2: int
        :param n_hidden2: number of hidden units for second layer

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.First_hiddenLayer = HiddenLayer(rng=rng,
                                             input=input,
                                             n_in=n_in,
                                             n_out=n_hidden1,
                                             activation=relu)

        self.Second_hiddenLayer = HiddenLayer(
            rng=rng,
            input=self.First_hiddenLayer.output,
            n_in=n_hidden1,
            n_out=n_hidden2,
            activation=relu)

        self.Third_hiddenLayer = HiddenLayer(
            rng=rng,
            input=self.Second_hiddenLayer.output,
            n_in=n_hidden2,
            n_out=n_hidden3,
            activation=relu)

        # The logistic regression layer gets as input the hidden units
        # of the last hidden layer
        self.logRegressionLayer = LogisticRegression(
            input=self.Second_hiddenLayer.output, n_in=n_hidden2, n_out=n_out)
        # end-snippet-2 start-snippet-3
        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = (abs(self.First_hiddenLayer.W).sum() +
                   abs(self.Second_hiddenLayer.W).sum() +
                   abs(self.Third_hiddenLayer.W).sum() +
                   abs(self.logRegressionLayer.W).sum())

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = ((self.First_hiddenLayer.W**2).sum() +
                       (self.Second_hiddenLayer.W**2).sum() +
                       (self.Third_hiddenLayer.W**2).sum() +
                       (self.logRegressionLayer.W**2).sum())

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likelihood)
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.First_hiddenLayer.params + self.Second_hiddenLayer.params + self.Third_hiddenLayer.params + self.logRegressionLayer.params
        # end-snippet-3

        # keep track of model input
        self.input = input
Пример #20
0
class SdA(object):
    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
                 rng=None):

        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.dA_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if rng is None:
            rng = numpy.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            # construct dA_layers
            dA_layer = dA(input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layer_sizes[i],
                          W=sigmoid_layer.W,
                          hbias=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(
            input=self.sigmoid_layers[-1].sample_h_given_v(),
            label=self.y,
            n_in=hidden_layer_sizes[-1],
            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()

    def pretrain(self, lr=0.1, corruption_level=0.3, epochs=100):
        for i in xrange(self.n_layers):
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].sample_h_given_v(
                    layer_input)

            da = self.dA_layers[i]

            for epoch in xrange(epochs):
                da.train(lr=lr,
                         corruption_level=corruption_level,
                         input=layer_input)

    def finetune(self, lr=0.1, epochs=100):
        layer_input = self.sigmoid_layers[-1].sample_h_given_v()

        # train log_layer
        epoch = 0

        while epoch < epochs:
            self.log_layer.train(lr=lr, input=layer_input)
            # self.finetune_cost = self.log_layer.negative_log_likelihood()
            # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost

            lr *= 0.95
            epoch += 1

    def predict(self, x):
        layer_input = x

        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            layer_input = sigmoid_layer.output(input=layer_input)

        return self.log_layer.predict(layer_input)
Пример #21
0
class SdA(object):
    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
                 rng=None):
        
        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.dA_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if rng is None:
            rng = numpy.random.RandomState(1234)

        
        assert self.n_layers > 0


        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()
                
            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)


            # construct dA_layers
            dA_layer = dA(input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layer_sizes[i],
                          W=sigmoid_layer.W,
                          hbias=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)


        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
                                            label=self.y,
                                            n_in=hidden_layer_sizes[-1],
                                            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()


    def pretrain(self, lr=0.1, corruption_level=0.3, epochs=100):
        for i in xrange(self.n_layers):
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i-1].sample_h_given_v(layer_input)

            da = self.dA_layers[i]

            for epoch in xrange(epochs):
                da.train(lr=lr, corruption_level=corruption_level, input=layer_input)

    def finetune(self, lr=0.1, epochs=100):
        layer_input = self.sigmoid_layers[-1].sample_h_given_v()

        # train log_layer
        epoch = 0

        while epoch < epochs:
            self.log_layer.train(lr=lr, input=layer_input)
            # self.finetune_cost = self.log_layer.negative_log_likelihood()
            # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost
            
            lr *= 0.95
            epoch += 1


    def predict(self, x):
        layer_input = x
        
        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            layer_input = sigmoid_layer.output(input=layer_input)

        return self.log_layer.predict(layer_input)
from sklearn.model_selection import LeaveOneOut
#importing the logistic regression module from logisticRegression.py
from LogisticRegression import LogisticRegression

import numpy as np

irisdata = datasets.load_iris()
data = np.delete(irisdata['data'][50:], [0, 1], axis=1)

lv = LeaveOneOut()
lv.get_n_splits(data)

labels = irisdata.target[50:].ravel()

error = []

for train_index, test_index in lv.split(data):

    #creating object for logistic regression
    model = LogisticRegression()

    train_data = data[train_index]
    train_labels = labels[train_index]
    test_data = data[test_index]
    test_labels = labels[test_index]
    #training the logistic regression object
    model.train(train_data, train_labels)

    error.append(1 - (test_labels == model.test(test_data, test_labels)[0]))

print(np.mean(error))
Пример #23
0
    plt.yticks([0, 1], ytick, fontsize=14)  #y軸の目盛りにラベルを表示
    plt.xlabel('ある製品試験に不合格だった個数[個] ', fontsize=14)  #x軸のラベル
    plt.title('製品試験と工場(正常・異常)の関係', fontsize=16)  #グラフのタイトル
    plt.scatter(X[:11], y[:11], c='b')  #散布図
    plt.scatter(X[11:], y[11:], c='r')  #散布図
    plt.show()  #グラフを表示


# 分類問題のデータセットXを作成
m = 22  # 不合格だった個数の最大数
X = np.arange(0, m, 1)  # 0からm-1個の不合格だった個数を生成

# 分類問題のデータセット(正解ラベル)を作成
t = np.where(X > 10, 1, 0)

model = LogisticRegression()
iteration = 10000
cost = []

for i in range(iteration):
    # 仮説の計算
    y = model.predict(X)
    # 目的関数の計算
    J = model.cost_function(y, t)
    cost.append(J)
    # パラメータの更新
    model.gradient_descent(X, y, t)

# 学習の経過をグラフ化
plt.plot(cost, c='b')
plt.xlabel('学習回数')
    print('SEED:' + str(SEED))

    # marginals_rand = np.random.multinomial(N, true_marginals.flatten(), size=1) / N
    # marginals_rand = np.reshape(marginals_rand, [2, 2])
    # while not (marginals_rand[1, 1] * marginals_rand[0, 1]):
    #     marginals_rand = np.random.multinomial(N, true_marginals.flatten(), size=1) / N
    #     marginals_rand = np.reshape(marginals_rand, [2, 2])
    # marginals_rand = np.reshape(marginals_rand, [2, 2])

    X_train, y_train, a_train = upload_data(ds=ds,
                                            n_samples=n_train,
                                            marginals=true_marginals)
    X_test, y_test, a_test = upload_data(ds=ds,
                                         n_samples=n_test,
                                         marginals=true_marginals)
    clf = LogisticRegression(fit_intercept=False, reg=0)
    clf.fit(X_train, y_train)

    # if ds == 'toy_correlated_for_most_fav_dist':
    #     clf.coef_ = np.array([0.25, 0.80])
    # else:
    #     clf.coef_ = np.array([-.25, .8])

    data_tuple = [X_test, a_test, y_test]
    clf.coef_ = np.array([0.4, 1.12])

    dist, optimum_ks, gamma_opt = calculate_distance_prob_eqopp_with_opt_params(
        data_tuple=data_tuple,
        function_of_gamma=f_gamma,
        range_gamma=range_gamma,
        k_opt_fcn=k_opt,
Пример #25
0
from LogisticRegression import LogisticRegression
import numpy as np

u = np.array([[0.5, 0, -0.3, -0.7, 0.5]])
v = np.array([[0.7, -0.4, 0.9, -0.5, 0.6]])

Y = np.random.randint(0, 2, (100, 1))
R = np.tile(Y, (1, 5))
E = np.random.rand(100, 1)

X = u * R + (np.ones((100, 5)) - R) * v + E

model1 = LogisticRegression()
model1.fit(X, Y)
print(model1.evaluate(X, Y))

model2 = LogisticRegression(stochastic=True, batch_size=50)
model2.fit(X, Y)
print(model2.evaluate(X, Y))

from sklearn.model_selection import train_test_split
X, X_val, Y, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

model3 = LogisticRegression(stochastic=True, batch_size=50, epochs=10000)
model3.fit(X, Y, X_val=X_val, Y_val=Y_val)
print(model3.evaluate(X, Y))
Пример #26
0
# Do not change anything below this line!!
# -----------------------------------------------------------------

# Read from file and extract X and Y
df = pd.read_csv("fruit.csv")
X = df[['width', 'height']].values
Y = (df['fruit'] - 1).values

nb1 = GaussianGenerativeModel(isSharedCovariance=False)
nb1.fit(X,Y)
nb1.test_insample()
print "Gaussian model with separate covariance: in-sample error rate %.1f%%" % (nb1.insample_err * 100.0 )
nb1.visualize("generative_result_separate_covariances.png",show_charts=True)

nb2 = GaussianGenerativeModel(isSharedCovariance=True)
nb2.fit(X,Y)
nb2.test_insample()
print "Gaussian model with shared covariance: in-sample error rate %.1f%%" % (nb2.insample_err * 100.0 )
nb2.visualize("generative_result_shared_covariances.png",show_charts=True)

for idx,lamda in enumerate([ 0.0001, 0.001, 0.01, 0.1, 1, 0 ]):
    lr = LogisticRegression(eta=eta, lambda_parameter=lamda)
    lr.fit(X,Y)
    lr.test_insample()
    print "Logistic regression with lambda %.5f, in-sample error rate %.1f%%" % ( lamda, lr.insample_err * 100.0 )
    lr.visualize('logistic_regression_result_%d.png' % idx, show_charts=True)



Пример #27
0
    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
                 numpy_rng=None):
        """
            documentation copied from:
            http://www.cse.unsw.edu.au/~cs9444/Notes13/demo/DBN.py
        This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """



        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if numpy_rng is None:
            numpy_rng = numpy.random.RandomState(1234)


        assert self.n_layers > 0


        # construct multi-layer
        #ORIG# for i in xrange(self.n_layers):
        for i in range(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        numpy_rng=numpy_rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)


            # construct rbm_layer
            rbm_layer = RBM(input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layer_sizes[i],
                            W=sigmoid_layer.W,     # W, b are shared
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)


        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
                                            label=self.y,
                                            n_in=hidden_layer_sizes[-1],
                                            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

from LogisticRegression import LogisticRegression

#from regression import LogisticRegression


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1234)

regressor = LogisticRegression(learning_rate=0.0001, n_iters=1000)
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_test)

print("LR classification accuracy:", accuracy(y_test, predictions))
Пример #29
0
def LogisticRegression_demo(learning_rate=0.13,
                            n_epochs=1000,
                            dataset='mnist.pkl.gz',
                            batch_size=600):
    datasets = load_multi()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print '... building the model'

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    classifier = LogisticRegression(x, y, n_in=103, n_out=9)

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    cost, updates = classifier.get_cost_updates(learning_rate=learning_rate)

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print '... training the model'

    patience = 5000
    patience_increase = 2

    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                    (epoch, minibatch_index + 1, n_train_batches,
                    this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of best'
                           ' model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
Пример #30
0
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=3600,
                 hidden_layers_sizes=[500, 500], n_outs=6):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
                            
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, batch_size, k):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param k: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            cost, updates = rbm.get_cost_updates(learning_rate,
                                                 persistent=None, k=k)

            # compile the theano function
            fn = theano.function(
                inputs=[index, 
                        theano.In(learning_rate, value=0.1)],
                        outputs=cost,
                        updates=updates,
                givens={
                    self.x: train_set_x[batch_begin:batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns



    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: train_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: test_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: test_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: valid_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: valid_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in range(n_test_batches)]

        
        return train_fn, valid_score, test_score
Пример #31
0
    simulated_separableish_features = np.vstack((x1, x2)).astype(np.float32)
    simulated_labels = np.hstack((np.zeros(data_cnt), np.ones(data_cnt)))

    return simulated_separableish_features, simulated_labels


train_data_cnt = 5000
train_x, train_y = generateData(train_data_cnt)

print train_x.shape, train_y.shape

test_data_cnt = 1000
test_x, test_y = generateData(test_data_cnt)

lr = LogisticRegression()
lr.fit(train_x, train_y)

model_name = 'lr.model'
lr.saveModel(model_name)

new_lr = LogisticRegression()
new_lr.loadModel(model_name)

pred_res = new_lr.predict(test_x)

print pred_res[:10]

correct_cnt = np.sum((pred_res == test_y).astype(int))

print correct_cnt
Пример #32
0
 def __init__(self, rnn_size=[800], layer=[10]):
     self.bidirectional_rnn = BidirectionalRNN('BidirectionalRNN', rnn_size)
     self.logistic_regression = LogisticRegression('LogisticRegression',
                                                   rnn_size[-1], layer)
    ax1.plot(smooth(Adamax_train_costs), color='#895CCC', label='Adamax')
    ax1.legend()
    ax2.plot(Adamax_val_errors, 'o-', color='#895CCC', label='Adamax')
    ax2.legend()
    plt.pause(1)

    plt.show(block=False)


if __name__ == '__main__':
    image_size = 28 * 28
    classes = 10
    # test on logistic regression
    shapes = [(image_size, classes), (classes, )]
    # construct the logistic regression class
    classifier = lambda x: LogisticRegression(
        input=x, n_in=image_size, n_out=classes)
    print('start test on logistic regression......')
    test_model(classifier, shapes, fig_num=1)

    # test on mlp
    n_hidden = 500
    shapes = [(image_size, n_hidden), (n_hidden, ), (n_hidden, classes),
              (classes, )]
    rng = numpy.random.RandomState(1234)
    # construct the mlp
    classifier = lambda x: MLP(
        rng=rng, input=x, n_in=image_size, n_hidden=n_hidden, n_out=classes)
    print()
    print()
    print('start test on logistic regression......')
    test_model(classifier, shapes, fig_num=2)
Пример #34
0
import pandas as pd
import numpy as np
from LogisticRegression import LogisticRegression
from Multinomial import Multinomial


def loadData(file):
    data = pd.read_csv(file, header=None).fillna(0)
    return data.drop_duplicates()


if __name__ == '__main__':

    print("1 Spambase Logistic Regression")
    LogisticRegression(loadData('./data/spambase.csv')).validate()

    print("1 Breast Cancer Logistic Regression")
    LogisticRegression(loadData('./data/breastcancer.csv')).validate()

    print("1 Diabetes Logistic Regression")
    LogisticRegression(loadData('./data/diabetes.csv')).validate()

    print("2 Multivariate Bernoulli")
    Multinomial(True).validate('./data/20NG_data/train_data.csv',
                               './data/20NG_data/train_label.csv',
                               './data/20NG_data/test_data.csv',
                               './data/20NG_data/test_label.csv')

    print("2 Multinomial")
    Multinomial().validate('./data/20NG_data/train_data.csv',
                           './data/20NG_data/train_label.csv',
filename = 'LogisticRegressionData1.txt'
def load_data(filename):
    data = pd.read_csv(filename)
    data=data.values
    return data


data = load_data(filename)
dimensions = data.shape
x_train = data[0:50,0:dimensions[1]-1]
y_train = data[0:50,dimensions[1]-1:]
m = x_train.shape[0]
n = x_train.shape[1] 
x_test = data[51:,0:dimensions[1]-1]
y_test = data[51:,dimensions[1]-1:]
tester = LogisticRegression()
tester.logistic_regression(x_train,y_train)
pred_probability = tester.predict_probability(x_test)
print "Probability of y being 1 for given testing samples:" + str(pred_probability)
pred_y = tester.predict_y(x_test)
print "Predicted value of y for given testing samples : " + str(pred_y)
plot_train = tester.plot_train(x_train,y_train)
plot_test = tester.plot_test(x_test,y_test)
acc = tester.accuracy(x_test,y_test)
print "Accuracy of regression algorithm = " + str(acc)
                             



Пример #36
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target

X = X[y < 2, :2]
y = y[y < 2]

from model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)

from LogisticRegression import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# print(log_reg.score(X_test,y_test))#1.0
#
# print(log_reg.predict_proba(X_test))
# #[0.92972035 0.98664939 0.14852024 0.01685947 0.0369836  0.0186637
#  # 0.04936918 0.99669244 0.97993941 0.74524655 0.04473194 0.00339285
#  # 0.26131273 0.0369836  0.84192923 0.79892262 0.82890209 0.32358166
#  # 0.06535323 0.20735334]
#
# print(log_reg.coef_) #[ 3.01796521 -5.04447145]
# print(log_reg.intercept_) #-0.6937719272911225


#因为就选了两个特征,可以观察x和y的关系对最终结果的影响范围
Пример #37
0
class CNN(object):
    def __init__(self,
                 N,
                 label,
                 n_hidden,
                 n_out,
                 image_size,
                 channel,
                 n_kernels,
                 kernel_sizes,
                 pool_sizes,
                 rng=None,
                 activation=ReLU):

        if rng is None:
            rng = numpy.random.RandomState(1234)

        self.N = N
        self.n_hidden = n_hidden

        self.n_kernels = n_kernels

        self.pool_sizes = pool_sizes

        self.conv_layers = []
        self.conv_sizes = []

        # construct 1st conv_layer
        conv_layer0 = ConvPoolLayer(N, image_size, channel, n_kernels[0],
                                    kernel_sizes[0], pool_sizes[0], rng,
                                    activation)
        self.conv_layers.append(conv_layer0)

        conv_size = [
            (image_size[0] - kernel_sizes[0][0] + 1) / pool_sizes[0][0],
            (image_size[1] - kernel_sizes[0][1] + 1) / pool_sizes[0][1]
        ]
        self.conv_sizes.append(conv_size)

        # construct 2nd conv_layer
        conv_layer1 = ConvPoolLayer(N, conv_size, n_kernels[0], n_kernels[1],
                                    kernel_sizes[1], pool_sizes[1], rng,
                                    activation)
        self.conv_layers.append(conv_layer1)

        conv_size = [
            (conv_size[0] - kernel_sizes[1][0] + 1) / pool_sizes[1][0],
            (conv_size[1] - kernel_sizes[1][0] + 1) / pool_sizes[1][1]
        ]
        self.conv_sizes.append(conv_size)

        # construct hidden_layer
        self.hidden_layer = HiddenLayer(
            None, n_kernels[-1] * conv_size[0] * conv_size[1], n_hidden, None,
            None, rng, activation)

        # construct log_layer
        self.log_layer = LogisticRegression(None, label, n_hidden, n_out)

    # def train(self, epochs, learning_rate, input=None):
    def train(self, epochs, learning_rate, input, test_input=None):

        for epoch in xrange(epochs):

            if (epoch + 1) % 5 == 0:
                print 'iter = %d/%d' % (epoch + 1, epochs)
                print

                if (test_input != None):
                    print '------------------'
                    print 'TEST PROCESSING...'

                    print self.predict(test_input)
                    print '------------------'
                print

            # forward first conv layer
            pooled_X = self.conv_layers[0].forward(input=input)

            # forward second conv layer
            pooled_X = self.conv_layers[1].forward(input=pooled_X)

            # flatten input
            layer_input = self.flatten(pooled_X)

            # forward hidden layer
            layer_input = self.hidden_layer.forward(input=layer_input)

            # forward & backward logistic layer
            self.log_layer.train(lr=learning_rate, input=layer_input)

            # backward hidden layer
            self.hidden_layer.backward(prev_layer=self.log_layer,
                                       lr=learning_rate)

            flatten_size = self.n_kernels[-1] * self.conv_sizes[-1][
                0] * self.conv_sizes[-1][1]
            delta_flatten = numpy.zeros((self.N, flatten_size))

            for n in xrange(self.N):
                for i in xrange(flatten_size):

                    for j in xrange(self.n_hidden):
                        delta_flatten[n][i] += self.hidden_layer.W[i][
                            j] * self.hidden_layer.d_y[n][j]

            # unflatten delta
            delta = numpy.zeros(
                (len(delta_flatten), self.n_kernels[-1],
                 self.conv_sizes[-1][0], self.conv_sizes[-1][1]))

            for n in xrange(len(delta)):
                index = 0
                for k in xrange(self.n_kernels[-1]):
                    for i in xrange(self.conv_sizes[-1][0]):
                        for j in xrange(self.conv_sizes[-1][1]):
                            delta[n][k][i][j] = delta_flatten[n][index]
                            index += 1

            # backward second conv layer
            delta = self.conv_layers[1].backward(delta, self.conv_sizes[1],
                                                 learning_rate)

            # backward first conv layer
            self.conv_layers[0].backward(delta, self.conv_sizes[0],
                                         learning_rate)

    def flatten(self, input):

        flatten_size = self.n_kernels[-1] * self.conv_sizes[-1][
            0] * self.conv_sizes[-1][1]
        flattened_input = numpy.zeros((len(input), flatten_size))

        for n in xrange(len(flattened_input)):
            index = 0

            for k in xrange(self.n_kernels[-1]):
                for i in xrange(self.conv_sizes[-1][0]):
                    for j in xrange(self.conv_sizes[-1][1]):
                        flattened_input[n][index] = input[n][k][i][j]
                        index += 1

        # print flattened_input

        return flattened_input

    def predict(self, x):

        pooled_X = self.conv_layers[0].forward(input=x)

        pooled_X = self.conv_layers[1].forward(input=pooled_X)

        layer_input = self.flatten(pooled_X)

        x = self.hidden_layer.output(input=layer_input)

        return self.log_layer.predict(x)
Пример #38
0
def PolynomialLogisticRegression(degree, C, penalty='l2'):
    return Pipeline([('poly', PolynomialFeatures(degree=degree)),
                     ('std_scaler', StandardScaler()),
                     ('log_reg', LogisticRegression(C=C, penalty=penalty))])
Пример #39
0
            reg.fit(X_train, Y_train)
            predictions = reg.predict(X_test)
            acc = accuracy(Y_test, predictions)
            if acc > acc_max:
                acc_max = acc
                lr_max = lr_val
                n_iter_max = iteration
    return (lr_max, n_iter_max)


best_lr, best_n_iter = best_params()

print("Best Learning Rate:", best_lr)
print("Best Number Of Iterations:", best_n_iter)

# Best LR = 0.0001
# Best N_Iter = 1000

reg = LogisticRegression(lr=best_lr, n_iters=best_n_iter)
reg.fit(X_train, Y_train)
predictions = reg.predict(X_test)

print("Best Classification Accuracy", accuracy(Y_test, predictions))

# On Running the Code , The following  warning will be shown:
#  RuntimeWarning: overflow encountered in exp
#  return 1 / (1 + np.exp(-X))

# This happens because X is very big which produces exp(-X) to be extremely small to be represented in 64bits hence 128bits required to represent it.
# Hence only selected number 'lr' value and 'n_iter' value is used.For small number of comparisions.
Пример #40
0
                X[i][j] = (X[i][j] - MIN[j]) / (MAX[j] - MIN[j])

        X = np.hstack((np.ones((len(X), 1)), X))

    testing_X = np.array(X)

    return training_X, training_Y, testing_X


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


if __name__ == "__main__":

    training_X, training_y, testing_X = extract_features(normalization=True)

    model = LogisticRegression(training_X, training_y)

    weight = model.GradientDescent(optimizer="Adagrad")

    testing_y = sigmoid(np.dot(testing_X, weight))

    with open("output.csv", "w") as f:
        print("id,label", file=f)
        for i in range(testing_y.shape[0]):
            if testing_y[i] >= 0.5:
                print("{},1".format(i + 1), file=f)
            else:
                print("{},0".format(i + 1), file=f)
Пример #41
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=3600,
                 hidden_layers_sizes=[500, 500], n_outs=6):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
                            
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
from operator import imod
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

from LogisticRegression import LogisticRegression

bc = datasets.load_breast_cancer()

X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1234)


def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)


regresor = LogisticRegression(lr=0.01, n_iters=10000)
regresor.fit(X_train, y_train)
predikcije = regresor.predict(X_test)

print("Preciznost: ", accuracy(y_test, predikcije))
Пример #43
0
print('Data sorted ...')

# Split data into training and test data - ignoring the critical data for now
X = np.concatenate((X_ordered, X_disordered))
Y = np.concatenate((Y_ordered, Y_disordered))
print(np.shape(X))
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.20)
saveData = np.c_[X_train, Y_train]
np.save('test_set', saveData)
print('saved test set')


# Illustrate ordered vs disordered data
# L = 40
# fig = plt.figure()
# plt.subplot(121)
# plt.imshow(X_ordered[20000].reshape(L, L), cmap='plasma_r')
# plt.title('Ordered')
#
# plt.subplot(122)
# plt.imshow(X_disordered[20000].reshape(L, L), cmap='plasma_r')
# plt.title('Disordered')
# plt.show()

# Train on the training data
logreg = LogisticRegression(X_train, Y_train.reshape(len(Y_train), 1), X_test, Y_test.reshape(len(Y_test), 1))
logreg.fit_standard()
print(logreg.accuracy())
weight = logreg.getWeights()
np.save('weights_logreg.npy', weight)
Пример #44
0
class DBN(object):
    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
                 numpy_rng=None):
        """
            documentation copied from:
            http://www.cse.unsw.edu.au/~cs9444/Notes13/demo/DBN.py
        This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if numpy_rng is None:
            numpy_rng = numpy.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        #ORIG# for i in xrange(self.n_layers):
        for i in range(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        numpy_rng=numpy_rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            # construct rbm_layer
            rbm_layer = RBM(
                input=layer_input,
                n_visible=input_size,
                n_hidden=hidden_layer_sizes[i],
                W=sigmoid_layer.W,  # W, b are shared
                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(
            input=self.sigmoid_layers[-1].sample_h_given_v(),
            label=self.y,
            n_in=hidden_layer_sizes[-1],
            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()

    def pretrain(self, lr=0.1, k=1, epochs=100):
        # pre-train layer-wise
        for i in xrange(self.n_layers):
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].sample_h_given_v(
                    layer_input)
            rbm = self.rbm_layers[i]

            for epoch in xrange(epochs):
                rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
                # cost = rbm.get_reconstruction_cross_entropy()
                # print >> sys.stderr, \
                #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost

    # def pretrain(self, lr=0.1, k=1, epochs=100):
    #     # pre-train layer-wise
    #     for i in xrange(self.n_layers):
    #         rbm = self.rbm_layers[i]

    #         for epoch in xrange(epochs):
    #             layer_input = self.x
    #             for j in xrange(i):
    #                 layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input)

    #             rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
    #             # cost = rbm.get_reconstruction_cross_entropy()
    #             # print >> sys.stderr, \
    #             #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost

    def finetune(self, lr=0.1, epochs=100):
        layer_input = self.sigmoid_layers[-1].sample_h_given_v()

        # train log_layer
        epoch = 0
        done_looping = False
        while (epoch < epochs) and (not done_looping):
            self.log_layer.train(lr=lr, input=layer_input)
            # self.finetune_cost = self.log_layer.negative_log_likelihood()
            # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost

            lr *= 0.95
            epoch += 1

    def predict(self, x):
        layer_input = x

        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            layer_input = sigmoid_layer.output(input=layer_input)

        out = self.log_layer.predict(layer_input)
        return out
Пример #45
0
class DBN(object):


    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
                 numpy_rng=None):

        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if numpy_rng is None:
            numpy_rng = numpy.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        numpy_rng=numpy_rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            # construct rbm_layer
            rbm_layer = RBM(
                input=layer_input,
                n_visible=input_size,
                n_hidden=hidden_layer_sizes[i],
                W=sigmoid_layer.W,  # W, b are shared
                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(
            input=self.sigmoid_layers[-1].sample_h_given_v(),
            label=self.y,
            n_in=hidden_layer_sizes[-1],
            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()

#         print 'self.finetune_cost: ', self.finetune_cost

    def pretrain(self, lr=0.1, k=1, epochs=1000, batch_size=-1):

        pretaining_start_time = time.clock()
        # pre-train layer-wise

        for i in xrange(self.n_layers):
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].sample_h_given_v(
                    layer_input)

            rbm = self.rbm_layers[i]
            #             print 'layer_input', layer_input

            for epoch in xrange(epochs):
                batch_start = time.clock()
                #                 rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
                # cost = rbm.get_reconstruction_cross_entropy()
                # print >> sys.stderr, \
                #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost

                cost = 0.0
                if batch_size == -1:
                    cost = rbm.contrastive_divergence(input=layer_input,
                                                      lr=lr,
                                                      k=k,
                                                      batch_size=-1)
                else:
                    n_train_batches = len(
                        layer_input
                    ) / batch_size  # compute number of minibatches for training, validation and testing
                    mean_cost = []
                    for batch_index in xrange(n_train_batches):
                        mean_cost += [
                            rbm.contrastive_divergence(
                                input=layer_input[batch_index *
                                                  batch_size:(batch_index +
                                                              1) * batch_size],
                                lr=lr,
                                k=k,
                                batch_size=batch_size)
                        ]
                    cost = numpy.mean(mean_cost)

                batch_stop = time.clock()
                batch_time = (batch_stop - batch_start)

                print '\tPre-training layer [%d: %d X %d], epoch %d, cost %.7fm, entropy: %.2f, time is %.2f seconds' % (
                    i, rbm.n_visible, rbm.n_hidden, epoch, cost,
                    rbm.get_reconstruction_cross_entropy(), (batch_time))

            # synchronous betwen rbm and sigmoid layer
            self.sigmoid_layers[i].W = rbm.W
            self.sigmoid_layers[i].b = rbm.hbias


#                 # Plot filters after each training epoch
#                 # Construct image from the weight matrix
#                 if layer == 0:
#                     if (epoch % 20 == 0):
#                         image = PIL.Image.fromarray(tile_raster_images(
#                                  X = numpy.array(rbm.get_w().T),
#                                  img_shape=(28, 28),
#                                  tile_shape=(10, 10),
#                                  tile_spacing=(1, 1)))
#                         image.save('result/filters_at_layer_%d_epoch_%d.png' % (layer, epoch))

#             numpy.array(rbm.get_w().T).dump(('result/weight_at_layer_%d.txt' % layer))
#             if layer == 0:
#                 image = PIL.Image.fromarray(tile_raster_images(
#                              X = numpy.array(rbm.get_w().T),
#                              img_shape=(28, rbm.n_visible / 28),
#                              tile_shape=(10, 10),
#                              tile_spacing=(1, 1)))
#             image.save('result/filters_at_layer_%d.png' % layer)

        pretaining_end_time = time.clock()
        print('Total time for pretraining: ' + '%.2f seconds' %
              ((pretaining_end_time - pretaining_start_time)))
        print self.sigmoid_layers[0].W

    def finetune(self, lr=0.1, epochs=100):

        layer_input = self.sigmoid_layers[-1].sample_h_given_v()

        #         self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
        #                                     label=self.y,
        #                                     n_in=hidden_layer_sizes[-1],
        #                                     n_out=n_outs)

        # train log_layer
        epoch = 0
        done_looping = False
        start_time = time.clock()
        print layer_input
        while (epoch < epochs) and (not done_looping):
            self.log_layer.train(lr=lr, input=layer_input)
            if (epoch % 20 == 0):
                print('\tFine-tuning epoch %d, cost is ' % epoch
                      )  #self.log_layer.negative_log_likelihood()

            lr *= 0.95
            epoch += 1
        end_time = time.clock()
        print('Total time for fine-tuning: ' + '%.2f seconds' %
              ((end_time - start_time)))

    def predict(self, x=None, y=None):

        input_x = x

        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            input_x = sigmoid_layer.output(input=input_x)

        out = self.log_layer.predict(input_x, y)
        return out
Пример #46
0
    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
                 numpy_rng=None):
        """
            documentation copied from:
            http://www.cse.unsw.edu.au/~cs9444/Notes13/demo/DBN.py
        This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if numpy_rng is None:
            numpy_rng = numpy.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        #ORIG# for i in xrange(self.n_layers):
        for i in range(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        numpy_rng=numpy_rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            # construct rbm_layer
            rbm_layer = RBM(
                input=layer_input,
                n_visible=input_size,
                n_hidden=hidden_layer_sizes[i],
                W=sigmoid_layer.W,  # W, b are shared
                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(
            input=self.sigmoid_layers[-1].sample_h_given_v(),
            label=self.y,
            n_in=hidden_layer_sizes[-1],
            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()
Пример #47
0
    def __init__(self, stateIn, deepOut = False):
        global pickle

        print("  Loading previous state ...")
        if stateIn.endswith('gz'):
            f = gzip.open(stateIn,'rb')
        else:
            f = open(stateIn, 'r')
        state_name = pickle.load(f)
        state = state_name[0]
        self.names = state_name[1]
        convValues = state.convValues
        w0 = convValues[0][0]
        b0 = convValues[0][1]
        w1 = convValues[1][0]
        b1 = convValues[1][1]
        hiddenVals = state.hiddenValues
        wHidden = hiddenVals[0]
        bHidden = hiddenVals[1]
        logRegValues = state.logRegValues
        wLogReg = logRegValues[0]
        bLogReg = logRegValues[1]
        topo = state.topoplogy
        nkerns = topo.nkerns
        n_out = len(self.names)
        assert(n_out == np.shape(wLogReg)[1])

        print("  Some Values ...")
        print("     Number of Kernels : " + str(nkerns))
        print("     First Kernel w0[0][0] :\n" + str(w0[0][0]))
        print("     bHidden :\n" + str(bHidden))
        print("     bLogReg :\n" + str(bLogReg))
        print("  Building the theano model")
        batch_size = 1

        x = T.matrix('x')   # the data is presented as rasterized images
        layer0_input = x.reshape((batch_size, 1, topo.ishape[0], topo.ishape[1]))
        rng = np.random.RandomState(23455)

        layer0 = LeNetConvPoolLayer(None, input=layer0_input,
                                image_shape=(batch_size, 1, topo.ishape[0],  topo.ishape[0]),
                                filter_shape=(nkerns[0], 1, topo.filter_1, topo.filter_1),
                                poolsize=(topo.pool_1, topo.pool_1), wOld=w0, bOld=b0, deepOut=deepOut)


        layer1 = LeNetConvPoolLayer(None, input=layer0.output,
                                    image_shape=(batch_size, nkerns[0], topo.in_2, topo.in_2),
                                    filter_shape=(nkerns[1], nkerns[0], topo.filter_2, topo.filter_2),
                                    poolsize=(topo.pool_2, topo.pool_2), wOld=w1, bOld=b1, deepOut=deepOut)

        layer2_input = layer1.output.flatten(2)

        layer2 = HiddenLayer(None, input=layer2_input, n_in=nkerns[1] * topo.hidden_input,
                             n_out=topo.numLogisticInput, activation=T.tanh, Wold = wHidden, bOld = bHidden)

        # classify the values of the fully-connected sigmoidal layer
        layer3 = LogisticRegression(input=layer2.output, n_in=topo.numLogisticInput, n_out=n_out, Wold = wLogReg, bOld=bLogReg )

        # create a function to compute the mistakes that are made by the model
        # index = T.lscalar()
        # test_model = theano.function([index], layer3.getProbs(),
        #                              givens={x: test_set_x[index * batch_size: (index + 1) * batch_size]})

        self.predict_model = theano.function([x], layer3.getProbs())

        if (deepOut):
            self.layer0_out = theano.function([x], layer0.output)
            self.layer0_conv= theano.function([x], layer0.conv_out)
            self.layer1_conv= theano.function([x], layer1.conv_out)
            self.layer1_out = theano.function([x], layer1.output)
            self.b0 = b0
            self.b1 = b1
            self.w0 = w0
            self.w1 = w1
def evaluate_lenet5(topo,
                    loadPics,
                    learning_rate=0.005,
                    n_epochs=500,
                    stateIn=None,
                    stateOut=None):

    rng = numpy.random.RandomState(23455)
    theano_rng = RandomStreams(numpy.random.randint(2**30))

    print "Loading the datasets for testing and validation..."
    # Images
    valid_set_x, valid_set_y = loadPics.getValidationData()
    test_set_x, test_set_y = loadPics.getTestData()
    print "... Loading the datasets"

    n_out = loadPics.numberOfClassed
    batch_size = 200  #TODO Check
    print("       Learning rate " + str(learning_rate))

    # compute number of minibatches for training, validation and testing
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]

    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    print 'Number of Kernels' + str(topo.nkerns)

    in_2 = 14  #Input in second layer (layer1)

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, topo.ishape[0], topo.ishape[1]))

    # Using presistent state from last run
    w0 = w1 = b0 = b1 = wHidden = bHidden = wLogReg = bLogReg = None
    if stateIn is not None:
        print("  Loading previous state ...")
        state_names = pickle.load(open(stateIn, "r"))
        state = state_names[0]
        convValues = state.convValues
        w0 = convValues[0][0]
        b0 = convValues[0][1]
        w1 = convValues[1][0]
        b1 = convValues[1][1]
        hiddenVals = state.hiddenValues
        wHidden = hiddenVals[0]
        bHidden = hiddenVals[1]
        logRegValues = state.logRegValues
        wLogReg = logRegValues[0]
        bLogReg = logRegValues[1]
        print("Hallo Gallo")

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, topo.ishape[0],
                                             topo.ishape[0]),
                                filter_shape=(topo.nkerns[0], 1, topo.filter_1,
                                              topo.filter_1),
                                poolsize=(topo.pool_1, topo.pool_1),
                                wOld=w0,
                                bOld=b0)

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, topo.nkerns[0],
                                             topo.in_2, topo.in_2),
                                filter_shape=(topo.nkerns[1], topo.nkerns[0],
                                              topo.filter_2, topo.filter_2),
                                poolsize=(topo.pool_2, topo.pool_2),
                                wOld=w1,
                                bOld=b1)

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # Evt. some drop out for the fully connected layer
    # Achtung p=1 entspricht keinem Dropout.
    # layer2_input = theano_rng.binomial(size=layer2_input.shape, n=1, p=1 - 0.02) * layer2_input
    # paper_6 no dropout
    # paper_14 again 0.02 dropout
    # paper_15 again no dropout

    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=topo.nkerns[1] * topo.hidden_input,
                         n_out=topo.numLogisticInput,
                         activation=T.tanh,
                         Wold=wHidden,
                         bOld=bHidden)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output,
                                n_in=topo.numLogisticInput,
                                n_out=n_out,
                                Wold=wLogReg,
                                bOld=bLogReg)

    # Some regularisation (not for the conv-Kernels)
    L2_sqr = (layer2.W**2).sum() + (layer3.W**2).sum()

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y) + 0.001 * L2_sqr

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # Functions for statistics
    test_logloss = theano.function(
        [index],
        cost,
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_logloss = theano.function(
        [index],
        cost,
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    test_probs_fct = theano.function(
        [index],
        layer3.getProbs(),
        givens={x: test_set_x[index * batch_size:(index + 1) * batch_size]})

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    epoch_fraction = 0.0
    while (epoch < n_epochs) and (not done_looping):
        # New epoch the training set is disturbed again
        print("  Starting new training epoch")
        print("  Manipulating the training set")
        train_set_x, train_set_y = loadPics.giveMeNewTraining()
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= batch_size
        validation_frequency = min(n_train_batches, patience / 2)
        print("  Compiling new function")
        learning_rate *= 0.993  #See Paper from Cican

        train_logloss = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            })
        print("  Finished compiling the training set")
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):  #Alle einmal anfassen
            iter = (epoch - 1) * n_train_batches + minibatch_index
            epoch_fraction += 1.0 / float(n_train_batches)
            cost_ij = train_logloss(minibatch_index)
            if iter % 100 == 0:
                print 'training @ iter = ', iter, ' epoch_fraction ', epoch_fraction, ' costs ' + str(
                    cost_ij)
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                # compute zero-one loss on validation set
                validation_log_loss = numpy.mean(
                    [validate_logloss(i) for i in xrange(n_valid_batches)])
                test_log_loss = numpy.mean(
                    [test_logloss(i) for i in xrange(n_test_batches)])

                # test it on the test set
                test_start = time.clock()
                test_losses = [test_model(i) for i in xrange(n_test_batches)]
                train_costs = [
                    train_logloss(i) for i in xrange(n_test_batches)
                ]
                dt = time.clock() - test_start
                print 'Testing %i faces in %f msec image / sec  %f', batch_size * n_test_batches, dt, dt / (
                    n_test_batches * batch_size)
                test_score = numpy.mean(test_losses)
                train_log_loss = numpy.mean(train_costs)

                test_probs1 = [
                    test_probs_fct(i) for i in xrange(n_test_batches)
                ]

                print('%i, %f, %f, %f, %f, %f, %f, 0.424242' %
                      (epoch, this_validation_loss * 100., test_score * 100.,
                       learning_rate, train_log_loss, validation_log_loss,
                       test_log_loss))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # # test it on the test set
                    # test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    # test_score = numpy.mean(test_losses)
                    # print(('     epoch %i, minibatch %i/%i, test error of best '
                    #        'model %f %%') %
                    #       (epoch, minibatch_index + 1, n_train_batches,
                    #        test_score * 100.))

                    # if (this_validation_loss < 0.02):
                    #     learning_rate /= 2
                    #     print("Decreased learning rate due to low xval error to " + str(learning_rate))

            if patience <= iter:
                print("--------- Finished Looping ----- earlier ")
                done_looping = True
                break

    end_time = time.clock()
    print('----------  Optimization complete -------------------------')
    print('Res: ', str(topo.nkerns))
    print('Res: ', learning_rate)
    print('Res: Best validation score of %f %% obtained at iteration %i,' \
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('Res: The code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    # Oliver
    if not os.path.isdir("conv_images"):
        os.makedirs("conv_images")
        os.chdir("conv_images")

    # d = layer0.W.get_value() #e.g.  (20, 1, 5, 5) number of filter, num of incomming filters, dim filter
    # for i in range(0, numpy.shape(d)[0]):
    #     dd = d[i][0]
    #     rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8)
    #     img = Image.fromarray(rescaled)
    #     img.save('filter_l0' + str(i) + '.png')
    #
    # d = layer1.W.get_value() #e.g.  (20, 1, 5, 5) number of filter, num of incomming filters, dim filter
    # for i in range(0, numpy.shape(d)[0]):
    #     dd = d[i][0]
    #     rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8)
    #     img = Image.fromarray(rescaled)
    #     img.save('filter_l1' + str(i) + '.png')

    state = LeNet5State(topology=topo,
                        convValues=[
                            layer0.getParametersAsValues(),
                            layer1.getParametersAsValues()
                        ],
                        hiddenValues=layer2.getParametersAsValues(),
                        logRegValues=layer3.getParametersAsValues())
    print
    if stateOut is not None:
        pickle.dump([state, loadPics.getClasses()], open(stateOut, 'wb'))
        print("Saved the pickeled data-set")

    return learning_rate
Пример #49
0
class SdA(object):
    """Stacked denoising auto-encoder class (SdA)

    A stacked denoising autoencoder model is obtained by stacking several
    dAs. The hidden layer of the dA at layer `i` becomes the input of
    the dA at layer `i+1`. The first layer dA gets as input the input of
    the SdA, and the hidden layer of the last dA represents the output.
    Note that after pretraining, the SdA is dealt with as a normal MLP,
    the dAs are only used to initialize the weights.
    """

    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=3600,
        hidden_layers_sizes=[500, 500],
        n_outs=6,
        corruption_levels=[0.1, 0.1]
    ):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
            
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
                                        
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
                          
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )

        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
        

    def pretraining_functions(self, train_set_x, batch_size):
        ''' Generates a list of functions, each of them implementing one
        step in trainnig the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
                            for training the dA

        :type batch_size: int
        :param batch_size: size of a [mini]batch

        :type learning_rate: float
        :param learning_rate: learning rate used during training for any of
                              the dA layers
        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        corruption_level = T.scalar('corruption')  # % of corruption to use
        learning_rate = T.scalar('lr')  # learning rate to use
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.dA_layers:
            # get the cost and the updates list
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            # compile the theano function
            fn = theano.function(
                inputs=[
                    index,
                    theano.In(corruption_level, value=0.2),
                    theano.In(learning_rate, value=0.1)
                ],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin: batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
        

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                         the has to contain three pairs, `train`,
                         `valid`, `test` in this order, where each pair
                         is formed of two Theano variables, one for the
                         datapoints, the other for the labels

        :type batch_size: int
        :param batch_size: size of a minibatch

        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches //= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches //= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = [
            (param, param - gparam * learning_rate)
            for param, gparam in zip(self.params, gparams)
        ]

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: train_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='train'
        )

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: test_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: test_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='test'
        )

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: valid_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: valid_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='valid'
        )

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in range(n_test_batches)]

        return train_fn, valid_score, test_score
Пример #50
0
# -----------------------------------------------------------------

# Read from file and extract X and Y
df = pd.read_csv("fruit.csv")
X = df[['width', 'height']].values
Y = (df['fruit'] - 1).values

nb1 = GaussianGenerativeModel(isSharedCovariance=False)
nb1.fit(X, Y)
nb1.visualize("generative_result_separate_covariances.png")

nb2 = GaussianGenerativeModel(isSharedCovariance=True)
nb2.fit(X, Y)
nb2.visualize("generative_result_shared_covariances.png")

lr = LogisticRegression(eta=eta, lambda_parameter=lambda_parameter)
lr.fit(X, Y)
lr.visualize('logistic_regression_result.png')

X_test = np.array([[4, 11], [8.5, 7]])
Y_nb1 = nb1.predict(X_test)
Y_nb2 = nb2.predict(X_test)
Y_lr = lr.predict(X_test)

print("Test fruit predictions for Gaussian Model:")
print("width 4 cm and height 11 cm: " + str(Y_nb1[0]))
print("width 8.5 cm and height 7 cm: " + str(Y_nb1[1]))

print("Test fruit predictions for Shared Covariance Gaussian Model:")
print("width 4 cm and height 11 cm: " + str(Y_nb2[0]))
print("width 8.5 cm and height 7 cm: " + str(Y_nb2[1]))
Пример #51
0
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                           dataset='mnist.pkl.gz',
                           batch_size=600):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    classifier = LogisticRegression(input_data=x, n_in=28 * 28, n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print('... training the model')
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(
                        (
                            '     epoch %i, minibatch %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                        )
                    )

                    # save the best model
                    with open('best_model.pkl', 'wb') as f:
                        pickle.dump(classifier, f)

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(
        (
            'Optimization complete with best validation score of %f %%,'
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., test_score * 100.)
    )
    print('The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time)))
Пример #52
0
# in order to run tests put in into the directiry prior to LogisticRegression
# directory

import pandas as pd
from LogisticRegression import LogisticRegression

df = pd.read_csv('iris.csv')
df[df.columns[4]] = df[df.columns[4]].map({
    'Iris-setosa': 0,
    'Iris-versicolor': 1,
    'Iris-virginica': 2
})
print(df.head())

clf = LogisticRegression.LogisticRegression(n_iter=500)
print('\n Fitting...\n')
clf.fit(df[df.columns[:-1]].values, df[df.columns[-1]].values)
print('Predicting... \n\n')
prd = clf.predict(df[df.columns[:-1]].values)

for r, p in zip(df[df.columns[-1]].values, prd):
    print(r, p)
Пример #53
0
class DBN(object):
    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
                 numpy_rng=None):
        """
            documentation copied from:
            http://www.cse.unsw.edu.au/~cs9444/Notes13/demo/DBN.py
        This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """



        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if numpy_rng is None:
            numpy_rng = numpy.random.RandomState(1234)


        assert self.n_layers > 0


        # construct multi-layer
        #ORIG# for i in xrange(self.n_layers):
        for i in range(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        numpy_rng=numpy_rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)


            # construct rbm_layer
            rbm_layer = RBM(input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layer_sizes[i],
                            W=sigmoid_layer.W,     # W, b are shared
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)


        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
                                            label=self.y,
                                            n_in=hidden_layer_sizes[-1],
                                            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()



    def pretrain(self, lr=0.1, k=1, epochs=100):
        # pre-train layer-wise
        for i in xrange(self.n_layers):
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i-1].sample_h_given_v(layer_input)
            rbm = self.rbm_layers[i]

            for epoch in xrange(epochs):
                rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
                # cost = rbm.get_reconstruction_cross_entropy()
                # print >> sys.stderr, \
                #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost

    # def pretrain(self, lr=0.1, k=1, epochs=100):
    #     # pre-train layer-wise
    #     for i in xrange(self.n_layers):
    #         rbm = self.rbm_layers[i]

    #         for epoch in xrange(epochs):
    #             layer_input = self.x
    #             for j in xrange(i):
    #                 layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input)

    #             rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
    #             # cost = rbm.get_reconstruction_cross_entropy()
    #             # print >> sys.stderr, \
    #             #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost


    def finetune(self, lr=0.1, epochs=100):
        layer_input = self.sigmoid_layers[-1].sample_h_given_v()

        # train log_layer
        epoch = 0
        done_looping = False
        while (epoch < epochs) and (not done_looping):
            self.log_layer.train(lr=lr, input=layer_input)
            # self.finetune_cost = self.log_layer.negative_log_likelihood()
            # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost

            lr *= 0.95
            epoch += 1


    def predict(self, x):
        layer_input = x

        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            layer_input = sigmoid_layer.output(input=layer_input)

        out = self.log_layer.predict(layer_input)
        return out
Пример #54
0
    val_siamese_dataset = SiamesDataset(val_csv,
                                            training_dir,
                                            transform=transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()]))
    
    val_dataloader = DataLoader(val_siamese_dataset,
                        shuffle=True,
                        num_workers=2,
                        batch_size=32)
    

    model = models.vgg16(pretrained=True)
    model.classifier[6] = nn.Linear(4096, 128)
    print(model)
    net = SiamesNet(model)
    regresion = LogisticRegression(128,1)

    if torch.cuda.is_available():
        print('Cuda Disponible')
        net.cuda()
        regresion.cuda()
    
    #criterion = ContrastiveLoss()
    criterion = torch.nn.BCELoss()
    #criterion =torch.nn.CrossEntropyLoss()
    
    #optimizer = optim.RMSprop(regresion.parameters(), lr=1e-4, alpha=0.99, eps=1e-8, weight_decay=0.0005, momentum=0.9)
    optimizer = torch.optim.Adam(regresion.parameters(), lr=0.0001)
    #wandb.watch(regresion)

    for epoch in range(0,1000):
Пример #55
0
class DBN(object):
    def __init__(self, input=None, label=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, rng=None):

        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if rng is None:
            rng = numpy.random.RandomState(1234)

        assert self.n_layers > 0

        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(
                input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid
            )
            self.sigmoid_layers.append(sigmoid_layer)

            # construct rbm_layer
            rbm_layer = RBM(
                input=layer_input,
                n_visible=input_size,
                n_hidden=hidden_layer_sizes[i],
                W=sigmoid_layer.W,  # W, b are shared
                hbias=sigmoid_layer.b,
            )
            self.rbm_layers.append(rbm_layer)

        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(
            input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs
        )

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()

    def pretrain(self, lr=0.1, k=1, epochs=100):
        # pre-train layer-wise
        for i in xrange(self.n_layers):
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].sample_h_given_v(layer_input)
            rbm = self.rbm_layers[i]

            for epoch in xrange(epochs):
                rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
                # log pretraining as stderr every after 100 epochs
                if (epoch + 1) % 100 == 0:  # REMOVE this block for faster training
                    cost = rbm.get_reconstruction_cross_entropy()
                    print >> sys.stderr, "Pre-training layer %d, epoch %d, cost " % (i, epoch + 1), cost

    def finetune(self, lr=0.1, epochs=100):
        layer_input = self.sigmoid_layers[-1].sample_h_given_v()

        # train log_layer
        epoch = 0
        done_looping = False
        while (epoch < epochs) and (not done_looping):
            self.log_layer.train(lr=lr, input=layer_input)
            # log finetune training as stderr every 25 epochs
            if (epoch + 1) % 25 == 0:  # REMOVE this block for faster training
                self.finetune_cost = self.log_layer.negative_log_likelihood()
                print >> sys.stderr, "Training epoch %d, cost is " % (epoch + 1), self.finetune_cost

            lr *= 0.95
            epoch += 1

    def predict(self, x):
        layer_input = x

        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            layer_input = sigmoid_layer.output(input=layer_input)

        out = self.log_layer.predict(layer_input)
        return out
Пример #56
0
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

from LogisticRegression import LogisticRegression

bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1234)


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


regressor = LogisticRegression(lr=0.001, nb_iters=1000)
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_test)

print("Logistic Regression Accuracy : ", accuracy(y_test, predictions))
def evaluate_lenet5(learning_rate=0.15, n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 20], batch_size=500):
    """ Demonstrates lenet on CIFAR-10 dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    def shared_dataset(data_xy, borrow=True):

        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
    data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2')
    data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3')
    data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4')
    data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5')
    test = unpickle('cifar-10-batches-py/test_batch')

    train_set_1 = data_batch_1["data"]
    train_set_2 = data_batch_2["data"]
    train_set_3 = data_batch_3["data"]
    train_set_4 = data_batch_4["data"]
    train_set_5 = data_batch_5["data"]
    X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0)

    y_train = numpy.concatenate((data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"],
                                 data_batch_4["labels"], data_batch_5["labels"]))

    test_set = test["data"]
    Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3)
    Yte = numpy.asarray(test["labels"])

    Xval_rows = X_train[:7500, :]  # take first 1000 for validation
    Yval = y_train[:7500]
    Xtr_rows = X_train[7500:50000, :]  # keep last 49,000 for train
    Ytr = y_train[7500:50000]

    mean_train = Xtr_rows.mean(axis=0)
    stdv_train = Xte_rows.std(axis=0)
    Xtr_rows = (Xtr_rows - mean_train) / stdv_train
    Xval_rows = (Xval_rows - mean_train) / stdv_train
    Xte_rows = (Xte_rows - mean_train) / stdv_train
    learning_rate = theano.shared(learning_rate)

    """whitening"""

    """
    Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important)
    cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data
    Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100])

    # whiten the data:
    # divide by the eigenvalues (which are square roots of the singular values)
    Xwhite = Xrot / numpy.sqrt(S + 1e-5)"""

    """whitening"""

    #Xtr_rows = whiten(Xtr_rows)
    # zero-center the data (important)
    """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)

    Xtr_rows = Xrot / numpy.sqrt(S + 1e-5)

    Xval_rot = numpy.dot(Xval_rows,U)
    Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5)

    Xte_rot = numpy.dot(Xte_rows,U)
    Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5)
    """

    train_set = (Xtr_rows, Ytr)
    valid_set = (Xval_rows, Yval)
    test_set = (Xte_rows, Yte)

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
                (test_set_x, test_set_y)]

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (32+4-5+1 , 32+4-5+1) = (32, 32)
    # maxpooling reduces this further to (32/2, 32/2) = (16, 16)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 3, 32, 32),
        filter_shape=(nkerns[0], 3, 5, 5),
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (16+4-5+1, 16+4-5+1) = (16, 16)
    # maxpooling reduces this further to (16/2, 16/2) = (8, 8)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 8, 8)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 16, 16),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 8 * 8,
        n_out=500,
        activation=relu
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    L2_reg = 0.001
    L2_sqr = (
            (layer2.W ** 2).sum()
            + (layer3.W ** 2).sum()
        )

    cost = layer3.negative_log_likelihood(y)  + L2_reg * L2_sqr

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    epoch_loss_list = []
    epoch_val_list = []

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        if epoch == 10:
            learning_rate.set_value(0.1)
        # if epoch > 30:
        #    learning_rate.set_value(learning_rate.get_value()*0.9995)
        if epoch > 3:
            epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
            epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))
            numpy.savetxt(fname='epoc_cost.csv', X=epoch_loss_np,
                          fmt='%1.3f')
            numpy.savetxt(fname='epoc_val_error.csv', X=epoch_val_np,
                          fmt='%1.3f')

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            epoch_loss_entry = [iter, epoch, float(cost_ij)]
            epoch_loss_list.append(epoch_loss_entry)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                epoch_val_entry = [iter, epoch, this_validation_loss]
                epoch_val_list.append(epoch_val_entry)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)

    epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
    epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))

    epoch_loss = pandas.DataFrame({"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1],
                                   "cost": epoch_loss_np[:, 2]})
    epoch_vall = pandas.DataFrame({"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1],
                                   "val_error": epoch_val_np[:, 2]})
    epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(['epoch']).mean()["cost"])
    epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(['epoch']).mean()["val_error"])
    epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]})
    epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]})
    epoc_avg_loss.plot(kind="line", x="epoch", y="cost")
    plt.show()
    epoc_avg_loss_val.plot(kind='line', x="epoch", y="val_error")
    plt.show()
Пример #58
0
class DBN(object):
    def __init__(self, input=None, label=None,\
                 hidden_layer_sizes=[3, 3], num_layers=0, \
                 learning_rate=0.1):

        self.x = input
        self.y = label

        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        assert self.num_layers >= 0

        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()

            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        numpy_rng=numpy_rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            # construct rbm_layer
            rbm_layer = RBM(
                input=layer_input,
                n_visible=input_size,
                n_hidden=hidden_layer_sizes[i],
                W=sigmoid_layer.W,  # W, b are shared
                hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(
            input=self.sigmoid_layers[-1].sample_h_given_v(),
            label=self.y,
            n_in=hidden_layer_sizes[-1],
            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()

    def pretrain(self, lr=0.1, k=1, epochs=100):
        # pre-train layer-wise
        for i in xrange(self.n_layers):
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[i - 1].sample_h_given_v(
                    layer_input)
            rbm = self.rbm_layers[i]

            for epoch in xrange(epochs):
                rbm.contrastive_divergence(lr=lr, k=k, input=layer_input)
                # cost = rbm.get_reconstruction_cross_entropy()
                # print >> sys.stderr, \
                #        'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost

    def finetune(self, lr=0.1, epochs=100):
        layer_input = self.sigmoid_layers[-1].sample_h_given_v()

        # train log_layer
        epoch = 0
        done_looping = False
        while (epoch < epochs) and (not done_looping):
            self.log_layer.train(lr=lr, input=layer_input)
            # self.finetune_cost = self.log_layer.negative_log_likelihood()
            # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost

            lr *= 0.95
            epoch += 1

    def predict(self, x):
        layer_input = x

        for i in xrange(self.n_layers):
            sigmoid_layer = self.sigmoid_layers[i]
            layer_input = sigmoid_layer.output(input=layer_input)

        out = self.log_layer.predict(layer_input)
        return out
Пример #59
0
class Dropout(object):
    def __init__(self, input, label,\
                 n_in, hidden_layer_sizes, n_out,\
                 rng=None, activation=ReLU):

        self.x = input
        self.y = label

        self.hidden_layers = []
        self.n_layers = len(hidden_layer_sizes)
        
        if rng is None:
            rng = numpy.random.RandomState(1234)

        assert self.n_layers > 0


        # construct multi-layer 
        for i in xrange(self.n_layers):

            # layer_size
            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layer_sizes[i-1]

            # layer_input
            if i == 0:
                layer_input = self.x

            else:
                layer_input = self.hidden_layers[-1].output()

            # construct hidden_layer
            hidden_layer = HiddenLayer(input=layer_input,
                                       n_in=input_size,
                                       n_out=hidden_layer_sizes[i],
                                       rng=rng,
                                       activation=activation)
            
            self.hidden_layers.append(hidden_layer)


        # layer for ouput using Logistic Regression (softmax)
        self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(),
                                            label=self.y,
                                            n_in=hidden_layer_sizes[-1],
                                            n_out=n_out)


    def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None):

        for epoch in xrange(epochs):
            dropout_masks = []  # create different masks in each training epoch

            # forward hidden_layers
            for i in xrange(self.n_layers):
                if i == 0:
                    layer_input = self.x

                layer_input = self.hidden_layers[i].forward(input=layer_input)

                if dropout == True:
                    mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng)
                    layer_input *= mask

                    dropout_masks.append(mask)


            # forward & backward log_layer
            self.log_layer.train(input=layer_input)


            # backward hidden_layers
            for i in reversed(xrange(0, self.n_layers)):
                if i == self.n_layers-1:
                    prev_layer = self.log_layer
                else:
                    prev_layer = self.hidden_layers[i+1]

                if dropout == True:
                    self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i])
                else:
                    self.hidden_layers[i].backward(prev_layer=prev_layer)
                


    def predict(self, x, dropout=True, p_dropout=0.5):
        layer_input = x

        for i in xrange(self.n_layers):
            if dropout == True:
                self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W
            
            layer_input = self.hidden_layers[i].output(input=layer_input)

        return self.log_layer.predict(layer_input)
Пример #60
0
    def __init__(self, input=None, label=None,\
                 n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\
                 rng=None):
        
        self.x = input
        self.y = label

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layer_sizes)  # = len(self.rbm_layers)

        if rng is None:
            rng = numpy.random.RandomState(1234)

        
        assert self.n_layers > 0


        # construct multi-layer
        for i in xrange(self.n_layers):
            # layer_size
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layer_sizes[i - 1]

            # layer_input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].sample_h_given_v()
                
            # construct sigmoid_layer
            sigmoid_layer = HiddenLayer(input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layer_sizes[i],
                                        rng=rng,
                                        activation=sigmoid)
            self.sigmoid_layers.append(sigmoid_layer)

            # construct rbm_layer
            if i == 0:
                rbm_layer = CRBM(input=layer_input,     # continuous-valued inputs
                                 n_visible=input_size,
                                 n_hidden=hidden_layer_sizes[i],
                                 W=sigmoid_layer.W,     # W, b are shared
                                 hbias=sigmoid_layer.b)
            else:
                rbm_layer = RBM(input=layer_input,
                                n_visible=input_size,
                                n_hidden=hidden_layer_sizes[i],
                                W=sigmoid_layer.W,     # W, b are shared
                                hbias=sigmoid_layer.b)
                
            self.rbm_layers.append(rbm_layer)


        # layer for output using Logistic Regression
        self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(),
                                            label=self.y,
                                            n_in=hidden_layer_sizes[-1],
                                            n_out=n_outs)

        # finetune cost: the negative log likelihood of the logistic regression layer
        self.finetune_cost = self.log_layer.negative_log_likelihood()