class MLP(object): def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): self.x = input self.y = label if rng is None: rng = numpy.random.RandomState(1234) # construct hidden_layer (tanh or sigmoid so far) self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, activation=numpy.tanh) # construct log_layer (softmax) self.log_layer = LogisticRegression(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out) def train(self): layer_input = self.hidden_layer.forward() self.log_layer.train(input=layer_input) self.hidden_layer.backward(prev_layer=self.log_layer) def predict(self, x): x = self.hidden_layer.output(x) return self.log_layer.predict(x)
def tuneThreshold(): """ Explore different values of threshold to see which one fits best """ thresholds = np.linspace(0.4,0.6, 10) bestAcc = 0.0 bestModel = None X_tr, y_tr, w_tr = loadData() m, n = X_tr.shape for th in thresholds: model = LogisticRegression(features=['PRI_tau_eta', 'PRI_lep_eta', 'DER_deltar_tau_lep', 'PRI_met_sumet', 'DER_mass_transverse_met_lep'], threshold=th) model.train(X_tr, y_tr, w_tr) p, r = model.predict(X_tr) #calculate some accuracy on the same train set acc = 100.0*(p.flatten() == y_tr.flatten()).sum()/m print "%s %s%%"%(th, acc) if acc > bestAcc: bestAcc = acc bestModel = model #save the best model bestModel.save('data/logisticRegression%.2f.txt'%acc)
def __init__(self, image_shape = [28, 12], filter_shape = [5, 5], nkerns = [20, 50], batch_size = 500): self.layers = [] rng = np.random.RandomState(23455) # generate symbolic variables for input (x and y represent a # minibatch) self.x = T.matrix('x') # data, presented as rasterized images self.y = T.ivector('y') # labels, presented as 1D vector of [int] labels layer0_input = self.x.reshape((batch_size, 1, image_shape[0], image_shape[0])) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = Layer( rng, input=layer0_input, image_shape=(batch_size, 1, image_shape[0], image_shape[0]), filter_shape=(nkerns[0], 1, filter_shape[0], filter_shape[0]), poolsize=(2, 2) ) self.layers.append(layer0) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = Layer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], image_shape[1], image_shape[1]), filter_shape=(nkerns[1], nkerns[0], filter_shape[1], filter_shape[1]), poolsize=(2, 2) ) self.layers.append(layer1) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( input=layer2_input, rng = rng, n_in=nkerns[1] * 4 * 4, n_out=500, activ=T.tanh ) self.layers.append(layer2) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) self.layers.append(layer3) # the cost we minimize during training is the NLL of the model self.cost = layer3.negative_log_likelihood(self.y)
def test_classification(self, X, y): logreg = LogisticRegression(lam_2=0.5) logreg.train(X, y) print("predict", logreg.predict(X[0])) print("error:", sum((np.array([logreg.predict(x) for x in X]) - np.array(y))**2)) print("F:", logreg.F(logreg.w, X, y)) print("w:", logreg.w) print(logreg.fevals, logreg.gevals, logreg.adp)
def cross_validation(X,y,bsize, fold, eta, solver="SGD", wdecay=0): from sklearn.cross_validation import StratifiedKFold from LogisticRegression import LogisticRegression scores=[] skf = StratifiedKFold( y, fold) for train_index, test_index in skf: X_train, X_test, y_train, y_test = X[train_index,:], X[test_index,:], y[train_index], y[test_index] lr = LogisticRegression(learning=solver,weight_decay=wdecay,eta_0=eta, batch_size=bsize) lr.fit(X_train,y_train) scores.append(lr.score(X_test,y_test)) return np.mean(scores)
def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression #print self.sigmoid_layers[-1].sample_h_given_v().shape self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood()
def main(): #model = FakeModel() #TODO model parameters model = LogisticRegression(features=['PRI_tau_eta', 'PRI_lep_eta', 'DER_deltar_tau_lep', 'PRI_met_sumet', 'DER_mass_transverse_met_lep']) #load some previously saved model parameters model.load('data/logisticRegression69.61.txt') #load test data data = np.loadtxt("data/test.csv", delimiter=',', skiprows=1) ids = data[:,0].astype(int) #first column is id X = data[:,1:31] #30 features #make predictions (ranking and label) r, p = model.predict(X) #save to file save(ids, r, p)
class NeuralNetwork: def __init__(self, rng, n_in, n_out, hl): # will contain basically a list of Hidden Layers objects. self.layers = [] inp_size = n_in for i in range(len(hl)): HL = HiddenLayer(rng, inp_size, hl[i]) self.layers.append(HL) inp_size = hl[i] self.op = LogisticRegression(inp_size, n_out) self.params = [] for l in self.layers: self.params = self.params + l.params self.params = self.params + self.op.params # self.params = [l.params for l in self.layers] # forward pass is here def forward(self, x): act = [x] for i, l in enumerate(self.layers): act.append(l.output(act[i])) return act def cost(self, x, y): act = self.forward(x) estimate = act[-1] return self.op.cost(estimate, y) def calcAccuracy(self, x, y): act = self.forward(x) ll = act[-1] return self.op.calcAccuracy(ll, y)
def __init__(self, numpy_rng, theano_rng = None, n_ins=784, hidden_layers_sizes = [500, 500], n_outs = 10, mode = 'dA'): self.sigmoid_layers = [] self.ae_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) self.x = T.matrix('x') self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data for i in range(self.n_layers): if i==0: input_size = n_ins layer_input = self.x else: input_size = hidden_layers_sizes[i-1] layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng = numpy_rng, input = layer_input, n_in = input_size, n_out = hidden_layers_sizes[i], activ = T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) #initialize dA or sA if mode == 'sA': ae_layer = SparseAE(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, n_visible = input_size, n_hidden = hidden_layers_sizes[i], W = sigmoid_layer.W, bhid = sigmoid_layer.b) else: ae_layer = DenoiseAE(numpy_rng = numpy_rng, theano_rng = theano_rng, input = layer_input, n_visible = input_size, n_hidden = hidden_layers_sizes[i], W = sigmoid_layer.W, bhid = sigmoid_layer.b) self.ae_layers.append(ae_layer) self.logLayer = LogisticRegression(input = self.sigmoid_layers[-1].output, n_in = hidden_layers_sizes[-1], n_out = n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def __init__(self, input, label,\ n_in, hidden_layer_sizes, n_out,\ rng=None, activation=ReLU): self.x = input self.y = label self.hidden_layers = [] self.n_layers = len(hidden_layer_sizes) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_in else: input_size = hidden_layer_sizes[i-1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output() # construct hidden_layer hidden_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=activation) self.hidden_layers.append(hidden_layer) # layer for ouput using Logistic Regression (softmax) self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_out)
def testF(self, X, y): logreg = LogisticRegression(lam_2=0.5) logreg.train(X, y) print("f complete") print(logreg.f(logreg.w, X[0], y[0])) print("f for first entry") print(logreg.f(logreg.w, X[0], y[0])) print("F") print(logreg.F(logreg.w, X, y)) print("g ") print(logreg.g(logreg.w, X[0], y[0]))
def __init__(self, rng, n_in, n_out, hl): # will contain basically a list of Hidden Layers objects. self.layers = [] inp_size = n_in for i in range(len(hl)): HL = HiddenLayer(rng, inp_size, hl[i]) self.layers.append(HL) inp_size = hl[i] self.op = LogisticRegression(inp_size, n_out) self.params = [] for l in self.layers: self.params = self.params + l.params self.params = self.params + self.op.params
def __init__(self, input, label, n_in, n_hidden, n_out, rng=None): self.x = input self.y = label if rng is None: rng = numpy.random.RandomState(1234) # construct hidden_layer (tanh or sigmoid so far) self.hidden_layer = HiddenLayer(input=self.x, n_in=n_in, n_out=n_hidden, rng=rng, activation=numpy.tanh) # construct log_layer (softmax) self.log_layer = LogisticRegression(input=self.hidden_layer.output, label=self.y, n_in=n_hidden, n_out=n_out)
def __init__(self, np_rng, theano_rng=None, n_ins=784, hidden_layer_sizes=[500, 500], n_outs=10): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layer_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(np_rng.randint(2 ** 30)) self.x = T.matrix('x') self.y = T.ivector('y') for i in xrange(self.n_layers): if i == 0: n_in = n_ins layer_input = self.x else: n_in = hidden_layer_sizes[i-1] layer_input = self.sigmoid_layers[-1].output n_out = hidden_layer_sizes[i] sigmoid_layer = HiddenLayer(np_rng, layer_input, n_in, n_out, activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = AutoEncoder(np_rng, n_in, n_out, theano_rng=theano_rng, input=layer_input, W=sigmoid_layer.W, b_hid=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.log_layer = LogisticRegression(self.sigmoid_layers[-1].output, self.y, hidden_layer_sizes[-1], n_outs) self.params.extend(self.log_layer.params) self.finetune_cost = self.log_layer.negative_log_likelihood() self.errors = self.log_layer.errors()
def modifyModel(self, batch_size, dataset): print("Start to modify the model---------------------------") dataset = dataset self.batch_size = batch_size """ create Model """ datasets = YiWenData.load_data(dataset) test_set_x, test_set_y = datasets[2] # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images # [int] labels print('... building the model') self.layer0_input = x.reshape((self.batch_size, 1, 28, 28)) self.layer0.modify(self.layer0_input, (self.batch_size, 1, 28, 28)) self.layer1.modify(self.layer0.output, (self.batch_size, self.nkerns[0], 12, 12)) self.layer2_input = self.layer1.output.flatten(2) # construct a fully-connected sigmoidal layer self.layer2 = self.layer2 # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticRegression(input=self.layer2.output, n_in=500, n_out=7) print("-------batch_size---------batch_size---------batch_size------------",self.layer0.image_shape)
def __init__( self, numpy_rng, theano_rng=None, n_ins=3600, hidden_layers_sizes=[500, 500], n_outs=6, corruption_levels=[0.1, 0.1] ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
def LogisticRegression_demo(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): datasets = load_multi() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') classifier = LogisticRegression(x, y, n_in=103, n_out=9) test_model = theano.function(inputs=[index], outputs=classifier.errors(), givens={x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(), givens={x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) cost, updates = classifier.get_cost_updates(learning_rate=learning_rate) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) print '... training the model' patience = 5000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
## These are the hyperparameters to the classifiers. You may need to # adjust these as you try to find the best fit for each classifier. # Logistic Regression parameters eta = .001 lambda_parameter = .1 # Do not change anything below this line!! # ----------------------------------------------------------------- # Read from file and extract X and Y df = pd.read_csv("fruit.csv") X = df[['width', 'height']].values Y = (df['fruit'] - 1).values nb1 = GaussianGenerativeModel(isSharedCovariance=False) nb1.fit(X,Y) nb1.visualize("generative_result_separate_covariances.png") nb2 = GaussianGenerativeModel(isSharedCovariance=True) nb2.fit(X,Y) nb2.visualize("generative_result_shared_covariances.png") lr = LogisticRegression(eta=eta, lambda_parameter=lambda_parameter) lr.fit(X,Y) lr.visualize('logistic_regression_result.png')
def __init__(self, rng, input, n_in, n_hidden1, n_hidden2, n_hidden3, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden1: int :param n_hidden1: number of hidden units for first layer :type n_hidden2: int :param n_hidden2: number of hidden units for second layer :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.First_hiddenLayer = HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden1, activation=relu) self.Second_hiddenLayer = HiddenLayer( rng=rng, input=self.First_hiddenLayer.output, n_in=n_hidden1, n_out=n_hidden2, activation=relu) self.Third_hiddenLayer = HiddenLayer( rng=rng, input=self.Second_hiddenLayer.output, n_in=n_hidden2, n_out=n_hidden3, activation=relu) # The logistic regression layer gets as input the hidden units # of the last hidden layer self.logRegressionLayer = LogisticRegression( input=self.Second_hiddenLayer.output, n_in=n_hidden2, n_out=n_out) # end-snippet-2 start-snippet-3 # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = (abs(self.First_hiddenLayer.W).sum() + abs(self.Second_hiddenLayer.W).sum() + abs(self.Third_hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = ((self.First_hiddenLayer.W**2).sum() + (self.Second_hiddenLayer.W**2).sum() + (self.Third_hiddenLayer.W**2).sum() + (self.logRegressionLayer.W**2).sum()) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood) # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.First_hiddenLayer.params + self.Second_hiddenLayer.params + self.Third_hiddenLayer.params + self.logRegressionLayer.params # end-snippet-3 # keep track of model input self.input = input
class SdA(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct dA_layers dA_layer = dA(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.dA_layers.append(dA_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood() def pretrain(self, lr=0.1, corruption_level=0.3, epochs=100): for i in xrange(self.n_layers): if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i - 1].sample_h_given_v( layer_input) da = self.dA_layers[i] for epoch in xrange(epochs): da.train(lr=lr, corruption_level=corruption_level, input=layer_input) def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() # train log_layer epoch = 0 while epoch < epochs: self.log_layer.train(lr=lr, input=layer_input) # self.finetune_cost = self.log_layer.negative_log_likelihood() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost lr *= 0.95 epoch += 1 def predict(self, x): layer_input = x for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) return self.log_layer.predict(layer_input)
class SdA(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.dA_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct dA_layers dA_layer = dA(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.dA_layers.append(dA_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood() def pretrain(self, lr=0.1, corruption_level=0.3, epochs=100): for i in xrange(self.n_layers): if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i-1].sample_h_given_v(layer_input) da = self.dA_layers[i] for epoch in xrange(epochs): da.train(lr=lr, corruption_level=corruption_level, input=layer_input) def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() # train log_layer epoch = 0 while epoch < epochs: self.log_layer.train(lr=lr, input=layer_input) # self.finetune_cost = self.log_layer.negative_log_likelihood() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost lr *= 0.95 epoch += 1 def predict(self, x): layer_input = x for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) return self.log_layer.predict(layer_input)
from sklearn.model_selection import LeaveOneOut #importing the logistic regression module from logisticRegression.py from LogisticRegression import LogisticRegression import numpy as np irisdata = datasets.load_iris() data = np.delete(irisdata['data'][50:], [0, 1], axis=1) lv = LeaveOneOut() lv.get_n_splits(data) labels = irisdata.target[50:].ravel() error = [] for train_index, test_index in lv.split(data): #creating object for logistic regression model = LogisticRegression() train_data = data[train_index] train_labels = labels[train_index] test_data = data[test_index] test_labels = labels[test_index] #training the logistic regression object model.train(train_data, train_labels) error.append(1 - (test_labels == model.test(test_data, test_labels)[0])) print(np.mean(error))
plt.yticks([0, 1], ytick, fontsize=14) #y軸の目盛りにラベルを表示 plt.xlabel('ある製品試験に不合格だった個数[個] ', fontsize=14) #x軸のラベル plt.title('製品試験と工場(正常・異常)の関係', fontsize=16) #グラフのタイトル plt.scatter(X[:11], y[:11], c='b') #散布図 plt.scatter(X[11:], y[11:], c='r') #散布図 plt.show() #グラフを表示 # 分類問題のデータセットXを作成 m = 22 # 不合格だった個数の最大数 X = np.arange(0, m, 1) # 0からm-1個の不合格だった個数を生成 # 分類問題のデータセット(正解ラベル)を作成 t = np.where(X > 10, 1, 0) model = LogisticRegression() iteration = 10000 cost = [] for i in range(iteration): # 仮説の計算 y = model.predict(X) # 目的関数の計算 J = model.cost_function(y, t) cost.append(J) # パラメータの更新 model.gradient_descent(X, y, t) # 学習の経過をグラフ化 plt.plot(cost, c='b') plt.xlabel('学習回数')
print('SEED:' + str(SEED)) # marginals_rand = np.random.multinomial(N, true_marginals.flatten(), size=1) / N # marginals_rand = np.reshape(marginals_rand, [2, 2]) # while not (marginals_rand[1, 1] * marginals_rand[0, 1]): # marginals_rand = np.random.multinomial(N, true_marginals.flatten(), size=1) / N # marginals_rand = np.reshape(marginals_rand, [2, 2]) # marginals_rand = np.reshape(marginals_rand, [2, 2]) X_train, y_train, a_train = upload_data(ds=ds, n_samples=n_train, marginals=true_marginals) X_test, y_test, a_test = upload_data(ds=ds, n_samples=n_test, marginals=true_marginals) clf = LogisticRegression(fit_intercept=False, reg=0) clf.fit(X_train, y_train) # if ds == 'toy_correlated_for_most_fav_dist': # clf.coef_ = np.array([0.25, 0.80]) # else: # clf.coef_ = np.array([-.25, .8]) data_tuple = [X_test, a_test, y_test] clf.coef_ = np.array([0.4, 1.12]) dist, optimum_ks, gamma_opt = calculate_distance_prob_eqopp_with_opt_params( data_tuple=data_tuple, function_of_gamma=f_gamma, range_gamma=range_gamma, k_opt_fcn=k_opt,
from LogisticRegression import LogisticRegression import numpy as np u = np.array([[0.5, 0, -0.3, -0.7, 0.5]]) v = np.array([[0.7, -0.4, 0.9, -0.5, 0.6]]) Y = np.random.randint(0, 2, (100, 1)) R = np.tile(Y, (1, 5)) E = np.random.rand(100, 1) X = u * R + (np.ones((100, 5)) - R) * v + E model1 = LogisticRegression() model1.fit(X, Y) print(model1.evaluate(X, Y)) model2 = LogisticRegression(stochastic=True, batch_size=50) model2.fit(X, Y) print(model2.evaluate(X, Y)) from sklearn.model_selection import train_test_split X, X_val, Y, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42) model3 = LogisticRegression(stochastic=True, batch_size=50, epochs=10000) model3.fit(X, Y, X_val=X_val, Y_val=Y_val) print(model3.evaluate(X, Y))
# Do not change anything below this line!! # ----------------------------------------------------------------- # Read from file and extract X and Y df = pd.read_csv("fruit.csv") X = df[['width', 'height']].values Y = (df['fruit'] - 1).values nb1 = GaussianGenerativeModel(isSharedCovariance=False) nb1.fit(X,Y) nb1.test_insample() print "Gaussian model with separate covariance: in-sample error rate %.1f%%" % (nb1.insample_err * 100.0 ) nb1.visualize("generative_result_separate_covariances.png",show_charts=True) nb2 = GaussianGenerativeModel(isSharedCovariance=True) nb2.fit(X,Y) nb2.test_insample() print "Gaussian model with shared covariance: in-sample error rate %.1f%%" % (nb2.insample_err * 100.0 ) nb2.visualize("generative_result_shared_covariances.png",show_charts=True) for idx,lamda in enumerate([ 0.0001, 0.001, 0.01, 0.1, 1, 0 ]): lr = LogisticRegression(eta=eta, lambda_parameter=lamda) lr.fit(X,Y) lr.test_insample() print "Logistic regression with lambda %.5f, in-sample error rate %.1f%%" % ( lamda, lr.insample_err * 100.0 ) lr.visualize('logistic_regression_result_%d.png' % idx, show_charts=True)
def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ numpy_rng=None): """ documentation copied from: http://www.cse.unsw.edu.au/~cs9444/Notes13/demo/DBN.py This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if numpy_rng is None: numpy_rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer #ORIG# for i in xrange(self.n_layers): for i in range(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], numpy_rng=numpy_rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood()
import numpy as np from sklearn.model_selection import train_test_split from sklearn import datasets import matplotlib.pyplot as plt from LogisticRegression import LogisticRegression #from regression import LogisticRegression def accuracy(y_true, y_pred): accuracy = np.sum(y_true == y_pred) / len(y_true) return accuracy bc = datasets.load_breast_cancer() X, y = bc.data, bc.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) regressor = LogisticRegression(learning_rate=0.0001, n_iters=1000) regressor.fit(X_train, y_train) predictions = regressor.predict(X_test) print("LR classification accuracy:", accuracy(y_test, predictions))
def LogisticRegression_demo(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): datasets = load_multi() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') classifier = LogisticRegression(x, y, n_in=103, n_out=9) test_model = theano.function( inputs=[index], outputs=classifier.errors(), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) cost, updates = classifier.get_cost_updates(learning_rate=learning_rate) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) print '... training the model' patience = 5000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
class DBN(object): """Deep Belief Network A deep belief network is obtained by stacking several RBMs on top of each other. The hidden layer of the RBM at layer `i` becomes the input of the RBM at layer `i+1`. The first layer RBM gets as input the input of the network, and the hidden layer of the last RBM represents the output. When used for classification, the DBN is treated as a MLP, by adding a logistic regression layer on top. """ def __init__(self, numpy_rng, theano_rng=None, n_ins=3600, hidden_layers_sizes=[500, 500], n_outs=6): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, train_set_x, batch_size, k): '''Generates a list of functions, for performing one step of gradient descent at a given layer. The function will require as input the minibatch index, and to train an RBM you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared var. that contains all datapoints used for training the RBM :type batch_size: int :param batch_size: size of a [mini]batch :param k: number of Gibbs steps to do in CD-k / PCD-k ''' # index to a [mini]batch index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('lr') # learning rate to use # number of batches n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for rbm in self.rbm_layers: # get the cost and the updates list # using CD-k here (persisent=None) for training each RBM. # TODO: change cost function to reconstruction error cost, updates = rbm.get_cost_updates(learning_rate, persistent=None, k=k) # compile the theano function fn = theano.function( inputs=[index, theano.In(learning_rate, value=0.1)], outputs=cost, updates=updates, givens={ self.x: train_set_x[batch_begin:batch_end] } ) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, datasets, batch_size, learning_rate): '''Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type datasets: list of pairs of theano.tensor.TensorType :param datasets: It is a list that contain all the datasets; the has to contain three pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano variables, one for the datapoints, the other for the labels :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage ''' (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam * learning_rate)) train_fn = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: train_set_y[ index * batch_size: (index + 1) * batch_size ] } ) test_score_i = theano.function( [index], self.errors, givens={ self.x: test_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: test_set_y[ index * batch_size: (index + 1) * batch_size ] } ) valid_score_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: valid_set_y[ index * batch_size: (index + 1) * batch_size ] } ) # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set def test_score(): return [test_score_i(i) for i in range(n_test_batches)] return train_fn, valid_score, test_score
simulated_separableish_features = np.vstack((x1, x2)).astype(np.float32) simulated_labels = np.hstack((np.zeros(data_cnt), np.ones(data_cnt))) return simulated_separableish_features, simulated_labels train_data_cnt = 5000 train_x, train_y = generateData(train_data_cnt) print train_x.shape, train_y.shape test_data_cnt = 1000 test_x, test_y = generateData(test_data_cnt) lr = LogisticRegression() lr.fit(train_x, train_y) model_name = 'lr.model' lr.saveModel(model_name) new_lr = LogisticRegression() new_lr.loadModel(model_name) pred_res = new_lr.predict(test_x) print pred_res[:10] correct_cnt = np.sum((pred_res == test_y).astype(int)) print correct_cnt
def __init__(self, rnn_size=[800], layer=[10]): self.bidirectional_rnn = BidirectionalRNN('BidirectionalRNN', rnn_size) self.logistic_regression = LogisticRegression('LogisticRegression', rnn_size[-1], layer)
ax1.plot(smooth(Adamax_train_costs), color='#895CCC', label='Adamax') ax1.legend() ax2.plot(Adamax_val_errors, 'o-', color='#895CCC', label='Adamax') ax2.legend() plt.pause(1) plt.show(block=False) if __name__ == '__main__': image_size = 28 * 28 classes = 10 # test on logistic regression shapes = [(image_size, classes), (classes, )] # construct the logistic regression class classifier = lambda x: LogisticRegression( input=x, n_in=image_size, n_out=classes) print('start test on logistic regression......') test_model(classifier, shapes, fig_num=1) # test on mlp n_hidden = 500 shapes = [(image_size, n_hidden), (n_hidden, ), (n_hidden, classes), (classes, )] rng = numpy.random.RandomState(1234) # construct the mlp classifier = lambda x: MLP( rng=rng, input=x, n_in=image_size, n_hidden=n_hidden, n_out=classes) print() print() print('start test on logistic regression......') test_model(classifier, shapes, fig_num=2)
import pandas as pd import numpy as np from LogisticRegression import LogisticRegression from Multinomial import Multinomial def loadData(file): data = pd.read_csv(file, header=None).fillna(0) return data.drop_duplicates() if __name__ == '__main__': print("1 Spambase Logistic Regression") LogisticRegression(loadData('./data/spambase.csv')).validate() print("1 Breast Cancer Logistic Regression") LogisticRegression(loadData('./data/breastcancer.csv')).validate() print("1 Diabetes Logistic Regression") LogisticRegression(loadData('./data/diabetes.csv')).validate() print("2 Multivariate Bernoulli") Multinomial(True).validate('./data/20NG_data/train_data.csv', './data/20NG_data/train_label.csv', './data/20NG_data/test_data.csv', './data/20NG_data/test_label.csv') print("2 Multinomial") Multinomial().validate('./data/20NG_data/train_data.csv', './data/20NG_data/train_label.csv',
filename = 'LogisticRegressionData1.txt' def load_data(filename): data = pd.read_csv(filename) data=data.values return data data = load_data(filename) dimensions = data.shape x_train = data[0:50,0:dimensions[1]-1] y_train = data[0:50,dimensions[1]-1:] m = x_train.shape[0] n = x_train.shape[1] x_test = data[51:,0:dimensions[1]-1] y_test = data[51:,dimensions[1]-1:] tester = LogisticRegression() tester.logistic_regression(x_train,y_train) pred_probability = tester.predict_probability(x_test) print "Probability of y being 1 for given testing samples:" + str(pred_probability) pred_y = tester.predict_y(x_test) print "Predicted value of y for given testing samples : " + str(pred_y) plot_train = tester.plot_train(x_train,y_train) plot_test = tester.plot_test(x_test,y_test) acc = tester.accuracy(x_test,y_test) print "Accuracy of regression algorithm = " + str(acc)
import numpy as np import matplotlib.pyplot as plt from sklearn import datasets iris = datasets.load_iris() X = iris.data y = iris.target X = X[y < 2, :2] y = y[y < 2] from model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666) from LogisticRegression import LogisticRegression log_reg = LogisticRegression() log_reg.fit(X_train, y_train) # print(log_reg.score(X_test,y_test))#1.0 # # print(log_reg.predict_proba(X_test)) # #[0.92972035 0.98664939 0.14852024 0.01685947 0.0369836 0.0186637 # # 0.04936918 0.99669244 0.97993941 0.74524655 0.04473194 0.00339285 # # 0.26131273 0.0369836 0.84192923 0.79892262 0.82890209 0.32358166 # # 0.06535323 0.20735334] # # print(log_reg.coef_) #[ 3.01796521 -5.04447145] # print(log_reg.intercept_) #-0.6937719272911225 #因为就选了两个特征,可以观察x和y的关系对最终结果的影响范围
class CNN(object): def __init__(self, N, label, n_hidden, n_out, image_size, channel, n_kernels, kernel_sizes, pool_sizes, rng=None, activation=ReLU): if rng is None: rng = numpy.random.RandomState(1234) self.N = N self.n_hidden = n_hidden self.n_kernels = n_kernels self.pool_sizes = pool_sizes self.conv_layers = [] self.conv_sizes = [] # construct 1st conv_layer conv_layer0 = ConvPoolLayer(N, image_size, channel, n_kernels[0], kernel_sizes[0], pool_sizes[0], rng, activation) self.conv_layers.append(conv_layer0) conv_size = [ (image_size[0] - kernel_sizes[0][0] + 1) / pool_sizes[0][0], (image_size[1] - kernel_sizes[0][1] + 1) / pool_sizes[0][1] ] self.conv_sizes.append(conv_size) # construct 2nd conv_layer conv_layer1 = ConvPoolLayer(N, conv_size, n_kernels[0], n_kernels[1], kernel_sizes[1], pool_sizes[1], rng, activation) self.conv_layers.append(conv_layer1) conv_size = [ (conv_size[0] - kernel_sizes[1][0] + 1) / pool_sizes[1][0], (conv_size[1] - kernel_sizes[1][0] + 1) / pool_sizes[1][1] ] self.conv_sizes.append(conv_size) # construct hidden_layer self.hidden_layer = HiddenLayer( None, n_kernels[-1] * conv_size[0] * conv_size[1], n_hidden, None, None, rng, activation) # construct log_layer self.log_layer = LogisticRegression(None, label, n_hidden, n_out) # def train(self, epochs, learning_rate, input=None): def train(self, epochs, learning_rate, input, test_input=None): for epoch in xrange(epochs): if (epoch + 1) % 5 == 0: print 'iter = %d/%d' % (epoch + 1, epochs) print if (test_input != None): print '------------------' print 'TEST PROCESSING...' print self.predict(test_input) print '------------------' print # forward first conv layer pooled_X = self.conv_layers[0].forward(input=input) # forward second conv layer pooled_X = self.conv_layers[1].forward(input=pooled_X) # flatten input layer_input = self.flatten(pooled_X) # forward hidden layer layer_input = self.hidden_layer.forward(input=layer_input) # forward & backward logistic layer self.log_layer.train(lr=learning_rate, input=layer_input) # backward hidden layer self.hidden_layer.backward(prev_layer=self.log_layer, lr=learning_rate) flatten_size = self.n_kernels[-1] * self.conv_sizes[-1][ 0] * self.conv_sizes[-1][1] delta_flatten = numpy.zeros((self.N, flatten_size)) for n in xrange(self.N): for i in xrange(flatten_size): for j in xrange(self.n_hidden): delta_flatten[n][i] += self.hidden_layer.W[i][ j] * self.hidden_layer.d_y[n][j] # unflatten delta delta = numpy.zeros( (len(delta_flatten), self.n_kernels[-1], self.conv_sizes[-1][0], self.conv_sizes[-1][1])) for n in xrange(len(delta)): index = 0 for k in xrange(self.n_kernels[-1]): for i in xrange(self.conv_sizes[-1][0]): for j in xrange(self.conv_sizes[-1][1]): delta[n][k][i][j] = delta_flatten[n][index] index += 1 # backward second conv layer delta = self.conv_layers[1].backward(delta, self.conv_sizes[1], learning_rate) # backward first conv layer self.conv_layers[0].backward(delta, self.conv_sizes[0], learning_rate) def flatten(self, input): flatten_size = self.n_kernels[-1] * self.conv_sizes[-1][ 0] * self.conv_sizes[-1][1] flattened_input = numpy.zeros((len(input), flatten_size)) for n in xrange(len(flattened_input)): index = 0 for k in xrange(self.n_kernels[-1]): for i in xrange(self.conv_sizes[-1][0]): for j in xrange(self.conv_sizes[-1][1]): flattened_input[n][index] = input[n][k][i][j] index += 1 # print flattened_input return flattened_input def predict(self, x): pooled_X = self.conv_layers[0].forward(input=x) pooled_X = self.conv_layers[1].forward(input=pooled_X) layer_input = self.flatten(pooled_X) x = self.hidden_layer.output(input=layer_input) return self.log_layer.predict(x)
def PolynomialLogisticRegression(degree, C, penalty='l2'): return Pipeline([('poly', PolynomialFeatures(degree=degree)), ('std_scaler', StandardScaler()), ('log_reg', LogisticRegression(C=C, penalty=penalty))])
reg.fit(X_train, Y_train) predictions = reg.predict(X_test) acc = accuracy(Y_test, predictions) if acc > acc_max: acc_max = acc lr_max = lr_val n_iter_max = iteration return (lr_max, n_iter_max) best_lr, best_n_iter = best_params() print("Best Learning Rate:", best_lr) print("Best Number Of Iterations:", best_n_iter) # Best LR = 0.0001 # Best N_Iter = 1000 reg = LogisticRegression(lr=best_lr, n_iters=best_n_iter) reg.fit(X_train, Y_train) predictions = reg.predict(X_test) print("Best Classification Accuracy", accuracy(Y_test, predictions)) # On Running the Code , The following warning will be shown: # RuntimeWarning: overflow encountered in exp # return 1 / (1 + np.exp(-X)) # This happens because X is very big which produces exp(-X) to be extremely small to be represented in 64bits hence 128bits required to represent it. # Hence only selected number 'lr' value and 'n_iter' value is used.For small number of comparisions.
X[i][j] = (X[i][j] - MIN[j]) / (MAX[j] - MIN[j]) X = np.hstack((np.ones((len(X), 1)), X)) testing_X = np.array(X) return training_X, training_Y, testing_X def sigmoid(z): return 1 / (1 + np.exp(-z)) if __name__ == "__main__": training_X, training_y, testing_X = extract_features(normalization=True) model = LogisticRegression(training_X, training_y) weight = model.GradientDescent(optimizer="Adagrad") testing_y = sigmoid(np.dot(testing_X, weight)) with open("output.csv", "w") as f: print("id,label", file=f) for i in range(testing_y.shape[0]): if testing_y[i] >= 0.5: print("{},1".format(i + 1), file=f) else: print("{},0".format(i + 1), file=f)
def __init__(self, numpy_rng, theano_rng=None, n_ins=3600, hidden_layers_sizes=[500, 500], n_outs=6): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)
from operator import imod import numpy as np from sklearn.model_selection import train_test_split from sklearn import datasets import matplotlib.pyplot as plt from LogisticRegression import LogisticRegression bc = datasets.load_breast_cancer() X, y = bc.data, bc.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) def accuracy(y_true, y_pred): return np.sum(y_true == y_pred) / len(y_true) regresor = LogisticRegression(lr=0.01, n_iters=10000) regresor.fit(X_train, y_train) predikcije = regresor.predict(X_test) print("Preciznost: ", accuracy(y_test, predikcije))
print('Data sorted ...') # Split data into training and test data - ignoring the critical data for now X = np.concatenate((X_ordered, X_disordered)) Y = np.concatenate((Y_ordered, Y_disordered)) print(np.shape(X)) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.20) saveData = np.c_[X_train, Y_train] np.save('test_set', saveData) print('saved test set') # Illustrate ordered vs disordered data # L = 40 # fig = plt.figure() # plt.subplot(121) # plt.imshow(X_ordered[20000].reshape(L, L), cmap='plasma_r') # plt.title('Ordered') # # plt.subplot(122) # plt.imshow(X_disordered[20000].reshape(L, L), cmap='plasma_r') # plt.title('Disordered') # plt.show() # Train on the training data logreg = LogisticRegression(X_train, Y_train.reshape(len(Y_train), 1), X_test, Y_test.reshape(len(Y_test), 1)) logreg.fit_standard() print(logreg.accuracy()) weight = logreg.getWeights() np.save('weights_logreg.npy', weight)
class DBN(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ numpy_rng=None): """ documentation copied from: http://www.cse.unsw.edu.au/~cs9444/Notes13/demo/DBN.py This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if numpy_rng is None: numpy_rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer #ORIG# for i in xrange(self.n_layers): for i in range(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], numpy_rng=numpy_rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM( input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood() def pretrain(self, lr=0.1, k=1, epochs=100): # pre-train layer-wise for i in xrange(self.n_layers): if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i - 1].sample_h_given_v( layer_input) rbm = self.rbm_layers[i] for epoch in xrange(epochs): rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) # cost = rbm.get_reconstruction_cross_entropy() # print >> sys.stderr, \ # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost # def pretrain(self, lr=0.1, k=1, epochs=100): # # pre-train layer-wise # for i in xrange(self.n_layers): # rbm = self.rbm_layers[i] # for epoch in xrange(epochs): # layer_input = self.x # for j in xrange(i): # layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input) # rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) # # cost = rbm.get_reconstruction_cross_entropy() # # print >> sys.stderr, \ # # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() # train log_layer epoch = 0 done_looping = False while (epoch < epochs) and (not done_looping): self.log_layer.train(lr=lr, input=layer_input) # self.finetune_cost = self.log_layer.negative_log_likelihood() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost lr *= 0.95 epoch += 1 def predict(self, x): layer_input = x for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) out = self.log_layer.predict(layer_input) return out
class DBN(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ numpy_rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if numpy_rng is None: numpy_rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], numpy_rng=numpy_rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM( input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood() # print 'self.finetune_cost: ', self.finetune_cost def pretrain(self, lr=0.1, k=1, epochs=1000, batch_size=-1): pretaining_start_time = time.clock() # pre-train layer-wise for i in xrange(self.n_layers): if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i - 1].sample_h_given_v( layer_input) rbm = self.rbm_layers[i] # print 'layer_input', layer_input for epoch in xrange(epochs): batch_start = time.clock() # rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) # cost = rbm.get_reconstruction_cross_entropy() # print >> sys.stderr, \ # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost cost = 0.0 if batch_size == -1: cost = rbm.contrastive_divergence(input=layer_input, lr=lr, k=k, batch_size=-1) else: n_train_batches = len( layer_input ) / batch_size # compute number of minibatches for training, validation and testing mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [ rbm.contrastive_divergence( input=layer_input[batch_index * batch_size:(batch_index + 1) * batch_size], lr=lr, k=k, batch_size=batch_size) ] cost = numpy.mean(mean_cost) batch_stop = time.clock() batch_time = (batch_stop - batch_start) print '\tPre-training layer [%d: %d X %d], epoch %d, cost %.7fm, entropy: %.2f, time is %.2f seconds' % ( i, rbm.n_visible, rbm.n_hidden, epoch, cost, rbm.get_reconstruction_cross_entropy(), (batch_time)) # synchronous betwen rbm and sigmoid layer self.sigmoid_layers[i].W = rbm.W self.sigmoid_layers[i].b = rbm.hbias # # Plot filters after each training epoch # # Construct image from the weight matrix # if layer == 0: # if (epoch % 20 == 0): # image = PIL.Image.fromarray(tile_raster_images( # X = numpy.array(rbm.get_w().T), # img_shape=(28, 28), # tile_shape=(10, 10), # tile_spacing=(1, 1))) # image.save('result/filters_at_layer_%d_epoch_%d.png' % (layer, epoch)) # numpy.array(rbm.get_w().T).dump(('result/weight_at_layer_%d.txt' % layer)) # if layer == 0: # image = PIL.Image.fromarray(tile_raster_images( # X = numpy.array(rbm.get_w().T), # img_shape=(28, rbm.n_visible / 28), # tile_shape=(10, 10), # tile_spacing=(1, 1))) # image.save('result/filters_at_layer_%d.png' % layer) pretaining_end_time = time.clock() print('Total time for pretraining: ' + '%.2f seconds' % ((pretaining_end_time - pretaining_start_time))) print self.sigmoid_layers[0].W def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() # self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), # label=self.y, # n_in=hidden_layer_sizes[-1], # n_out=n_outs) # train log_layer epoch = 0 done_looping = False start_time = time.clock() print layer_input while (epoch < epochs) and (not done_looping): self.log_layer.train(lr=lr, input=layer_input) if (epoch % 20 == 0): print('\tFine-tuning epoch %d, cost is ' % epoch ) #self.log_layer.negative_log_likelihood() lr *= 0.95 epoch += 1 end_time = time.clock() print('Total time for fine-tuning: ' + '%.2f seconds' % ((end_time - start_time))) def predict(self, x=None, y=None): input_x = x for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] input_x = sigmoid_layer.output(input=input_x) out = self.log_layer.predict(input_x, y) return out
def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ numpy_rng=None): """ documentation copied from: http://www.cse.unsw.edu.au/~cs9444/Notes13/demo/DBN.py This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if numpy_rng is None: numpy_rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer #ORIG# for i in xrange(self.n_layers): for i in range(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], numpy_rng=numpy_rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM( input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood()
def __init__(self, stateIn, deepOut = False): global pickle print(" Loading previous state ...") if stateIn.endswith('gz'): f = gzip.open(stateIn,'rb') else: f = open(stateIn, 'r') state_name = pickle.load(f) state = state_name[0] self.names = state_name[1] convValues = state.convValues w0 = convValues[0][0] b0 = convValues[0][1] w1 = convValues[1][0] b1 = convValues[1][1] hiddenVals = state.hiddenValues wHidden = hiddenVals[0] bHidden = hiddenVals[1] logRegValues = state.logRegValues wLogReg = logRegValues[0] bLogReg = logRegValues[1] topo = state.topoplogy nkerns = topo.nkerns n_out = len(self.names) assert(n_out == np.shape(wLogReg)[1]) print(" Some Values ...") print(" Number of Kernels : " + str(nkerns)) print(" First Kernel w0[0][0] :\n" + str(w0[0][0])) print(" bHidden :\n" + str(bHidden)) print(" bLogReg :\n" + str(bLogReg)) print(" Building the theano model") batch_size = 1 x = T.matrix('x') # the data is presented as rasterized images layer0_input = x.reshape((batch_size, 1, topo.ishape[0], topo.ishape[1])) rng = np.random.RandomState(23455) layer0 = LeNetConvPoolLayer(None, input=layer0_input, image_shape=(batch_size, 1, topo.ishape[0], topo.ishape[0]), filter_shape=(nkerns[0], 1, topo.filter_1, topo.filter_1), poolsize=(topo.pool_1, topo.pool_1), wOld=w0, bOld=b0, deepOut=deepOut) layer1 = LeNetConvPoolLayer(None, input=layer0.output, image_shape=(batch_size, nkerns[0], topo.in_2, topo.in_2), filter_shape=(nkerns[1], nkerns[0], topo.filter_2, topo.filter_2), poolsize=(topo.pool_2, topo.pool_2), wOld=w1, bOld=b1, deepOut=deepOut) layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer(None, input=layer2_input, n_in=nkerns[1] * topo.hidden_input, n_out=topo.numLogisticInput, activation=T.tanh, Wold = wHidden, bOld = bHidden) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=topo.numLogisticInput, n_out=n_out, Wold = wLogReg, bOld=bLogReg ) # create a function to compute the mistakes that are made by the model # index = T.lscalar() # test_model = theano.function([index], layer3.getProbs(), # givens={x: test_set_x[index * batch_size: (index + 1) * batch_size]}) self.predict_model = theano.function([x], layer3.getProbs()) if (deepOut): self.layer0_out = theano.function([x], layer0.output) self.layer0_conv= theano.function([x], layer0.conv_out) self.layer1_conv= theano.function([x], layer1.conv_out) self.layer1_out = theano.function([x], layer1.output) self.b0 = b0 self.b1 = b1 self.w0 = w0 self.w1 = w1
def evaluate_lenet5(topo, loadPics, learning_rate=0.005, n_epochs=500, stateIn=None, stateOut=None): rng = numpy.random.RandomState(23455) theano_rng = RandomStreams(numpy.random.randint(2**30)) print "Loading the datasets for testing and validation..." # Images valid_set_x, valid_set_y = loadPics.getValidationData() test_set_x, test_set_y = loadPics.getTestData() print "... Loading the datasets" n_out = loadPics.numberOfClassed batch_size = 200 #TODO Check print(" Learning rate " + str(learning_rate)) # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' print 'Number of Kernels' + str(topo.nkerns) in_2 = 14 #Input in second layer (layer1) # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, topo.ishape[0], topo.ishape[1])) # Using presistent state from last run w0 = w1 = b0 = b1 = wHidden = bHidden = wLogReg = bLogReg = None if stateIn is not None: print(" Loading previous state ...") state_names = pickle.load(open(stateIn, "r")) state = state_names[0] convValues = state.convValues w0 = convValues[0][0] b0 = convValues[0][1] w1 = convValues[1][0] b1 = convValues[1][1] hiddenVals = state.hiddenValues wHidden = hiddenVals[0] bHidden = hiddenVals[1] logRegValues = state.logRegValues wLogReg = logRegValues[0] bLogReg = logRegValues[1] print("Hallo Gallo") # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, topo.ishape[0], topo.ishape[0]), filter_shape=(topo.nkerns[0], 1, topo.filter_1, topo.filter_1), poolsize=(topo.pool_1, topo.pool_1), wOld=w0, bOld=b0) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, topo.nkerns[0], topo.in_2, topo.in_2), filter_shape=(topo.nkerns[1], topo.nkerns[0], topo.filter_2, topo.filter_2), poolsize=(topo.pool_2, topo.pool_2), wOld=w1, bOld=b1) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # Evt. some drop out for the fully connected layer # Achtung p=1 entspricht keinem Dropout. # layer2_input = theano_rng.binomial(size=layer2_input.shape, n=1, p=1 - 0.02) * layer2_input # paper_6 no dropout # paper_14 again 0.02 dropout # paper_15 again no dropout layer2 = HiddenLayer(rng, input=layer2_input, n_in=topo.nkerns[1] * topo.hidden_input, n_out=topo.numLogisticInput, activation=T.tanh, Wold=wHidden, bOld=bHidden) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=topo.numLogisticInput, n_out=n_out, Wold=wLogReg, bOld=bLogReg) # Some regularisation (not for the conv-Kernels) L2_sqr = (layer2.W**2).sum() + (layer3.W**2).sum() # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) + 0.001 * L2_sqr # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # Functions for statistics test_logloss = theano.function( [index], cost, givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_logloss = theano.function( [index], cost, givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) test_probs_fct = theano.function( [index], layer3.getProbs(), givens={x: test_set_x[index * batch_size:(index + 1) * batch_size]}) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False epoch_fraction = 0.0 while (epoch < n_epochs) and (not done_looping): # New epoch the training set is disturbed again print(" Starting new training epoch") print(" Manipulating the training set") train_set_x, train_set_y = loadPics.giveMeNewTraining() n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size validation_frequency = min(n_train_batches, patience / 2) print(" Compiling new function") learning_rate *= 0.993 #See Paper from Cican train_logloss = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) print(" Finished compiling the training set") epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): #Alle einmal anfassen iter = (epoch - 1) * n_train_batches + minibatch_index epoch_fraction += 1.0 / float(n_train_batches) cost_ij = train_logloss(minibatch_index) if iter % 100 == 0: print 'training @ iter = ', iter, ' epoch_fraction ', epoch_fraction, ' costs ' + str( cost_ij) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) # compute zero-one loss on validation set validation_log_loss = numpy.mean( [validate_logloss(i) for i in xrange(n_valid_batches)]) test_log_loss = numpy.mean( [test_logloss(i) for i in xrange(n_test_batches)]) # test it on the test set test_start = time.clock() test_losses = [test_model(i) for i in xrange(n_test_batches)] train_costs = [ train_logloss(i) for i in xrange(n_test_batches) ] dt = time.clock() - test_start print 'Testing %i faces in %f msec image / sec %f', batch_size * n_test_batches, dt, dt / ( n_test_batches * batch_size) test_score = numpy.mean(test_losses) train_log_loss = numpy.mean(train_costs) test_probs1 = [ test_probs_fct(i) for i in xrange(n_test_batches) ] print('%i, %f, %f, %f, %f, %f, %f, 0.424242' % (epoch, this_validation_loss * 100., test_score * 100., learning_rate, train_log_loss, validation_log_loss, test_log_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # # test it on the test set # test_losses = [test_model(i) for i in xrange(n_test_batches)] # test_score = numpy.mean(test_losses) # print((' epoch %i, minibatch %i/%i, test error of best ' # 'model %f %%') % # (epoch, minibatch_index + 1, n_train_batches, # test_score * 100.)) # if (this_validation_loss < 0.02): # learning_rate /= 2 # print("Decreased learning rate due to low xval error to " + str(learning_rate)) if patience <= iter: print("--------- Finished Looping ----- earlier ") done_looping = True break end_time = time.clock() print('---------- Optimization complete -------------------------') print('Res: ', str(topo.nkerns)) print('Res: ', learning_rate) print('Res: Best validation score of %f %% obtained at iteration %i,' \ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('Res: The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) # Oliver if not os.path.isdir("conv_images"): os.makedirs("conv_images") os.chdir("conv_images") # d = layer0.W.get_value() #e.g. (20, 1, 5, 5) number of filter, num of incomming filters, dim filter # for i in range(0, numpy.shape(d)[0]): # dd = d[i][0] # rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8) # img = Image.fromarray(rescaled) # img.save('filter_l0' + str(i) + '.png') # # d = layer1.W.get_value() #e.g. (20, 1, 5, 5) number of filter, num of incomming filters, dim filter # for i in range(0, numpy.shape(d)[0]): # dd = d[i][0] # rescaled = (255.0 / dd.max() * (dd - dd.min())).astype(numpy.uint8) # img = Image.fromarray(rescaled) # img.save('filter_l1' + str(i) + '.png') state = LeNet5State(topology=topo, convValues=[ layer0.getParametersAsValues(), layer1.getParametersAsValues() ], hiddenValues=layer2.getParametersAsValues(), logRegValues=layer3.getParametersAsValues()) print if stateOut is not None: pickle.dump([state, loadPics.getClasses()], open(stateOut, 'wb')) print("Saved the pickeled data-set") return learning_rate
class SdA(object): """Stacked denoising auto-encoder class (SdA) A stacked denoising autoencoder model is obtained by stacking several dAs. The hidden layer of the dA at layer `i` becomes the input of the dA at layer `i+1`. The first layer dA gets as input the input of the SdA, and the hidden layer of the last dA represents the output. Note that after pretraining, the SdA is dealt with as a normal MLP, the dAs are only used to initialize the weights. """ def __init__( self, numpy_rng, theano_rng=None, n_ins=3600, hidden_layers_sizes=[500, 500], n_outs=6, corruption_levels=[0.1, 0.1] ): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs ) self.params.extend(self.logLayer.params) # construct a function that implements one step of finetunining # compute the cost for second phase of training, # defined as the negative log likelihood self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, train_set_x, batch_size): ''' Generates a list of functions, each of them implementing one step in trainnig the dA corresponding to the layer with same index. The function will require as input the minibatch index, and to train a dA you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared variable that contains all datapoints used for training the dA :type batch_size: int :param batch_size: size of a [mini]batch :type learning_rate: float :param learning_rate: learning rate used during training for any of the dA layers ''' # index to a [mini]batch index = T.lscalar('index') # index to a minibatch corruption_level = T.scalar('corruption') # % of corruption to use learning_rate = T.scalar('lr') # learning rate to use # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for dA in self.dA_layers: # get the cost and the updates list cost, updates = dA.get_cost_updates(corruption_level, learning_rate) # compile the theano function fn = theano.function( inputs=[ index, theano.In(corruption_level, value=0.2), theano.In(learning_rate, value=0.1) ], outputs=cost, updates=updates, givens={ self.x: train_set_x[batch_begin: batch_end] } ) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, datasets, batch_size, learning_rate): '''Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type datasets: list of pairs of theano.tensor.TensorType :param datasets: It is a list that contain all the datasets; the has to contain three pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano variables, one for the datapoints, the other for the labels :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage ''' (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_valid_batches //= batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches //= batch_size index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = [ (param, param - gparam * learning_rate) for param, gparam in zip(self.params, gparams) ] train_fn = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: train_set_y[ index * batch_size: (index + 1) * batch_size ] }, name='train' ) test_score_i = theano.function( [index], self.errors, givens={ self.x: test_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: test_set_y[ index * batch_size: (index + 1) * batch_size ] }, name='test' ) valid_score_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: valid_set_y[ index * batch_size: (index + 1) * batch_size ] }, name='valid' ) # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set def test_score(): return [test_score_i(i) for i in range(n_test_batches)] return train_fn, valid_score, test_score
# ----------------------------------------------------------------- # Read from file and extract X and Y df = pd.read_csv("fruit.csv") X = df[['width', 'height']].values Y = (df['fruit'] - 1).values nb1 = GaussianGenerativeModel(isSharedCovariance=False) nb1.fit(X, Y) nb1.visualize("generative_result_separate_covariances.png") nb2 = GaussianGenerativeModel(isSharedCovariance=True) nb2.fit(X, Y) nb2.visualize("generative_result_shared_covariances.png") lr = LogisticRegression(eta=eta, lambda_parameter=lambda_parameter) lr.fit(X, Y) lr.visualize('logistic_regression_result.png') X_test = np.array([[4, 11], [8.5, 7]]) Y_nb1 = nb1.predict(X_test) Y_nb2 = nb2.predict(X_test) Y_lr = lr.predict(X_test) print("Test fruit predictions for Gaussian Model:") print("width 4 cm and height 11 cm: " + str(Y_nb1[0])) print("width 8.5 cm and height 7 cm: " + str(Y_nb1[1])) print("Test fruit predictions for Shared Covariance Gaussian Model:") print("width 4 cm and height 11 cm: " + str(Y_nb2[0])) print("width 8.5 cm and height 7 cm: " + str(Y_nb2[1]))
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 classifier = LogisticRegression(input_data=x, n_in=28 * 28, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-3 ############### # TRAIN MODEL # ############### print('... training the model') # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) # save the best model with open('best_model.pkl', 'wb') as f: pickle.dump(classifier, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) print('The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)))
# in order to run tests put in into the directiry prior to LogisticRegression # directory import pandas as pd from LogisticRegression import LogisticRegression df = pd.read_csv('iris.csv') df[df.columns[4]] = df[df.columns[4]].map({ 'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2 }) print(df.head()) clf = LogisticRegression.LogisticRegression(n_iter=500) print('\n Fitting...\n') clf.fit(df[df.columns[:-1]].values, df[df.columns[-1]].values) print('Predicting... \n\n') prd = clf.predict(df[df.columns[:-1]].values) for r, p in zip(df[df.columns[-1]].values, prd): print(r, p)
class DBN(object): def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ numpy_rng=None): """ documentation copied from: http://www.cse.unsw.edu.au/~cs9444/Notes13/demo/DBN.py This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if numpy_rng is None: numpy_rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer #ORIG# for i in xrange(self.n_layers): for i in range(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], numpy_rng=numpy_rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood() def pretrain(self, lr=0.1, k=1, epochs=100): # pre-train layer-wise for i in xrange(self.n_layers): if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i-1].sample_h_given_v(layer_input) rbm = self.rbm_layers[i] for epoch in xrange(epochs): rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) # cost = rbm.get_reconstruction_cross_entropy() # print >> sys.stderr, \ # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost # def pretrain(self, lr=0.1, k=1, epochs=100): # # pre-train layer-wise # for i in xrange(self.n_layers): # rbm = self.rbm_layers[i] # for epoch in xrange(epochs): # layer_input = self.x # for j in xrange(i): # layer_input = self.sigmoid_layers[j].sample_h_given_v(layer_input) # rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) # # cost = rbm.get_reconstruction_cross_entropy() # # print >> sys.stderr, \ # # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() # train log_layer epoch = 0 done_looping = False while (epoch < epochs) and (not done_looping): self.log_layer.train(lr=lr, input=layer_input) # self.finetune_cost = self.log_layer.negative_log_likelihood() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost lr *= 0.95 epoch += 1 def predict(self, x): layer_input = x for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) out = self.log_layer.predict(layer_input) return out
val_siamese_dataset = SiamesDataset(val_csv, training_dir, transform=transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])) val_dataloader = DataLoader(val_siamese_dataset, shuffle=True, num_workers=2, batch_size=32) model = models.vgg16(pretrained=True) model.classifier[6] = nn.Linear(4096, 128) print(model) net = SiamesNet(model) regresion = LogisticRegression(128,1) if torch.cuda.is_available(): print('Cuda Disponible') net.cuda() regresion.cuda() #criterion = ContrastiveLoss() criterion = torch.nn.BCELoss() #criterion =torch.nn.CrossEntropyLoss() #optimizer = optim.RMSprop(regresion.parameters(), lr=1e-4, alpha=0.99, eps=1e-8, weight_decay=0.0005, momentum=0.9) optimizer = torch.optim.Adam(regresion.parameters(), lr=0.0001) #wandb.watch(regresion) for epoch in range(0,1000):
class DBN(object): def __init__(self, input=None, label=None, n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2, rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer( input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid ) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM( input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b, ) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs ) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood() def pretrain(self, lr=0.1, k=1, epochs=100): # pre-train layer-wise for i in xrange(self.n_layers): if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i - 1].sample_h_given_v(layer_input) rbm = self.rbm_layers[i] for epoch in xrange(epochs): rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) # log pretraining as stderr every after 100 epochs if (epoch + 1) % 100 == 0: # REMOVE this block for faster training cost = rbm.get_reconstruction_cross_entropy() print >> sys.stderr, "Pre-training layer %d, epoch %d, cost " % (i, epoch + 1), cost def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() # train log_layer epoch = 0 done_looping = False while (epoch < epochs) and (not done_looping): self.log_layer.train(lr=lr, input=layer_input) # log finetune training as stderr every 25 epochs if (epoch + 1) % 25 == 0: # REMOVE this block for faster training self.finetune_cost = self.log_layer.negative_log_likelihood() print >> sys.stderr, "Training epoch %d, cost is " % (epoch + 1), self.finetune_cost lr *= 0.95 epoch += 1 def predict(self, x): layer_input = x for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) out = self.log_layer.predict(layer_input) return out
import numpy as np from sklearn.model_selection import train_test_split from sklearn import datasets import matplotlib.pyplot as plt from LogisticRegression import LogisticRegression bc = datasets.load_breast_cancer() X, y = bc.data, bc.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) def accuracy(y_true, y_pred): accuracy = np.sum(y_true == y_pred) / len(y_true) return accuracy regressor = LogisticRegression(lr=0.001, nb_iters=1000) regressor.fit(X_train, y_train) predictions = regressor.predict(X_test) print("Logistic Regression Accuracy : ", accuracy(y_test, predictions))
def evaluate_lenet5(learning_rate=0.15, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[20, 20], batch_size=500): """ Demonstrates lenet on CIFAR-10 dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1') data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2') data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3') data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4') data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5') test = unpickle('cifar-10-batches-py/test_batch') train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = numpy.concatenate((data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"], data_batch_4["labels"], data_batch_5["labels"])) test_set = test["data"] Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3) Yte = numpy.asarray(test["labels"]) Xval_rows = X_train[:7500, :] # take first 1000 for validation Yval = y_train[:7500] Xtr_rows = X_train[7500:50000, :] # keep last 49,000 for train Ytr = y_train[7500:50000] mean_train = Xtr_rows.mean(axis=0) stdv_train = Xte_rows.std(axis=0) Xtr_rows = (Xtr_rows - mean_train) / stdv_train Xval_rows = (Xval_rows - mean_train) / stdv_train Xte_rows = (Xte_rows - mean_train) / stdv_train learning_rate = theano.shared(learning_rate) """whitening""" """ Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important) cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100]) # whiten the data: # divide by the eigenvalues (which are square roots of the singular values) Xwhite = Xrot / numpy.sqrt(S + 1e-5)""" """whitening""" #Xtr_rows = whiten(Xtr_rows) # zero-center the data (important) """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U) Xtr_rows = Xrot / numpy.sqrt(S + 1e-5) Xval_rot = numpy.dot(Xval_rows,U) Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5) Xte_rot = numpy.dot(Xte_rows,U) Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5) """ train_set = (Xtr_rows, Ytr) valid_set = (Xval_rows, Yval) test_set = (Xte_rows, Yte) test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (32+4-5+1 , 32+4-5+1) = (32, 32) # maxpooling reduces this further to (32/2, 32/2) = (16, 16) # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (16+4-5+1, 16+4-5+1) = (16, 16) # maxpooling reduces this further to (16/2, 16/2) = (8, 8) # 4D output tensor is thus of shape (batch_size, nkerns[1], 8, 8) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 16, 16), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 8 * 8, n_out=500, activation=relu ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model L2_reg = 0.001 L2_sqr = ( (layer2.W ** 2).sum() + (layer3.W ** 2).sum() ) cost = layer3.negative_log_likelihood(y) + L2_reg * L2_sqr # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False epoch_loss_list = [] epoch_val_list = [] while (epoch < n_epochs) and (not done_looping): epoch += 1 if epoch == 10: learning_rate.set_value(0.1) # if epoch > 30: # learning_rate.set_value(learning_rate.get_value()*0.9995) if epoch > 3: epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) numpy.savetxt(fname='epoc_cost.csv', X=epoch_loss_np, fmt='%1.3f') numpy.savetxt(fname='epoc_val_error.csv', X=epoch_val_np, fmt='%1.3f') for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) epoch_loss_entry = [iter, epoch, float(cost_ij)] epoch_loss_list.append(epoch_loss_entry) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) epoch_val_entry = [iter, epoch, this_validation_loss] epoch_val_list.append(epoch_val_entry) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) epoch_loss = pandas.DataFrame({"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1], "cost": epoch_loss_np[:, 2]}) epoch_vall = pandas.DataFrame({"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1], "val_error": epoch_val_np[:, 2]}) epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(['epoch']).mean()["cost"]) epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(['epoch']).mean()["val_error"]) epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]}) epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]}) epoc_avg_loss.plot(kind="line", x="epoch", y="cost") plt.show() epoc_avg_loss_val.plot(kind='line', x="epoch", y="val_error") plt.show()
class DBN(object): def __init__(self, input=None, label=None,\ hidden_layer_sizes=[3, 3], num_layers=0, \ learning_rate=0.1): self.x = input self.y = label self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) assert self.num_layers >= 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], numpy_rng=numpy_rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer rbm_layer = RBM( input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression( input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood() def pretrain(self, lr=0.1, k=1, epochs=100): # pre-train layer-wise for i in xrange(self.n_layers): if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[i - 1].sample_h_given_v( layer_input) rbm = self.rbm_layers[i] for epoch in xrange(epochs): rbm.contrastive_divergence(lr=lr, k=k, input=layer_input) # cost = rbm.get_reconstruction_cross_entropy() # print >> sys.stderr, \ # 'Pre-training layer %d, epoch %d, cost ' %(i, epoch), cost def finetune(self, lr=0.1, epochs=100): layer_input = self.sigmoid_layers[-1].sample_h_given_v() # train log_layer epoch = 0 done_looping = False while (epoch < epochs) and (not done_looping): self.log_layer.train(lr=lr, input=layer_input) # self.finetune_cost = self.log_layer.negative_log_likelihood() # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, self.finetune_cost lr *= 0.95 epoch += 1 def predict(self, x): layer_input = x for i in xrange(self.n_layers): sigmoid_layer = self.sigmoid_layers[i] layer_input = sigmoid_layer.output(input=layer_input) out = self.log_layer.predict(layer_input) return out
class Dropout(object): def __init__(self, input, label,\ n_in, hidden_layer_sizes, n_out,\ rng=None, activation=ReLU): self.x = input self.y = label self.hidden_layers = [] self.n_layers = len(hidden_layer_sizes) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_in else: input_size = hidden_layer_sizes[i-1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output() # construct hidden_layer hidden_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=activation) self.hidden_layers.append(hidden_layer) # layer for ouput using Logistic Regression (softmax) self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_out) def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): for epoch in xrange(epochs): dropout_masks = [] # create different masks in each training epoch # forward hidden_layers for i in xrange(self.n_layers): if i == 0: layer_input = self.x layer_input = self.hidden_layers[i].forward(input=layer_input) if dropout == True: mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng) layer_input *= mask dropout_masks.append(mask) # forward & backward log_layer self.log_layer.train(input=layer_input) # backward hidden_layers for i in reversed(xrange(0, self.n_layers)): if i == self.n_layers-1: prev_layer = self.log_layer else: prev_layer = self.hidden_layers[i+1] if dropout == True: self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i]) else: self.hidden_layers[i].backward(prev_layer=prev_layer) def predict(self, x, dropout=True, p_dropout=0.5): layer_input = x for i in xrange(self.n_layers): if dropout == True: self.hidden_layers[i].W = (1 - p_dropout) * self.hidden_layers[i].W layer_input = self.hidden_layers[i].output(input=layer_input) return self.log_layer.predict(layer_input)
def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if rng is None: rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], rng=rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer if i == 0: rbm_layer = CRBM(input=layer_input, # continuous-valued inputs n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) else: rbm_layer = RBM(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood()