def fit(self, X, Y, learning_rate=10e-6, regularisation=10e-1, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) Xvalid, Yvalid = X[-1000:], Y[-1000:] # Tvalid = y2indicator(Yvalid) # WE DONT NEED TVALID CAUSE WE ARE USING COST2 X, Y = X[:-1000], Y[:-1000] # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) N, D = X.shape K = len(set(Y)) T = y2indicator(Y) #Need this for gradient descent self.W1, self.b1 = init_weight_and_bias(D, self.M) self.W2, self.b2 = init_weight_and_bias(self.M, K) costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation pY, Z = self.forward(X) # gradient descent pY_T = pY - T self.W2 -= learning_rate * (Z.T.dot(pY_T) + regularisation * self.W2) self.b2 -= learning_rate * ( (pY_T).sum(axis=0) + regularisation * self.b2) # dZ = pY_T.dot(self.W2.T) * (Z>0) #Relu dZ = pY_T.dot(self.W2.T) * (1 - Z * Z) # Tanh self.W1 -= learning_rate * (X.T.dot(dZ) + regularisation * self.W1) self.b1 -= learning_rate * (dZ.sum(axis=0) + regularisation * self.b1) if i % 10 == 0: pYvalid, _ = self.forward(Xvalid) c = cost2(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print("i : " + str(i) + "; Cost : " + str(c) + "; Error : " + str(e)) if e < best_validation_error: best_validation_error = e print("Best Validation error : " + str(best_validation_error)) if (show_fig): plt.plot(costs) plt.show()
def fit(self,X,Y,learning_rate=5e-7,regularisation=1.0,epochs=10000,show_fig=False): X,Y = shuffle(X,Y) Y = np.reshape(Y,(len(Y),1)) #s # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) Xvalid, Yvalid = X[-1000:],Y[-1000:] X,Y = X[:-1000],Y[:-1000] # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) N,D = X.shape self.W1,self.b1 = init_weight_and_bias(D,self.M) #s self.W2,self.b2 = init_weight_and_bias(self.M,1) #s # self.W1 = np.random.randn(D, self.M) / np.sqrt(D) #lp # self.b1 = np.zeros(self.M) #lp # self.W2 = np.random.randn(self.M) / np.sqrt(self.M) #lp # self.b2 = 0 #lp costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation pY, Z = self.forward(X) # gradient descent pY_Y = pY - Y # print("X.shape"+str(X.shape)) # print("pY.shape"+str(pY.shape)) # print("Y.shape"+str(Y.shape)) # print("Z.shape"+str(Z.shape)) # print("W2.shape"+str(self.W2.shape)) # print("pY_Y.shape"+str(pY_Y.shape)) self.W2 -= learning_rate*(Z.T.dot(pY_Y) + regularisation*self.W2) self.b2 -= learning_rate*(pY_Y.sum() + regularisation*self.b2) dZ = pY_Y.dot(self.W2.T) * (Z>0) #Relu dZ = pY_Y.dot(self.W2.T) * (1-Z*Z) #Relu # dZ = np.outer(pY_Y, self.W2) * (Z > 0) #lp self.W1 -= learning_rate*(X.T.dot(dZ) + regularisation*self.W1) self.b1 -= learning_rate*(np.sum(dZ,axis=0) + regularisation*self.b1) if i%20 ==0 : pYvalid ,_ = self.forward(Xvalid) # print("Yvalid.shape"+str(Yvalid.shape)) # print("pYvalid.shape"+str(pYvalid.shape)) c = sigmoid_cost(Yvalid,pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print("i : "+str(i)+"; Cost : "+str(c)+"; Error : "+str(e)) if e < best_validation_error: best_validation_error = e print("Best Validation error : "+str(best_validation_error)) if(show_fig): plt.plot(costs) plt.show()
def __init__(self, M1, M2): self.M1 = M1 self.M2 = M2 W, b = init_weight_and_bias(M1, M2) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) self.params = [self.W, self.b]
def __init__(self, M1, M2, an_id): self.id = an_id self.M1 = M1 self.M2 = M2 W, b = init_weight_and_bias(M1, M2) self.W = tf.Variable(W.astype(np.float32), name='W%s' % self.id) self.b = tf.Variable(b.astype(np.float32), name='b%s' % self.id) self.parameters = [self.W, self.b]
def __init__(self, M1, M2, an_id): self.id = an_id self.M1 = M1 self.M2 = M2 W, b = init_weight_and_bias(M1, M2) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) self.params = [self.W, self.b]
def __init__(self, M1, M2, an_id): self.id = an_id self.M1 = M1 self.M2 = M2 W, b = init_weight_and_bias(M1, M2) self.W = tf.Variable(W) self.b = tf.Variable(b) self.params = [self.W, self.b]
def __init__(self, M1, M2, an_id): self.id = an_id self.M1 = M1 self.M2 = M2 W0, b0 = init_weight_and_bias(M1, M2) self.W = tf.Variable(W0, name='W%s' % self.id) self.b = tf.Variable(b0, name='b%s' % self.id) self.params = [self.W, self.b]
def __init__(self, M1, M2, an_id): self.id = an_id self.M1 = M1 self.M2 = M2 W, b = init_weight_and_bias(M1, M2) self.W = theano.shared(W, 'W_%s' % self.id) self.b = theano.shared(b, 'b_%s' % self.id) self.params = [self.W, self.b]
def __init__(self, M1, M2, activation): self.activation = activation self.M1 = M1 self.M2 = M2 W, b = init_weight_and_bias(M1, M2) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) self.parameters = [self.W, self.b]
def __init__(self, M1, M2, an_id, f): self.id = an_id self.f = f # activation function self.M1 = M1 self.M2 = M2 W, b = init_weight_and_bias(M1, M2) self.W = theano.shared(W, 'W%s' % self.id) self.b = theano.shared(b, 'b%s' % self.id) self.params = [self.W, self.b]
def neural_network(D, K, tfX): W1, b1 = init_weight_and_bias(D, n_nodes_h1) hidden_1_layer = { 'w': tf.Variable(W1.astype(np.float32)), 'b': tf.Variable(b1.astype(np.float32)) } W2, b2 = init_weight_and_bias(n_nodes_h1, n_nodes_h2) hidden_2_layer = { 'w': tf.Variable(W2.astype(np.float32)), 'b': tf.Variable(b2.astype(np.float32)) } W3, b3 = init_weight_and_bias(n_nodes_h2, n_nodes_h3) hidden_3_layer = { 'w': tf.Variable(W3.astype(np.float32)), 'b': tf.Variable(b3.astype(np.float32)) } W4, b4 = init_weight_and_bias(n_nodes_h3, K) output_layer = { 'w': tf.Variable(W4.astype(np.float32)), 'b': tf.Variable(b4.astype(np.float32)) } #forwarding l1 = tf.add(tf.matmul(tfX, hidden_1_layer['w']), hidden_1_layer['b']) l1 = tf.nn.relu(l1) l2 = tf.add(tf.matmul(l1, hidden_2_layer['w']), hidden_2_layer['b']) l2 = tf.nn.relu(l2) l3 = tf.add(tf.matmul(l2, hidden_3_layer['w']), hidden_3_layer['b']) l3 = tf.nn.relu(l3) output = tf.matmul(l3, output_layer['w'] + output_layer['b']) params.extend([W1, b1, W2, b2, W3, b3, W4, b4]) return output
def fit(self,X,Y,learning_rate=10e-8,regularisation=10e-12,epochs=10000,show_fig=False): X,Y = shuffle(X,Y) # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) Xvalid, Yvalid = X[-1000:],Y[-1000:] Tvalid = y2indicator(Yvalid) X,Y = X[:-1000],Y[:-1000] # print("X.shape"+str(X.shape)) # print("Y.shape"+str(Y.shape)) N,D = X.shape K = len(set(Y)) T = y2indicator(Y) self.W,self.b = init_weight_and_bias(D,K) costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation pY = self.forward(X) # gradient descent self.W -= learning_rate*(X.T.dot(pY-T) + regularisation*self.W) self.b -= learning_rate*((pY-T).sum(axis=0) + regularisation*self.b) if i%10 ==0 : pYvalid = self.forward(Xvalid) c = cost(Tvalid,pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid,axis=1)) print("i : "+str(i)+"; Cost : "+str(c)+"; Error : "+str(e)) if e < best_validation_error: best_validation_error = e print("Best Validation error : "+str(best_validation_error)) if(show_fig): plt.plot(costs) plt.show()
def fit(self, X, Y, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] # initialize convpool layers N, c, width, height = X.shape mi = c outw = width outh = height self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = (outw - fw + 1) // 2 outh = (outh - fh + 1) // 2 mi = mo # initialize mlp layers K = len(set(Y)) self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # logistic regression layer W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for c in self.convpool_layers: self.params += c.params for h in self.hidden_layers: self.params += h.params # set up theano functions and variables thX = T.tensor4('X', dtype='float32') thY = T.ivector('Y') pY = self.forward(thX) rcost = reg*T.sum([(p*p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.th_predict(thX) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = rmsprop(cost, self.params, lr, mu, decay, eps) train_op = theano.function( inputs=[thX, thY], outputs=cost, updates=updates ) n_batches = N // batch_sz costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] train_c = train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print( "i:", i, "j:", j, "nb:", n_batches, "train cost:", train_c, "cost:", c, "error rate:", e ) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, lr=1e-6, mu=0.99, decay=0.999, reg=1e-11, eps=1e-9, epochs=300, batch_sz=100, show_fig=False): K = len(set(Y)) X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.int32) Xvalid = X[-1000:] # last 1000 Yvalid = Y[-1000:] # last 1000 Yvalid_flat = np.argmax(Yvalid, axis=1) X = X[:-1000] # all but the last 1000 Y = Y[:-1000] # all but the last 1000 N, D = X.shape self.hidden_layers = [] M1 = D for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2) self.hidden_layers.append(h) M1 = M2 W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params tfX = tf.placeholder(tf.float32, shape=(None, D), name='X') tfT = tf.placeholder(tf.float32, shape=(None, K), name='T') act = self.forward(tfX) rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( act, tfT)) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost) n_batches = int(N / batch_sz) costs = [] init = tf.initialize_all_variables() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j + 1) * batch_sz] Ybatch = Y[j * batch_sz:(j + 1) * batch_sz] session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={ tfX: Xvalid, tfT: Yvalid }) costs.append(c) p = session.run(prediction, feed_dict={ tfX: Xvalid, tfT: Yvalid }) e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, lr=10e-5, mu=0.99, reg=10e-7, decay=0.99999, eps=10e-3, batch_sz=30, epochs=100, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) # ============= Prep Data ============= # Validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) # Valid set - last 1000 entries Xvalid, Yvalid = X[-1000:], Y[-1000:] # Training set - Everything except last 1000 entries X, Y = X[:-1000], Y[:-1000] # ============= Prep ConvPool layers ============= # initialize convpool layers N, c, width, height = X.shape mi = c outw = width outh = height self.convpool_layers = [] # For each parameterised convpool layer conv_layer_count = 0 for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh, self.pool_sz[conv_layer_count]) # Add layer self.convpool_layers.append(layer) # Output W after convolution layer outw = (outw - fw + 1) // self.pool_sz[conv_layer_count][0] outh = (outh - fh + 1) // self.pool_sz[conv_layer_count][1] # Set feature input to previous feature output # for the next loop mi = mo conv_layer_count += 1 # ============= Prep ANN layers ============= # K = length of all the unique values of Y K = len(set(Y)) # list to store all the hidden layers self.hidden_layers = [] # Output of last convpool layer feature output # This is to flatten the last convpool feature output as an input to the ANN M1 = self.convpool_layer_sizes[-1][ 0] * outw * outh # size must be same as output of last convpool layer count = 0 # Loop through the hidden layers in hidden_layer_sizes for M2 in self.hidden_layer_sizes: # Create hidden layer h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) # Set feature input to previous feature output # for the next loop M1 = M2 count += 1 # ============= Prep Log Regression layer ============= W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # ============= Collect parameters for SGD ============= self.params = [self.W, self.b] for c in self.convpool_layers: self.params += c.params for h in self.hidden_layers: self.params += h.params # momentum dparams = [ theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params ] # rmsprop cache = [ theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params ] # define theano variables - X and Y thX = T.tensor4('X', dtype='float32') thY = T.ivector('Y') # Probability of Y pY = self.forward(thX) # regularisation cost # rcost = reg_parameter*sum(each_parameter^2) rcost = reg * T.sum([(p * p).sum() for p in self.params]) # cost = mean*log(all the relevant targets) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost # prediction prediction = self.th_predict(thX) # function to calculate the prediction cost without updates # used to calculate cost of prediction for the validation set cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) # momentum updates # momentum only. Update params and dparams updates = [(p, p + mu * dp - lr * T.grad(cost, p)) for p, dp in zip(self.params, dparams) ] + [(dp, mu * dp - lr * T.grad(cost, p)) for p, dp in zip(self.params, dparams)] train_op = theano.function(inputs=[thX, thY], updates=updates) n_batches = N // batch_sz costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.savefig("cost.png")
def fit(self, X, Y, Xvalid, Yvalid, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True): # downcast lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) X = X.astype(np.float32) Xvalid = Xvalid.astype(np.float32) Y = Y.astype(np.int32) Yvalid = Yvalid.astype(np.int32) # initialize convpool layers N, c, width, height = X.shape mi = c outw = width outh = height self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = (outw - fw + 1) // 2 outh = (outh - fh + 1) // 2 mi = mo # initialize mlp layers K = len(set(Y)) self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # logistic regression layer W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for c in self.convpool_layers: self.params += c.params for h in self.hidden_layers: self.params += h.params # set up theano functions and variables thX = T.tensor4('X', dtype='float32') thY = T.ivector('Y') pY = self.forward(thX) rcost = reg*T.sum([(p*p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.th_predict(thX) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = rmsprop(cost, self.params, lr, mu, decay, eps) train_op = theano.function( inputs=[thX, thY], outputs=cost, updates=updates ) n_batches = N // batch_sz costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] train_c = train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print( "i:", i, "j:", j, "nb:", n_batches, "train cost:", train_c, "cost:", c, "error rate:", e ) if show_fig: plt.plot(costs) plt.show()
def fit( self, X, Y, lr=10e-4, mu=0.99, reg=10e-4, decay=0.99999, eps=10e-3, batch_sz=30, epochs=100, show_fig=True ): # convert all of the params to float32 lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) # K are the unique values of Y (number of classes) K = len(set(Y)) # ============= Prep Data ============= # Validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) # Valid set - last 1000 entries Xvalid, Yvalid = X[-1000:], Y[-1000:] # Training set - Everything except last 1000 entries X, Y = X[:-1000], Y[:-1000] # Flat version required, so that error can be calculated. Yvalid_flat = np.argmax(Yvalid, axis=1) # ============= Prep ConvPool layers ============= # initialise convpool layers N, width, height, c_number = X.shape # input feature maps mi = c_number outw = width outh = height self.convpool_layers = [] convpool_layer_count = 0 # create convpool layers for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer( mi, mo, fw, fh, self.strides[convpool_layer_count], self.pool_sz[convpool_layer_count], self.pool_strides[convpool_layer_count] ) self.convpool_layers.append(layer) outw = outw // self.pool_sz[convpool_layer_count][1] outh = outh // self.pool_sz[convpool_layer_count][2] mi = mo convpool_layer_count += 1 # ============= Prep ANN layers ============= # Hidden layers self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0]*outw*outh hidden_layer_count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, hidden_layer_count) self.hidden_layers.append(h) M1 = M2 hidden_layer_count += 1 # ============= prep log regression layer ============= W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W, 'W_logreg') self.b = tf.Variable(b, 'b_logreg') # ============= collect params ============= self.params = [self.W, self.b] # collect convpool for h in self.convpool_layers: self.params += h.params # collect hidden for h in self.hidden_layers: self.params += h.params # ============= init tensorflow variables ============= tfX = tf.placeholder(tf.float32, shape=(None, width, height, c_number), name='X') tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y') # not doing softmax, calculating our own activation function act = self.forward(tfX) # reg cost - regularisation*sum of L2 loss for every parameter rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=act, labels=tfY ) ) + rcost prediction = self.predict(tfX) # ============= init train function ============= train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost) # calculate number of batches n_batches = N // batch_sz # initialise costs array costs = [] init = tf.global_variables_initializer() # ============= init tf session ============= with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch}) if j % 20 == 0: # calculate costs c_out = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid}) costs.append(c_out) # calculate prediction p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid}) # calculcate error rate e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c_out, "error rate:", e) if show_fig: plt.plot(costs) plt.savefig("cost.png")
def fit(self, X, Y, lr=10e-5, mu=0.99, reg=10e-7, decay=0.99999, eps=10e-3, batch_sz=30, epochs=100, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] # initialize convpool layers N, c, width, height = X.shape mi = c outw = width outh = height self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = (outw - fw + 1) // 2 outh = (outh - fh + 1) // 2 mi = mo # initialize mlp layers K = len(set(Y)) self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][ 0] * outw * outh # size must be same as output of last convpool layer count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # logistic regression layer W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for c in self.convpool_layers: self.params += c.params for h in self.hidden_layers: self.params += h.params # for momentum dparams = [ theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params ] # for rmsprop cache = [ theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params ] # set up theano functions and variables thX = T.tensor4('X', dtype='float32') thY = T.ivector('Y') pY = self.forward(thX) rcost = reg * T.sum([(p * p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.th_predict(thX) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) # updates = [ # (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache) # ] + [ # (p, p + mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams) # ] + [ # (dp, mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams) # ] # momentum only updates = [(p, p + mu * dp - lr * T.grad(cost, p)) for p, dp in zip(self.params, dparams) ] + [(dp, mu * dp - lr * T.grad(cost, p)) for p, dp in zip(self.params, dparams)] train_op = theano.function(inputs=[thX, thY], updates=updates) n_batches = N // batch_sz costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show()
def train(self, X, Y, learning_rate=10e-4, mu=.99, reg=10e-4, decay=0.9999, eps=10e-3, batch_sz=100, epochs=3, dispFig=True): print('Training model...') # tensorflow expects inputs to be of same data format learning_rate = np.float32(learning_rate) mu = np.float32(mu) decay = np.float32(decay) eps = np.float32(eps) # input data should have shape (N, im_W, im_H, color_channels) X, Y = shuffle(X, Y) N, im_W, im_H, color_channels = X.shape K = len(np.unique(Y)) # number of classes # check if input truths are vector or one hot encoded if len(Y.shape) == 1 or Y.shape[1] != K: Y_ind = y2indicator(Y).astype(np.float32) else: Y_ind = Y X = X.astype(np.float32) # just a precaution... # use 80% of data for test, 20% for validation set # initialize tensorflow var X with shape (NONE, w,h,color) numTrain = round(N * .8) numTest = round(N * .2) trainIdx = makeDiv(numTrain, batch_sz) validIdx = makeDiv(numTest, batch_sz) Xtrain = X[:trainIdx, ] Ytrain = Y_ind[:trainIdx, ] Xvalid = X[-validIdx:, ] Yvalid = Y_ind[-validIdx:, ] # init Convpool layers inputMap_sz = X.shape[-1] self.convpoolLayers = [] outW = im_W outH = im_H for outMap, filter_W, filter_H in self.convpool_sz: self.convpoolLayers.append( Convpool(inputMap_sz, outMap, filter_W, filter_H)) inputMap_sz = outMap outW = outW // 2 outH = outH // 2 # init MLP layers self.hiddenLayers = [] hiddenInput_shp = inputMap_sz * outW * outH for m in self.hidden_sz: self.hiddenLayers.append(HiddenLayer(hiddenInput_shp, m)) hiddenInput_shp = m V, c = init_weight_and_bias(hiddenInput_shp, K) self.V = tf.Variable(V) self.c = tf.Variable(c) # collect params for use in updates self.params = [self.V, self.c] for h in self.convpoolLayers: self.params += h.params for h in self.hiddenLayers: self.params += h.params tfX = tf.placeholder(tf.float32, shape=(None, im_W, im_H, color_channels), name='X') tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y') Z_logreg = self.forward(tfX) rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) # calculate l2 penalty cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=Z_logreg, labels=tfY)) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) n_batches = len(Xtrain) // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(epochs): Xtrain, Ytrain = shuffle(Xtrain, Ytrain) for j in range(n_batches): Xbatch = Xtrain[j * batch_sz:(j * batch_sz + batch_sz), ] Ybatch = Ytrain[j * batch_sz:(j * batch_sz + batch_sz), ] sess.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch}) if j % 10 == 0: c = sess.run(cost, feed_dict={ tfX: Xvalid, tfY: Yvalid }) costs.append(c) p = sess.run(prediction, feed_dict={ tfX: Xvalid, tfY: Yvalid }) e = error_rate(np.argmax(Yvalid, axis=1), p) print('Epoch: {}\t batch: {}\t cost: {}\t error: {}'. format(i, j, c, e)) print('Final Accuracy: {}'.format(1 - e)) if dispFig: plt.plot(costs) plt.xlabel('Epochs') plt.ylabel('Cost') plt.show() return costs, (1 - e)
def fit(self, X, Y, learning_rate=10e-4, mu=0.99, decay=0.999, reg=10e-3, epochs=400, batch_sz=128, show_fig=False): K = len(set(Y)) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) Xvalid, Yvalid = X[-1000:], Y[-1000:] Yvalid_flat = np.argmax(Yvalid, axis=1) X, Y = X[:-1000], Y[:-1000] # intialize hidden layers N, D = X.shape self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 #output of last layer is input of next count += 1 # initaliz params of output layers W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params tfX = tf.placeholder(tf.float32, shape=(None, D), name='X') tfT = tf.placeholder(tf.float32, shape=(None, K), name='T') act = self.forward(tfX) rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=act, labels=tfT)) + rcost predction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) n_batches = int(N / batch_sz) costs = [] init = tf.initialize_all_variables() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={ tfX: Xvalid, tfT: Yvalid }) costs.append(c) p = session.run(predction, feed_dict={ tfX: Xvalid, tfT: Yvalid }) e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error_rate", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=10e-7, mu=0.99, decay=0.999, reg=10e-12, eps=10e-10, epochs=400, batch_sz=100, show_fig=False): learning_rate = np.float32(learning_rate) mu = np.float32(mu) decay = np.float32(decay) reg = np.float32(reg) eps = np.float32(eps) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] # initialize hidden layers N, D = X.shape K = len(set(Y)) self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params # for momentum dparams = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params] # for rmsprop cache = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params] # set up theano functions and variables thX = T.fmatrix('X') thY = T.ivector('Y') pY = self.forward(thX) rcost = reg*T.sum([(p*p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.predict(thX) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = [ (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache) ] + [ (p, p + mu*dp - learning_rate*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams) ] + [ (dp, mu*dp - learning_rate*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams) ] # momentum only # updates = [ # (p, p + mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, dparams) # ] + [ # (dp, mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, dparams) # ] train_op = theano.function( inputs=[thX, thY], updates=updates ) n_batches = N / batch_sz costs = [] for i in xrange(epochs): X, Y = shuffle(X, Y) for j in xrange(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print "i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-3, mu=0.99, decay=0.999, reg=1e-3, epoches=10, batch_sz=100, show_fig=False): # step1. get data X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.int32) Xvalid = Xvalid.astype(np.float32) Yvalid_vector = Yvalid.astype(np.int32) Yvalid = y2indicator(Yvalid).astype(np.int32) # step1.1 initialize each layer and parameters(with tf.Variable) of NN and keep them in a list N, D = X.shape M1 = D K = Y.shape[1] self.hidden_layers = [] # for saving HiddenLayer object count = 0 for M2 in self.hidden_layer_size: # 這邊做出第一層~ 倒數第二層 hidden layer 的HiddenLayer object h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) # 最後輸出層的 weight self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) # collect all the parameters that we are going to use grediant descent self.params = [self.W, self.b] for layer in self.hidden_layers: self.params += layer.params # step1.2 tf.palceholder tfX = tf.placeholder(tf.float32, shape=(None, D), name="X") tfT = tf.placeholder(tf.float32, shape=(None, K), name="T") # step2. model act = self.forward( tfX) # 最後不經過softmax喔,也不通過其他的activation fun(因為tf 就是這樣要求的) # step3. cost function rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=act, labels=tfT)) + rcost prediction_op = self.predict(tfX) # step4. solver traiin_op = tf.train.RMSPropOptimizer(learning_rate=learning_rate, momentum=mu, decay=decay).minimize(cost) init = tf.global_variables_initializer() n_batches = N // batch_sz costs = [] with tf.Session() as sess: sess.run(init) for i in range(epoches): for j in range(n_batches): Xbatch = X[j * batch_sz:(j + 1) * batch_sz, ] Ybatch = Y[j * batch_sz:(j + 1) * batch_sz, ] sess.run(traiin_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) if j % 50 == 0: cost_val = sess.run(cost, feed_dict={ tfX: Xvalid, tfT: Yvalid }) costs.append(cost_val) preds = sess.run(prediction_op, feed_dict={tfX: Xvalid}) err = error_rate(Yvalid_vector, preds) print("i:", i, "j:", j, "nb:", n_batches, "cost:", cost_val, "error rate:", err) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False): K = len(set(Y)) # won't work later b/c we turn it into indicator # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) # Y = Y.astype(np.int32) Xvalid, Yvalid = X[-1000:], Y[-1000:] Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate X, Y = X[:-1000], Y[:-1000] # initialize hidden layers N, D = X.shape self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) # collect params for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params # set up theano functions and variables tfX = tf.placeholder(tf.float32, shape=(None, D), name='X') tfT = tf.placeholder(tf.float32, shape=(None, K), name='T') act = self.forward(tfX) rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=act, labels=tfT)) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) n_batches = N // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={ tfX: Xvalid, tfT: Yvalid }) costs.append(c) p = session.run(prediction, feed_dict={ tfX: Xvalid, tfT: Yvalid }) e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) # TODO: ask lazy programmer how to make a score function. # For this lecture: https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow/learn/v4/t/lecture/5228492?start=0 if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, lr=10e-4, mu=0.99, reg=10e-4, decay=0.99999, eps=10e-3, batch_sz=30, epochs=3, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) K = len(set(Y)) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate # initialize convpool layers N, d, d, c = X.shape mi = c outw = d outh = d self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = outw / 2 outh = outh / 2 mi = mo # initialize mlp layers self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # logistic regression layer W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W, 'W_logreg') self.b = tf.Variable(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for h in self.convpool_layers: self.params += h.params for h in self.hidden_layers: self.params += h.params # set up tensorflow functions and variables tfX = tf.placeholder(tf.float32, shape=(None, d, d, c), name='X') tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y') act = self.forward(tfX) rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(act, tfY)) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost) n_batches = N / batch_sz costs = [] init = tf.initialize_all_variables() with tf.Session() as session: session.run(init) for i in xrange(epochs): X, Y = shuffle(X, Y) for j in xrange(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid}) costs.append(c) p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid}) e = error_rate(Yvalid_flat, p) print "i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, lr=10e-4, mu=0.99, reg=10e-4, decay=0.99999, eps=10e-3, batch_sz=30, epochs=3, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) K = len(set(Y)) X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] Yvalid_flat = np.argmax(Yvalid, axis=1) N, d, d, c = X.shape mi = c outw = d outh = d self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = outw / 2 outh = outh / 2 mi = mo self.hidden_layers = [] M1 = int(self.convpool_layer_sizes[-1][0] * outw * outh) count = 0 for M2 in self.hidden_layer_sizes: print(M1, M2) h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W, 'W_logreg') self.b = tf.Variable(b, 'b_log') self.params = [self.W, self.b] for h in self.convpool_layers: self.params += h.params for h in self.hidden_layers: self.params += h.params tfX = tf.placeholder(tf.float32, shape=(None, d, d, c), name='X') tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y') act = self.forward(tfX) rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=act, labels=tfY)) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost) n_batches = N // batch_sz costs = [] init = tf.initialize_all_variables() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={ tfX: Xvalid, tfY: Yvalid }) costs.append(c) p = session.run(prediction, feed_dict={ tfX: Xvalid, tfY: Yvalid }) e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error_rate:", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False): K = len(set(Y)) # won't work later b/c we turn it into indicator # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) # Y = Y.astype(np.int32) Xvalid, Yvalid = X[-1000:], Y[-1000:] Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate X, Y = X[:-1000], Y[:-1000] # initialize hidden layers N, D = X.shape self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) # collect params for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params # set up theano functions and variables tfX = tf.placeholder(tf.float32, shape=(None, D), name='X') tfT = tf.placeholder(tf.float32, shape=(None, K), name='T') act = self.forward(tfX) rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=act, labels=tfT ) ) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) n_batches = N // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid}) costs.append(c) p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid}) e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=10e-7, mu=0.99, decay=0.999, reg=10e-12, eps=10e-10, epochs=400, batch_sz=100, show_fig=False): learning_rate = np.float32(learning_rate) mu = np.float32(mu) decay = np.float32(decay) reg = np.float32(reg) eps = np.float32(eps) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] # initialize hidden layers N, D = X.shape K = len(set(Y)) self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params # for momentum dparams = [ theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params ] # for rmsprop cache = [ theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params ] # set up theano functions and variables thX = T.fmatrix('X') thY = T.ivector('Y') pY = self.th_forward(thX) rcost = reg * T.sum([(p * p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.th_predict(thX) # actual prediction function self.predict_op = theano.function(inputs=[thX], outputs=prediction) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = [ (c, decay * c + (np.float32(1) - decay) * T.grad(cost, p) * T.grad(cost, p)) for p, c in zip(self.params, cache) ] + [ (p, p + mu * dp - learning_rate * T.grad(cost, p) / T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams) ] + [(dp, mu * dp - learning_rate * T.grad(cost, p) / T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams)] # momentum only # updates = [ # (p, p + mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, dparams) # ] + [ # (dp, mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, dparams) # ] train_op = theano.function(inputs=[thX, thY], updates=updates) n_batches = N // batch_sz costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_size=30, epochs=10, display_cost=False): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) # create a validation set: Xvalid, Yvalid = X[-1000:,], Y[-1000:] X, Y = X[:-1000,], Y[:-1000] # initialize convpool layers: N, c, height, width = X.shape mi = c outh = height outw = width self.convpool_layers = [] for mo, fh, fw in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fh, fw) self.convpool_layers.append(layer) # output volume height and width # after the current convpool layer: outh = (outh - fh + 1) // 2 outw = (outw - fh + 1) // 2 mi = mo # initialize mlp layers: K = len(set(Y)) self.hidden_layers = [] # size must be the same as output of last convpool layer: M1 = self.convpool_layer_sizes[-1][0]*outh*outw count = 0 # will be used to id hidden layers for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # the last layer - softmax output: W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_output') self.b = theano.shared(b, 'b_output') # collect params: self.params = [] for layer in self.convpool_layers: self.params += layer.params for h_layer in self.hidden_layers: self.params += h_layer.params self.params += [self.W, self.b] # set up theano functions and variables: thX = T.tensor4('X', dtype='float32') thY = T.ivector('Y') pY = self.forward(thX) # the forward func will be defined cost = -T.mean(T.log(pY[T.arange(pY.shape[0]), thY])) # add the regularization term to the cost: reg_term = reg*T.sum([(p*p).sum() for p in self.params]) cost += reg_term prediction = self.th_predict(thX) # theano function to make the actual calculation of cost # and get the prediction: cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = rmsprop(cost, self.params, lr, mu, decay, eps) train_op = theano.function( inputs=[thX, thY], updates=updates, outputs=cost, ) # the training loop: n_batches = N // batch_size train_costs = [] valid_costs = [] t0 = datetime.now() for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_size:(j+1)*batch_size, :] Ybatch = Y[j*batch_size:(j+1)*batch_size] train_cost = train_op(Xbatch, Ybatch) train_costs.append(train_cost) if j % 20 == 0: cost_val, prediction_val = cost_predict_op(Xvalid, Yvalid) error = error_rate(prediction_val, Yvalid) print('\ni: %d, j: %d, valid_cost: %.3f, error: %.3f' % (i, j, cost_val, error)) valid_costs.append(cost_val) print('\nElapsed time: ', datetime.now() - t0) if display_cost: plt.plot(train_costs) plt.title('Cost on Training Set') plt.xlabel('iterations') plt.show() plt.plot(valid_costs) plt.title('Cost on Validation Set') plt.xlabel('iterations') plt.show()
def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False): K = len(set(Y)) #make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) #for cauculating error rate Yvalid_flat = Yvalid Yvalid = y2indicator(Yvalid).astype(np.float32) #initialize hidden layers N, D = X.shape self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) #collect param for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params #set up function and variable tfX = tf.placeholder(tf.float32, shape=(None, D), name='X') tfT = tf.placeholder(tf.float32, shape=(None, K), name='T') act = self.forward(tfX) rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(logits=act, labels=tfT)) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) n_batches = N // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={ tfX: Xvalid, tfT: Yvalid }) costs.append(c) p = session.run(prediction, feed_dict={ tfX: Xvalid, tfT: Yvalid }) e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, eps=1e-8, epochs=10, batch_sz=100, show_fig=False): # downcast learning_rate = np.float32(learning_rate) mu = np.float32(mu) decay = np.float32(decay) reg = np.float32(reg) eps = np.float32(eps) X = X.astype(np.float32) Xvalid = Xvalid.astype(np.float32) Y = Y.astype(np.int32) Yvalid = Yvalid.astype(np.int32) # initialize hidden layers N, D = X.shape K = len(set(Y)) self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params # set up theano functions and variables thX = T.fmatrix('X') thY = T.ivector('Y') pY = self.th_forward(thX) rcost = reg * T.sum([(p * p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.th_predict(thX) # actual prediction function self.predict_op = theano.function(inputs=[thX], outputs=prediction) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = rmsprop(cost, self.params, learning_rate, mu, decay, eps) train_op = theano.function(inputs=[thX, thY], updates=updates) n_batches = N // batch_sz costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, lr=10e-5, mu=0.99, reg=10e-7, decay=0.99999, eps=10e-3, batch_sz=30, epochs=100, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] # initialize convpool layers N, c, d, d = X.shape mi = c outw = d outh = d self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = (outw - fw + 1) / 2 outh = (outh - fh + 1) / 2 mi = mo # initialize mlp layers K = len(set(Y)) self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # logistic regression layer W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for c in self.convpool_layers: self.params += c.params for h in self.hidden_layers: self.params += h.params # for momentum dparams = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params] # for rmsprop cache = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params] # set up theano functions and variables thX = T.tensor4('X', dtype='float32') thY = T.ivector('Y') pY = self.forward(thX) rcost = reg*T.sum([(p*p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.predict(thX) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) # updates = [ # (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache) # ] + [ # (p, p + mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams) # ] + [ # (dp, mu*dp - lr*T.grad(cost, p)/T.sqrt(c + eps)) for p, c, dp in zip(self.params, cache, dparams) # ] # momentum only updates = [ (p, p + mu*dp - lr*T.grad(cost, p)) for p, dp in zip(self.params, dparams) ] + [ (dp, mu*dp - lr*T.grad(cost, p)) for p, dp in zip(self.params, dparams) ] train_op = theano.function( inputs=[thX, thY], updates=updates ) n_batches = N / batch_sz costs = [] for i in xrange(epochs): X, Y = shuffle(X, Y) for j in xrange(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print "i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, lr=1e-3, mu=0.99, reg=10e-4, decay=0.99999, eps=10e-3, batch_sz=30, epochs=3, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) K = len(set(Y)) #Unique values in Y #Creating Validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) Xvalid, Yvalid = X[-1000:], Y[-1000:] #First 1000 X, Y = X[:-1000], Y[:-1000] #Set X, Y to remaining Yvalid_flat = np.argmax(Yvalid, axis=1) #For error claculation #Initialize ConvPool layer N, d, d, c = X.shape mi = c #Input feature map = color outw = d outh = d self.convpool_layers = [] #Save convool layers in a list for mo, fw, fh in self.convpool_layer_sizes: layers = ConvPoolLayer(mi, mo, fw, fh) #Initialize layers self.convpool_layers.append(layers) outw = outw / 2 #Divide by 2 because of pooling layer outh = outh / 2 mi = mo #Initialize Hidden layers self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0] * outw * outh count = 0 #As these Id's will be passed into hidden layers for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count = count + 1 #Initialize Logistic Regression W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W, 'W_logreg') self.b = tf.Variable(b, 'b_logreg') self.params = [self.W, self.b] for h in self.convpool_layers: self.params = self.params + h.params for h in self.hidden_layers: self.params = self.params + h.params #Define TensorFlow functions and Variables tfX = tf.placeholder(tf.float32, shape=(None, d, d, c)) tfY = tf.placeholder(tf.float32, shape=(None, K)) act = self.forward(tfX) #Calculate Regularization Cost rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) #Calculate Final Cost #Activation, Indicator Martix of targets cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=act, labels=tfY)) + rcost #Calculate prediction prediction = self.predict(tfX) #Define train function # train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost) train_op = tf.train.AdamOptimizer(lr).minimize(cost) #Calculate No of batches n_batches = N / batch_sz #Initialize cost array costs = [] #Initialize all variables init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={ tfX: Xvalid, tfY: Yvalid }) costs.append(c) #Calculate prediction p = session.run(prediction, feed_dict={ tfX: Xvalid, tfY: Yvalid }) #Calculate error rate e = error_rate(Yvalid_flat, p) print('i', i, 'j', j, 'n_batches', n_batches, 'cost', c, 'error_rate', e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X_train, labels_train, X_val, labels_val, learning_rate=1e-4, mu=0.9, decay=0.99, lambda_=1e-3, epochs=5, batch_sz=200, show_fig=False): K = len(set(labels_train)) # Correct datatype X_train, X_val = X_train.astype(np.float32), X_val.astype(np.float32) Y_train, Y_val = y2indicator(labels_train).astype(np.float32), y2indicator(labels_val).astype(np.float32) # Initialize convpool layers N, width, height, c = X_train.shape mi = c outw = width outh = height self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: cp = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(cp) outw = outw // 2 outh = outh // 2 mi = mo # Initialize hidden layers self.hidden_layers = [] M1 = mi * outw * outh count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # Initialize Output Layer W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W) self.b = tf.Variable(b) # Collect params for later use self.params = [self.W, self.b] for cp in self.convpool_layers: self.params += cp.params for h in self.hidden_layers: self.params += h.params # Set up tensorflow functions and variables tf_X = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X') tf_Y = tf.placeholder(tf.float32, shape=(None, K), name='Y') logits = self.forward(tf_X) reg_cost = lambda_ * sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf_Y ) ) + reg_cost train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) predict_op = self.predict(tf_X) n_batches = N // batch_sz costs = [] best_val_error = 1 init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X_train, Y_train = shuffle(X_train, Y_train) for j in range(n_batches): Xbatch = X_train[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y_train[j*batch_sz:(j*batch_sz+batch_sz)] session.run(train_op, feed_dict={tf_X: Xbatch, tf_Y: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={tf_X: X_val, tf_Y: Y_val}) costs.append(c) labels_val_pred = session.run(predict_op, feed_dict={tf_X: X_val}) e = error_rate(labels_val, labels_val_pred) print("i:", i, "j:", j, '/', n_batches, "cost:", c, "error_rate:", e) if e < best_val_error: best_val_error = e print("best_val_error:", best_val_error) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-3, mu=0.9, decay=0.9, reg=0, eps=1e-10, epochs=100, batch_sz=30, show_fig=False): learning_rate = np.float32(learning_rate) mu = np.float32(mu) decay = np.float32(decay) reg = np.float32(reg) eps = np.float32(eps) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] # initialize hidden layers N, D = X.shape K = len(set(Y)) self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params # set up theano functions and variables thX = T.fmatrix('X') thY = T.ivector('Y') pY = self.th_forward(thX) rcost = reg*T.sum([(p*p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.th_predict(thX) # actual prediction function self.predict_op = theano.function(inputs=[thX], outputs=prediction) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = rmsprop(cost, self.params, learning_rate, mu, decay, eps) train_op = theano.function( inputs=[thX, thY], updates=updates ) n_batches = N // batch_sz costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, lr=1e-6, mu=0.99, decay=0.999, reg=1e-11, eps=1e-9, epochs=300, batch_sz=100, show_fig=False): lr = np.float32(lr) mu = np.float32(mu) decay = np.float32(decay) reg = np.float32(reg) eps = np.float32(eps) X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) Xvalid = X[-1000:] # last 1000 Yvalid = Y[-1000:] # last 1000 X = X[:-1000] # all but the last 1000 Y = Y[:-1000] # all but the last 1000 N, D = X.shape K = len(set(Y)) self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params dparams = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params] cache = [theano.shared(np.zeros(p.get_value().shape, dtype=np.float32)) for p in self.params] thX = T.fmatrix('X') thY = T.ivector('Y') pY = self.forward(thX) rcost = reg*T.sum([(p*p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.predict(thX) cost_predict_op = theano.function(inputs = [thX, thY], outputs = [cost, prediction]) updates = [ (c, decay*c + (np.float32(1)-decay)*T.grad(cost, p)*T.grad(cost, p)) for p, c in zip(self.params, cache) ] + [ (p, p + mu*dp - lr*T.grad(cost, p)/T.sqrt(c+eps)) for p, c, dp in zip(self.params, cache, dparams) ] + [ (dp, mu*dp - lr*T.grad(cost, p)/T.sqrt(c+eps)) for p, c, dp in zip(self.params, cache, dparams) ] train_op = theano.function( inputs = [thX, thY], updates=updates ) n_batches = int(N / batch_sz) costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j+1)*batch_sz] Ybatch = Y[j*batch_sz:(j+1)*batch_sz] train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, Xvalid, Yvalid, lr=1e-2, mu=0.9, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=5, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) K = len(set(Y)) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) Yvalid = y2indicator(Yvalid).astype(np.float32) Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate # initialize convpool layers N, width, height, c = X.shape mi = c outw = width outh = height self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = outw // 2 outh = outh // 2 mi = mo # initialize mlp layers self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # logistic regression layer W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W, 'W_logreg') self.b = tf.Variable(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for h in self.convpool_layers: self.params += h.params for h in self.hidden_layers: self.params += h.params # set up tensorflow functions and variables tfX = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X') tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y') act = self.forward(tfX) rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=act, labels=tfY ) ) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost) n_batches = N // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid}) costs.append(c) p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid}) e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-4, mu=0.9, decay=0.9, epochs=15, batch_sz=100, display_cost=False, save_params=False): # set evarything to np.float32 to enable tf computation running correctly learning_rate = np.float32(learning_rate) mu = np.float32(mu) decay = np.float32(decay) # create a vailidation set: X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:, ], Y[-1000:] X, Y = X[:-1000, ], Y[:-1000] # initialize hidden layers: N, D = X.shape K = len(set(Y)) self.hidden_layers = [] M1 = D count = 0 # iterate the self.hidden_layer_sizes list through M1 variable: for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # the last_hidden_layer-output_layer weights and bias: W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W, name='W%s' % count) self.b = tf.Variable(b, name='b%s' % count) # collect all the network's parameters: self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.parameters # define tensorflow placeholders: tfX = tf.placeholder(tf.float32, shape=(None, D), name='X') tfT = tf.placeholder(tf.int32, shape=(None, ), name='T') # the logits ouputs of the network: Y_logits = self.forward_train(tfX) # define the expression for cost: cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=Y_logits, labels=tfT)) # define the tensorflow train function: train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) predict_op = self.predict(tfX) # validation cost will be calculated separately since nothing will be dropped Y_logits_valid = self.forward_predict(tfX) cost_valid = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=Y_logits_valid, labels=tfT)) n_batches = N // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as session: # initialize all tf variables: print('\nInitializing variables...') session.run(init) print('\nPerforming batch SGD with RMSProp and momentum...') for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j + 1) * batch_sz, :] Ybatch = Y[j * batch_sz:(j + 1) * batch_sz] session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) if j % 20 == 0: c = session.run(cost_valid, feed_dict={ tfX: Xvalid, tfT: Yvalid }) costs.append(c) prediction = session.run(predict_op, feed_dict={ tfX: Xvalid, tfT: Yvalid }) #print(prediction) error = error_rate(Yvalid, prediction) print('\ni: %d, j: %d, cost: %.6f, error: %.6f' % (i, j, c, error)) # make the final prediction: prediction = session.run(predict_op, feed_dict={tfX: Xvalid}) final_error = error_rate(Yvalid, prediction) if save_params: for h in self.hidden_layers: p_type = 'W' for p in h.parameters: p = p.eval() #print(type(p)) #print(p.shape) name = p_type + str(h.id) np.save(name, p) p_type = 'b' # last hidden layer - output layer parameters: np.save('W%s' % count, self.W.eval()) np.save('b%s' % count, self.b.eval()) if display_cost: plt.plot(costs) plt.show()