def __init__(self, M1, M2, an_id): self.M1 = M1 self.M2 = M2 W, b = init_weight_bias(M1, M2) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) self.param = [self.W, self.b]
def __init__(self, M1, M2, an_id): self.id = an_id self.M1 = M1 self.M2 = M2 W, b = init_weight_bias(M1, M2) self.W = theano.shared(W, 'W_%s' % self.id) self.b = theano.shared(b, 'b_%s' % self.id) self.param = [self.W, self.b]
def fit(self, X, Y, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_size=100, show_fig=False): learning_rate = np.float32(learning_rate) mu = np.float32(mu) decay = np.float32(decay) reg = np.float32(reg) X, Y = shuffle(X, Y) X = X.astype(np.float32) K = len(set(Y)) Y = indicator(Y).astype(np.float32) train_X = X[:-1000, :] train_Y = Y[:-1000, :] test_X = X[-1000:, :] test_Y = Y[-1000:, :] test_Y_flat = np.argmax(test_Y, axis=1) N, D = train_X.shape # K = len(set(Y)) M1 = D self.hidden_layers = [] count = 0 for M2 in self.hidden_layer_size: h = Hiddenlayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_bias(M1, K) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) #store parameters self.param = [self.W, self.b] for h in self.hidden_layers: self.param += h.param #set functions and variables tf_X = tf.placeholder(tf.float32, shape=(None, D), name='X') tf_Y = tf.placeholder(tf.float32, shape=(None, K), name='T') action = self.forward(tf_X) rcost = reg * sum([tf.nn.l2_loss(p) for p in self.param]) cost_fun = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=action, labels=tf_Y)) + rcost train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost_fun) predict_fun = self.predict(tf_X) num_batch = N // batch_size costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): shuffle_X, shuffle_Y = shuffle(train_X, train_Y) for j in range(num_batch): x = shuffle_X[j * batch_size:(j * batch_size + batch_size), :] y = shuffle_Y[j * batch_size:(j * batch_size + batch_size), :] session.run(train_op, feed_dict={tf_X: x, tf_Y: y}) if j % 20 == 0: c = session.run(cost_fun, feed_dict={ tf_X: test_X, tf_Y: test_Y }) p = session.run(predict_fun, feed_dict={tf_X: test_X}) error = error_rate(test_Y_flat, p) print("i:", i, "j:", j, "nb:", num_batch, "cost:", c, "error_rate:", error) costs.append(c) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-3, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False): K = len(set(Y)) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) Xvalid, Yvalid = X[-1000:], Y[-1000:] Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate X, Y = X[:-1000], Y[:-1000] # initialize hidden layer N, D = X.shape self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_bias(M1, K) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) # collect params for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params # set up tensorflow functions and variables tfX = tf.placeholder(tf.float32, shape=(None, D), name='X') tfT = tf.placeholder(tf.float32, shape=(None, K), name='T') act = self.forward(tfX) rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=act, labels=tfT)) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) n_batches = N // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={ tfX: Xvalid, tfT: Yvalid }) costs.append(c) p = session.run(prediction, feed_dict={ tfX: Xvalid, tfT: Yvalid }) e = error_rate(Yvalid_flat, p) print('i: ', i, 'j: ', j, 'num of batch: ', n_batches, 'cost: ', c, 'error rate: ', e) if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-3, mu=0.9, decay=0.9, reg=0, eps=1e-10, epochs=100, batch_size=30, show_fig=False): learning_rate = np.float32(learning_rate) mu = np.float32(mu) decay = np.float32(decay) reg = np.float32(reg) eps = np.float32(eps) X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = Y.astype(np.int32) train_X = X[:-1000, :] train_Y = Y[:-1000] valid_X = X[-1000:, :] valid_Y = Y[-1000:] N, D = train_X.shape K = len(set(train_Y)) self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) count += 1 M1 = M2 W, b = init_weight_bias(M1, K) self.W = theano.shared(W, 'W_out') self.b = theano.shared(b, 'b_out') self.param = [self.W, self.b] for h in self.hidden_layers: self.param += h.param #set up functions and variables thX = T.fmatrix('X') thY = T.ivector('Y') pY = self.th_forward(thX) rcost = reg * T.sum([(p * p).sum() for p in self.param]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.th_predict(thX) self.predict_opt = theano.function(inputs=[thX], outputs=prediction) cost_predict_opt = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = rmsprop(cost, self.param, learning_rate, mu, decay, eps) train_op = theano.function(inputs=[thX, thY], updates=updates) num_batch = N // batch_size cost_array = [] for i in range(epochs): shuffle_X, shuffle_Y = shuffle(train_X, train_Y) for j in range(num_batch): x = shuffle_X[j * batch_size:(j * batch_size + batch_size), :] y = shuffle_Y[j * batch_size:(j * batch_size + batch_size)] train_op(x, y) if j % 20 == 0: c, p = cost_predict_opt(valid_X, valid_Y) cost_array.append(c) e = error_rate(valid_Y, p) print("i:", i, "j:", j, "nb:", num_batch, "cost:", c, "error rate:", e) if show_fig: plt.plot(cost_array) plt.show()
def fit(self, X, Y, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) K = len(set(Y)) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate # initialize convpool layers N, width, height, c = X.shape mi = c outw = width outh = height self.convpool_layers = [] for mo, fw, fh in convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = outw // 2 outh = outh // 2 mi = mo # initialize mlp layers self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0] * outw *outh count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # logistic regression layer W, b = init_weight_bias(M1, K) self.W = tf.Variable(W, 'W_logreg') self.b = tf.Variable(b, 'b_logreg') # collet params for later use self.params = [self.W, self.b] for h in self.convpool_layers: self.params += h.params for h in self.hidden_layers: self.params += h.params # set up tensorflow functions and Variables tfX = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X') tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y') act = self.forward(tfX) rcost = reg * sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=act, labels=tfY ) ) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost) n_batches = N // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j * batch_sz:(j * batch_sz + batch_sz)] Ybatch = Y[j * batch_sz:(j * batch_sz + batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid}) costs.append(c) p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid}) e = error_rate(Yvalid_flat, p) print('i: ', i, 'j: ', j, 'num of batch: ', n_batches, 'cost: ', c, 'error rate: ', e) if show_fig: plt.plot(costs) plt.show()