def q_2_4(): print("******RUNNING TITANIC DATA SET*****") data, test_data, feature_names, class_names = load_titanic_data() data = preprocess_titanic(data, True) perm = np.random.RandomState(seed=20).permutation((data.shape[0])) data = data[perm] data, valid = data[:800], data[800:] idy = data.shape[1] - 1 type_map, categories_map = gen_maps(data) classifier = DecisionTree(type_map, categories_map) classifier.fit(data, 4, 10) train_predictions = classifier.predict(data) train_actual = extract_column(data, idy) valid_predictions = classifier.predict(valid) valid_actual = extract_column(valid, idy) print("Decision Tree training Accuracies: ", error_rate(train_predictions, train_actual)) print("Decision Tree Validation Accuracies: ", error_rate(valid_predictions, valid_actual)) classifier = RandomForest(300, 300, 2, type_map, categories_map, 20) classifier.fit(data, 10, 10) train_predictions = classifier.predict(data) train_actual = extract_column(data, idy) valid_predictions = classifier.predict(valid) valid_actual = extract_column(valid, idy) print("Random Forest training Accuracies: ", error_rate(train_predictions, train_actual)) print("Random Forest Validation Accuracies: ", error_rate(valid_predictions, valid_actual))
def fit(self, X, Y, learning_rate=10e-7, reg=0, epochs=120000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W = np.random.randn(D) / np.sqrt(D) self.b = 0 costs = [] best_validation_error = 1 for i in range(epochs): pY = self.forward(X) #gradient descent step self.W -= learning_rate*(np.dot(X.T, (pY - Y)) + reg*self.W) self.b -= learning_rate*((pY - Y).sum() + reg*self.b) if i % 20 == 0: pYvalid = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print "Epoch: {}".format(i) if e < best_validation_error: best_validation_error = e print "best validation error: {}".format(best_validation_error) if show_fig: plt.plot(costs) plt.show()
def main(): df, X, y = preprocess_data() X_train, X_test, y_train, y_test = train_test_splitter(X=X, y=y, ratio=0.8) logistic_regressor = LogisticRegressor(alpha=0.05, c=0.01, T=1000, random_seed=0, intercept=True) losses = logistic_regressor.fit(X_train, y_train) plot_losses(losses=losses, savefig=True) train_error = error_rate(y_train, logistic_regressor.predict(X_train)) test_error = error_rate(y_test, logistic_regressor.predict(X_test)) print('Training Error Rate: %f' % train_error) print('Test Error Rate: %f' % test_error)
def main(): X, T = get_facialexpression(balance_ones=True) # X, T = np.shuffle(X,T) label_map = [ 'Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral' ] # klass =3 error_rate=0.0 # klass =4 error_rate=0.0 # klass =5 error_rate=0.0 # klass =0 klass = 4 N, D = X.shape X = np.concatenate( (np.ones((N, 1)), X), axis=1, ) T = T.astype(np.int32) X = X.astype(np.float32) #Fix for forecasting on one image T = class1detect(T, detect=klass) D += 1 # params lr = 5e-7 max_iteration = 150 W = np.random.randn(D) / np.sqrt(D) cost = [] error = [] for i in xrange(max_iteration): Y = forward(W, X) cost.append(cross_entropy(T, Y)) error.append(error_rate(T, Y)) W += lr * X.T.dot(T - Y) if i % 5 == 0: print "i=%d\tcost=%.3f\terror=%.3f" % (i, cost[-1], error[-1]) if i % 5 == 0: print "i=%d\tcost=%.3f\terror=%.3f" % (i, cost[-1], error[-1]) print "Final weight:", W print T print np.round(Y) plt.title('logistic regression ' + label_map[klass]) plt.xlabel('iterations') plt.ylabel('cross entropy') plt.plot(cost) plt.show() plt.title('logistic regression ' + label_map[klass]) plt.xlabel('iterations') plt.ylabel('error rate') plt.plot(error) plt.show()
def fit(self, X, Y, learning_rate=1e-6, reg=0, epochs=12000, show_figure=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:, :], Y[-1000:] X, Y = X[:-1000, :], Y[:-1000] K = len(set(Y)) N, D = X.shape Yind_valid = np.zeros((1000, K), dtype=np.int32) Yind = np.zeros((N, K), dtype=np.int32) Yind_valid[np.arange(1000), Yvalid] = 1 Yind[np.arange(N), Y] = 1 self.W = np.random.randn(D, K) / np.sqrt(D + K) self.b = 0 costs = [] best_validation_error = 1 for i in xrange(epochs): for j in xrange(N): xj = X[j, :].T yj = Y[j] yp = np.argmax((self.W.T).dot(xj), axis=0) # gradient descent step self.W[:, yj] += (xj + reg * self.W[:, yj]) self.W[:, yp] -= (xj + reg * self.W[:, yp]) # self.b -= learning_rate *((pY-Y).sum() + reg*self.b) if i % 20 == 0: import code code.interact(local=dict(globals(), **locals())) pYvalid = self.forward(Xvalid) # c = sigmoid_cost(Yvalid, pYvalid) c = cross_entropy(Yind_valid, pYvalid) costs.append(c) e = error_rate(Yvalid, pYvalid) sys.stdout.write("i:%s\tcost:%.4f\terror:%.4f\t\r" % (format(i, '04d'), c, e)) sys.stdout.flush() # print "i", i, "cost:", c, "error", e if e < best_validation_error: best_validation_error = e print "best_validation_error:", best_validation_error if show_figure: plt.plot(costs) plt.show()
def _train_store_prediction(self, sess, batch_x, batch_y, name, prediction_path): loss, prediction = sess.run([self.loss, self.predicter], feed_dict={self.x: batch_x, self.y: batch_y}) logging.info("Verification error= {:.1f}%, loss= {:.4f}".format(utils.error_rate(prediction, batch_y), loss)) img = utils.combine_img_prediction(batch_x, batch_y, prediction) utils.save_image(img, "%s/%s.jpg" % (prediction_path, name)) return
def fit(self, X, Y, learning_rate=1e-8, reg=1e-12, epochs=10000, show_fig=False): D = X.shape[1] # number of features K = len(set(Y)) # number of classes X, Y = shuffle(X, Y) X_valid, Y_valid = X[-1000:], Y[-1000:] T_valid = one_hot_encoder(Y_valid) X, Y = X[:-1000], Y[:-1000] T = one_hot_encoder(Y) self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M) self.b2 = np.zeros(K) costs = [] best_validation_error = 1 for epoch in range(epochs): Y_hat, Z = self.forward(X) # Weight updates ---------------------- Y_hat_T = Y_hat - T self.W2 -= learning_rate * (Z.T.dot(Y_hat_T) + reg * self.W2) self.b2 -= learning_rate * (Y_hat_T.sum() + reg * self.b2) val = Y_hat_T.dot(self.W2.T) * (1 - Z * Z) #tanh self.W1 -= learning_rate * (X.T.dot(val) + reg * self.W1) self.b1 -= learning_rate * (val.sum() + reg * self.b1) # ------------------------------------- if epoch % 10 == 0: Y_hat_valid, _ = self.forward(X_valid) c = cross_entropy(T_valid, Y_hat_valid) costs.append(c) e = error_rate(Y_valid, np.argmax(Y_hat_valid, axis=1)) print("epoch:", epoch, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.title('Validation cost') print("Final train classification_rate:", self.score(Y, self.predict(Y_hat)))
def weather_predictor(df, do_print=False): """ This function creates a DecisionTreeClassifier that predicts the weather tag using the data from the trail dataset, if do_print is True, will summary results """ df = df.dropna() X = df.loc[:, 'Total':'DAY_OF_WEEK'] # (df.columns != 'weather')] # y = df['weather'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) model = DecisionTreeClassifier() model.fit(X_train, y_train) y_pred_train = model.predict(X_train) y_pred_test = model.predict(X_test) y_train = y_train.to_numpy(dtype=str) y_test = y_test.to_numpy(dtype=str) if do_print: print('weather_predictor') print('training set error rate: ' + str(100 * utils.error_rate(y_train, y_pred_train)) + '%') print('test set error rate: ' + str(100 * utils.error_rate(y_test, y_pred_test)) + '%')
def fit(self, X, Y, learning_rate=10e-8, reg=10e-8, epochs=10000, show_figure=False): X, Y = shuffle(X, Y) K = len(set(Y)) Xvalid, Yvalid = X[-1000:], Y[-1000:] Tvalid = y2indicator(Yvalid, K) X, Y = X[:-1000], Y[:-1000] N, D = X.shape T = y2indicator(Y, K) self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M + K) self.b2 = np.zeros(K) costs = [] best_validation_error = 1 for i in xrange(epochs): pY, Z = self.forward(X) # gradient descent step self.W2 -= learning_rate * (Z.T.dot(pY - T) + reg * self.W2) self.b2 -= learning_rate * ((pY - T).sum(axis=0) + reg * self.b2) self.W1 -= learning_rate * (X.T.dot( (pY - T).dot(self.W2.T) * Z * (1 - Z)) + reg * self.W1) self.b1 -= learning_rate * (((pY - T).dot(self.W2.T) * Z * (1 - Z)).sum(axis=0) + reg * self.b1) if i % 10 == 0: pYvalid, Zvalid = self.forward(Xvalid) c = cost(Tvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print "i", i, "cost:", c, "error", e if e < best_validation_error: best_validation_error = e print "best_validation_error:", best_validation_error if show_figure: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=5 * 10e-7, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M) / np.sqrt(self.M) self.b2 = 0 costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation pY, Z = self.forward(X) # gradient descent step pY_Y = pY - Y self.W2 -= learning_rate * (Z.T.dot(pY_Y) + reg * self.W2) self.b2 -= learning_rate * ((pY_Y).sum() + reg * self.b2) # relu #dZ = np.outer(pY_Y, self.W2) * (Z>0) # tanh dZ = np.outer(pY_Y, self.W2) * (1 - Z * Z) self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1) if i % 20 == 0: pYvalid, _ = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print(f'i: {i} cost: {c} error: {e}') if e < best_validation_error: best_validation_error = e print(f'Best validation error : {best_validation_error}') print(f'Score is : {self.score(Xvalid,Yvalid)}') if show_fig: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=10e-6, reg=10e-7, epochs=1000, show_figure=False): X, Y = shuffle(X, Y) x_valid = X[-10:] y_valid = Y[-10:] t_valid = utils.y2indicator(y_valid) x = X[:-10] y = Y[:-10] t = utils.y2indicator(y) N, D = x.shape K = len(set(y)) self.W1 = np.random.randn(D, self.M) self.b1 = np.random.randn(self.M) self.W2 = np.random.randn(self.M, K) self.b2 = np.random.randn(K) costs = [] for i in range(epochs): pY, Z = self.forward(x) #Updating Weights D = pY - t self.W2 -= learning_rate * (Z.T.dot(D) + reg * self.W2) self.b2 -= learning_rate * (D.sum() + reg * self.b2) dZ = D.dot(self.W2.T) * Z * (1 - Z) self.W1 -= learning_rate * (x.T.dot(dZ) + reg * self.W1) self.b1 -= learning_rate * (dZ.sum() + reg * self.b1) if i % 10 == 0: pY_valid, _ = self.forward(x_valid) c = utils.cost(t_valid, pY_valid) costs.append(c) e = utils.error_rate(y_valid, np.argmax(pY_valid, axis=1)) print("i:", i, " cost: ", c, " error: ", e) if show_figure: plt.plot(costs) plt.show()
def kaggle(): data, test_data, feature_names, class_names = load_titanic_data() data = preprocess_titanic(data, True) test = preprocess_titanic(test_data, False) type_map, categories_map = gen_maps(data) classifier = DecisionTree(type_map, categories_map) classifier.fit(data, 4, 10) predictions = classifier.predict(test) pred_train = classifier.predict(data) actual = extract_column(data, 9) print(error_rate(pred_train, actual)) results_to_csv(predictions.flatten()) """
def fit(self, X, y, plot_cost=False): X_train, Y_train, X_test, Y_test = get_train_test(X, y, percent_train=0.7) n, d = X_train.shape k = Y_train.shape[1] self.W1, self.b1 = init_weight_bias(d, self.hidden_layer_sizes[0]) self.W2, self.b2 = init_weight_bias(self.hidden_layer_sizes[0], k) costs = [] best_validation_error = 1 if (self.batch_size == 'auto'): self.batch_size = min(200, n) num_batches = int(n / self.batch_size) for i in range(self.max_iter): X_temp, Y_temp = shuffle(X_train, Y_train) for j in range(num_batches): X_temp, Y_temp = X_train[ j * self.batch_size:j * self.batch_size + self.batch_size, :], Y_train[j * self.batch_size:j * self.batch_size + self.batch_size, :] Ypred, Z1 = self.forward(X_temp) pY_t = Ypred - Y_temp self.W2 -= self.learning_rate_init * (Z1.T.dot(pY_t)) self.b2 -= self.learning_rate_init * (pY_t.sum(axis=0)) dZ = pY_t.dot(self.W2.T) * (Z1 > 0) self.W1 -= self.learning_rate_init * X_temp.T.dot(dZ) self.b1 -= self.learning_rate_init * dZ.sum(axis=0) if (i % 2) == 0: pY_test, _ = self.forward(X_test) c = cost(Y_test, pY_test) costs.append(c) e = error_rate(Y_test.argmax(axis=1), pY_test.argmax(axis=1)) print('Iteration', i, 'Cost:', c, 'Error Rate:', e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if plot_cost: plt.plot(costs) plt.show()
def fit(self, X, Y, learning_rate=1e-8, reg=1e-12, epochs=10000, show_fig=False): D = X.shape[1] # number of features K = len(set(Y)) # number of classes X, Y = shuffle(X, Y) X_valid, Y_valid = X[-1000:], Y[-1000:] T_valid = one_hot_encoder(Y_valid) X, Y = X[:-1000], Y[:-1000] T = one_hot_encoder(Y) self.W = np.random.randn(D, K) / np.sqrt(D) self.b = np.zeros(K) costs = [] best_validation_error = 1 for epoch in range(epochs): Y_hat = self.forward(X) self.W -= learning_rate * (self.dJ_dw(T, Y_hat, X) + reg * self.W) self.b -= learning_rate * (self.dJ_db(T, Y_hat) + reg * self.b) if epoch % 100 == 0: Y_hat_valid = self.forward(X_valid) c = cross_entropy(T_valid, Y_hat_valid) costs.append(c) e = error_rate(Y_valid, np.argmax(Y_hat_valid, axis=1)) print("epoch:", epoch, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.title('Validation cost') plt.show() print("Final train classification_rate:", self.score(X, Y))
def main(): user_action=3 X, T = get_ecommerce(user_action=user_action) # X, T = np.shuffle(X,T) N, D = X.shape X = np.concatenate((np.ones((N,1)), X), axis=1, ) T = T.astype(np.int32) X = X.astype(np.float32) D+=1 # params lr = 5e-4 max_iteration=1000 W = np.random.randn(D) / np.sqrt(D) cost = [] error = [] for i in xrange(max_iteration): Y = forward(W, X) cost.append(cross_entropy(T,Y)) error.append(error_rate(T,Y)) W += lr*X.T.dot(T-Y) if i % 5 == 0: print "i=%d\tcost=%.3f\terror=%.3f" % (i,cost[-1],error[-1]) if i % 5 == 0: print "i=%d\tcost=%.3f\terror=%.3f" % (i,cost[-1],error[-1]) print "Final weight:", W plt.title('logistic regression user_action=%d' % (user_action)) plt.xlabel('iterations') plt.ylabel('cross entropy') plt.plot(cost) plt.show() plt.title('logistic regression user_action=%d' % (user_action)) plt.xlabel('iterations') plt.ylabel('error rate') plt.plot(error) plt.show()
def fit(self, X, Y, learning_rate=1e-6, reg=0, epochs=12000, show_figure=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:, :], Y[-1000:] X, Y = X[:-1000, :], Y[:-1000] N, D = X.shape self.W = np.random.randn(D) / np.sqrt(D) self.b = 0 costs = [] best_validation_error = 1 for i in xrange(epochs): pY = self.forward(X) # gradient descent step self.W -= learning_rate * (X.T.dot(pY - Y) + reg * self.W) self.b -= learning_rate * ((pY - Y).sum() + reg * self.b) if i % 20 == 0: pYvalid = self.forward(Xvalid) # c = sigmoid_cost(Yvalid, pYvalid) c = cross_entropy(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, pYvalid) sys.stdout.write("i:%s\tcost:%.4f\terror:%.4f\t\r" % (format(i, '04d'), c, e)) sys.stdout.flush() # print "i", i, "cost:", c, "error", e if e < best_validation_error: best_validation_error = e print "best_validation_error:", best_validation_error if show_figure: plt.plot(costs) plt.show()
def main(): #file_loc = '/media/avemuri/DEV/Data/deeplearning/mnist/train.csv' file_loc = 'D:/dev/data/face_emotion_recognizer/fer2013.csv' X_train, Y_train, X_test, Y_test = get_data(file_name=file_loc) pca = PCA(n_components=400) pca.fit(X_train) X_train = pca.transform(X_train) X_test = pca.transform(X_test) T_train = one_hot_encoder(Y_train) T_test = one_hot_encoder(Y_test) D = X_train.shape[1] # number of features K = len(set(Y_train)) # number of classes decay_rate = 0.999 eps = 1e-10 epochs = 100 n_batches = 10 batch_size = X_train.shape[0]//n_batches print_time = n_batches M = 300 learning_rate=1e-6 reg=1e-8 W1_init = np.random.randn(D, M) / np.sqrt(D) b1_init = np.zeros(M) W2_init = np.random.randn(M, K) / np.sqrt(M) b2_init = np.zeros(K) thX = th.matrix('X') thT = th.matrix('Y') W1 = theano.shared(W1_init, 'W1') b1 = theano.shared(b1_init, 'b1') W2 = theano.shared(W2_init, 'W2') b2 = theano.shared(b2_init, 'b2') cache_W1 = theano.shared(1, 'cache_w1') cache_b1 = theano.shared(1, 'cache_b1') cache_W2 = theano.shared(1, 'cache_w2') cache_b2 = theano.shared(1, 'cache_b2') # forward model thZ = th.nnet.relu(thX.dot(W1) + b1) #thZ[thZ < 0] = 0 # Z = np.tanh(X.dot(self.W1) + self.b1) thY = th.nnet.softmax(thZ.dot(W2) + b2) # Cost cost = -((thT*th.log(thY)).sum() + reg*((W1*W1).sum() + (b1*b1).sum() + (W2*W2).sum() + (b2*b2).sum())) # Prediction prediction = th.argmax(thY, axis=1) # Updates dJ_dW1 = th.grad(cost, W1) dJ_db1 = th.grad(cost, b1) dJ_dW2 = th.grad(cost, W2) dJ_db2 = th.grad(cost, b2) cache_W1 = decay_rate*cache_W1 + (1-decay_rate)*dJ_dW1*dJ_dW1 cache_b1 = decay_rate*cache_b1 + (1-decay_rate)*dJ_db1*dJ_db1 cache_W2 = decay_rate*cache_W2 + (1-decay_rate)*dJ_dW2*dJ_dW2 cache_b2 = decay_rate*cache_b2 + (1-decay_rate)*dJ_db2*dJ_db2 update_W1 = W1 - learning_rate*dJ_dW1/(np.sqrt(cache_W1)+eps) update_b1 = b1 - learning_rate*dJ_db1/(np.sqrt(cache_b1)+eps) update_W2 = W2 - learning_rate*dJ_dW2/(np.sqrt(cache_W2)+eps) update_b2 = b2 - learning_rate*dJ_db2/(np.sqrt(cache_b2)+eps) train = theano.function(inputs=[thX, thT], updates=[(W1, update_W1), (b1, update_b1), (W2, update_W2), (b2, update_b2)])# get_prediction = theano.function(inputs=[thX, thT], outputs=[cost, prediction]) costs = [] for epoch in range(epochs): X_shuffled, T_shuffled = shuffle(X_train, T_train) for batch in range(n_batches): # Get the batch X_batch = X_shuffled[batch*batch_size:(batch+1)*batch_size,:] Y_batch = T_shuffled[batch*batch_size:(batch+1)*batch_size,:] train(X_batch, Y_batch) if batch % print_time == 0: c, pred = get_prediction(X_test, T_test) err = error_rate(Y_test, pred) print("epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]" %(epoch, batch, c, err)) costs.append(c) plt.plot(costs) plt.title('Validation cost') plt.show()
def fit(self, Xin, Yin, learning_rate=10e-7, reg=10e-8, epochs=10000, show_figure=False): Nvalid = 500 N, D = Xin.shape K = len(np.unique(Yin)) Xin, Yin = shuffle(Xin, Yin) Xtrain, Ytrain = Xin[-Nvalid:, :], Yin[-Nvalid:, ] Xvalid, Yvalid = Xin[:-Nvalid, :], Yin[:-Nvalid, ] Ttrain, Tvalid = y2indicator(Ytrain, K), y2indicator(Yvalid, K) #Initialize Wi,bi W1_init = np.random.randn(D, self.M) / np.sqrt(D + self.M) b1_init = np.random.randn(self.M) / np.sqrt(self.M) W2_init = np.random.randn(self.M, K) / np.sqrt(K + self.M) b2_init = np.random.randn(K) / np.sqrt(K) #Theano shared W1 = theano.shared(W1_init, 'W1') b1 = theano.shared(b1_init, 'b1') W2 = theano.shared(W2_init, 'W2') b2 = theano.shared(b2_init, 'b2') #Theano variables thX = T.matrix('X') thT = T.matrix('T') thZ = sigmoid(thX.dot(W1) + b1) thY = T.nnet.softmax(thZ.dot(W2) + b2) #Theano updatebles costs = -(thT * np.log(thY) + (1 - thT) * np.log((1 - thY))).sum() prediction = T.argmax(thY, axis=1) W1_update = W1 - learning_rate * (T.grad(costs, W1) + reg * W1) b1_update = b1 - learning_rate * (T.grad(costs, b1) + reg * b1) W2_update = W2 - learning_rate * (T.grad(costs, W2) + reg * W2) b2_update = b2 - learning_rate * (T.grad(costs, b2) + reg * b2) self._train = theano.function( inputs=[thX, thT], updates=[(W1, W1_update), (b1, b1_update), (W2, W2_update), (b2, b2_update)], ) self._predict = theano.function( inputs=[thX, thT], outputs=[costs, prediction], ) train_costs = [] train_errors = [] valid_costs = [] valid_errors = [] for i in xrange(epochs): self._train(Xtrain, Ttrain) if i % 10 == 0: ctrain, pYtrain = self._predict(Xtrain, Ttrain) err = error_rate(Ttrain, pYtrain) train_costs.append(ctrain) train_errors.append(err) cvalid, pYvalid = self._predict(Xvalid, Tvalid) err = error_rate(Tvalid, pYvalid) valid_costs.append(cvalid) valid_errors.append(err) print "i=%d\tc=%.3f\terr==%.3f\t" % (i, cvalid, err) cvalid, pYvalid = self._predict(Xvalid, Tvalid) err = error_rate(Tvalid, pYvalid) valid_costs.append(cvalid) valid_errors.append(err) print "i=%d\tc=%.3f\terr==%.3f\t" % (epochs, cvalid, err) print "Final train classification rate", classification_rate( Ytrain, pYtrain) print "Final valid classification rate", classification_rate( Yalid, pYalid) plt.title('Multi layer perceptron: Costs') plt.xlabel('iterations') plt.ylabel('costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(valid_costs, label='valid cost') plt.legend([ legend1, legend2, ]) plt.show() plt.title('Multi layer perceptron: Error rates') plt.xlabel('iterations') plt.ylabel('error rates') legend1, = plt.plot(train_errors, label='train error') legend2, = plt.plot(valid_errors, label='valid error') plt.legend([ legend1, legend2, ]) plt.show()
def fit(self, X, Y, learning_rate=1e-8, reg=1e-12, epochs=10000, n_batches=10, show_fig=False): D = X.shape[1] # number of features K = len(set(Y)) # number of classes X, Y = shuffle(X, Y) X_valid, Y_valid = X[-1000:], Y[-1000:] T_valid = one_hot_encoder(Y_valid) X, Y = X[:-1000], Y[:-1000] batch_size = X.shape[0] // n_batches T = one_hot_encoder(Y) self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M) self.b2 = np.zeros(K) # 1st moment mW1 = 0 mb1 = 0 mW2 = 0 mb2 = 0 # 2nd moment vW1 = 0 vb1 = 0 vW2 = 0 vb2 = 0 # hyperparams beta1 = 0.9 beta2 = 0.999 eps = 1e-8 costs = [] t = 1 for epoch in range(epochs): X_shuffled, T_shuffled = shuffle(X, T) for ibatch in range(n_batches): # Get the batch X_batch = X_shuffled[ibatch * batch_size:(ibatch + 1) * batch_size, :] Y_batch = T_shuffled[ibatch * batch_size:(ibatch + 1) * batch_size, :] Y_hat, Z = self.forward(X_batch) # Weight updates ---------------------- Y_hat_T = Y_hat - Y_batch dJ_dW2 = Z.T.dot(Y_hat_T) + reg * self.W2 dJ_db2 = Y_hat_T.sum() + reg * self.b2 val = (Y_hat - Y_batch).dot(self.W2.T) * (Z > 0) # Relu #val = Y_hat_T.dot(self.W2.T) * (1-Z*Z) # tanh dJ_dW1 = X_batch.T.dot(val) + reg * self.W1 dJ_db1 = val.sum() + reg * self.b1 # Mean mW2 = beta1 * mW2 + (1 - beta1) * dJ_dW2 mb2 = beta1 * mb2 + (1 - beta1) * dJ_db2 mW1 = beta1 * mW1 + (1 - beta1) * dJ_dW1 mb1 = beta1 * mb1 + (1 - beta1) * dJ_db1 # Velocity terms vW2 = beta2 * vW2 + (1 - beta2) * dJ_dW2 * dJ_dW2 vb2 = beta2 * vb2 + (1 - beta2) * dJ_db2 * dJ_db2 vW1 = beta2 * vW1 + (1 - beta2) * dJ_dW1 * dJ_dW1 vb1 = beta2 * vb1 + (1 - beta2) * dJ_db1 * dJ_db1 correction1 = 1 - beta1**t hat_mW2 = mW2 / correction1 hat_mb2 = mb2 / correction1 hat_mW1 = mW1 / correction1 hat_mb1 = mb1 / correction1 correction2 = 1 - beta2**t hat_vW2 = vW2 / correction2 hat_vb2 = vb2 / correction2 hat_vW1 = vW1 / correction2 hat_vb1 = vb1 / correction2 self.W2 -= learning_rate * hat_mW2 / (np.sqrt(hat_vW2) + eps) self.b2 -= learning_rate * hat_mb2 / (np.sqrt(hat_vb2) + eps) self.W1 -= learning_rate * hat_mW1 / (np.sqrt(hat_vW1) + eps) self.b1 -= learning_rate * hat_mb1 / (np.sqrt(hat_vb1) + eps) # ------------------------------------- Y_hat_valid, _ = self.forward(X_valid) c = cross_entropy(T_valid, Y_hat_valid) costs.append(c) if ibatch % (n_batches) == 0: e = error_rate(Y_valid, np.argmax(Y_hat_valid, axis=1)) print("epoch:", epoch, " cost:", c, " error:", e) t += 1 if show_fig: plt.plot(costs) plt.title('Validation cost') plt.show() print("Final train classification_rate:", self.score(X, Y))
def fit(self, X, Y, activation=tf.nn.relu, learning_rate=1e-8, reg=1e-12, epochs=10000, n_batches=10, decay_rate=0.9, show_fig=False): X = X.astype(np.float32) Y = Y.astype(np.int32) X, Y = shuffle(X, Y) X_valid, Y_valid = X[-1000:], Y[-1000:] T_valid = one_hot_encoder(Y_valid) X, Y = X[:-1000], Y[:-1000] T = one_hot_encoder(Y) eps = 1e-10 D = X.shape[1] # number of features K = len(set(Y)) # number of classes batch_size = X.shape[0] // n_batches print_time = n_batches // 1 M1 = D for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, activation_fn=activation) self.layers.append(h) M1 = M2 # the final layer h = HiddenLayer(M1, K, activation_fn=tf.nn.softmax) self.layers.append(h) for layer in self.layers: self.params += layer.params tfX = tf.placeholder(tf.float32, shape=(None, D), name='tfX') tfT = tf.placeholder(tf.float32, shape=(None, K), name='tfT') tfY = self.forward(tfX) predict_op = tf.argmax(tfY, axis=1) cost = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits_v2(logits=tfY, labels=tfT)) train_op = tf.train.RMSPropOptimizer(learning_rate, decay=0.99, momentum=0.9).minimize(cost) costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for epoch in range(epochs): X_shuffled, T_shuffled = shuffle(X, T) for batch in range(n_batches): # Get the batch X_batch = X_shuffled[batch * batch_size:(batch + 1) * batch_size, :] Y_batch = T_shuffled[batch * batch_size:(batch + 1) * batch_size, :] session.run(train_op, feed_dict={ tfX: X_batch, tfT: Y_batch }) if batch % print_time == 0: test_cost = session.run(cost, feed_dict={ tfX: X_valid, tfT: T_valid }) prediction = session.run(predict_op, feed_dict={tfX: X_valid}) err = error_rate(Y_valid, prediction) # print(prediction.shape) print( "epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]" % (epoch, batch, test_cost, err)) costs.append(test_cost) plt.plot(costs) plt.title('Validation cost') plt.show()
def fit(self, X, Y, learning_rate=10e-4, reg=10e-8, epochs=10000, show_figure=False): Nvalid = 1000 N, D = X.shape K = len(np.unique(Y)) X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-Nvalid:,:], Y[-Nvalid:,] X, Y = X[:-Nvalid,:], Y[:-Nvalid,] #Initialize Hidden layers self.hidden_layers = [] M1 = D for count, M2 in enumerate(self.hidden_layer_sizes): hidden_layer = HiddenLayer(M1, M2, count) self.hidden_layers.append(hidden_layer) M1=M2 #final layer W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') #collect parameters for later use self.params = [] for h in self.hidden_layers: self.params += h.params self.params += [self.W, self.b] #Theano variables thX = T.fmatrix('X') thY = T.ivector('Y') pY =self.th_forward(thX) costs = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) prediction = self.th_predict(thX) #actual prediction functions and variabels self.predict_op=theano.function(inputs=[thX], outputs=prediction) cost_predict_op=theano.function(inputs=[thX, thY], outputs=[costs, prediction]) #Streamline initializations updates = [ (p, p - learning_rate*(T.grad(costs,p) + reg*p)) for p in self.params ] train_op = theano.function( inputs=[thX, thY], updates=updates, allow_input_downcast=True, ) batch_sz=200 n_batches = N / batch_sz costs = [] for i in xrange(epochs): X,Y = shuffle(X,Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz),:] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] train_op(Xbatch.astype(np.float32), Ybatch.astype(np.int32)) if j % 100 == 0: c, p = cost_predict_op(Xvalid.astype(np.float32), Yvalid.astype(np.int32)) costs.append(c) err = error_rate(Yvalid, p) print "i:%d\tj:%d\tnb:%d\tc:%.3f\terr:%.3f\t" % (i,j,n_batches,c,err) print "i:%d\tj:%d\tnb:%d\tc:%.3f\terr:%.3f\t" % (i,batch_sz,n_batches,c,err) print "Final error rate", err if show_fig: plt.plot(costs) plt.show()
def main(): #file_loc = '/media/avemuri/DEV/Data/deeplearning/mnist/train.csv' file_loc = 'D:/dev/data/mnist/train.csv' X_train, Y_train, X_test, Y_test = get_data(file_name=file_loc, split_train_test=True) pca = PCA(n_components=400) pca.fit(X_train) X_train = pca.transform(X_train) #Y = Y_train T_train = one_hot_encoder(Y_train) X_test = pca.transform(X_test) T_test = one_hot_encoder(Y_test) ####################################################### D = X_train.shape[1] # number of features K = len(set(Y_train)) # number of classes M = 300 reg = 0.00001 batch_size = 500 n_batches = X_train.shape[0] // batch_size learning_rate = 0.00004 epochs = 10 W1_init = np.random.randn(D, M) / np.sqrt(D) b1_init = np.zeros(M) W2_init = np.random.randn(M, K) / np.sqrt(M) b2_init = np.zeros(K) # Define all variables X = tf.placeholder(tf.float32, shape=(None, D), name='X') T = tf.placeholder(tf.float32, shape=(None, K), name='Y') W1 = tf.Variable(W1_init.astype(np.float32)) b1 = tf.Variable(b1_init.astype(np.float32)) W2 = tf.Variable(W2_init.astype(np.float32)) b2 = tf.Variable(b2_init.astype(np.float32)) # Model definition Z = tf.nn.relu(tf.matmul(X, W1) + b1) Y_hat = tf.matmul(Z, W2) + b2 # Cost cost = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits_v2(logits=Y_hat, labels=T)) # Optimization train = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.99, momentum=0.9).minimize(cost) # Predictions predic_op = tf.argmax(Y_hat, axis=1) costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for epoch in range(epochs): X_shuffled, T_shuffled = shuffle(X_train, T_train) for batch in range(n_batches): # Get the batch X_batch = X_shuffled[batch * batch_size:(batch + 1) * batch_size, :] Y_batch = T_shuffled[batch * batch_size:(batch + 1) * batch_size, :] session.run(train, feed_dict={X: X_batch, T: Y_batch}) if batch % 10 == 0: c = session.run(cost, feed_dict={X: X_test, T: T_test}) Y_test_predictions = session.run(predic_op, feed_dict={X: X_test}) err = error_rate(Y_test, Y_test_predictions) print( "epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]" % (epoch, batch, c, err)) costs.append(c) plt.plot(costs) plt.title('Validation cost') plt.show()
def score(self, X, Y): prediction = self.predict(X) return np.round(1 - error_rate(Y, prediction), 4)
# ======================================================= MAX_DEPTH = 5 rf_source = skl_ens.RandomForestClassifier(n_estimators=NB_TREE, max_depth=MAX_DEPTH, oob_score=True) rf_target = skl_ens.RandomForestClassifier(n_estimators=NB_TREE, max_depth=MAX_DEPTH, oob_score=True, class_weight=None) rf_source.fit(X_source, y_source) rf_source_score_target = rf_source.score(X_target_095, y_target_095) print("Error rate de rf_source sur data target : ", error_rate(rf_source_score_target)) rf_target.fit(X_target_005, y_target_005) rf_target_score_target = rf_target.score(X_target_095, y_target_095) print("Error rate de rf_target(5%) sur data target(95%) : ", error_rate(rf_target_score_target)) #for i in range(SIZE_TEST): # print('Test n°', i) # # rf_source.fit(X_source, y_source) # rf_source_score_target = rf_source[i].score(X_target_095, y_target_095) # print("Error rate de rf_source sur data target : ", # error_rate(rf_source_score_target)) # #
def score(self, X, Y): prediction = self.predict(X) return 1 - error_rate(Y, prediction)
def _output_minibatch_stats(self, sess, summary_writer, step, batch_x, batch_y): # Calculate batch loss and accuracy summary_str, loss, acc, predictions = sess.run([self.summary_op, self.loss, self.accuracy, self.predicter], feed_dict={self.x: batch_x, self.y: batch_y}) summary_writer.add_summary(summary_str, step) summary_writer.flush() logging.info( "Iter {:}, Minibatch Loss= {:.4f}, Training Accuracy= {:.4f}, Minibatch error= {:.1f}%".format(step, loss, acc, utils.error_rate( predictions, batch_y)))
def main(): X, Y = get_ecommerce(user_action=None) X, Y = shuffle(X, Y) # Running variables learning_rate = 5e-4 max_iterations = 10000 # Define dimensions N, D = X.shape M = 5 K = len(np.unique(Y)) Ntrain = N - 100 Xtrain, Ytrain = X[:Ntrain, :], Y[:Ntrain] Ytrain_ind = y2indicator(Ytrain, K) Ntest = 100 Xtest, Ytest = X[-Ntest:, :], Y[-Ntest:] Ytest_ind = y2indicator(Ytest, K) W1_init = np.random.randn(D, M) / np.sqrt(M + D) b1_init = np.random.randn(M) / np.sqrt(M) W2_init = np.random.randn(M, K) / np.sqrt(M + K) b2_init = np.random.randn(K) / np.sqrt(K) #Define theano shared W1 = theano.shared(W1_init, 'W1') b1 = theano.shared(b1_init, 'b1') W2 = theano.shared(W2_init, 'W2') b2 = theano.shared(b2_init, 'b2') #Define constant tensor matrices thX = T.matrix('X') thT = T.matrix('T') #Define cost thZ = sigmoid(thX.dot(W1) + b1) thY = softmax(thZ.dot(W2) + b2) cost = -(thT * np.log(thY) + (1 - thT) * np.log(1 - thY)).sum() prediction = T.argmax(thY, axis=1) #Define updates W1_update = W1 - learning_rate * T.grad(cost, W1) b1_update = b1 - learning_rate * T.grad(cost, b1) W2_update = W2 - learning_rate * T.grad(cost, W2) b2_update = b2 - learning_rate * T.grad(cost, b2) train = theano.function( inputs=[thX, thT], updates=[(W1, W1_update), (b1, b1_update), (W2, W2_update), (b2, b2_update)], ) predict = theano.function( inputs=[thX, thT], outputs=[cost, prediction], ) LL = [] train_errors = [] test_errors = [] train_costs = [] test_costs = [] for i in xrange(max_iterations): train(Xtrain, Ytrain_ind) if i % 10 == 0: c, pYtrain = predict(Xtrain, Ytrain_ind) err = error_rate(Ytrain, pYtrain) train_costs.append(c) train_errors.append(err) c, pYtest = predict(Xtest, Ytest_ind) err = error_rate(Ytest, pYtest) test_costs.append(c) test_errors.append(err) print "i=%d\tc=%.3f\terr==%.3f\t" % (i, c, err) print "i=%d\tc=%.3f\terr==%.3f\t" % (max_iterations, c, err) print "Final train classification rate", classification_rate( Ytrain, pYtrain) print "Final test classification rate", classification_rate(Ytest, pYtest) plt.title('Multi layer perceptron: Costs') plt.xlabel('iterations') plt.ylabel('costs') legend1, = plt.plot(train_costs, label='train cost') legend2, = plt.plot(test_costs, label='test cost') plt.legend([ legend1, legend2, ]) plt.show() plt.title('Multi layer perceptron: Error rates') plt.xlabel('iterations') plt.ylabel('error rates') legend1, = plt.plot(train_errors, label='train error') legend2, = plt.plot(test_errors, label='test error') plt.legend([ legend1, legend2, ]) plt.show()
def main(): #file_loc = '/media/avemuri/DEV/Data/deeplearning/mnist/train.csv' file_loc = 'D:/dev/data/mnist/train.csv' X_train, Y_train, X_test, Y_test = get_data(file_name=file_loc, split_train_test=True) pca = PCA(n_components=400) pca.fit(X_train) X = pca.transform(X_train) Y = Y_train T = one_hot_encoder(Y) X_test = pca.transform(X_test) T_test = one_hot_encoder(Y_test) ####################################################### D = X.shape[1] # number of features K = len(set(Y)) # number of classes M = 300 reg = 0.00001 batch_size = 500 n_batches = X.shape[0] // batch_size learning_rate = 0.0004 epochs = 1000 print_time = epochs // 10 W_init = np.random.randn(D, K) / np.sqrt(D) b_init = np.zeros(K) thX = Th.matrix('X') thT = Th.matrix('T') W = theano.shared(W_init, 'W') b = theano.shared(b_init, 'b') # Forward model thY = Th.nnet.softmax(thX.dot(W) + b) # Cost cost = -((thT * Th.log(thY)).sum() + reg * ((W * W).sum() + (b * b).sum())) # Predictions prediction = Th.argmax(thY, axis=1) update_W = W - learning_rate * Th.grad(cost, W) update_b = b - learning_rate * Th.grad(cost, b) train = theano.function(inputs=[thX, thT], updates=[(W, update_W), (b, update_b)]) get_prediction = theano.function(inputs=[thX, thT], outputs=[cost, prediction]) costs = [] for epoch in range(epochs): X_shuffled, T_shuffled = shuffle(X, T) for batch in range(n_batches): # Get the batch X_batch = X_shuffled[batch * batch_size:(batch + 1) * batch_size, :] Y_batch = T_shuffled[batch * batch_size:(batch + 1) * batch_size, :] train(X_batch, Y_batch) if batch % print_time == 0: test_cost, prediction = get_prediction(X_test, T_test) err = error_rate(Y_test, prediction) print("epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]" % (epoch, batch, test_cost, err)) costs.append(test_cost) plt.plot(costs) plt.title('Validation cost') plt.show()
def fit(self, X, Y, activation=th.nnet.relu, learning_rate=1e-8, reg=1e-12, epochs=10000, n_batches=10, decay_rate=0.9, show_fig=False): X = X.astype(np.float32) Y = Y.astype(np.int32) X, Y = shuffle(X, Y) X_valid, Y_valid = X[-1000:], Y[-1000:] T_valid = one_hot_encoder(Y_valid) X, Y = X[:-1000], Y[:-1000] T = one_hot_encoder(Y) self.rng = theano.tensor.shared_randomstreams.RandomStreams() eps = 1e-10 D = X.shape[1] # number of features K = len(set(Y)) # number of classes batch_size = X.shape[0] // n_batches print_time = n_batches // 1 M1 = D for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, activation_fn=activation) self.layers.append(h) M1 = M2 # the final layer h = HiddenLayer(M1, K, activation_fn=th.nnet.softmax) self.layers.append(h) for layer in self.layers: self.params += layer.params dparams = [ theano.shared(np.zeros_like(p.get_value())) for p in self.params ] cache = [ theano.shared(np.zeros_like(p.get_value())) for p in self.params ] thX = th.matrix('X') thT = th.matrix('T') thY_train = self.forward_train(thX) # Cost regularization_cost = reg * th.mean([(p * p).sum() for p in self.params]) #cost = -th.mean(th.log(thY[th.arange(thT.shape[0]), thT])) #+ regularization_cost cost_train = -th.mean(thT * th.log(thY_train)) + regularization_cost # Gradient grads = th.grad(cost_train, self.params) update_params = [(p, p - learning_rate * (decay_rate * v + (1 - decay_rate) * g + reg * p)) for g, v, p in zip(grads, dparams, self.params)] update_velocity = [(v, decay_rate * v + (1 - decay_rate) * g) for g, v in zip(grads, dparams)] # updates = [(p, p - learning_rate*g) for g, p in zip(grads, self.params)] updates = update_params + update_velocity train_op = theano.function(inputs=[thX, thT], updates=updates) thY_predict = self.forward_predict(thX) cost_predict = -th.mean( thT * th.log(thY_predict)) + regularization_cost # Predictions prediction = th.argmax(thY_predict, axis=1) cost_predict_op = theano.function(inputs=[thX, thT], outputs=[cost, prediction]) costs = [] for epoch in range(epochs): X_shuffled, T_shuffled = shuffle(X, T) for batch in range(n_batches): # Get the batch X_batch = X_shuffled[batch * batch_size:(batch + 1) * batch_size, :] Y_batch = T_shuffled[batch * batch_size:(batch + 1) * batch_size, :] train_op(X_batch, Y_batch) if batch % print_time == 0: test_cost, prediction = cost_predict_op(X_valid, T_valid) err = error_rate(Y_valid, prediction) # print(prediction.shape) print( "epoch [%d], batch [%d] : cost=[%.3f], error=[%.3f]" % (epoch, batch, test_cost, err)) costs.append(test_cost) plt.plot(costs) plt.title('Validation cost') plt.show()
def fit(self, X, Y, learning_rate=10e-5, epochs=200, reg=10e-8, batch_sz=200, show_fig=False, activation=tf.tanh): X, Y = shuffle(X, Y) K = len(np.unique(Y)) T = y2indicator(Y, K).astype(np.float32) Xvalid, Yvalid, Tvalid = X[-1000:,], Y[-1000:], T[-1000:,:] Xtrain, Ytrain, Ttrain = X[:-1000,:], Y[:-1000],T[:-1000,:] N, D = Xtrain.shape #Varianel initialization W1, b1 = init_weight_and_bias(D,self.M) W2, b2 = init_weight_and_bias(self.M,K) self.W1 = tf.Variable(W1.astype(np.float32), 'W1') self.b1 = tf.Variable(b1.astype(np.float32), 'b1') self.W2 = tf.Variable(W2.astype(np.float32), 'W2') self.b2 = tf.Variable(b2.astype(np.float32), 'b2') self.params = [self.W1, self.b1, self.W2, self.b2] # Define placeholders X = tf.placeholder(tf.float32,shape=(None,D),name='X') T = tf.placeholder(tf.float32,shape=(None,K),name='Y') Z = activation(tf.matmul(X, self.W1) + self.b1) Yish = tf.matmul(Z, self.W2) + self.b2 rcost = reg*tf.reduce_sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(labels=T, logits=Yish) ) + rcost train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) self.predict_op = tf.argmax(Yish, 1) n_batches = N // batch_sz costs=[] errors=[] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in xrange(epochs): Xtrain, Ytrain = shuffle(Xtrain, Ytrain) for j in xrange(n_batches): Xbatch = Xtrain[j*batch_sz:(j+1)*batch_sz,:] Ybatch = Ytrain[j*batch_sz:(j+1)*batch_sz] Tbatch = Ttrain[j*batch_sz:(j+1)*batch_sz,:] session.run(train_op, feed_dict={ X: Xbatch, T: Tbatch }) if j % 10 == 0: c = session.run(cost, feed_dict={X:Xvalid, T:Tvalid} ) pYvalid = session.run( self.predict_op, feed_dict={X: Xvalid} ) err = error_rate(Yvalid, pYvalid) print "i:%d\tj:%d\tc:%.3f\terr:%.3f\t" % (i,j,c,err) costs.append(c) errors.append(err) if show_fig: plt.title('costs') plt.plot(costs) plt.show() plt.title('error rate') plt.plot(errors) plt.show()